diff --git a/.github/labeler.yml b/.github/labeler.yml
index 48967417af3..90cdda4d3ca 100644
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -12,7 +12,7 @@ cudf.polars:
   - 'python/cudf_polars/**'
 
 pylibcudf:
-  - 'python/cudf/cudf/_lib/pylibcudf/**'
+  - 'python/cudf/pylibcudf/**'
 
 libcudf:
   - 'cpp/**'
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 2fc39c06fad..b5d17022a3a 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -67,7 +67,51 @@ jobs:
       node_type: "gpu-v100-latest-1"
       run_script: "ci/build_docs.sh"
       sha: ${{ inputs.sha }}
+  wheel-build-libcudf:
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10
+    with:
+      # build for every combination of arch and CUDA version, but only for the latest Python
+      matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber)))
+      build_type: ${{ inputs.build_type || 'branch' }}
+      branch: ${{ inputs.branch }}
+      sha: ${{ inputs.sha }}
+      date: ${{ inputs.date }}
+      script: ci/build_wheel_libcudf.sh
+  wheel-publish-libcudf:
+    needs: wheel-build-libcudf
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10
+    with:
+      build_type: ${{ inputs.build_type || 'branch' }}
+      branch: ${{ inputs.branch }}
+      sha: ${{ inputs.sha }}
+      date: ${{ inputs.date }}
+      package-name: libcudf
+      package-type: cpp
+  wheel-build-pylibcudf:
+    needs: [wheel-publish-libcudf]
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10
+    with:
+      build_type: ${{ inputs.build_type || 'branch' }}
+      branch: ${{ inputs.branch }}
+      sha: ${{ inputs.sha }}
+      date: ${{ inputs.date }}
+      script: ci/build_wheel_pylibcudf.sh
+  wheel-publish-pylibcudf:
+    needs: wheel-build-pylibcudf
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10
+    with:
+      build_type: ${{ inputs.build_type || 'branch' }}
+      branch: ${{ inputs.branch }}
+      sha: ${{ inputs.sha }}
+      date: ${{ inputs.date }}
+      package-name: pylibcudf
+      package-type: python
   wheel-build-cudf:
+    needs: wheel-publish-pylibcudf
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10
     with:
@@ -86,6 +130,7 @@ jobs:
       sha: ${{ inputs.sha }}
       date: ${{ inputs.date }}
       package-name: cudf
+      package-type: python
   wheel-build-dask-cudf:
     needs: wheel-publish-cudf
     secrets: inherit
@@ -108,8 +153,9 @@ jobs:
       sha: ${{ inputs.sha }}
       date: ${{ inputs.date }}
       package-name: dask_cudf
+      package-type: python
   wheel-build-cudf-polars:
-    needs: wheel-publish-cudf
+    needs: wheel-publish-pylibcudf
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10
     with:
@@ -130,6 +176,7 @@ jobs:
       sha: ${{ inputs.sha }}
       date: ${{ inputs.date }}
       package-name: cudf_polars
+      package-type: python
   trigger-pandas-tests:
     if: inputs.build_type == 'nightly'
     needs: wheel-build-cudf
diff --git a/.github/workflows/pandas-tests.yaml b/.github/workflows/pandas-tests.yaml
index cf0c2b377dd..10c803f7921 100644
--- a/.github/workflows/pandas-tests.yaml
+++ b/.github/workflows/pandas-tests.yaml
@@ -19,7 +19,8 @@ jobs:
       secrets: inherit
       uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10
       with:
-        matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.9" and (.CUDA_VER | startswith("12.5.")) ))
+        # This selects "ARCH=amd64 + the latest supported Python + CUDA".
+        matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
         build_type: nightly
         branch: ${{ inputs.branch }}
         date: ${{ inputs.date }}
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index c2e7f64f952..8730804e8b6 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -12,6 +12,7 @@ concurrency:
 jobs:
   pr-builder:
     needs:
+      - changed-files
       - checks
       - conda-cpp-build
       - conda-cpp-checks
@@ -23,6 +24,8 @@ jobs:
       - static-configure
       - conda-notebook-tests
       - docs-build
+      - wheel-build-libcudf
+      - wheel-build-pylibcudf
       - wheel-build-cudf
       - wheel-tests-cudf
       - wheel-build-cudf-polars
@@ -35,6 +38,70 @@ jobs:
       - pandas-tests-diff
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.10
+    if: always()
+    with:
+      needs: ${{ toJSON(needs) }}
+  changed-files:
+    runs-on: ubuntu-latest
+    name: "Check changed files"
+    outputs:
+      test_cpp: ${{ steps.changed-files.outputs.cpp_any_changed == 'true' }}
+      test_java: ${{ steps.changed-files.outputs.java_any_changed == 'true' }}
+      test_notebooks: ${{ steps.changed-files.outputs.notebooks_any_changed == 'true' }}
+      test_python: ${{ steps.changed-files.outputs.python_any_changed == 'true' }}
+    steps:
+      - name: Get PR info
+        id: get-pr-info
+        uses: rapidsai/shared-actions/get-pr-info@main
+      - name: Checkout code repo
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          persist-credentials: false
+      - name: Calculate merge base
+        id: calculate-merge-base
+        env:
+          PR_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.sha }}
+          BASE_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }}
+        run: |
+          (echo -n "merge-base="; git merge-base "$BASE_SHA" "$PR_SHA") > "$GITHUB_OUTPUT"
+      - name: Get changed files
+        id: changed-files
+        uses: tj-actions/changed-files@v45
+        with:
+          base_sha: ${{ steps.calculate-merge-base.outputs.merge-base }}
+          sha: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.sha }}
+          files_yaml: |
+            cpp:
+              - '**'
+              - '!CONTRIBUTING.md'
+              - '!README.md'
+              - '!docs/**'
+              - '!img/**'
+              - '!java/**'
+              - '!notebooks/**'
+              - '!python/**'
+            java:
+              - '**'
+              - '!CONTRIBUTING.md'
+              - '!README.md'
+              - '!docs/**'
+              - '!img/**'
+              - '!notebooks/**'
+              - '!python/**'
+            notebooks:
+              - '**'
+              - '!CONTRIBUTING.md'
+              - '!README.md'
+              - '!java/**'
+            python:
+              - '**'
+              - '!CONTRIBUTING.md'
+              - '!README.md'
+              - '!docs/**'
+              - '!img/**'
+              - '!java/**'
+              - '!notebooks/**'
   checks:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.10
@@ -54,9 +121,10 @@ jobs:
       build_type: pull-request
       enable_check_symbols: true
   conda-cpp-tests:
-    needs: conda-cpp-build
+    needs: [conda-cpp-build, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.10
+    if: needs.changed-files.outputs.test_cpp == 'true'
     with:
       build_type: pull-request
   conda-python-build:
@@ -66,24 +134,27 @@ jobs:
     with:
       build_type: pull-request
   conda-python-cudf-tests:
-    needs: conda-python-build
+    needs: [conda-python-build, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10
+    if: needs.changed-files.outputs.test_python == 'true'
     with:
       build_type: pull-request
       script: "ci/test_python_cudf.sh"
   conda-python-other-tests:
     # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism
-    needs: conda-python-build
+    needs: [conda-python-build, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10
+    if: needs.changed-files.outputs.test_python == 'true'
     with:
       build_type: pull-request
       script: "ci/test_python_other.sh"
   conda-java-tests:
-    needs: conda-cpp-build
+    needs: [conda-cpp-build, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10
+    if: needs.changed-files.outputs.test_java == 'true'
     with:
       build_type: pull-request
       node_type: "gpu-v100-latest-1"
@@ -101,9 +172,10 @@ jobs:
       container_image: "rapidsai/ci-wheel:latest"
       run_script: "ci/configure_cpp_static.sh"
   conda-notebook-tests:
-    needs: conda-python-build
+    needs: [conda-python-build, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10
+    if: needs.changed-files.outputs.test_notebooks == 'true'
     with:
       build_type: pull-request
       node_type: "gpu-v100-latest-1"
@@ -120,22 +192,39 @@ jobs:
       arch: "amd64"
       container_image: "rapidsai/ci-conda:latest"
       run_script: "ci/build_docs.sh"
-  wheel-build-cudf:
+  wheel-build-libcudf:
     needs: checks
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10
+    with:
+      # build for every combination of arch and CUDA version, but only for the latest Python
+      matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber)))
+      build_type: pull-request
+      script: "ci/build_wheel_libcudf.sh"
+  wheel-build-pylibcudf:
+    needs: [checks, wheel-build-libcudf]
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10
+    with:
+      build_type: pull-request
+      script: "ci/build_wheel_pylibcudf.sh"
+  wheel-build-cudf:
+    needs: wheel-build-pylibcudf
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10
     with:
       build_type: pull-request
       script: "ci/build_wheel_cudf.sh"
   wheel-tests-cudf:
-    needs: wheel-build-cudf
+    needs: [wheel-build-cudf, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10
+    if: needs.changed-files.outputs.test_python == 'true'
     with:
       build_type: pull-request
       script: ci/test_wheel_cudf.sh
   wheel-build-cudf-polars:
-    needs: wheel-build-cudf
+    needs: wheel-build-pylibcudf
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10
     with:
@@ -144,9 +233,10 @@ jobs:
       build_type: pull-request
       script: "ci/build_wheel_cudf_polars.sh"
   wheel-tests-cudf-polars:
-    needs: wheel-build-cudf-polars
+    needs: [wheel-build-cudf-polars, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10
+    if: needs.changed-files.outputs.test_python == 'true'
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
@@ -164,9 +254,10 @@ jobs:
       build_type: pull-request
       script: "ci/build_wheel_dask_cudf.sh"
   wheel-tests-dask-cudf:
-    needs: wheel-build-dask-cudf
+    needs: [wheel-build-dask-cudf, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10
+    if: needs.changed-files.outputs.test_python == 'true'
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
@@ -183,20 +274,24 @@ jobs:
         build-all -DBUILD_BENCHMARKS=ON --verbose;
         sccache -s;
   unit-tests-cudf-pandas:
-    needs: wheel-build-cudf
+    needs: [wheel-build-cudf, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10
+    if: needs.changed-files.outputs.test_python == 'true'
     with:
+      # This selects "ARCH=amd64 + the latest supported Python + CUDA".
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
       build_type: pull-request
       script: ci/cudf_pandas_scripts/run_tests.sh
   pandas-tests:
     # run the Pandas unit tests using PR branch
-    needs: wheel-build-cudf
+    needs: [wheel-build-cudf, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10
+    if: needs.changed-files.outputs.test_python == 'true'
     with:
-      matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.9" and (.CUDA_VER | startswith("12.5.")) ))
+      # This selects "ARCH=amd64 + the latest supported Python + CUDA".
+      matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
       build_type: pull-request
       script: ci/cudf_pandas_scripts/pandas-tests/run.sh pr
       # Hide test failures because they exceed the GITHUB_STEP_SUMMARY output limit.
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 9feea050b19..8605fa46f68 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -124,3 +124,15 @@ jobs:
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
       script: ci/cudf_pandas_scripts/run_tests.sh
+  third-party-integration-tests-cudf-pandas:
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10
+    with:
+      build_type: nightly
+      branch: ${{ inputs.branch }}
+      date: ${{ inputs.date }}
+      sha: ${{ inputs.sha }}
+      node_type: "gpu-v100-latest-1"
+      container_image: "rapidsai/ci-conda:latest"
+      run_script: |
+        ci/cudf_pandas_scripts/third-party-integration/test.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
diff --git a/.gitignore b/.gitignore
index c89fb49697a..619e1464b2a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -79,6 +79,8 @@ Debug
 build/
 cpp/build/
 cpp/examples/*/install/
+cpp/examples/*/build/
+cpp/examples/tpch/datagen/datafusion
 cpp/include/cudf/ipc_generated/*.h
 cpp/thirdparty/googletest/
 
@@ -176,3 +178,7 @@ jupyter_execute
 # clang tooling
 compile_commands.json
 .clangd/
+
+# pytest artifacts
+rmm_log.txt
+python/cudf/cudf_pandas_tests/data/rmm_log.txt
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index bbcd78d051f..f861fb57916 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -144,7 +144,7 @@ repos:
       - id: ruff-format
         files: python/.*$
   - repo: https://github.com/rapidsai/pre-commit-hooks
-    rev: v0.2.0
+    rev: v0.4.0
     hooks:
       - id: verify-copyright
         exclude: |
diff --git a/CHANGELOG.md b/CHANGELOG.md
deleted file mode 100644
index f2a7c337675..00000000000
--- a/CHANGELOG.md
+++ /dev/null
@@ -1,8241 +0,0 @@
-# cudf 24.08.00 (7 Aug 2024)
-
-## 🚨 Breaking Changes
-
-- Align Index __init__ APIs with pandas 2.x ([#16362](https://github.com/rapidsai/cudf/pull/16362)) [@mroeschke](https://github.com/mroeschke)
-- Align Series APIs with pandas 2.x ([#16333](https://github.com/rapidsai/cudf/pull/16333)) [@mroeschke](https://github.com/mroeschke)
-- Add missing `stream` param to dictionary factory APIs ([#16319](https://github.com/rapidsai/cudf/pull/16319)) [@JayjeetAtGithub](https://github.com/JayjeetAtGithub)
-- Deprecate dtype= parameter in reduction methods ([#16313](https://github.com/rapidsai/cudf/pull/16313)) [@mroeschke](https://github.com/mroeschke)
-- Remove squeeze argument from groupby ([#16312](https://github.com/rapidsai/cudf/pull/16312)) [@mroeschke](https://github.com/mroeschke)
-- Align more DataFrame APIs with pandas ([#16310](https://github.com/rapidsai/cudf/pull/16310)) [@mroeschke](https://github.com/mroeschke)
-- Remove `mr` param from `write_csv` and `write_json` ([#16231](https://github.com/rapidsai/cudf/pull/16231)) [@JayjeetAtGithub](https://github.com/JayjeetAtGithub)
-- Report number of rows per file read by PQ reader when no row selection and fix segfault in chunked PQ reader when skip_rows &gt; 0 ([#16195](https://github.com/rapidsai/cudf/pull/16195)) [@mhaseeb123](https://github.com/mhaseeb123)
-- Refactor from_arrow_device/host to use resource_ref ([#16160](https://github.com/rapidsai/cudf/pull/16160)) [@harrism](https://github.com/harrism)
-- Deprecate Arrow support in I/O ([#16132](https://github.com/rapidsai/cudf/pull/16132)) [@lithomas1](https://github.com/lithomas1)
-- Return `FrozenList` for `Index.names` ([#16047](https://github.com/rapidsai/cudf/pull/16047)) [@galipremsagar](https://github.com/galipremsagar)
-- Add compile option to enable large strings support ([#16037](https://github.com/rapidsai/cudf/pull/16037)) [@davidwendt](https://github.com/davidwendt)
-- Hide visibility of non public symbols ([#15982](https://github.com/rapidsai/cudf/pull/15982)) [@robertmaynard](https://github.com/robertmaynard)
-- Rename strings multiple target replace API ([#15898](https://github.com/rapidsai/cudf/pull/15898)) [@davidwendt](https://github.com/davidwendt)
-- Pinned vector factory that uses the global pool ([#15895](https://github.com/rapidsai/cudf/pull/15895)) [@vuule](https://github.com/vuule)
-- Apply clang-tidy autofixes ([#15894](https://github.com/rapidsai/cudf/pull/15894)) [@vyasr](https://github.com/vyasr)
-- Support `arrow:schema` in Parquet writer to faithfully roundtrip `duration` types with Arrow ([#15875](https://github.com/rapidsai/cudf/pull/15875)) [@mhaseeb123](https://github.com/mhaseeb123)
-- Expose stream parameter to public rolling APIs ([#15865](https://github.com/rapidsai/cudf/pull/15865)) [@srinivasyadav18](https://github.com/srinivasyadav18)
-- Fix large strings handling in nvtext::character_tokenize ([#15829](https://github.com/rapidsai/cudf/pull/15829)) [@davidwendt](https://github.com/davidwendt)
-- Remove legacy JSON reader and concurrent_unordered_map.cuh. ([#15813](https://github.com/rapidsai/cudf/pull/15813)) [@bdice](https://github.com/bdice)
-
-## 🐛 Bug Fixes
-
-- Add `flatbuffers` to `libcudf` build ([#16446](https://github.com/rapidsai/cudf/pull/16446)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix parquet_field_list read_func lambda capture invalid this pointer ([#16440](https://github.com/rapidsai/cudf/pull/16440)) [@davidwendt](https://github.com/davidwendt)
-- Enable prefetching in cudf.pandas.install() ([#16439](https://github.com/rapidsai/cudf/pull/16439)) [@bdice](https://github.com/bdice)
-- Enable prefetching before `runpy` ([#16427](https://github.com/rapidsai/cudf/pull/16427)) [@galipremsagar](https://github.com/galipremsagar)
-- Support thread-safe for `prefetch_config::get` and `prefetch_config::set` ([#16425](https://github.com/rapidsai/cudf/pull/16425)) [@ttnghia](https://github.com/ttnghia)
-- Fix a `pandas-2.0` missing attribute error ([#16416](https://github.com/rapidsai/cudf/pull/16416)) [@galipremsagar](https://github.com/galipremsagar)
-- [Bug] Remove loud `NativeFile` deprecation noise for `read_parquet` from S3 ([#16415](https://github.com/rapidsai/cudf/pull/16415)) [@rjzamora](https://github.com/rjzamora)
-- Fix nightly memcheck error for empty STREAM_INTEROP_TEST ([#16406](https://github.com/rapidsai/cudf/pull/16406)) [@davidwendt](https://github.com/davidwendt)
-- Gate ArrowStringArrayNumpySemantics cudf.pandas proxy behind version check ([#16401](https://github.com/rapidsai/cudf/pull/16401)) [@mroeschke](https://github.com/mroeschke)
-- Don&#39;t export bs_thread_pool ([#16398](https://github.com/rapidsai/cudf/pull/16398)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- Require fixed width types for casting in `cudf-polars` ([#16381](https://github.com/rapidsai/cudf/pull/16381)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Fix docstring of `DataFrame.apply` ([#16351](https://github.com/rapidsai/cudf/pull/16351)) [@galipremsagar](https://github.com/galipremsagar)
-- Make __bool__ raise for more cudf objects ([#16311](https://github.com/rapidsai/cudf/pull/16311)) [@mroeschke](https://github.com/mroeschke)
-- Rename `.devcontainer`s for CUDA 12.5 ([#16293](https://github.com/rapidsai/cudf/pull/16293)) [@jakirkham](https://github.com/jakirkham)
-- Fix split_record for all empty strings column ([#16291](https://github.com/rapidsai/cudf/pull/16291)) [@davidwendt](https://github.com/davidwendt)
-- Fix logic in to_arrow for empty list column ([#16279](https://github.com/rapidsai/cudf/pull/16279)) [@wence-](https://github.com/wence-)
-- [BUG] Make name attr of Index fast slow attrs ([#16270](https://github.com/rapidsai/cudf/pull/16270)) [@Matt711](https://github.com/Matt711)
-- Add custom name setter and getter for proxy objects in `cudf.pandas` ([#16234](https://github.com/rapidsai/cudf/pull/16234)) [@Matt711](https://github.com/Matt711)
-- Fall back when casting a timestamp to numeric in cudf-polars ([#16232](https://github.com/rapidsai/cudf/pull/16232)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Disable large string support for Java build ([#16216](https://github.com/rapidsai/cudf/pull/16216)) [@jlowe](https://github.com/jlowe)
-- Remove CCCL patch for PR 211. ([#16207](https://github.com/rapidsai/cudf/pull/16207)) [@bdice](https://github.com/bdice)
-- Add single offset to an empty ListArray in cudf::to_arrow ([#16201](https://github.com/rapidsai/cudf/pull/16201)) [@davidwendt](https://github.com/davidwendt)
-- Fix `memory_usage` when calculating nested list column ([#16193](https://github.com/rapidsai/cudf/pull/16193)) [@mroeschke](https://github.com/mroeschke)
-- Support at/iat indexers in cudf.pandas ([#16177](https://github.com/rapidsai/cudf/pull/16177)) [@mroeschke](https://github.com/mroeschke)
-- Fix unused-return-value debug build error in from_arrow_stream_test.cpp ([#16168](https://github.com/rapidsai/cudf/pull/16168)) [@davidwendt](https://github.com/davidwendt)
-- Fix cudf::strings::replace_multiple hang on empty target ([#16167](https://github.com/rapidsai/cudf/pull/16167)) [@davidwendt](https://github.com/davidwendt)
-- Refactor from_arrow_device/host to use resource_ref ([#16160](https://github.com/rapidsai/cudf/pull/16160)) [@harrism](https://github.com/harrism)
-- interpolate returns new column if no values are interpolated ([#16158](https://github.com/rapidsai/cudf/pull/16158)) [@mroeschke](https://github.com/mroeschke)
-- Use provided memory resource for allocating mixed join results. ([#16153](https://github.com/rapidsai/cudf/pull/16153)) [@bdice](https://github.com/bdice)
-- Run DFG after verify-alpha-spec ([#16151](https://github.com/rapidsai/cudf/pull/16151)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- Use size_t to allow large conditional joins ([#16127](https://github.com/rapidsai/cudf/pull/16127)) [@bdice](https://github.com/bdice)
-- Allow only scale=0 fixed-point values in fixed_width_column_wrapper ([#16120](https://github.com/rapidsai/cudf/pull/16120)) [@davidwendt](https://github.com/davidwendt)
-- Fix pylibcudf Table.num_rows for 0 columns case and add interop to docs ([#16108](https://github.com/rapidsai/cudf/pull/16108)) [@lithomas1](https://github.com/lithomas1)
-- Add support for proxy `np.flatiter` objects ([#16107](https://github.com/rapidsai/cudf/pull/16107)) [@Matt711](https://github.com/Matt711)
-- Ensure cudf objects can astype to any type when empty ([#16106](https://github.com/rapidsai/cudf/pull/16106)) [@mroeschke](https://github.com/mroeschke)
-- Support `pd.read_pickle` and `pd.to_pickle` in `cudf.pandas` ([#16105](https://github.com/rapidsai/cudf/pull/16105)) [@Matt711](https://github.com/Matt711)
-- Fix unnecessarily strict check in parquet chunked reader for choosing split locations. ([#16099](https://github.com/rapidsai/cudf/pull/16099)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Fix `is_monotonic_*` APIs to include `nan&#39;s` ([#16085](https://github.com/rapidsai/cudf/pull/16085)) [@galipremsagar](https://github.com/galipremsagar)
-- More safely parse CUDA versions when subprocess output is contaminated ([#16067](https://github.com/rapidsai/cudf/pull/16067)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- fast_slow_proxy: Don&#39;t import assert_eq at top-level ([#16063](https://github.com/rapidsai/cudf/pull/16063)) [@wence-](https://github.com/wence-)
-- Prevent bad ColumnAccessor state after .sort_index(axis=1, ignore_index=True) ([#16061](https://github.com/rapidsai/cudf/pull/16061)) [@mroeschke](https://github.com/mroeschke)
-- Fix ArrowDeviceArray interface to pass address of event ([#16058](https://github.com/rapidsai/cudf/pull/16058)) [@zeroshade](https://github.com/zeroshade)
-- Fix a size overflow bug in hash groupby ([#16053](https://github.com/rapidsai/cudf/pull/16053)) [@PointKernel](https://github.com/PointKernel)
-- Fix `atomic_ref` scope when multiple blocks are updating the same output ([#16051](https://github.com/rapidsai/cudf/pull/16051)) [@vuule](https://github.com/vuule)
-- Fix initialization error in to_arrow for empty string views ([#16033](https://github.com/rapidsai/cudf/pull/16033)) [@wence-](https://github.com/wence-)
-- Fix the int32 overflow when computing page fragment sizes for large string columns ([#16028](https://github.com/rapidsai/cudf/pull/16028)) [@mhaseeb123](https://github.com/mhaseeb123)
-- Fix the pool size alignment issue ([#16024](https://github.com/rapidsai/cudf/pull/16024)) [@PointKernel](https://github.com/PointKernel)
-- Improve multibyte-split byte-range performance ([#16019](https://github.com/rapidsai/cudf/pull/16019)) [@davidwendt](https://github.com/davidwendt)
-- Fix target counting in strings char-parallel replace ([#16017](https://github.com/rapidsai/cudf/pull/16017)) [@davidwendt](https://github.com/davidwendt)
-- Support IntervalDtype in cudf.from_pandas ([#16014](https://github.com/rapidsai/cudf/pull/16014)) [@mroeschke](https://github.com/mroeschke)
-- Fix memory size in create_byte_range_infos_consecutive ([#16012](https://github.com/rapidsai/cudf/pull/16012)) [@davidwendt](https://github.com/davidwendt)
-- Hide visibility of non public symbols ([#15982](https://github.com/rapidsai/cudf/pull/15982)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix Cython typo preventing proper inheritance ([#15978](https://github.com/rapidsai/cudf/pull/15978)) [@vyasr](https://github.com/vyasr)
-- Fix convert_dtypes with convert_integer=False/convert_floating=True ([#15964](https://github.com/rapidsai/cudf/pull/15964)) [@mroeschke](https://github.com/mroeschke)
-- Fix nunique for `MultiIndex`, `DataFrame`, and all NA case with `dropna=False` ([#15962](https://github.com/rapidsai/cudf/pull/15962)) [@mroeschke](https://github.com/mroeschke)
-- Explicitly build for all GPU architectures ([#15959](https://github.com/rapidsai/cudf/pull/15959)) [@vyasr](https://github.com/vyasr)
-- Preserve column type and class information in more DataFrame operations ([#15949](https://github.com/rapidsai/cudf/pull/15949)) [@mroeschke](https://github.com/mroeschke)
-- Add __array_interface__ to cudf.pandas numpy.ndarray proxy ([#15936](https://github.com/rapidsai/cudf/pull/15936)) [@mroeschke](https://github.com/mroeschke)
-- Allow tests to be built when stream util is disabled ([#15933](https://github.com/rapidsai/cudf/pull/15933)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix JSON multi-source reading when total source size exceeds `INT_MAX` bytes ([#15930](https://github.com/rapidsai/cudf/pull/15930)) [@shrshi](https://github.com/shrshi)
-- Fix `dask_cudf.read_parquet` regression for legacy timestamp data ([#15929](https://github.com/rapidsai/cudf/pull/15929)) [@rjzamora](https://github.com/rjzamora)
-- Fix offsetalator when accessing over 268 million rows ([#15921](https://github.com/rapidsai/cudf/pull/15921)) [@davidwendt](https://github.com/davidwendt)
-- Fix debug assert in rowgroup_char_counts_kernel ([#15902](https://github.com/rapidsai/cudf/pull/15902)) [@davidwendt](https://github.com/davidwendt)
-- Fix categorical conversion from chunked arrow arrays ([#15886](https://github.com/rapidsai/cudf/pull/15886)) [@vyasr](https://github.com/vyasr)
-- Handling for `NaN` and `inf` when converting floating point to fixed point types ([#15885](https://github.com/rapidsai/cudf/pull/15885)) [@ttnghia](https://github.com/ttnghia)
-- Manual merge of Branch 24.08 from 24.06 ([#15869](https://github.com/rapidsai/cudf/pull/15869)) [@galipremsagar](https://github.com/galipremsagar)
-- Avoid unnecessary `Index` cast in `IndexedFrame.index` setter ([#15843](https://github.com/rapidsai/cudf/pull/15843)) [@charlesbluca](https://github.com/charlesbluca)
-- Fix large strings handling in nvtext::character_tokenize ([#15829](https://github.com/rapidsai/cudf/pull/15829)) [@davidwendt](https://github.com/davidwendt)
-- Fix multi-replace target count logic for large strings ([#15807](https://github.com/rapidsai/cudf/pull/15807)) [@davidwendt](https://github.com/davidwendt)
-- Fix JSON parsing memory corruption - Fix Mixed types nested children removal ([#15798](https://github.com/rapidsai/cudf/pull/15798)) [@karthikeyann](https://github.com/karthikeyann)
-- Allow anonymous user in devcontainer name. ([#15784](https://github.com/rapidsai/cudf/pull/15784)) [@bdice](https://github.com/bdice)
-- Add support for additional metaclasses of proxies and use for ExcelWriter ([#15399](https://github.com/rapidsai/cudf/pull/15399)) [@vyasr](https://github.com/vyasr)
-
-## 📖 Documentation
-
-- Add docstring for from_dataframe ([#16260](https://github.com/rapidsai/cudf/pull/16260)) [@mroeschke](https://github.com/mroeschke)
-- Update libcudf compiler requirements in contributing doc ([#16103](https://github.com/rapidsai/cudf/pull/16103)) [@davidwendt](https://github.com/davidwendt)
-- Add libcudf public/detail API pattern to developer guide ([#16086](https://github.com/rapidsai/cudf/pull/16086)) [@davidwendt](https://github.com/davidwendt)
-- Explain line profiler and how to know which functions are GPU-accelerated. ([#16079](https://github.com/rapidsai/cudf/pull/16079)) [@bdice](https://github.com/bdice)
-- cudf.pandas documentation improvement ([#15948](https://github.com/rapidsai/cudf/pull/15948)) [@Matt711](https://github.com/Matt711)
-- Reland &quot;Fix docs for IO readers and strings_convert&quot; ([#15872)&quot; (#15941](https://github.com/rapidsai/cudf/pull/15872)&quot; (#15941)) [@lithomas1](https://github.com/lithomas1)
-- Document how to use cudf.pandas in tandem with multiprocessing ([#15940](https://github.com/rapidsai/cudf/pull/15940)) [@wence-](https://github.com/wence-)
-- DOC: Add documentation for cudf.pandas in the Developer Guide ([#15889](https://github.com/rapidsai/cudf/pull/15889)) [@Matt711](https://github.com/Matt711)
-- Improve options docs ([#15888](https://github.com/rapidsai/cudf/pull/15888)) [@bdice](https://github.com/bdice)
-- DOC: add linkcode to docs ([#15860](https://github.com/rapidsai/cudf/pull/15860)) [@raybellwaves](https://github.com/raybellwaves)
-- DOC: use intersphinx mapping in pandas-compat ext ([#15846](https://github.com/rapidsai/cudf/pull/15846)) [@raybellwaves](https://github.com/raybellwaves)
-- Fix inconsistent usage of &#39;results&#39; and &#39;records&#39; in read-json.md ([#15766](https://github.com/rapidsai/cudf/pull/15766)) [@dagardner-nv](https://github.com/dagardner-nv)
-- Update PandasCompat.py to resolve references ([#15704](https://github.com/rapidsai/cudf/pull/15704)) [@raybellwaves](https://github.com/raybellwaves)
-
-## 🚀 New Features
-
-- Warn on cuDF failure when `POLARS_VERBOSE` is true ([#16308](https://github.com/rapidsai/cudf/pull/16308)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Add `drop_nulls` in `cudf-polars` ([#16290](https://github.com/rapidsai/cudf/pull/16290)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- [JNI] Add setKernelPinnedCopyThreshold and setPinnedAllocationThreshold ([#16288](https://github.com/rapidsai/cudf/pull/16288)) [@abellina](https://github.com/abellina)
-- Implement support for scan_ndjson in cudf-polars ([#16263](https://github.com/rapidsai/cudf/pull/16263)) [@lithomas1](https://github.com/lithomas1)
-- Publish cudf-polars nightlies ([#16213](https://github.com/rapidsai/cudf/pull/16213)) [@lithomas1](https://github.com/lithomas1)
-- Modify `make_host_vector` and `make_device_uvector` factories to optionally use pinned memory and kernel copy ([#16206](https://github.com/rapidsai/cudf/pull/16206)) [@vuule](https://github.com/vuule)
-- Migrate lists/set_operations to pylibcudf ([#16190](https://github.com/rapidsai/cudf/pull/16190)) [@Matt711](https://github.com/Matt711)
-- Migrate lists/filling to pylibcudf ([#16189](https://github.com/rapidsai/cudf/pull/16189)) [@Matt711](https://github.com/Matt711)
-- Fall back to CPU for unsupported libcudf binaryops in cudf-polars ([#16188](https://github.com/rapidsai/cudf/pull/16188)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Use resource_ref for upstream in stream_checking_resource_adaptor ([#16187](https://github.com/rapidsai/cudf/pull/16187)) [@harrism](https://github.com/harrism)
-- Migrate lists/modifying to pylibcudf ([#16185](https://github.com/rapidsai/cudf/pull/16185)) [@Matt711](https://github.com/Matt711)
-- Migrate lists/filtering to pylibcudf ([#16184](https://github.com/rapidsai/cudf/pull/16184)) [@Matt711](https://github.com/Matt711)
-- Migrate lists/sorting to pylibcudf ([#16179](https://github.com/rapidsai/cudf/pull/16179)) [@Matt711](https://github.com/Matt711)
-- Add missing methods to lists/list_column_view.pxd in pylibcudf ([#16175](https://github.com/rapidsai/cudf/pull/16175)) [@Matt711](https://github.com/Matt711)
-- Migrate pylibcudf lists gathering ([#16170](https://github.com/rapidsai/cudf/pull/16170)) [@Matt711](https://github.com/Matt711)
-- Move kernel vis over to CUDF_HIDDEN ([#16165](https://github.com/rapidsai/cudf/pull/16165)) [@robertmaynard](https://github.com/robertmaynard)
-- Add groupby_max multi-threaded benchmark ([#16154](https://github.com/rapidsai/cudf/pull/16154)) [@srinivasyadav18](https://github.com/srinivasyadav18)
-- Promote has_nested_columns to cudf public API ([#16131](https://github.com/rapidsai/cudf/pull/16131)) [@robertmaynard](https://github.com/robertmaynard)
-- Promote IO support queries to cudf API ([#16125](https://github.com/rapidsai/cudf/pull/16125)) [@robertmaynard](https://github.com/robertmaynard)
-- cudf::merge public API now support passing a user stream ([#16124](https://github.com/rapidsai/cudf/pull/16124)) [@robertmaynard](https://github.com/robertmaynard)
-- Add TPC-H inspired examples for Libcudf ([#16088](https://github.com/rapidsai/cudf/pull/16088)) [@JayjeetAtGithub](https://github.com/JayjeetAtGithub)
-- Installed cudf header use cudf::allocate_like ([#16087](https://github.com/rapidsai/cudf/pull/16087)) [@robertmaynard](https://github.com/robertmaynard)
-- `cudf-polars` string slicing ([#16082](https://github.com/rapidsai/cudf/pull/16082)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Migrate Parquet reader to pylibcudf ([#16078](https://github.com/rapidsai/cudf/pull/16078)) [@lithomas1](https://github.com/lithomas1)
-- Migrate lists/count_elements to pylibcudf ([#16072](https://github.com/rapidsai/cudf/pull/16072)) [@Matt711](https://github.com/Matt711)
-- Migrate lists/extract to pylibcudf ([#16071](https://github.com/rapidsai/cudf/pull/16071)) [@Matt711](https://github.com/Matt711)
-- Move common string utilities to public api ([#16070](https://github.com/rapidsai/cudf/pull/16070)) [@robertmaynard](https://github.com/robertmaynard)
-- stable_distinct public api now has a stream parameter ([#16068](https://github.com/rapidsai/cudf/pull/16068)) [@robertmaynard](https://github.com/robertmaynard)
-- Migrate expressions to pylibcudf ([#16056](https://github.com/rapidsai/cudf/pull/16056)) [@lithomas1](https://github.com/lithomas1)
-- Add support to ArrowDataSource in SourceInfo ([#16050](https://github.com/rapidsai/cudf/pull/16050)) [@lithomas1](https://github.com/lithomas1)
-- Experimental support for configurable prefetching ([#16020](https://github.com/rapidsai/cudf/pull/16020)) [@vyasr](https://github.com/vyasr)
-- Migrate CSV reader to pylibcudf ([#16011](https://github.com/rapidsai/cudf/pull/16011)) [@lithomas1](https://github.com/lithomas1)
-- Migrate string `slice` APIs to `pylibcudf` ([#15988](https://github.com/rapidsai/cudf/pull/15988)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Migrate lists/contains to pylibcudf ([#15981](https://github.com/rapidsai/cudf/pull/15981)) [@Matt711](https://github.com/Matt711)
-- Remove CCCL 2.2 patches as we now always use 2.5+ ([#15969](https://github.com/rapidsai/cudf/pull/15969)) [@robertmaynard](https://github.com/robertmaynard)
-- Migrate JSON reader to pylibcudf ([#15966](https://github.com/rapidsai/cudf/pull/15966)) [@lithomas1](https://github.com/lithomas1)
-- Add a developer check for proxy objects ([#15956](https://github.com/rapidsai/cudf/pull/15956)) [@Matt711](https://github.com/Matt711)
-- Start migrating I/O writers to pylibcudf (starting with JSON) ([#15952](https://github.com/rapidsai/cudf/pull/15952)) [@lithomas1](https://github.com/lithomas1)
-- Kernel copy for pinned memory ([#15934](https://github.com/rapidsai/cudf/pull/15934)) [@vuule](https://github.com/vuule)
-- Migrate left join and conditional join benchmarks to use nvbench ([#15931](https://github.com/rapidsai/cudf/pull/15931)) [@srinivasyadav18](https://github.com/srinivasyadav18)
-- Migrate lists/combine to pylibcudf ([#15928](https://github.com/rapidsai/cudf/pull/15928)) [@Matt711](https://github.com/Matt711)
-- Plumb pylibcudf strings `contains_re` through cudf_polars ([#15918](https://github.com/rapidsai/cudf/pull/15918)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Start migrating I/O to pylibcudf ([#15899](https://github.com/rapidsai/cudf/pull/15899)) [@lithomas1](https://github.com/lithomas1)
-- Pinned vector factory that uses the global pool ([#15895](https://github.com/rapidsai/cudf/pull/15895)) [@vuule](https://github.com/vuule)
-- Migrate strings `contains` operations to `pylibcudf` ([#15880](https://github.com/rapidsai/cudf/pull/15880)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Migrate quantile.pxd to pylibcudf ([#15874](https://github.com/rapidsai/cudf/pull/15874)) [@lithomas1](https://github.com/lithomas1)
-- Migrate round to pylibcudf ([#15863](https://github.com/rapidsai/cudf/pull/15863)) [@lithomas1](https://github.com/lithomas1)
-- Migrate string replace.pxd to pylibcudf ([#15839](https://github.com/rapidsai/cudf/pull/15839)) [@lithomas1](https://github.com/lithomas1)
-- Add an Environment Variable for debugging the fast path in cudf.pandas ([#15837](https://github.com/rapidsai/cudf/pull/15837)) [@Matt711](https://github.com/Matt711)
-- Add an option to run cuIO benchmarks with pinned buffers as input ([#15830](https://github.com/rapidsai/cudf/pull/15830)) [@vuule](https://github.com/vuule)
-- Update `pylibcudf` testing utilities ([#15772](https://github.com/rapidsai/cudf/pull/15772)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Migrate string `capitalize` APIs to `pylibcudf` ([#15503](https://github.com/rapidsai/cudf/pull/15503)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Add tests for `pylibcudf` binaryops ([#15470](https://github.com/rapidsai/cudf/pull/15470)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Migrate column factories to pylibcudf ([#15257](https://github.com/rapidsai/cudf/pull/15257)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- cuDF/libcudf exponentially weighted moving averages ([#9027](https://github.com/rapidsai/cudf/pull/9027)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-
-## 🛠️ Improvements
-
-- Ensure objects with __interface__ are converted to cupy/numpy arrays ([#16436](https://github.com/rapidsai/cudf/pull/16436)) [@mroeschke](https://github.com/mroeschke)
-- Add about rmm modes in `cudf.pandas` docs ([#16404](https://github.com/rapidsai/cudf/pull/16404)) [@galipremsagar](https://github.com/galipremsagar)
-- Gracefully CUDF_FAIL when `skip_rows &gt; 0` in Chunked Parquet reader ([#16385](https://github.com/rapidsai/cudf/pull/16385)) [@mhaseeb123](https://github.com/mhaseeb123)
-- Make C++ compilation warning free after #16297 ([#16379](https://github.com/rapidsai/cudf/pull/16379)) [@wence-](https://github.com/wence-)
-- Align Index __init__ APIs with pandas 2.x ([#16362](https://github.com/rapidsai/cudf/pull/16362)) [@mroeschke](https://github.com/mroeschke)
-- Use rapids_cpm_bs_thread_pool() ([#16360](https://github.com/rapidsai/cudf/pull/16360)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- Rename PrefetchConfig to prefetch_config. ([#16358](https://github.com/rapidsai/cudf/pull/16358)) [@bdice](https://github.com/bdice)
-- Implement parquet reading using pylibcudf in cudf-polars ([#16346](https://github.com/rapidsai/cudf/pull/16346)) [@lithomas1](https://github.com/lithomas1)
-- Fix compile warnings with `jni_utils.hpp` ([#16336](https://github.com/rapidsai/cudf/pull/16336)) [@ttnghia](https://github.com/ttnghia)
-- Align Series APIs with pandas 2.x ([#16333](https://github.com/rapidsai/cudf/pull/16333)) [@mroeschke](https://github.com/mroeschke)
-- Add missing `stream` param to dictionary factory APIs ([#16319](https://github.com/rapidsai/cudf/pull/16319)) [@JayjeetAtGithub](https://github.com/JayjeetAtGithub)
-- Mark cudf._typing as a typing module in ruff ([#16318](https://github.com/rapidsai/cudf/pull/16318)) [@mroeschke](https://github.com/mroeschke)
-- Add `stream` param to list explode APIs ([#16317](https://github.com/rapidsai/cudf/pull/16317)) [@JayjeetAtGithub](https://github.com/JayjeetAtGithub)
-- Fix polars for 1.2.1 ([#16316](https://github.com/rapidsai/cudf/pull/16316)) [@lithomas1](https://github.com/lithomas1)
-- Use workflow branch 24.08 again ([#16314](https://github.com/rapidsai/cudf/pull/16314)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- Deprecate dtype= parameter in reduction methods ([#16313](https://github.com/rapidsai/cudf/pull/16313)) [@mroeschke](https://github.com/mroeschke)
-- Remove squeeze argument from groupby ([#16312](https://github.com/rapidsai/cudf/pull/16312)) [@mroeschke](https://github.com/mroeschke)
-- Align more DataFrame APIs with pandas ([#16310](https://github.com/rapidsai/cudf/pull/16310)) [@mroeschke](https://github.com/mroeschke)
-- Clean unneeded/redudant dtype utils ([#16309](https://github.com/rapidsai/cudf/pull/16309)) [@mroeschke](https://github.com/mroeschke)
-- Implement read_csv in cudf-polars using pylibcudf ([#16307](https://github.com/rapidsai/cudf/pull/16307)) [@lithomas1](https://github.com/lithomas1)
-- Use Column.can_cast_safely instead of some ad-hoc dtype functions in .where ([#16303](https://github.com/rapidsai/cudf/pull/16303)) [@mroeschke](https://github.com/mroeschke)
-- Drop `{{ pin_compatible(&#39;numpy&#39;, max_pin=&#39;x&#39;) }}` ([#16301](https://github.com/rapidsai/cudf/pull/16301)) [@jakirkham](https://github.com/jakirkham)
-- Host implementation of `to_arrow` using nanoarrow ([#16297](https://github.com/rapidsai/cudf/pull/16297)) [@zeroshade](https://github.com/zeroshade)
-- Add ability to prefetch in `cudf.pandas` and change default to managed pool ([#16296](https://github.com/rapidsai/cudf/pull/16296)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix tests for polars 1.2 ([#16292](https://github.com/rapidsai/cudf/pull/16292)) [@lithomas1](https://github.com/lithomas1)
-- Introduce dedicated options for low memory readers ([#16289](https://github.com/rapidsai/cudf/pull/16289)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove decimal/floating 64/128bit switches due to register pressure ([#16287](https://github.com/rapidsai/cudf/pull/16287)) [@pmattione-nvidia](https://github.com/pmattione-nvidia)
-- Make ColumnAccessor strictly require a mapping of columns ([#16285](https://github.com/rapidsai/cudf/pull/16285)) [@mroeschke](https://github.com/mroeschke)
-- Introduce version file so we can conditionally handle things in tests ([#16280](https://github.com/rapidsai/cudf/pull/16280)) [@wence-](https://github.com/wence-)
-- Type &amp; reduce cupy usage ([#16277](https://github.com/rapidsai/cudf/pull/16277)) [@mroeschke](https://github.com/mroeschke)
-- Update cudf::detail::grid_1d to use thread_index_type ([#16276](https://github.com/rapidsai/cudf/pull/16276)) [@davidwendt](https://github.com/davidwendt)
-- Replace np.isscalar/issubdtype checks with is_scalar/.kind checks ([#16275](https://github.com/rapidsai/cudf/pull/16275)) [@mroeschke](https://github.com/mroeschke)
-- Remove xml from sort_ninja_log.py utility ([#16274](https://github.com/rapidsai/cudf/pull/16274)) [@davidwendt](https://github.com/davidwendt)
-- Fix issue in horizontal concat implementation in cudf-polars ([#16271](https://github.com/rapidsai/cudf/pull/16271)) [@wence-](https://github.com/wence-)
-- Preserve order in left join for cudf-polars ([#16268](https://github.com/rapidsai/cudf/pull/16268)) [@wence-](https://github.com/wence-)
-- Replace is_datetime/timedelta_dtype checks with .kind checks ([#16262](https://github.com/rapidsai/cudf/pull/16262)) [@mroeschke](https://github.com/mroeschke)
-- Replace is_float/integer_dtype checks with .kind checks ([#16261](https://github.com/rapidsai/cudf/pull/16261)) [@mroeschke](https://github.com/mroeschke)
-- Build and test with CUDA 12.5.1 ([#16259](https://github.com/rapidsai/cudf/pull/16259)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- Replace is_bool_type with checking .dtype.kind ([#16255](https://github.com/rapidsai/cudf/pull/16255)) [@mroeschke](https://github.com/mroeschke)
-- remove `cuco_noexcept.diff` ([#16254](https://github.com/rapidsai/cudf/pull/16254)) [@trxcllnt](https://github.com/trxcllnt)
-- Update contains_tests.cpp to use public cudf::slice ([#16253](https://github.com/rapidsai/cudf/pull/16253)) [@davidwendt](https://github.com/davidwendt)
-- Improve the test data for pylibcudf I/O tests ([#16247](https://github.com/rapidsai/cudf/pull/16247)) [@lithomas1](https://github.com/lithomas1)
-- Short circuit some Column methods ([#16246](https://github.com/rapidsai/cudf/pull/16246)) [@mroeschke](https://github.com/mroeschke)
-- Make nvcomp adapter compatible with new version macros ([#16245](https://github.com/rapidsai/cudf/pull/16245)) [@vuule](https://github.com/vuule)
-- Add Column.strftime/strptime instead of overloading `as_string/datetime/timedelta_column` ([#16243](https://github.com/rapidsai/cudf/pull/16243)) [@mroeschke](https://github.com/mroeschke)
-- Remove temporary functor overloads required by cuco version bump ([#16242](https://github.com/rapidsai/cudf/pull/16242)) [@PointKernel](https://github.com/PointKernel)
-- Remove hash_character_ngrams dependency from jaccard_index ([#16241](https://github.com/rapidsai/cudf/pull/16241)) [@davidwendt](https://github.com/davidwendt)
-- Expose sorted groupby parameters to pylibcudf ([#16240](https://github.com/rapidsai/cudf/pull/16240)) [@wence-](https://github.com/wence-)
-- Expose reflection to check if casting between two types is supported ([#16239](https://github.com/rapidsai/cudf/pull/16239)) [@wence-](https://github.com/wence-)
-- Handle nans in groupby-aggregations in polars executor ([#16233](https://github.com/rapidsai/cudf/pull/16233)) [@wence-](https://github.com/wence-)
-- Remove `mr` param from `write_csv` and `write_json` ([#16231](https://github.com/rapidsai/cudf/pull/16231)) [@JayjeetAtGithub](https://github.com/JayjeetAtGithub)
-- Support Literals in groupby-agg ([#16218](https://github.com/rapidsai/cudf/pull/16218)) [@wence-](https://github.com/wence-)
-- Handler csv reader options in cudf-polars ([#16211](https://github.com/rapidsai/cudf/pull/16211)) [@wence-](https://github.com/wence-)
-- Update vendored thread_pool implementation ([#16210](https://github.com/rapidsai/cudf/pull/16210)) [@wence-](https://github.com/wence-)
-- Add low memory JSON reader for `cudf.pandas` ([#16204](https://github.com/rapidsai/cudf/pull/16204)) [@galipremsagar](https://github.com/galipremsagar)
-- Clean up state variables in MultiIndex ([#16203](https://github.com/rapidsai/cudf/pull/16203)) [@mroeschke](https://github.com/mroeschke)
-- skip CMake 3.30.0 ([#16202](https://github.com/rapidsai/cudf/pull/16202)) [@jameslamb](https://github.com/jameslamb)
-- Assert valid metadata is passed in to_arrow for list_view ([#16198](https://github.com/rapidsai/cudf/pull/16198)) [@wence-](https://github.com/wence-)
-- Expose type traits to pylibcudf ([#16197](https://github.com/rapidsai/cudf/pull/16197)) [@wence-](https://github.com/wence-)
-- Report number of rows per file read by PQ reader when no row selection and fix segfault in chunked PQ reader when skip_rows &gt; 0 ([#16195](https://github.com/rapidsai/cudf/pull/16195)) [@mhaseeb123](https://github.com/mhaseeb123)
-- Cast count aggs to correct dtype in translation ([#16192](https://github.com/rapidsai/cudf/pull/16192)) [@wence-](https://github.com/wence-)
-- Some small fixes in cudf-polars ([#16191](https://github.com/rapidsai/cudf/pull/16191)) [@wence-](https://github.com/wence-)
-- split up CUDA-suffixed dependencies in dependencies.yaml ([#16183](https://github.com/rapidsai/cudf/pull/16183)) [@jameslamb](https://github.com/jameslamb)
-- Define PTDS for the stream hook libs ([#16182](https://github.com/rapidsai/cudf/pull/16182)) [@trxcllnt](https://github.com/trxcllnt)
-- Make `test_python_cudf_pandas` generate `requirements.txt` ([#16181](https://github.com/rapidsai/cudf/pull/16181)) [@trxcllnt](https://github.com/trxcllnt)
-- Add environment-agnostic `ci/run_cudf_polars_pytest.sh` ([#16178](https://github.com/rapidsai/cudf/pull/16178)) [@trxcllnt](https://github.com/trxcllnt)
-- Implement translation for some unary functions and a single datetime extraction ([#16173](https://github.com/rapidsai/cudf/pull/16173)) [@wence-](https://github.com/wence-)
-- Remove size constraints on source files in batched JSON reading ([#16162](https://github.com/rapidsai/cudf/pull/16162)) [@shrshi](https://github.com/shrshi)
-- CI: Build wheels for cudf-polars ([#16156](https://github.com/rapidsai/cudf/pull/16156)) [@lithomas1](https://github.com/lithomas1)
-- Update cudf-polars for v1 release of polars ([#16149](https://github.com/rapidsai/cudf/pull/16149)) [@wence-](https://github.com/wence-)
-- Use strings concatenate to support large strings in CSV writer ([#16148](https://github.com/rapidsai/cudf/pull/16148)) [@davidwendt](https://github.com/davidwendt)
-- Use verify-alpha-spec hook ([#16144](https://github.com/rapidsai/cudf/pull/16144)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- Adds write-coalescing code path optimization to FST ([#16143](https://github.com/rapidsai/cudf/pull/16143)) [@elstehle](https://github.com/elstehle)
-- MAINT: Adapt to NumPy 2 promotion changes ([#16141](https://github.com/rapidsai/cudf/pull/16141)) [@seberg](https://github.com/seberg)
-- API: Check for integer overflows when creating scalar form python int ([#16140](https://github.com/rapidsai/cudf/pull/16140)) [@seberg](https://github.com/seberg)
-- Remove the (unused) implementation of `host_parse_nested_json` ([#16135](https://github.com/rapidsai/cudf/pull/16135)) [@vuule](https://github.com/vuule)
-- Deprecate Arrow support in I/O ([#16132](https://github.com/rapidsai/cudf/pull/16132)) [@lithomas1](https://github.com/lithomas1)
-- Disable dict support for split-page kernel in the parquet reader. ([#16128](https://github.com/rapidsai/cudf/pull/16128)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Add throughput metrics for REDUCTION_BENCH/REDUCTION_NVBENCH benchmarks ([#16126](https://github.com/rapidsai/cudf/pull/16126)) [@jihoonson](https://github.com/jihoonson)
-- Add ensure_index to not unnecessarily shallow copy cudf.Index ([#16117](https://github.com/rapidsai/cudf/pull/16117)) [@mroeschke](https://github.com/mroeschke)
-- Make binary operators work between fixed-point and floating args ([#16116](https://github.com/rapidsai/cudf/pull/16116)) [@pmattione-nvidia](https://github.com/pmattione-nvidia)
-- Implement Ternary copy_if_else ([#16114](https://github.com/rapidsai/cudf/pull/16114)) [@wence-](https://github.com/wence-)
-- Implement handlers for series literal in cudf-polars ([#16113](https://github.com/rapidsai/cudf/pull/16113)) [@wence-](https://github.com/wence-)
-- Fix dtype errors in `StringArrays` ([#16111](https://github.com/rapidsai/cudf/pull/16111)) [@galipremsagar](https://github.com/galipremsagar)
-- Ensure MultiIndex.to_frame deep copies columns ([#16110](https://github.com/rapidsai/cudf/pull/16110)) [@mroeschke](https://github.com/mroeschke)
-- Parallelize `gpuInitStringDescriptors` for fixed length byte array data ([#16109](https://github.com/rapidsai/cudf/pull/16109)) [@mhaseeb123](https://github.com/mhaseeb123)
-- Finish implementation of cudf-polars boolean function handlers ([#16098](https://github.com/rapidsai/cudf/pull/16098)) [@wence-](https://github.com/wence-)
-- Expose and then implement support for cross joins in cudf-polars ([#16097](https://github.com/rapidsai/cudf/pull/16097)) [@wence-](https://github.com/wence-)
-- Defer copying in Column.astype(copy=True) ([#16095](https://github.com/rapidsai/cudf/pull/16095)) [@mroeschke](https://github.com/mroeschke)
-- Fix segfault in conditional join ([#16094](https://github.com/rapidsai/cudf/pull/16094)) [@bdice](https://github.com/bdice)
-- Free temp memory no longer needed in multibyte_split processing ([#16091](https://github.com/rapidsai/cudf/pull/16091)) [@davidwendt](https://github.com/davidwendt)
-- Rename gather/scatter benchmarks to clarify coalesced behavior. ([#16083](https://github.com/rapidsai/cudf/pull/16083)) [@bdice](https://github.com/bdice)
-- Adapt to polars upstream changes and turn on CI testing ([#16081](https://github.com/rapidsai/cudf/pull/16081)) [@wence-](https://github.com/wence-)
-- Reduce/clean copy usage in Series, reshaping ([#16080](https://github.com/rapidsai/cudf/pull/16080)) [@mroeschke](https://github.com/mroeschke)
-- Account for FIXED_LEN_BYTE_ARRAY when calculating fragment sizes in Parquet writer ([#16064](https://github.com/rapidsai/cudf/pull/16064)) [@etseidl](https://github.com/etseidl)
-- Reduce (shallow) copies in DataFrame ops ([#16060](https://github.com/rapidsai/cudf/pull/16060)) [@mroeschke](https://github.com/mroeschke)
-- Add multi-file support to `dask_cudf.read_json` ([#16057](https://github.com/rapidsai/cudf/pull/16057)) [@rjzamora](https://github.com/rjzamora)
-- Reduce deep copies in Index ops ([#16054](https://github.com/rapidsai/cudf/pull/16054)) [@mroeschke](https://github.com/mroeschke)
-- Implement chunked column wise concat in chunked parquet reader ([#16052](https://github.com/rapidsai/cudf/pull/16052)) [@galipremsagar](https://github.com/galipremsagar)
-- Add exception when trying to create large strings with cudf::test::strings_column_wrapper ([#16049](https://github.com/rapidsai/cudf/pull/16049)) [@davidwendt](https://github.com/davidwendt)
-- Return `FrozenList` for `Index.names` ([#16047](https://github.com/rapidsai/cudf/pull/16047)) [@galipremsagar](https://github.com/galipremsagar)
-- Add ast cast test ([#16045](https://github.com/rapidsai/cudf/pull/16045)) [@pmattione-nvidia](https://github.com/pmattione-nvidia)
-- Remove `override_dtypes` and `include_index` from `Frame._copy_type_metadata` ([#16043](https://github.com/rapidsai/cudf/pull/16043)) [@mroeschke](https://github.com/mroeschke)
-- Add ruff rules to avoid importing from typing ([#16040](https://github.com/rapidsai/cudf/pull/16040)) [@mroeschke](https://github.com/mroeschke)
-- Fix decimal -&gt; float cast in ast code ([#16038](https://github.com/rapidsai/cudf/pull/16038)) [@pmattione-nvidia](https://github.com/pmattione-nvidia)
-- Add compile option to enable large strings support ([#16037](https://github.com/rapidsai/cudf/pull/16037)) [@davidwendt](https://github.com/davidwendt)
-- Reduce conditional_join nvbench configurations ([#16036](https://github.com/rapidsai/cudf/pull/16036)) [@srinivasyadav18](https://github.com/srinivasyadav18)
-- Project automation update: skip if not in project ([#16035](https://github.com/rapidsai/cudf/pull/16035)) [@jarmak-nv](https://github.com/jarmak-nv)
-- Add stream parameter to cudf::io::text::multibyte_split ([#16034](https://github.com/rapidsai/cudf/pull/16034)) [@davidwendt](https://github.com/davidwendt)
-- Delete unused code from stringfunction evaluator ([#16032](https://github.com/rapidsai/cudf/pull/16032)) [@wence-](https://github.com/wence-)
-- Fix exclude regex in pre-commit clang-format hook ([#16030](https://github.com/rapidsai/cudf/pull/16030)) [@wence-](https://github.com/wence-)
-- Refactor rmm usage in `cudf.pandas` ([#16021](https://github.com/rapidsai/cudf/pull/16021)) [@galipremsagar](https://github.com/galipremsagar)
-- Enable ruff TCH: typing imports under if TYPE_CHECKING ([#16015](https://github.com/rapidsai/cudf/pull/16015)) [@mroeschke](https://github.com/mroeschke)
-- Restrict the allowed pandas timezone objects in cudf ([#16013](https://github.com/rapidsai/cudf/pull/16013)) [@mroeschke](https://github.com/mroeschke)
-- orc multithreaded benchmark ([#16009](https://github.com/rapidsai/cudf/pull/16009)) [@zpuller](https://github.com/zpuller)
-- Add tests of expression-based sort and sort-by ([#16008](https://github.com/rapidsai/cudf/pull/16008)) [@wence-](https://github.com/wence-)
-- Add tests of implemented StringFunctions ([#16007](https://github.com/rapidsai/cudf/pull/16007)) [@wence-](https://github.com/wence-)
-- Add test that diagonal concat with mismatching schemas raises ([#16006](https://github.com/rapidsai/cudf/pull/16006)) [@wence-](https://github.com/wence-)
-- Add coverage selecting len from a dataframe (number of rows) ([#16005](https://github.com/rapidsai/cudf/pull/16005)) [@wence-](https://github.com/wence-)
-- Add basic tests of dataframe scan ([#16003](https://github.com/rapidsai/cudf/pull/16003)) [@wence-](https://github.com/wence-)
-- Add coverage for both expression and dataframe filter ([#16002](https://github.com/rapidsai/cudf/pull/16002)) [@wence-](https://github.com/wence-)
-- Remove deprecated ExtContext node ([#16001](https://github.com/rapidsai/cudf/pull/16001)) [@wence-](https://github.com/wence-)
-- Fix typo bug in gather implementation ([#16000](https://github.com/rapidsai/cudf/pull/16000)) [@wence-](https://github.com/wence-)
-- Extend coverage of groupby and rolling window nodes ([#15999](https://github.com/rapidsai/cudf/pull/15999)) [@wence-](https://github.com/wence-)
-- Coverage of binops where one or both operands are a scalar ([#15998](https://github.com/rapidsai/cudf/pull/15998)) [@wence-](https://github.com/wence-)
-- Add full coverage for whole-frame Agg expressions ([#15997](https://github.com/rapidsai/cudf/pull/15997)) [@wence-](https://github.com/wence-)
-- Add tests covering magic methods of Expr objects ([#15996](https://github.com/rapidsai/cudf/pull/15996)) [@wence-](https://github.com/wence-)
-- Add full coverage of utility functions ([#15995](https://github.com/rapidsai/cudf/pull/15995)) [@wence-](https://github.com/wence-)
-- Test behaviour of containers ([#15994](https://github.com/rapidsai/cudf/pull/15994)) [@wence-](https://github.com/wence-)
-- Fix implemention of any, all, and isbetween ([#15993](https://github.com/rapidsai/cudf/pull/15993)) [@wence-](https://github.com/wence-)
-- Raise early on unhandled PythonScan node ([#15992](https://github.com/rapidsai/cudf/pull/15992)) [@wence-](https://github.com/wence-)
-- Remove mapfunction nodes that don&#39;t exist/aren&#39;t supported ([#15991](https://github.com/rapidsai/cudf/pull/15991)) [@wence-](https://github.com/wence-)
-- Add test coverage for slicing with &quot;out of bounds&quot; negative indices ([#15990](https://github.com/rapidsai/cudf/pull/15990)) [@wence-](https://github.com/wence-)
-- Standardize and type `Series.dt` methods ([#15987](https://github.com/rapidsai/cudf/pull/15987)) [@mroeschke](https://github.com/mroeschke)
-- Refactor distinct with hashset-based algorithms ([#15984](https://github.com/rapidsai/cudf/pull/15984)) [@srinivasyadav18](https://github.com/srinivasyadav18)
-- resolve dependency-file-generator warning, remove unnecessary rapids-build-backend configuration ([#15980](https://github.com/rapidsai/cudf/pull/15980)) [@jameslamb](https://github.com/jameslamb)
-- Project automation bug fixes ([#15971](https://github.com/rapidsai/cudf/pull/15971)) [@jarmak-nv](https://github.com/jarmak-nv)
-- Add typing to single_column_frame ([#15965](https://github.com/rapidsai/cudf/pull/15965)) [@mroeschke](https://github.com/mroeschke)
-- Move some misc Frame methods to appropriate locations ([#15963](https://github.com/rapidsai/cudf/pull/15963)) [@mroeschke](https://github.com/mroeschke)
-- Condense pylibcudf data fixtures ([#15958](https://github.com/rapidsai/cudf/pull/15958)) [@lithomas1](https://github.com/lithomas1)
-- Refactor fillna logic to push specifics toward Frame subclasses and Column subclasses ([#15957](https://github.com/rapidsai/cudf/pull/15957)) [@mroeschke](https://github.com/mroeschke)
-- Remove unused parsing utilities ([#15955](https://github.com/rapidsai/cudf/pull/15955)) [@vuule](https://github.com/vuule)
-- Remove `Scalar` container type from polars interpreter ([#15953](https://github.com/rapidsai/cudf/pull/15953)) [@wence-](https://github.com/wence-)
-- Support arbitrary CUDA versions in UDF code ([#15950](https://github.com/rapidsai/cudf/pull/15950)) [@bdice](https://github.com/bdice)
-- Support large strings in cudf::io::text::multibyte_split ([#15947](https://github.com/rapidsai/cudf/pull/15947)) [@davidwendt](https://github.com/davidwendt)
-- Add external issue label and project automation ([#15945](https://github.com/rapidsai/cudf/pull/15945)) [@jarmak-nv](https://github.com/jarmak-nv)
-- Enable round-tripping of large strings in `cudf` ([#15944](https://github.com/rapidsai/cudf/pull/15944)) [@galipremsagar](https://github.com/galipremsagar)
-- Add more complete type annotations in polars interpreter ([#15942](https://github.com/rapidsai/cudf/pull/15942)) [@wence-](https://github.com/wence-)
-- Update implementations to build with the latest cuco ([#15938](https://github.com/rapidsai/cudf/pull/15938)) [@PointKernel](https://github.com/PointKernel)
-- Support timezone aware pandas inputs in cudf ([#15935](https://github.com/rapidsai/cudf/pull/15935)) [@mroeschke](https://github.com/mroeschke)
-- Define Column.nan_as_null to return self ([#15923](https://github.com/rapidsai/cudf/pull/15923)) [@mroeschke](https://github.com/mroeschke)
-- Make Frame._dtype an iterator instead of a dict ([#15920](https://github.com/rapidsai/cudf/pull/15920)) [@mroeschke](https://github.com/mroeschke)
-- Port start of datetime.hpp to pylibcudf ([#15916](https://github.com/rapidsai/cudf/pull/15916)) [@wence-](https://github.com/wence-)
-- Introduce `NamedColumn` concept in cudf-polars ([#15914](https://github.com/rapidsai/cudf/pull/15914)) [@wence-](https://github.com/wence-)
-- Avoid redefining Frame._get_columns_by_label in subclasses ([#15912](https://github.com/rapidsai/cudf/pull/15912)) [@mroeschke](https://github.com/mroeschke)
-- Templatization of fixed-width parquet decoding kernels. ([#15911](https://github.com/rapidsai/cudf/pull/15911)) [@nvdbaranec](https://github.com/nvdbaranec)
-- New Decimal &lt;--&gt; Floating conversion ([#15905](https://github.com/rapidsai/cudf/pull/15905)) [@pmattione-nvidia](https://github.com/pmattione-nvidia)
-- Use Arrow C Data Interface functions for Python interop ([#15904](https://github.com/rapidsai/cudf/pull/15904)) [@vyasr](https://github.com/vyasr)
-- Use offsetalator in cudf::io::json::detail::parse_string ([#15900](https://github.com/rapidsai/cudf/pull/15900)) [@davidwendt](https://github.com/davidwendt)
-- Rename strings multiple target replace API ([#15898](https://github.com/rapidsai/cudf/pull/15898)) [@davidwendt](https://github.com/davidwendt)
-- Apply clang-tidy autofixes ([#15894](https://github.com/rapidsai/cudf/pull/15894)) [@vyasr](https://github.com/vyasr)
-- Update Python labels and remove unnecessary ones ([#15893](https://github.com/rapidsai/cudf/pull/15893)) [@vyasr](https://github.com/vyasr)
-- Clean up pylibcudf test assertations ([#15892](https://github.com/rapidsai/cudf/pull/15892)) [@lithomas1](https://github.com/lithomas1)
-- Use offsetalator in orc rowgroup_char_counts_kernel ([#15891](https://github.com/rapidsai/cudf/pull/15891)) [@davidwendt](https://github.com/davidwendt)
-- Ensure literals have correct dtype ([#15890](https://github.com/rapidsai/cudf/pull/15890)) [@wence-](https://github.com/wence-)
-- Add overflow check when converting large strings to lists columns ([#15887](https://github.com/rapidsai/cudf/pull/15887)) [@davidwendt](https://github.com/davidwendt)
-- Use offsetalator in nvtext::tokenize_with_vocabulary ([#15878](https://github.com/rapidsai/cudf/pull/15878)) [@davidwendt](https://github.com/davidwendt)
-- Update interleave lists column for large strings ([#15877](https://github.com/rapidsai/cudf/pull/15877)) [@davidwendt](https://github.com/davidwendt)
-- Simple NumPy 2 fixes that are clearly no behavior change ([#15876](https://github.com/rapidsai/cudf/pull/15876)) [@seberg](https://github.com/seberg)
-- Support `arrow:schema` in Parquet writer to faithfully roundtrip `duration` types with Arrow ([#15875](https://github.com/rapidsai/cudf/pull/15875)) [@mhaseeb123](https://github.com/mhaseeb123)
-- Refactor join benchmarks to target public APIs with the default stream ([#15873](https://github.com/rapidsai/cudf/pull/15873)) [@PointKernel](https://github.com/PointKernel)
-- Fix url-decode benchmark to use offsetalator ([#15871](https://github.com/rapidsai/cudf/pull/15871)) [@davidwendt](https://github.com/davidwendt)
-- Use offsetalator in strings shift functor ([#15870](https://github.com/rapidsai/cudf/pull/15870)) [@davidwendt](https://github.com/davidwendt)
-- Memory Profiling ([#15866](https://github.com/rapidsai/cudf/pull/15866)) [@madsbk](https://github.com/madsbk)
-- Expose stream parameter to public rolling APIs ([#15865](https://github.com/rapidsai/cudf/pull/15865)) [@srinivasyadav18](https://github.com/srinivasyadav18)
-- Make Frame.astype return Self instead of a ColumnAccessor ([#15861](https://github.com/rapidsai/cudf/pull/15861)) [@mroeschke](https://github.com/mroeschke)
-- Use ColumnAccessor row and column length attributes more consistently ([#15857](https://github.com/rapidsai/cudf/pull/15857)) [@mroeschke](https://github.com/mroeschke)
-- add unit test setup for cudf_kafka ([#15853](https://github.com/rapidsai/cudf/pull/15853)) [@jameslamb](https://github.com/jameslamb)
-- Remove internal usage of core.index.as_index in favor of cudf.Index ([#15851](https://github.com/rapidsai/cudf/pull/15851)) [@mroeschke](https://github.com/mroeschke)
-- Ensure cudf.Series(cudf.Series(...)) creates a reference to the same index ([#15845](https://github.com/rapidsai/cudf/pull/15845)) [@mroeschke](https://github.com/mroeschke)
-- Remove benchmark-specific use of pinned-pooled memory in Parquet multithreaded benchmark. ([#15838](https://github.com/rapidsai/cudf/pull/15838)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Implement `on_bad_lines` in json reader ([#15834](https://github.com/rapidsai/cudf/pull/15834)) [@galipremsagar](https://github.com/galipremsagar)
-- Make Column.to_pandas return Index instead of Series ([#15833](https://github.com/rapidsai/cudf/pull/15833)) [@mroeschke](https://github.com/mroeschke)
-- Add test of interoperability of cuDF and arrow BYTE_STREAM_SPLIT encoders ([#15832](https://github.com/rapidsai/cudf/pull/15832)) [@etseidl](https://github.com/etseidl)
-- Refactor Parquet writer options and builders ([#15831](https://github.com/rapidsai/cudf/pull/15831)) [@etseidl](https://github.com/etseidl)
-- Migrate reshape.pxd to pylibcudf ([#15827](https://github.com/rapidsai/cudf/pull/15827)) [@lithomas1](https://github.com/lithomas1)
-- Remove legacy JSON reader and concurrent_unordered_map.cuh. ([#15813](https://github.com/rapidsai/cudf/pull/15813)) [@bdice](https://github.com/bdice)
-- Switch cuIO benchmarks to use pinned-pool host allocations by default. ([#15805](https://github.com/rapidsai/cudf/pull/15805)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Change thrust::count_if call to raw kernel in strings split APIs ([#15762](https://github.com/rapidsai/cudf/pull/15762)) [@davidwendt](https://github.com/davidwendt)
-- Improve performance for long strings for nvtext::replace_tokens ([#15756](https://github.com/rapidsai/cudf/pull/15756)) [@davidwendt](https://github.com/davidwendt)
-- Implement chunked parquet reader in cudf-python ([#15728](https://github.com/rapidsai/cudf/pull/15728)) [@galipremsagar](https://github.com/galipremsagar)
-- Add `from_arrow_host` functions for cudf interop with nanoarrow ([#15645](https://github.com/rapidsai/cudf/pull/15645)) [@zeroshade](https://github.com/zeroshade)
-- Add ability to enable rmm pool on `cudf.pandas` import ([#15628](https://github.com/rapidsai/cudf/pull/15628)) [@galipremsagar](https://github.com/galipremsagar)
-- Executor for polars logical plans ([#15504](https://github.com/rapidsai/cudf/pull/15504)) [@wence-](https://github.com/wence-)
-- Implement day_name and month_name to match pandas ([#15479](https://github.com/rapidsai/cudf/pull/15479)) [@btepera](https://github.com/btepera)
-- Utilities for decimal &lt;--&gt; floating conversion ([#15359](https://github.com/rapidsai/cudf/pull/15359)) [@pmattione-nvidia](https://github.com/pmattione-nvidia)
-- For powers of 10, replace ipow with switch ([#15353](https://github.com/rapidsai/cudf/pull/15353)) [@pmattione-nvidia](https://github.com/pmattione-nvidia)
-- Use rapids-build-backend. ([#15245](https://github.com/rapidsai/cudf/pull/15245)) [@vyasr](https://github.com/vyasr)
-- Add `codecov` coverage for `pandas_tests` ([#14513](https://github.com/rapidsai/cudf/pull/14513)) [@galipremsagar](https://github.com/galipremsagar)
-
-# cudf 24.06.00 (5 Jun 2024)
-
-## 🚨 Breaking Changes
-
-- Deprecate `Groupby.collect` ([#15808](https://github.com/rapidsai/cudf/pull/15808)) [@galipremsagar](https://github.com/galipremsagar)
-- Raise FileNotFoundError when a literal JSON string that looks like a json filename is passed ([#15806](https://github.com/rapidsai/cudf/pull/15806)) [@lithomas1](https://github.com/lithomas1)
-- Support filtered I/O in `chunked_parquet_reader` and simplify the use of `parquet_reader_options` ([#15764](https://github.com/rapidsai/cudf/pull/15764)) [@mhaseeb123](https://github.com/mhaseeb123)
-- Raise errors for unsupported operations on certain types ([#15712](https://github.com/rapidsai/cudf/pull/15712)) [@galipremsagar](https://github.com/galipremsagar)
-- Support `DurationType` in cudf parquet reader via `arrow:schema` ([#15617](https://github.com/rapidsai/cudf/pull/15617)) [@mhaseeb123](https://github.com/mhaseeb123)
-- Remove protobuf and use parsed ORC statistics from libcudf ([#15564](https://github.com/rapidsai/cudf/pull/15564)) [@bdice](https://github.com/bdice)
-- Remove legacy JSON reader from Python ([#15538](https://github.com/rapidsai/cudf/pull/15538)) [@bdice](https://github.com/bdice)
-- Removing all batching code from parquet writer ([#15528](https://github.com/rapidsai/cudf/pull/15528)) [@mhaseeb123](https://github.com/mhaseeb123)
-- Convert libcudf resource parameters to rmm::device_async_resource_ref ([#15507](https://github.com/rapidsai/cudf/pull/15507)) [@harrism](https://github.com/harrism)
-- Remove deprecated strings offsets_begin ([#15454](https://github.com/rapidsai/cudf/pull/15454)) [@davidwendt](https://github.com/davidwendt)
-- Floating &lt;--&gt; fixed-point conversion must now be called explicitly ([#15438](https://github.com/rapidsai/cudf/pull/15438)) [@pmattione-nvidia](https://github.com/pmattione-nvidia)
-- Bind `read_parquet_metadata` API to libcudf instead of pyarrow and extract `RowGroup` information ([#15398](https://github.com/rapidsai/cudf/pull/15398)) [@mhaseeb123](https://github.com/mhaseeb123)
-- Remove deprecated hash() and spark_murmurhash3_x86_32() ([#15375](https://github.com/rapidsai/cudf/pull/15375)) [@davidwendt](https://github.com/davidwendt)
-- Remove empty elements from exploded character-ngrams output ([#15371](https://github.com/rapidsai/cudf/pull/15371)) [@davidwendt](https://github.com/davidwendt)
-- [FEA] Performance improvement for mixed left semi/anti join ([#15288](https://github.com/rapidsai/cudf/pull/15288)) [@tgujar](https://github.com/tgujar)
-- Align date_range defaults with pandas, support tz ([#15139](https://github.com/rapidsai/cudf/pull/15139)) [@mroeschke](https://github.com/mroeschke)
-
-## 🐛 Bug Fixes
-
-- Revert &quot;Fix docs for IO readers and strings_convert&quot; ([#15872](https://github.com/rapidsai/cudf/pull/15872)) [@vyasr](https://github.com/vyasr)
-- Remove problematic call of index setter to unblock dask-cuda CI ([#15844](https://github.com/rapidsai/cudf/pull/15844)) [@charlesbluca](https://github.com/charlesbluca)
-- Use rapids_cpm_nvtx3 to get same nvtx3 target state as rmm ([#15840](https://github.com/rapidsai/cudf/pull/15840)) [@robertmaynard](https://github.com/robertmaynard)
-- Return boolean from config_host_memory_resource instead of throwing ([#15815](https://github.com/rapidsai/cudf/pull/15815)) [@abellina](https://github.com/abellina)
-- Add temporary dask-cudf workaround for categorical sorting ([#15801](https://github.com/rapidsai/cudf/pull/15801)) [@rjzamora](https://github.com/rjzamora)
-- Fix row group alignment in ORC writer ([#15789](https://github.com/rapidsai/cudf/pull/15789)) [@vuule](https://github.com/vuule)
-- Raise error when sorting by categorical column in dask-cudf ([#15788](https://github.com/rapidsai/cudf/pull/15788)) [@rjzamora](https://github.com/rjzamora)
-- Upgrade `arrow` to 16.1 ([#15787](https://github.com/rapidsai/cudf/pull/15787)) [@galipremsagar](https://github.com/galipremsagar)
-- Add support for `PandasArray` for `pandas&lt;2.1.0` ([#15786](https://github.com/rapidsai/cudf/pull/15786)) [@galipremsagar](https://github.com/galipremsagar)
-- Limit runtime dependency to `libarrow&gt;=16.0.0,&lt;16.1.0a0` ([#15782](https://github.com/rapidsai/cudf/pull/15782)) [@pentschev](https://github.com/pentschev)
-- Fix cat.as_ordered not propogating correct size ([#15780](https://github.com/rapidsai/cudf/pull/15780)) [@mroeschke](https://github.com/mroeschke)
-- Handle mixed-like homogeneous types in `isin` ([#15771](https://github.com/rapidsai/cudf/pull/15771)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix id_vars and value_vars not accepting string scalars in melt ([#15765](https://github.com/rapidsai/cudf/pull/15765)) [@mroeschke](https://github.com/mroeschke)
-- Fix `DatetimeIndex.loc` for all types of ordering cases ([#15761](https://github.com/rapidsai/cudf/pull/15761)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix arrow versioning logic ([#15755](https://github.com/rapidsai/cudf/pull/15755)) [@vyasr](https://github.com/vyasr)
-- Avoid running sanitizer on Java test designed to cause an error ([#15753](https://github.com/rapidsai/cudf/pull/15753)) [@jlowe](https://github.com/jlowe)
-- Handle empty dataframe object with index present in setitem of `loc` ([#15752](https://github.com/rapidsai/cudf/pull/15752)) [@galipremsagar](https://github.com/galipremsagar)
-- Eliminate circular reference in DataFrame/Series.iloc/loc ([#15749](https://github.com/rapidsai/cudf/pull/15749)) [@mroeschke](https://github.com/mroeschke)
-- Cap the absolute row index per pass in parquet chunked reader. ([#15735](https://github.com/rapidsai/cudf/pull/15735)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Fix `Index.repeat` for `datetime64` types ([#15722](https://github.com/rapidsai/cudf/pull/15722)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix multibyte check for case convert for large strings ([#15721](https://github.com/rapidsai/cudf/pull/15721)) [@davidwendt](https://github.com/davidwendt)
-- Fix `get_loc` to properly fetch results from an index that is in decreasing order ([#15719](https://github.com/rapidsai/cudf/pull/15719)) [@galipremsagar](https://github.com/galipremsagar)
-- Return same type as the original index for `.loc` operations ([#15717](https://github.com/rapidsai/cudf/pull/15717)) [@galipremsagar](https://github.com/galipremsagar)
-- Correct static builds + static arrow ([#15715](https://github.com/rapidsai/cudf/pull/15715)) [@robertmaynard](https://github.com/robertmaynard)
-- Raise errors for unsupported operations on certain types ([#15712](https://github.com/rapidsai/cudf/pull/15712)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix ColumnAccessor caching of nrows if empty previously ([#15710](https://github.com/rapidsai/cudf/pull/15710)) [@mroeschke](https://github.com/mroeschke)
-- Allow `None` when `nan_as_null=False` in column constructor ([#15709](https://github.com/rapidsai/cudf/pull/15709)) [@galipremsagar](https://github.com/galipremsagar)
-- Refine `CudaTest.testCudaException` in case throwing wrong type of CudaError under aarch64 ([#15706](https://github.com/rapidsai/cudf/pull/15706)) [@sperlingxx](https://github.com/sperlingxx)
-- Fix maxima of categorical column ([#15701](https://github.com/rapidsai/cudf/pull/15701)) [@rjzamora](https://github.com/rjzamora)
-- Add proxy for inplace operations in `cudf.pandas` ([#15695](https://github.com/rapidsai/cudf/pull/15695)) [@galipremsagar](https://github.com/galipremsagar)
-- Make `nan_as_null` behavior consistent across all APIs ([#15692](https://github.com/rapidsai/cudf/pull/15692)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix CI s3 api command to fetch latest results ([#15687](https://github.com/rapidsai/cudf/pull/15687)) [@galipremsagar](https://github.com/galipremsagar)
-- Add `NumpyExtensionArray` proxy type in `cudf.pandas` ([#15686](https://github.com/rapidsai/cudf/pull/15686)) [@galipremsagar](https://github.com/galipremsagar)
-- Properly implement binaryops for proxy types ([#15684](https://github.com/rapidsai/cudf/pull/15684)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix copy assignment and the comparison operator of `rmm_host_allocator` ([#15677](https://github.com/rapidsai/cudf/pull/15677)) [@vuule](https://github.com/vuule)
-- Fix multi-source reading in JSON byte range reader ([#15671](https://github.com/rapidsai/cudf/pull/15671)) [@shrshi](https://github.com/shrshi)
-- Return `int64` when pandas compatible mode is turned on for `get_indexer` ([#15659](https://github.com/rapidsai/cudf/pull/15659)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix Index contains for error validations and float vs int comparisons ([#15657](https://github.com/rapidsai/cudf/pull/15657)) [@galipremsagar](https://github.com/galipremsagar)
-- Preserve sub-second data for time scalars in column construction ([#15655](https://github.com/rapidsai/cudf/pull/15655)) [@galipremsagar](https://github.com/galipremsagar)
-- Check row limit size in cudf::strings::join_strings ([#15643](https://github.com/rapidsai/cudf/pull/15643)) [@davidwendt](https://github.com/davidwendt)
-- Enable sorting on column with nulls using query-planning ([#15639](https://github.com/rapidsai/cudf/pull/15639)) [@rjzamora](https://github.com/rjzamora)
-- Fix operator precedence problem in Parquet reader ([#15638](https://github.com/rapidsai/cudf/pull/15638)) [@etseidl](https://github.com/etseidl)
-- Fix decoding of dictionary encoded FIXED_LEN_BYTE_ARRAY data in Parquet reader ([#15601](https://github.com/rapidsai/cudf/pull/15601)) [@etseidl](https://github.com/etseidl)
-- Fix debug warnings/errors in from_arrow_device_test.cpp ([#15596](https://github.com/rapidsai/cudf/pull/15596)) [@davidwendt](https://github.com/davidwendt)
-- Add &quot;collect&quot; aggregation support to dask-cudf ([#15593](https://github.com/rapidsai/cudf/pull/15593)) [@rjzamora](https://github.com/rjzamora)
-- Fix categorical-accessor support and testing in dask-cudf ([#15591](https://github.com/rapidsai/cudf/pull/15591)) [@rjzamora](https://github.com/rjzamora)
-- Disable compute-sanitizer usage in CI tests with CUDA&lt;11.6 ([#15584](https://github.com/rapidsai/cudf/pull/15584)) [@davidwendt](https://github.com/davidwendt)
-- Preserve RangeIndex.step in to_arrow/from_arrow ([#15581](https://github.com/rapidsai/cudf/pull/15581)) [@mroeschke](https://github.com/mroeschke)
-- Ignore new cupy warning ([#15574](https://github.com/rapidsai/cudf/pull/15574)) [@vyasr](https://github.com/vyasr)
-- Add cuda-sanitizer-api dependency for test-cpp matrix 11.4 ([#15573](https://github.com/rapidsai/cudf/pull/15573)) [@davidwendt](https://github.com/davidwendt)
-- Allow apply udf to reference global modules in cudf.pandas ([#15569](https://github.com/rapidsai/cudf/pull/15569)) [@mroeschke](https://github.com/mroeschke)
-- Fix deprecation warnings for json legacy reader ([#15563](https://github.com/rapidsai/cudf/pull/15563)) [@davidwendt](https://github.com/davidwendt)
-- Fix millisecond resampling in cudf Python ([#15560](https://github.com/rapidsai/cudf/pull/15560)) [@mroeschke](https://github.com/mroeschke)
-- Rename JSON_READER_OPTION to JSON_READER_OPTION_NVBENCH. ([#15553](https://github.com/rapidsai/cudf/pull/15553)) [@bdice](https://github.com/bdice)
-- Fix a JNI bug in JSON parsing fixup ([#15550](https://github.com/rapidsai/cudf/pull/15550)) [@revans2](https://github.com/revans2)
-- Remove conda channel setup from wheel CI image script. ([#15539](https://github.com/rapidsai/cudf/pull/15539)) [@bdice](https://github.com/bdice)
-- cudf.pandas: Series dt accessor is CombinedDatetimelikeProperties ([#15523](https://github.com/rapidsai/cudf/pull/15523)) [@wence-](https://github.com/wence-)
-- Fix for some compiler warnings in parquet/page_decode.cuh ([#15518](https://github.com/rapidsai/cudf/pull/15518)) [@etseidl](https://github.com/etseidl)
-- Fix exponent overflow in strings-to-double conversion ([#15517](https://github.com/rapidsai/cudf/pull/15517)) [@davidwendt](https://github.com/davidwendt)
-- nanoarrow uses package override for proper pinned versions generation ([#15515](https://github.com/rapidsai/cudf/pull/15515)) [@robertmaynard](https://github.com/robertmaynard)
-- Remove index name overrides in dask-cudf pyarrow table dispatch ([#15514](https://github.com/rapidsai/cudf/pull/15514)) [@charlesbluca](https://github.com/charlesbluca)
-- Fix async synchronization issues in json_column.cu ([#15497](https://github.com/rapidsai/cudf/pull/15497)) [@karthikeyann](https://github.com/karthikeyann)
-- Add new patch to hide more CCCL APIs ([#15493](https://github.com/rapidsai/cudf/pull/15493)) [@vyasr](https://github.com/vyasr)
-- Make improvements in pandas-test reporting ([#15485](https://github.com/rapidsai/cudf/pull/15485)) [@galipremsagar](https://github.com/galipremsagar)
-- Fixed page data truncation in parquet writer under certain conditions. ([#15474](https://github.com/rapidsai/cudf/pull/15474)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Only use data_type constructor with scale for decimal types ([#15472](https://github.com/rapidsai/cudf/pull/15472)) [@wence-](https://github.com/wence-)
-- Avoid &quot;p2p&quot; shuffle as a default when `dask_cudf` is imported ([#15469](https://github.com/rapidsai/cudf/pull/15469)) [@rjzamora](https://github.com/rjzamora)
-- Fix debug build errors from to_arrow_device_test.cpp ([#15463](https://github.com/rapidsai/cudf/pull/15463)) [@davidwendt](https://github.com/davidwendt)
-- Fix base_normalator::integer_sizeof_fn integer dispatch ([#15457](https://github.com/rapidsai/cudf/pull/15457)) [@davidwendt](https://github.com/davidwendt)
-- Allow consumers of static builds to find nanoarrow ([#15456](https://github.com/rapidsai/cudf/pull/15456)) [@robertmaynard](https://github.com/robertmaynard)
-- Allow jit compilation when using a splayed CUDA toolkit ([#15451](https://github.com/rapidsai/cudf/pull/15451)) [@robertmaynard](https://github.com/robertmaynard)
-- Handle case of scan aggregation in groupby-transform ([#15450](https://github.com/rapidsai/cudf/pull/15450)) [@wence-](https://github.com/wence-)
-- Test static builds in CI and fix nanoarrow configure ([#15437](https://github.com/rapidsai/cudf/pull/15437)) [@vyasr](https://github.com/vyasr)
-- Fixes potential race in JSON parser when parsing JSON lines format and when recovering from invalid lines ([#15419](https://github.com/rapidsai/cudf/pull/15419)) [@elstehle](https://github.com/elstehle)
-- Fix errors in chunked ORC writer when no tables were (successfully) written ([#15393](https://github.com/rapidsai/cudf/pull/15393)) [@vuule](https://github.com/vuule)
-- Support implicit array conversion with query-planning enabled ([#15378](https://github.com/rapidsai/cudf/pull/15378)) [@rjzamora](https://github.com/rjzamora)
-- Fix arrow-based round trip of empty dataframes ([#15373](https://github.com/rapidsai/cudf/pull/15373)) [@wence-](https://github.com/wence-)
-- Remove empty elements from exploded character-ngrams output ([#15371](https://github.com/rapidsai/cudf/pull/15371)) [@davidwendt](https://github.com/davidwendt)
-- Remove boundscheck=False setting in cython files ([#15362](https://github.com/rapidsai/cudf/pull/15362)) [@wence-](https://github.com/wence-)
-- Patch dask-expr `var` logic in dask-cudf ([#15347](https://github.com/rapidsai/cudf/pull/15347)) [@rjzamora](https://github.com/rjzamora)
-- Fix for logical and syntactical errors in libcudf c++ examples ([#15346](https://github.com/rapidsai/cudf/pull/15346)) [@mhaseeb123](https://github.com/mhaseeb123)
-- Disable dask-expr in docs builds. ([#15343](https://github.com/rapidsai/cudf/pull/15343)) [@bdice](https://github.com/bdice)
-- Apply the cuFile error work around to data_sink as well ([#15335](https://github.com/rapidsai/cudf/pull/15335)) [@vuule](https://github.com/vuule)
-- Fix parquet predicate filtering with column projection ([#15113](https://github.com/rapidsai/cudf/pull/15113)) [@karthikeyann](https://github.com/karthikeyann)
-- Check column type equality, handling nested types correctly. ([#14531](https://github.com/rapidsai/cudf/pull/14531)) [@bdice](https://github.com/bdice)
-
-## 📖 Documentation
-
-- Fix docs for IO readers and strings_convert ([#15842](https://github.com/rapidsai/cudf/pull/15842)) [@bdice](https://github.com/bdice)
-- Update cudf.pandas docs for GA ([#15744](https://github.com/rapidsai/cudf/pull/15744)) [@beckernick](https://github.com/beckernick)
-- Add contributing warning about circular imports ([#15691](https://github.com/rapidsai/cudf/pull/15691)) [@er-eis](https://github.com/er-eis)
-- Update libcudf developer guide for strings offsets column ([#15661](https://github.com/rapidsai/cudf/pull/15661)) [@davidwendt](https://github.com/davidwendt)
-- Update developer guide with device_async_resource_ref guidelines ([#15562](https://github.com/rapidsai/cudf/pull/15562)) [@harrism](https://github.com/harrism)
-- DOC: add pandas intersphinx mapping ([#15531](https://github.com/rapidsai/cudf/pull/15531)) [@raybellwaves](https://github.com/raybellwaves)
-- rm-dup-doc in frame.py ([#15530](https://github.com/rapidsai/cudf/pull/15530)) [@raybellwaves](https://github.com/raybellwaves)
-- Update CONTRIBUTING.md to use latest cuda env ([#15467](https://github.com/rapidsai/cudf/pull/15467)) [@raybellwaves](https://github.com/raybellwaves)
-- Doc: interleave columns pandas compat ([#15383](https://github.com/rapidsai/cudf/pull/15383)) [@raybellwaves](https://github.com/raybellwaves)
-- Simplified README Examples ([#15338](https://github.com/rapidsai/cudf/pull/15338)) [@wkaisertexas](https://github.com/wkaisertexas)
-- Add debug tips section to libcudf developer guide ([#15329](https://github.com/rapidsai/cudf/pull/15329)) [@davidwendt](https://github.com/davidwendt)
-- Fix and clarify notes on result ordering ([#13255](https://github.com/rapidsai/cudf/pull/13255)) [@shwina](https://github.com/shwina)
-
-## 🚀 New Features
-
-- Add JNI bindings for zstd compression of NVCOMP. ([#15729](https://github.com/rapidsai/cudf/pull/15729)) [@firestarman](https://github.com/firestarman)
-- Fix spaces around CSV quoted strings ([#15727](https://github.com/rapidsai/cudf/pull/15727)) [@thabetx](https://github.com/thabetx)
-- Add default pinned pool that falls back to new pinned allocations ([#15665](https://github.com/rapidsai/cudf/pull/15665)) [@vuule](https://github.com/vuule)
-- Overhaul ops-codeowners coverage ([#15660](https://github.com/rapidsai/cudf/pull/15660)) [@raydouglass](https://github.com/raydouglass)
-- Concatenate dictionary of objects along axis=1 ([#15623](https://github.com/rapidsai/cudf/pull/15623)) [@er-eis](https://github.com/er-eis)
-- Construct `pylibcudf` columns from objects supporting `__cuda_array_interface__` ([#15615](https://github.com/rapidsai/cudf/pull/15615)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Expose some Parquet per-column configuration options via the python API ([#15613](https://github.com/rapidsai/cudf/pull/15613)) [@etseidl](https://github.com/etseidl)
-- Migrate string `find` operations to `pylibcudf` ([#15604](https://github.com/rapidsai/cudf/pull/15604)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Round trip FIXED_LEN_BYTE_ARRAY data properly in Parquet writer ([#15600](https://github.com/rapidsai/cudf/pull/15600)) [@etseidl](https://github.com/etseidl)
-- Reading multi-line JSON in string columns using runtime configurable delimiter ([#15556](https://github.com/rapidsai/cudf/pull/15556)) [@shrshi](https://github.com/shrshi)
-- Remove public gtest dependency from libcudf conda package ([#15534](https://github.com/rapidsai/cudf/pull/15534)) [@robertmaynard](https://github.com/robertmaynard)
-- Fea/move to latest nanoarrow ([#15526](https://github.com/rapidsai/cudf/pull/15526)) [@robertmaynard](https://github.com/robertmaynard)
-- Migrate string `case` operations to `pylibcudf` ([#15489](https://github.com/rapidsai/cudf/pull/15489)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Add Parquet encoding statistics to column chunk metadata ([#15452](https://github.com/rapidsai/cudf/pull/15452)) [@etseidl](https://github.com/etseidl)
-- Implement JNI for chunked ORC reader ([#15446](https://github.com/rapidsai/cudf/pull/15446)) [@ttnghia](https://github.com/ttnghia)
-- Add some missing optional fields to the Parquet RowGroup metadata ([#15421](https://github.com/rapidsai/cudf/pull/15421)) [@etseidl](https://github.com/etseidl)
-- Adding parquet transcoding example ([#15420](https://github.com/rapidsai/cudf/pull/15420)) [@mhaseeb123](https://github.com/mhaseeb123)
-- Add fields to Parquet Statistics structure that were added in parquet-format 2.10 ([#15412](https://github.com/rapidsai/cudf/pull/15412)) [@etseidl](https://github.com/etseidl)
-- Add option to Parquet writer to skip compressing individual columns ([#15411](https://github.com/rapidsai/cudf/pull/15411)) [@etseidl](https://github.com/etseidl)
-- Add BYTE_STREAM_SPLIT support to Parquet ([#15311](https://github.com/rapidsai/cudf/pull/15311)) [@etseidl](https://github.com/etseidl)
-- Introduce benchmark suite for JSON reader options ([#15124](https://github.com/rapidsai/cudf/pull/15124)) [@shrshi](https://github.com/shrshi)
-- Implement ORC chunked reader ([#15094](https://github.com/rapidsai/cudf/pull/15094)) [@ttnghia](https://github.com/ttnghia)
-- Extend cudf devcontainers to specify jitify2 kernel cache ([#15068](https://github.com/rapidsai/cudf/pull/15068)) [@robertmaynard](https://github.com/robertmaynard)
-- Add `to_arrow_device` function to cudf interop using nanoarrow ([#15047](https://github.com/rapidsai/cudf/pull/15047)) [@zeroshade](https://github.com/zeroshade)
-- Add JSON option to prune columns ([#14996](https://github.com/rapidsai/cudf/pull/14996)) [@karthikeyann](https://github.com/karthikeyann)
-
-## 🛠️ Improvements
-
-- Deprecate `Groupby.collect` ([#15808](https://github.com/rapidsai/cudf/pull/15808)) [@galipremsagar](https://github.com/galipremsagar)
-- Raise FileNotFoundError when a literal JSON string that looks like a json filename is passed ([#15806](https://github.com/rapidsai/cudf/pull/15806)) [@lithomas1](https://github.com/lithomas1)
-- Deprecate `divisions=&#39;quantile&#39;` support in `set_index` ([#15804](https://github.com/rapidsai/cudf/pull/15804)) [@rjzamora](https://github.com/rjzamora)
-- Improve performance of Series.to_numpy/to_cupy ([#15792](https://github.com/rapidsai/cudf/pull/15792)) [@mroeschke](https://github.com/mroeschke)
-- Access `self.index` instead of `self._index` where possible ([#15781](https://github.com/rapidsai/cudf/pull/15781)) [@mroeschke](https://github.com/mroeschke)
-- Support filtered I/O in `chunked_parquet_reader` and simplify the use of `parquet_reader_options` ([#15764](https://github.com/rapidsai/cudf/pull/15764)) [@mhaseeb123](https://github.com/mhaseeb123)
-- Avoid index-to-column conversion in some DataFrame ops ([#15763](https://github.com/rapidsai/cudf/pull/15763)) [@mroeschke](https://github.com/mroeschke)
-- Fix `chunked_parquet_reader` behavior when input has no more rows to read ([#15757](https://github.com/rapidsai/cudf/pull/15757)) [@mhaseeb123](https://github.com/mhaseeb123)
-- [JNI] Expose java API for cudf::io::config_host_memory_resource ([#15745](https://github.com/rapidsai/cudf/pull/15745)) [@abellina](https://github.com/abellina)
-- Migrate all cpp pxd files into pylibcudf ([#15740](https://github.com/rapidsai/cudf/pull/15740)) [@vyasr](https://github.com/vyasr)
-- Validate and materialize iterators earlier in as_column ([#15739](https://github.com/rapidsai/cudf/pull/15739)) [@mroeschke](https://github.com/mroeschke)
-- Push some as_column arrow logic to ColumnBase.from_arrow ([#15738](https://github.com/rapidsai/cudf/pull/15738)) [@mroeschke](https://github.com/mroeschke)
-- Expose stream parameter in public reduction APIs ([#15737](https://github.com/rapidsai/cudf/pull/15737)) [@srinivasyadav18](https://github.com/srinivasyadav18)
-- remove unnecessary &#39;setuptools&#39; host dependency, simplify dependencies.yaml ([#15736](https://github.com/rapidsai/cudf/pull/15736)) [@jameslamb](https://github.com/jameslamb)
-- Defer to C++ equality and hashing for pylibcudf DataType and Aggregation objects ([#15732](https://github.com/rapidsai/cudf/pull/15732)) [@wence-](https://github.com/wence-)
-- Implement null-aware NOT_EQUALS binop ([#15731](https://github.com/rapidsai/cudf/pull/15731)) [@wence-](https://github.com/wence-)
-- Fix split-record result list column offset type ([#15707](https://github.com/rapidsai/cudf/pull/15707)) [@davidwendt](https://github.com/davidwendt)
-- Upgrade `arrow` to `16` ([#15703](https://github.com/rapidsai/cudf/pull/15703)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove experimental namespace from make_strings_children ([#15702](https://github.com/rapidsai/cudf/pull/15702)) [@davidwendt](https://github.com/davidwendt)
-- Rework get_json_object benchmark to use nvbench ([#15698](https://github.com/rapidsai/cudf/pull/15698)) [@davidwendt](https://github.com/davidwendt)
-- Rework some python tests of Parquet delta encodings ([#15693](https://github.com/rapidsai/cudf/pull/15693)) [@etseidl](https://github.com/etseidl)
-- Skeleton cudf polars package ([#15688](https://github.com/rapidsai/cudf/pull/15688)) [@wence-](https://github.com/wence-)
-- Upgrade pre commit hooks ([#15685](https://github.com/rapidsai/cudf/pull/15685)) [@wence-](https://github.com/wence-)
-- Allow `fillna` to validate for `CategoricalColumn.fillna` ([#15683](https://github.com/rapidsai/cudf/pull/15683)) [@galipremsagar](https://github.com/galipremsagar)
-- Misc Column cleanups ([#15682](https://github.com/rapidsai/cudf/pull/15682)) [@mroeschke](https://github.com/mroeschke)
-- Reducing runtime of JSON reader options benchmark ([#15681](https://github.com/rapidsai/cudf/pull/15681)) [@shrshi](https://github.com/shrshi)
-- Add `Timestamp` and `Timedelta` proxy types ([#15680](https://github.com/rapidsai/cudf/pull/15680)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove host_parse_nested_json. ([#15674](https://github.com/rapidsai/cudf/pull/15674)) [@bdice](https://github.com/bdice)
-- Reduce runtime for ParquetChunkedReaderInputLimitTest gtests ([#15672](https://github.com/rapidsai/cudf/pull/15672)) [@davidwendt](https://github.com/davidwendt)
-- Add large-strings gtest for cudf::interleave_columns ([#15669](https://github.com/rapidsai/cudf/pull/15669)) [@davidwendt](https://github.com/davidwendt)
-- Use experimental make_strings_children for multi-replace_re ([#15667](https://github.com/rapidsai/cudf/pull/15667)) [@davidwendt](https://github.com/davidwendt)
-- Enabled `Holiday` types in `cudf.pandas` ([#15664](https://github.com/rapidsai/cudf/pull/15664)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove obsolete `XFAIL` markers for query-planning ([#15662](https://github.com/rapidsai/cudf/pull/15662)) [@rjzamora](https://github.com/rjzamora)
-- Clean up join benchmarks ([#15644](https://github.com/rapidsai/cudf/pull/15644)) [@PointKernel](https://github.com/PointKernel)
-- Enable warnings as errors in custreamz ([#15642](https://github.com/rapidsai/cudf/pull/15642)) [@mroeschke](https://github.com/mroeschke)
-- Improve distinct join with set `retrieve` ([#15636](https://github.com/rapidsai/cudf/pull/15636)) [@PointKernel](https://github.com/PointKernel)
-- Fix -Werror=type-limits. ([#15635](https://github.com/rapidsai/cudf/pull/15635)) [@bdice](https://github.com/bdice)
-- Enable FutureWarnings/DeprecationWarnings as errors for dask_cudf ([#15634](https://github.com/rapidsai/cudf/pull/15634)) [@mroeschke](https://github.com/mroeschke)
-- Remove NVBench SHA override. ([#15633](https://github.com/rapidsai/cudf/pull/15633)) [@alliepiper](https://github.com/alliepiper)
-- Add support for large string columns to Parquet reader and writer ([#15632](https://github.com/rapidsai/cudf/pull/15632)) [@etseidl](https://github.com/etseidl)
-- Large strings support in MD5 and SHA hashers ([#15631](https://github.com/rapidsai/cudf/pull/15631)) [@davidwendt](https://github.com/davidwendt)
-- Fix make_offsets_child_column usage in cudf::strings::detail::shift ([#15630](https://github.com/rapidsai/cudf/pull/15630)) [@davidwendt](https://github.com/davidwendt)
-- Use experimental make_strings_children for strings convert ([#15629](https://github.com/rapidsai/cudf/pull/15629)) [@davidwendt](https://github.com/davidwendt)
-- Forward-merge branch-24.04 to branch-24.06 ([#15627](https://github.com/rapidsai/cudf/pull/15627)) [@bdice](https://github.com/bdice)
-- Avoid accessing attributes via `_column` if not needed ([#15624](https://github.com/rapidsai/cudf/pull/15624)) [@mroeschke](https://github.com/mroeschke)
-- Make ColumnBase.__cuda_array_interface__ opt out instead of opt in ([#15622](https://github.com/rapidsai/cudf/pull/15622)) [@mroeschke](https://github.com/mroeschke)
-- Large strings support for cudf::gather ([#15621](https://github.com/rapidsai/cudf/pull/15621)) [@davidwendt](https://github.com/davidwendt)
-- Remove jni-docker-build workflow ([#15619](https://github.com/rapidsai/cudf/pull/15619)) [@bdice](https://github.com/bdice)
-- Support `DurationType` in cudf parquet reader via `arrow:schema` ([#15617](https://github.com/rapidsai/cudf/pull/15617)) [@mhaseeb123](https://github.com/mhaseeb123)
-- Drop Centos7 support ([#15608](https://github.com/rapidsai/cudf/pull/15608)) [@NvTimLiu](https://github.com/NvTimLiu)
-- Use experimental make_strings_children for json/csv writers ([#15599](https://github.com/rapidsai/cudf/pull/15599)) [@davidwendt](https://github.com/davidwendt)
-- Use experimental make_strings_children for strings join/url_encode/slice ([#15598](https://github.com/rapidsai/cudf/pull/15598)) [@davidwendt](https://github.com/davidwendt)
-- Use experimental make_strings_children in nvtext APIs ([#15595](https://github.com/rapidsai/cudf/pull/15595)) [@davidwendt](https://github.com/davidwendt)
-- Migrate to `{{ stdlib(&quot;c&quot;) }}` ([#15594](https://github.com/rapidsai/cudf/pull/15594)) [@hcho3](https://github.com/hcho3)
-- Deprecate `to/from_dask_dataframe` APIs in dask-cudf ([#15592](https://github.com/rapidsai/cudf/pull/15592)) [@rjzamora](https://github.com/rjzamora)
-- Minor fixups for future NumPy 2 compatibility ([#15590](https://github.com/rapidsai/cudf/pull/15590)) [@seberg](https://github.com/seberg)
-- Delay materializing RangeIndex in .reset_index ([#15588](https://github.com/rapidsai/cudf/pull/15588)) [@mroeschke](https://github.com/mroeschke)
-- Use experimental make_strings_children for capitalize/case/pad functions ([#15587](https://github.com/rapidsai/cudf/pull/15587)) [@davidwendt](https://github.com/davidwendt)
-- Use experimental make_strings_children for strings replace/filter/translate ([#15586](https://github.com/rapidsai/cudf/pull/15586)) [@davidwendt](https://github.com/davidwendt)
-- Add multithreaded parquet reader benchmarks. ([#15585](https://github.com/rapidsai/cudf/pull/15585)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Don&#39;t materialize column during RangeIndex methods ([#15582](https://github.com/rapidsai/cudf/pull/15582)) [@mroeschke](https://github.com/mroeschke)
-- Improve performance for cudf::strings::count_re ([#15578](https://github.com/rapidsai/cudf/pull/15578)) [@davidwendt](https://github.com/davidwendt)
-- Replace RangeIndex._start/_stop/_step with _range ([#15576](https://github.com/rapidsai/cudf/pull/15576)) [@mroeschke](https://github.com/mroeschke)
-- add --rm and --name to devcontainer run args ([#15572](https://github.com/rapidsai/cudf/pull/15572)) [@trxcllnt](https://github.com/trxcllnt)
-- Change the default dictionary policy in Parquet writer from `ALWAYS` to `ADAPTIVE` ([#15570](https://github.com/rapidsai/cudf/pull/15570)) [@mhaseeb123](https://github.com/mhaseeb123)
-- Rename experimental JSON tests. ([#15568](https://github.com/rapidsai/cudf/pull/15568)) [@bdice](https://github.com/bdice)
-- Refactor JNI native dependency loading to allow returning of library path ([#15566](https://github.com/rapidsai/cudf/pull/15566)) [@jlowe](https://github.com/jlowe)
-- Remove protobuf and use parsed ORC statistics from libcudf ([#15564](https://github.com/rapidsai/cudf/pull/15564)) [@bdice](https://github.com/bdice)
-- Deprecate legacy JSON reader options. ([#15558](https://github.com/rapidsai/cudf/pull/15558)) [@bdice](https://github.com/bdice)
-- Use same .clang-format in cuDF JNI ([#15557](https://github.com/rapidsai/cudf/pull/15557)) [@bdice](https://github.com/bdice)
-- Large strings support for cudf::fill ([#15555](https://github.com/rapidsai/cudf/pull/15555)) [@davidwendt](https://github.com/davidwendt)
-- Upgrade upper bound pinning to `pandas-2.2.2` ([#15554](https://github.com/rapidsai/cudf/pull/15554)) [@galipremsagar](https://github.com/galipremsagar)
-- Work around issues with cccl main ([#15552](https://github.com/rapidsai/cudf/pull/15552)) [@miscco](https://github.com/miscco)
-- Enable pandas plotting unit tests for cudf.pandas ([#15547](https://github.com/rapidsai/cudf/pull/15547)) [@mroeschke](https://github.com/mroeschke)
-- Move timezone conversion logic to `DatetimeColumn` ([#15545](https://github.com/rapidsai/cudf/pull/15545)) [@mroeschke](https://github.com/mroeschke)
-- Large strings support for cudf::interleave_columns ([#15544](https://github.com/rapidsai/cudf/pull/15544)) [@davidwendt](https://github.com/davidwendt)
-- [skip ci] Switch back to 24.06 branch for pandas tests ([#15543](https://github.com/rapidsai/cudf/pull/15543)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove checks dependency from static-configure test job. ([#15542](https://github.com/rapidsai/cudf/pull/15542)) [@bdice](https://github.com/bdice)
-- Remove legacy JSON reader from Python ([#15538](https://github.com/rapidsai/cudf/pull/15538)) [@bdice](https://github.com/bdice)
-- Enable more ignored pandas unit tests for cudf.pandas ([#15535](https://github.com/rapidsai/cudf/pull/15535)) [@mroeschke](https://github.com/mroeschke)
-- Large strings support for cudf::clamp ([#15533](https://github.com/rapidsai/cudf/pull/15533)) [@davidwendt](https://github.com/davidwendt)
-- Remove version hard-coding ([#15529](https://github.com/rapidsai/cudf/pull/15529)) [@galipremsagar](https://github.com/galipremsagar)
-- Removing all batching code from parquet writer ([#15528](https://github.com/rapidsai/cudf/pull/15528)) [@mhaseeb123](https://github.com/mhaseeb123)
-- Make some private class properties not settable ([#15527](https://github.com/rapidsai/cudf/pull/15527)) [@mroeschke](https://github.com/mroeschke)
-- Large strings support in regex replace APIs ([#15524](https://github.com/rapidsai/cudf/pull/15524)) [@davidwendt](https://github.com/davidwendt)
-- Skip pandas unit tests that crash pytest workers in `cudf.pandas` ([#15521](https://github.com/rapidsai/cudf/pull/15521)) [@mroeschke](https://github.com/mroeschke)
-- Preserve column metadata during more DataFrame operations ([#15519](https://github.com/rapidsai/cudf/pull/15519)) [@mroeschke](https://github.com/mroeschke)
-- Move to pandas-tests to a dedicated workflow file and trigger it from branch.yaml ([#15516](https://github.com/rapidsai/cudf/pull/15516)) [@galipremsagar](https://github.com/galipremsagar)
-- Large strings gtest fixture and utilities ([#15513](https://github.com/rapidsai/cudf/pull/15513)) [@davidwendt](https://github.com/davidwendt)
-- Convert libcudf resource parameters to rmm::device_async_resource_ref ([#15507](https://github.com/rapidsai/cudf/pull/15507)) [@harrism](https://github.com/harrism)
-- Relax protobuf lower bound to 3.20. ([#15506](https://github.com/rapidsai/cudf/pull/15506)) [@bdice](https://github.com/bdice)
-- Clean up index methods ([#15496](https://github.com/rapidsai/cudf/pull/15496)) [@mroeschke](https://github.com/mroeschke)
-- Update strings contains benchmarks to nvbench ([#15495](https://github.com/rapidsai/cudf/pull/15495)) [@davidwendt](https://github.com/davidwendt)
-- Update NVBench fixture to use new hooks, fix pinned memory segfault. ([#15492](https://github.com/rapidsai/cudf/pull/15492)) [@alliepiper](https://github.com/alliepiper)
-- Enable tests/scalar and test/series in cudf.pandas tests ([#15486](https://github.com/rapidsai/cudf/pull/15486)) [@mroeschke](https://github.com/mroeschke)
-- Clean up __cuda_array_interface__ handling in as_column ([#15477](https://github.com/rapidsai/cudf/pull/15477)) [@mroeschke](https://github.com/mroeschke)
-- Avoid .ordered and .categories from being settable in CategoricalColumn and CategoricalDtype ([#15475](https://github.com/rapidsai/cudf/pull/15475)) [@mroeschke](https://github.com/mroeschke)
-- Ignore pandas tests for cudf.pandas that need motoserver ([#15468](https://github.com/rapidsai/cudf/pull/15468)) [@mroeschke](https://github.com/mroeschke)
-- Use cached_property for NumericColumn.nan_count instead of ._nan_count variable ([#15466](https://github.com/rapidsai/cudf/pull/15466)) [@mroeschke](https://github.com/mroeschke)
-- Add to_arrow_device() functions that accept views ([#15465](https://github.com/rapidsai/cudf/pull/15465)) [@davidwendt](https://github.com/davidwendt)
-- Add custom status check workflow ([#15464](https://github.com/rapidsai/cudf/pull/15464)) [@galipremsagar](https://github.com/galipremsagar)
-- Disable pandas 2.x clipboard tests in cudf.pandas tests ([#15462](https://github.com/rapidsai/cudf/pull/15462)) [@mroeschke](https://github.com/mroeschke)
-- Enable tests/strings/test_api.py and tests/io/pytables in cudf.pandas tests ([#15461](https://github.com/rapidsai/cudf/pull/15461)) [@mroeschke](https://github.com/mroeschke)
-- Enable test_parsing in cudf.pandas tests ([#15460](https://github.com/rapidsai/cudf/pull/15460)) [@mroeschke](https://github.com/mroeschke)
-- Add `from_arrow_device` function to cudf interop using nanoarrow ([#15458](https://github.com/rapidsai/cudf/pull/15458)) [@zeroshade](https://github.com/zeroshade)
-- Remove deprecated strings offsets_begin ([#15454](https://github.com/rapidsai/cudf/pull/15454)) [@davidwendt](https://github.com/davidwendt)
-- Enable tests/windows/ in cudf.pandas tests ([#15444](https://github.com/rapidsai/cudf/pull/15444)) [@mroeschke](https://github.com/mroeschke)
-- Enable tests/interchange/test_impl.py in cudf.pandas tests ([#15443](https://github.com/rapidsai/cudf/pull/15443)) [@mroeschke](https://github.com/mroeschke)
-- Enable tests/io/test_user_agent.py in cudf pandas tests ([#15442](https://github.com/rapidsai/cudf/pull/15442)) [@mroeschke](https://github.com/mroeschke)
-- Performance improvement in libcudf case conversion for long strings ([#15441](https://github.com/rapidsai/cudf/pull/15441)) [@davidwendt](https://github.com/davidwendt)
-- Remove prior test skipping in run-pandas-tests with testing 2.2.1 ([#15440](https://github.com/rapidsai/cudf/pull/15440)) [@mroeschke](https://github.com/mroeschke)
-- Support orc and text IO with dask-expr using legacy conversion ([#15439](https://github.com/rapidsai/cudf/pull/15439)) [@rjzamora](https://github.com/rjzamora)
-- Floating &lt;--&gt; fixed-point conversion must now be called explicitly ([#15438](https://github.com/rapidsai/cudf/pull/15438)) [@pmattione-nvidia](https://github.com/pmattione-nvidia)
-- Unify Copy-On-Write and Spilling ([#15436](https://github.com/rapidsai/cudf/pull/15436)) [@madsbk](https://github.com/madsbk)
-- Enable ``dask_cudf`` json and s3 tests with query-planning on ([#15408](https://github.com/rapidsai/cudf/pull/15408)) [@rjzamora](https://github.com/rjzamora)
-- Bump ruff and codespell pre-commit checks ([#15407](https://github.com/rapidsai/cudf/pull/15407)) [@mroeschke](https://github.com/mroeschke)
-- Enable all tests for `arm` arch ([#15402](https://github.com/rapidsai/cudf/pull/15402)) [@galipremsagar](https://github.com/galipremsagar)
-- Bind `read_parquet_metadata` API to libcudf instead of pyarrow and extract `RowGroup` information ([#15398](https://github.com/rapidsai/cudf/pull/15398)) [@mhaseeb123](https://github.com/mhaseeb123)
-- Optimizing multi-source byte range reading in JSON reader ([#15396](https://github.com/rapidsai/cudf/pull/15396)) [@shrshi](https://github.com/shrshi)
-- add correct labels to pandas_function_request.md ([#15381](https://github.com/rapidsai/cudf/pull/15381)) [@raybellwaves](https://github.com/raybellwaves)
-- Remove deprecated hash() and spark_murmurhash3_x86_32() ([#15375](https://github.com/rapidsai/cudf/pull/15375)) [@davidwendt](https://github.com/davidwendt)
-- Large strings support in cudf::merge ([#15374](https://github.com/rapidsai/cudf/pull/15374)) [@davidwendt](https://github.com/davidwendt)
-- Enable test-reporting for pandas pytests in CI ([#15369](https://github.com/rapidsai/cudf/pull/15369)) [@galipremsagar](https://github.com/galipremsagar)
-- Use logical types in Parquet reader ([#15365](https://github.com/rapidsai/cudf/pull/15365)) [@etseidl](https://github.com/etseidl)
-- Add experimental make_strings_children utility ([#15363](https://github.com/rapidsai/cudf/pull/15363)) [@davidwendt](https://github.com/davidwendt)
-- Forward-merge branch-24.04 to branch-24.06 ([#15349](https://github.com/rapidsai/cudf/pull/15349)) [@bdice](https://github.com/bdice)
-- Fix CMake files in libcudf C++ examples to use existing libcudf build if present ([#15348](https://github.com/rapidsai/cudf/pull/15348)) [@mhaseeb123](https://github.com/mhaseeb123)
-- Use ruff pydocstyle over pydocstyle pre-commit hook ([#15345](https://github.com/rapidsai/cudf/pull/15345)) [@mroeschke](https://github.com/mroeschke)
-- Refactor stream mode setup for gtests ([#15337](https://github.com/rapidsai/cudf/pull/15337)) [@davidwendt](https://github.com/davidwendt)
-- Benchmark decimal &lt;--&gt; floating conversions. ([#15334](https://github.com/rapidsai/cudf/pull/15334)) [@pmattione-nvidia](https://github.com/pmattione-nvidia)
-- Avoid duplicate dask-cudf testing ([#15333](https://github.com/rapidsai/cudf/pull/15333)) [@rjzamora](https://github.com/rjzamora)
-- Skip decode steps in Parquet reader when nullable columns have no nulls ([#15332](https://github.com/rapidsai/cudf/pull/15332)) [@etseidl](https://github.com/etseidl)
-- Update udf_cpp to use rapids_cpm_cccl. ([#15331](https://github.com/rapidsai/cudf/pull/15331)) [@bdice](https://github.com/bdice)
-- Forward-merge branch-24.04 into branch-24.06 [skip ci] ([#15330](https://github.com/rapidsai/cudf/pull/15330)) [@rapids-bot[bot]](https://github.com/rapids-bot[bot])
-- Allow ``numeric_only=True`` for simple groupby reductions ([#15326](https://github.com/rapidsai/cudf/pull/15326)) [@rjzamora](https://github.com/rjzamora)
-- Drop CentOS 7 support. ([#15323](https://github.com/rapidsai/cudf/pull/15323)) [@bdice](https://github.com/bdice)
-- Rework cudf::find_and_replace_all to use gather-based make_strings_column ([#15305](https://github.com/rapidsai/cudf/pull/15305)) [@davidwendt](https://github.com/davidwendt)
-- First pass at adding testing for pylibcudf ([#15300](https://github.com/rapidsai/cudf/pull/15300)) [@vyasr](https://github.com/vyasr)
-- [FEA] Performance improvement for mixed left semi/anti join ([#15288](https://github.com/rapidsai/cudf/pull/15288)) [@tgujar](https://github.com/tgujar)
-- Rework cudf::replace_nulls to use strings::detail::copy_if_else ([#15286](https://github.com/rapidsai/cudf/pull/15286)) [@davidwendt](https://github.com/davidwendt)
-- Clean up special casing in `as_column` for non-typed input ([#15276](https://github.com/rapidsai/cudf/pull/15276)) [@mroeschke](https://github.com/mroeschke)
-- Large strings support in cudf::concatenate ([#15195](https://github.com/rapidsai/cudf/pull/15195)) [@davidwendt](https://github.com/davidwendt)
-- Use less _is_categorical_dtype ([#15148](https://github.com/rapidsai/cudf/pull/15148)) [@mroeschke](https://github.com/mroeschke)
-- Align date_range defaults with pandas, support tz ([#15139](https://github.com/rapidsai/cudf/pull/15139)) [@mroeschke](https://github.com/mroeschke)
-- `ModuleAccelerator` performance: cache the result of checking if a caller is in the denylist ([#15056](https://github.com/rapidsai/cudf/pull/15056)) [@shwina](https://github.com/shwina)
-- Use offsetalator in cudf::strings::replace functions ([#14824](https://github.com/rapidsai/cudf/pull/14824)) [@davidwendt](https://github.com/davidwendt)
-- Cleanup some timedelta/datetime column logic ([#14715](https://github.com/rapidsai/cudf/pull/14715)) [@mroeschke](https://github.com/mroeschke)
-- Refactor numpy array input in as_column ([#14651](https://github.com/rapidsai/cudf/pull/14651)) [@mroeschke](https://github.com/mroeschke)
-- Refactor joins for conditional semis and antis ([#14646](https://github.com/rapidsai/cudf/pull/14646)) [@DanialJavady96](https://github.com/DanialJavady96)
-- Eagerly populate the class dict for cudf.pandas proxy types ([#14534](https://github.com/rapidsai/cudf/pull/14534)) [@shwina](https://github.com/shwina)
-- Some additional kernel thread index refactoring. ([#14107](https://github.com/rapidsai/cudf/pull/14107)) [@bdice](https://github.com/bdice)
-
-# cuDF 24.04.00 (10 Apr 2024)
-
-## 🚨 Breaking Changes
-
-- Restructure pylibcudf/arrow interop facilities ([#15325](https://github.com/rapidsai/cudf/pull/15325)) [@vyasr](https://github.com/vyasr)
-- Change exceptions thrown by copying APIs ([#15319](https://github.com/rapidsai/cudf/pull/15319)) [@vyasr](https://github.com/vyasr)
-- Change strings_column_view::char_size to return int64 ([#15197](https://github.com/rapidsai/cudf/pull/15197)) [@davidwendt](https://github.com/davidwendt)
-- Upgrade to `arrow-14.0.2` ([#15108](https://github.com/rapidsai/cudf/pull/15108)) [@galipremsagar](https://github.com/galipremsagar)
-- Add support for `pandas-2.2` in `cudf` ([#15100](https://github.com/rapidsai/cudf/pull/15100)) [@galipremsagar](https://github.com/galipremsagar)
-- Deprecate cudf::hashing::spark_murmurhash3_x86_32 ([#15074](https://github.com/rapidsai/cudf/pull/15074)) [@davidwendt](https://github.com/davidwendt)
-- Align MultiIndex.get_indexder with pandas 2.2 change ([#15059](https://github.com/rapidsai/cudf/pull/15059)) [@mroeschke](https://github.com/mroeschke)
-- Raise an error on import for unsupported GPUs. ([#15053](https://github.com/rapidsai/cudf/pull/15053)) [@bdice](https://github.com/bdice)
-- Deprecate datelike isin casting strings to dates to match pandas 2.2 ([#15046](https://github.com/rapidsai/cudf/pull/15046)) [@mroeschke](https://github.com/mroeschke)
-- Align concat Series name behavior in pandas 2.2 ([#15032](https://github.com/rapidsai/cudf/pull/15032)) [@mroeschke](https://github.com/mroeschke)
-- Add `future_stack` to `DataFrame.stack` ([#15015](https://github.com/rapidsai/cudf/pull/15015)) [@galipremsagar](https://github.com/galipremsagar)
-- Deprecate groupby fillna ([#15000](https://github.com/rapidsai/cudf/pull/15000)) [@mroeschke](https://github.com/mroeschke)
-- Deprecate replace with categorical columns ([#14988](https://github.com/rapidsai/cudf/pull/14988)) [@mroeschke](https://github.com/mroeschke)
-- Deprecate delim_whitespace in read_csv for pandas 2.2 ([#14986](https://github.com/rapidsai/cudf/pull/14986)) [@mroeschke](https://github.com/mroeschke)
-- Deprecate parameters similar to pandas 2.2 ([#14984](https://github.com/rapidsai/cudf/pull/14984)) [@mroeschke](https://github.com/mroeschke)
-- Add missing atomic operators, refactor atomic operators, move atomic operators to detail namespace. ([#14962](https://github.com/rapidsai/cudf/pull/14962)) [@bdice](https://github.com/bdice)
-- Add `pandas-2.x` support in `cudf` ([#14916](https://github.com/rapidsai/cudf/pull/14916)) [@galipremsagar](https://github.com/galipremsagar)
-- Use cuco::static_set in the hash-based groupby ([#14813](https://github.com/rapidsai/cudf/pull/14813)) [@PointKernel](https://github.com/PointKernel)
-
-## 🐛 Bug Fixes
-
-- Fix an issue with creating a series from scalar when `dtype=&#39;category&#39;` ([#15476](https://github.com/rapidsai/cudf/pull/15476)) [@galipremsagar](https://github.com/galipremsagar)
-- Update pre-commit-hooks to v0.0.3 ([#15355](https://github.com/rapidsai/cudf/pull/15355)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- [BUG][JNI] Trigger MemoryBuffer.onClosed after memory is freed ([#15351](https://github.com/rapidsai/cudf/pull/15351)) [@abellina](https://github.com/abellina)
-- Fix an issue with multiple short list rowgroups using the Parquet chunked reader. ([#15342](https://github.com/rapidsai/cudf/pull/15342)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Avoid importing dask-expr if &quot;query-planning&quot; config is `False` ([#15340](https://github.com/rapidsai/cudf/pull/15340)) [@rjzamora](https://github.com/rjzamora)
-- Fix gtests/ERROR_TEST errors when run in Debug ([#15317](https://github.com/rapidsai/cudf/pull/15317)) [@davidwendt](https://github.com/davidwendt)
-- Fix OOB read in `inflate_kernel` ([#15309](https://github.com/rapidsai/cudf/pull/15309)) [@vuule](https://github.com/vuule)
-- Work around a cuFile error when running CSV tests with memcheck ([#15293](https://github.com/rapidsai/cudf/pull/15293)) [@vuule](https://github.com/vuule)
-- Fix Doxygen upload directory ([#15291](https://github.com/rapidsai/cudf/pull/15291)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- Fix Doxygen check ([#15289](https://github.com/rapidsai/cudf/pull/15289)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- Reintroduce PANDAS_GE_220 import ([#15287](https://github.com/rapidsai/cudf/pull/15287)) [@wence-](https://github.com/wence-)
-- Fix mean computation for the geometric distribution in the data generator ([#15282](https://github.com/rapidsai/cudf/pull/15282)) [@vuule](https://github.com/vuule)
-- Fix Parquet decimal64 stats ([#15281](https://github.com/rapidsai/cudf/pull/15281)) [@etseidl](https://github.com/etseidl)
-- Make linking of nvtx3-cpp BUILD_LOCAL_INTERFACE ([#15271](https://github.com/rapidsai/cudf/pull/15271)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- Workaround compute-sanitizer memcheck bug ([#15259](https://github.com/rapidsai/cudf/pull/15259)) [@davidwendt](https://github.com/davidwendt)
-- Cleanup `hostdevice_vector` and add more APIs ([#15252](https://github.com/rapidsai/cudf/pull/15252)) [@ttnghia](https://github.com/ttnghia)
-- Fix number of rows in randomly generated lists columns ([#15248](https://github.com/rapidsai/cudf/pull/15248)) [@vuule](https://github.com/vuule)
-- Fix wrong output for `collect_list`/`collect_set` of lists column ([#15243](https://github.com/rapidsai/cudf/pull/15243)) [@ttnghia](https://github.com/ttnghia)
-- Fix testchunkedPackTwoPasses to copy from the bounce buffer ([#15220](https://github.com/rapidsai/cudf/pull/15220)) [@abellina](https://github.com/abellina)
-- Fix accessing `.columns` by an external API ([#15212](https://github.com/rapidsai/cudf/pull/15212)) [@galipremsagar](https://github.com/galipremsagar)
-- [JNI] Disable testChunkedPackTwoPasses for now ([#15210](https://github.com/rapidsai/cudf/pull/15210)) [@abellina](https://github.com/abellina)
-- Update labeler and codeowner configs for CMake files ([#15208](https://github.com/rapidsai/cudf/pull/15208)) [@PointKernel](https://github.com/PointKernel)
-- Avoid dict normalization in ``__dask_tokenize__`` ([#15187](https://github.com/rapidsai/cudf/pull/15187)) [@rjzamora](https://github.com/rjzamora)
-- Fix memcheck error in distinct inner join ([#15164](https://github.com/rapidsai/cudf/pull/15164)) [@PointKernel](https://github.com/PointKernel)
-- Remove unneeded script parameters in test_cpp_memcheck.sh ([#15158](https://github.com/rapidsai/cudf/pull/15158)) [@davidwendt](https://github.com/davidwendt)
-- Fix `ListColumn.to_pandas()` to retain `list` type ([#15155](https://github.com/rapidsai/cudf/pull/15155)) [@galipremsagar](https://github.com/galipremsagar)
-- Avoid factorization in MultiIndex.to_pandas ([#15150](https://github.com/rapidsai/cudf/pull/15150)) [@mroeschke](https://github.com/mroeschke)
-- Fix GroupBy.get_group and GroupBy.indices ([#15143](https://github.com/rapidsai/cudf/pull/15143)) [@wence-](https://github.com/wence-)
-- Remove `const` from `range_window_bounds::_extent`. ([#15138](https://github.com/rapidsai/cudf/pull/15138)) [@mythrocks](https://github.com/mythrocks)
-- DataFrame.columns = ... retains RangeIndex &amp; set dtype ([#15129](https://github.com/rapidsai/cudf/pull/15129)) [@mroeschke](https://github.com/mroeschke)
-- Correctly handle output for `GroupBy.apply` when chunk results are reindexed series ([#15109](https://github.com/rapidsai/cudf/pull/15109)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Fix Series.groupby.shift with a MultiIndex ([#15098](https://github.com/rapidsai/cudf/pull/15098)) [@mroeschke](https://github.com/mroeschke)
-- Fix reductions when DataFrame has MulitIndex columns ([#15097](https://github.com/rapidsai/cudf/pull/15097)) [@mroeschke](https://github.com/mroeschke)
-- Fix deprecation warnings for deprecated hash() calls ([#15095](https://github.com/rapidsai/cudf/pull/15095)) [@davidwendt](https://github.com/davidwendt)
-- Add support for arrow `large_string` in `cudf` ([#15093](https://github.com/rapidsai/cudf/pull/15093)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix `sort_values` pytest failure with pandas-2.x regression ([#15092](https://github.com/rapidsai/cudf/pull/15092)) [@galipremsagar](https://github.com/galipremsagar)
-- Resolve path parsing issues in `get_json_object` ([#15082](https://github.com/rapidsai/cudf/pull/15082)) [@SurajAralihalli](https://github.com/SurajAralihalli)
-- Fix bugs in handling of delta encodings ([#15075](https://github.com/rapidsai/cudf/pull/15075)) [@etseidl](https://github.com/etseidl)
-- Fix `is_device_write_preferred` in `void_sink` and `user_sink_wrapper` ([#15064](https://github.com/rapidsai/cudf/pull/15064)) [@vuule](https://github.com/vuule)
-- Eliminate duplicate allocation of nested string columns ([#15061](https://github.com/rapidsai/cudf/pull/15061)) [@vuule](https://github.com/vuule)
-- Raise an error on import for unsupported GPUs. ([#15053](https://github.com/rapidsai/cudf/pull/15053)) [@bdice](https://github.com/bdice)
-- Align concat Series name behavior in pandas 2.2 ([#15032](https://github.com/rapidsai/cudf/pull/15032)) [@mroeschke](https://github.com/mroeschke)
-- Fix `Index.difference` to handle duplicate values when one of the inputs is empty ([#15016](https://github.com/rapidsai/cudf/pull/15016)) [@galipremsagar](https://github.com/galipremsagar)
-- Add `future_stack` to `DataFrame.stack` ([#15015](https://github.com/rapidsai/cudf/pull/15015)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix handling of values=None in pylibcudf GroupBy.get_groups ([#14998](https://github.com/rapidsai/cudf/pull/14998)) [@shwina](https://github.com/shwina)
-- Fix `DataFrame.sort_index` to respect `ignore_index` on all axis ([#14995](https://github.com/rapidsai/cudf/pull/14995)) [@galipremsagar](https://github.com/galipremsagar)
-- Raise for pyarrow array that is tz-aware ([#14980](https://github.com/rapidsai/cudf/pull/14980)) [@mroeschke](https://github.com/mroeschke)
-- Direct ``SeriesGroupBy.aggregate`` to ``SeriesGroupBy.agg`` ([#14971](https://github.com/rapidsai/cudf/pull/14971)) [@rjzamora](https://github.com/rjzamora)
-- Respect IntervalDtype and CategoricalDtype objects passed by users ([#14961](https://github.com/rapidsai/cudf/pull/14961)) [@mroeschke](https://github.com/mroeschke)
-- unset `CUDF_SPILL` after a pytest ([#14958](https://github.com/rapidsai/cudf/pull/14958)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix Null literals to be not parsed as string when mixed types as string is enabled in JSON reader ([#14939](https://github.com/rapidsai/cudf/pull/14939)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix chunked reads of Parquet delta encoded pages ([#14921](https://github.com/rapidsai/cudf/pull/14921)) [@etseidl](https://github.com/etseidl)
-- Fix reading offset for data stream in ORC reader ([#14911](https://github.com/rapidsai/cudf/pull/14911)) [@ttnghia](https://github.com/ttnghia)
-- Enable sanitizer check for a test case testORCReadAndWriteForDecimal128 ([#14897](https://github.com/rapidsai/cudf/pull/14897)) [@res-life](https://github.com/res-life)
-- Fix dask token normalization ([#14829](https://github.com/rapidsai/cudf/pull/14829)) [@rjzamora](https://github.com/rjzamora)
-- Fix 24.04 versions ([#14825](https://github.com/rapidsai/cudf/pull/14825)) [@raydouglass](https://github.com/raydouglass)
-- Ensure slow private attrs are maybe proxies ([#14380](https://github.com/rapidsai/cudf/pull/14380)) [@mroeschke](https://github.com/mroeschke)
-
-## 📖 Documentation
-
-- Ignore DLManagedTensor in the docs build ([#15392](https://github.com/rapidsai/cudf/pull/15392)) [@davidwendt](https://github.com/davidwendt)
-- Revert &quot;Temporarily disable docs errors. ([#15265)&quot; (#15269](https://github.com/rapidsai/cudf/pull/15265)&quot; (#15269)) [@bdice](https://github.com/bdice)
-- Temporarily disable docs errors. ([#15265](https://github.com/rapidsai/cudf/pull/15265)) [@bdice](https://github.com/bdice)
-- Update `developer_guide.md` with new guidance on quoted internal includes ([#15238](https://github.com/rapidsai/cudf/pull/15238)) [@harrism](https://github.com/harrism)
-- Fix broken link for developer guide ([#15025](https://github.com/rapidsai/cudf/pull/15025)) [@sanjana098](https://github.com/sanjana098)
-- [DOC] Update typo in docs example of structs_column_wrapper ([#14949](https://github.com/rapidsai/cudf/pull/14949)) [@karthikeyann](https://github.com/karthikeyann)
-- Update cudf.pandas FAQ. ([#14940](https://github.com/rapidsai/cudf/pull/14940)) [@bdice](https://github.com/bdice)
-- Optimize doc builds ([#14856](https://github.com/rapidsai/cudf/pull/14856)) [@vyasr](https://github.com/vyasr)
-- Add developer guideline to use east const. ([#14836](https://github.com/rapidsai/cudf/pull/14836)) [@bdice](https://github.com/bdice)
-- Document how cuDF is pronounced ([#14753](https://github.com/rapidsai/cudf/pull/14753)) [@pentschev](https://github.com/pentschev)
-- Notes convert to Pandas-compat ([#12641](https://github.com/rapidsai/cudf/pull/12641)) [@Touutae-lab](https://github.com/Touutae-lab)
-
-## 🚀 New Features
-
-- Address inconsistency in single quote normalization in JSON reader ([#15324](https://github.com/rapidsai/cudf/pull/15324)) [@shrshi](https://github.com/shrshi)
-- Use JNI pinned pool resource with cuIO ([#15255](https://github.com/rapidsai/cudf/pull/15255)) [@abellina](https://github.com/abellina)
-- Add DELTA_BYTE_ARRAY encoder for Parquet ([#15239](https://github.com/rapidsai/cudf/pull/15239)) [@etseidl](https://github.com/etseidl)
-- Migrate filling operations to pylibcudf ([#15225](https://github.com/rapidsai/cudf/pull/15225)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- [JNI] rmm based pinned pool ([#15219](https://github.com/rapidsai/cudf/pull/15219)) [@abellina](https://github.com/abellina)
-- Implement zero-copy host buffer source instead of using an arrow implementation ([#15189](https://github.com/rapidsai/cudf/pull/15189)) [@vuule](https://github.com/vuule)
-- Enable creation of columns from scalar ([#15181](https://github.com/rapidsai/cudf/pull/15181)) [@vyasr](https://github.com/vyasr)
-- Use NVTX from GitHub. ([#15178](https://github.com/rapidsai/cudf/pull/15178)) [@bdice](https://github.com/bdice)
-- Implement `segmented_row_bit_count` for computing row sizes by segments of rows ([#15169](https://github.com/rapidsai/cudf/pull/15169)) [@ttnghia](https://github.com/ttnghia)
-- Implement search using pylibcudf ([#15166](https://github.com/rapidsai/cudf/pull/15166)) [@vyasr](https://github.com/vyasr)
-- Add distinct left join ([#15149](https://github.com/rapidsai/cudf/pull/15149)) [@PointKernel](https://github.com/PointKernel)
-- Add cardinality control for groupby benchs with flat types ([#15134](https://github.com/rapidsai/cudf/pull/15134)) [@PointKernel](https://github.com/PointKernel)
-- Add ability to request Parquet encodings on a per-column basis ([#15081](https://github.com/rapidsai/cudf/pull/15081)) [@etseidl](https://github.com/etseidl)
-- Automate include grouping order in .clang-format ([#15063](https://github.com/rapidsai/cudf/pull/15063)) [@harrism](https://github.com/harrism)
-- Requesting a clean build directory also clears Jitify cache ([#15052](https://github.com/rapidsai/cudf/pull/15052)) [@robertmaynard](https://github.com/robertmaynard)
-- API for JSON unquoted whitespace normalization ([#15033](https://github.com/rapidsai/cudf/pull/15033)) [@shrshi](https://github.com/shrshi)
-- Implement concatenate, lists.explode, merge, sorting, and stream compaction in pylibcudf ([#15011](https://github.com/rapidsai/cudf/pull/15011)) [@vyasr](https://github.com/vyasr)
-- Implement replace in pylibcudf ([#15005](https://github.com/rapidsai/cudf/pull/15005)) [@vyasr](https://github.com/vyasr)
-- Add distinct key inner join ([#14990](https://github.com/rapidsai/cudf/pull/14990)) [@PointKernel](https://github.com/PointKernel)
-- Implement rolling in pylibcudf ([#14982](https://github.com/rapidsai/cudf/pull/14982)) [@vyasr](https://github.com/vyasr)
-- Implement joins in pylibcudf ([#14972](https://github.com/rapidsai/cudf/pull/14972)) [@vyasr](https://github.com/vyasr)
-- Implement scans and reductions in pylibcudf ([#14970](https://github.com/rapidsai/cudf/pull/14970)) [@vyasr](https://github.com/vyasr)
-- Rewrite cudf internals using pylibcudf groupby ([#14946](https://github.com/rapidsai/cudf/pull/14946)) [@vyasr](https://github.com/vyasr)
-- Implement groupby in pylibcudf ([#14945](https://github.com/rapidsai/cudf/pull/14945)) [@vyasr](https://github.com/vyasr)
-- Support casting of Map type to string in JSON reader ([#14936](https://github.com/rapidsai/cudf/pull/14936)) [@karthikeyann](https://github.com/karthikeyann)
-- POC for whitespace removal in input JSON data using FST ([#14931](https://github.com/rapidsai/cudf/pull/14931)) [@shrshi](https://github.com/shrshi)
-- Support for LZ4 compression in ORC and Parquet ([#14906](https://github.com/rapidsai/cudf/pull/14906)) [@vuule](https://github.com/vuule)
-- Remove supports_streams from cuDF custom memory resources. ([#14857](https://github.com/rapidsai/cudf/pull/14857)) [@harrism](https://github.com/harrism)
-- Migrate unary operations to pylibcudf ([#14850](https://github.com/rapidsai/cudf/pull/14850)) [@vyasr](https://github.com/vyasr)
-- Migrate binary operations to pylibcudf ([#14821](https://github.com/rapidsai/cudf/pull/14821)) [@vyasr](https://github.com/vyasr)
-- Add row index and stripe size options to Python ORC chunked writer ([#14785](https://github.com/rapidsai/cudf/pull/14785)) [@vuule](https://github.com/vuule)
-- Support CUDA 12.2 ([#14712](https://github.com/rapidsai/cudf/pull/14712)) [@jameslamb](https://github.com/jameslamb)
-
-## 🛠️ Improvements
-
-- Use `conda env create --yes` instead of `--force` ([#15403](https://github.com/rapidsai/cudf/pull/15403)) [@bdice](https://github.com/bdice)
-- Restructure pylibcudf/arrow interop facilities ([#15325](https://github.com/rapidsai/cudf/pull/15325)) [@vyasr](https://github.com/vyasr)
-- Change exceptions thrown by copying APIs ([#15319](https://github.com/rapidsai/cudf/pull/15319)) [@vyasr](https://github.com/vyasr)
-- Enable branch testing for `cudf.pandas` ([#15316](https://github.com/rapidsai/cudf/pull/15316)) [@galipremsagar](https://github.com/galipremsagar)
-- Replace black with ruff-format ([#15312](https://github.com/rapidsai/cudf/pull/15312)) [@mroeschke](https://github.com/mroeschke)
-- This fixes an NPE when trying to read empty JSON data by adding a new API for missing information ([#15307](https://github.com/rapidsai/cudf/pull/15307)) [@revans2](https://github.com/revans2)
-- Address poor performance of Parquet string decoding ([#15304](https://github.com/rapidsai/cudf/pull/15304)) [@etseidl](https://github.com/etseidl)
-- Update script input name ([#15301](https://github.com/rapidsai/cudf/pull/15301)) [@AyodeAwe](https://github.com/AyodeAwe)
-- Make test_read_parquet_partitioned_filtered data deterministic ([#15296](https://github.com/rapidsai/cudf/pull/15296)) [@mroeschke](https://github.com/mroeschke)
-- Add timeout for `cudf.pandas` pandas tests ([#15284](https://github.com/rapidsai/cudf/pull/15284)) [@galipremsagar](https://github.com/galipremsagar)
-- Add upper bound to prevent usage of NumPy 2 ([#15283](https://github.com/rapidsai/cudf/pull/15283)) [@bdice](https://github.com/bdice)
-- Fix cudf::test::to_host return of host_vector ([#15263](https://github.com/rapidsai/cudf/pull/15263)) [@davidwendt](https://github.com/davidwendt)
-- Implement grouped product scan ([#15254](https://github.com/rapidsai/cudf/pull/15254)) [@wence-](https://github.com/wence-)
-- Add CUDA 12.4 to supported PTX versions ([#15247](https://github.com/rapidsai/cudf/pull/15247)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Implement DataFrame|Series.squeeze ([#15244](https://github.com/rapidsai/cudf/pull/15244)) [@mroeschke](https://github.com/mroeschke)
-- Roll back ipow changes due to register pressure. ([#15242](https://github.com/rapidsai/cudf/pull/15242)) [@pmattione-nvidia](https://github.com/pmattione-nvidia)
-- Remove create_chars_child_column utility ([#15241](https://github.com/rapidsai/cudf/pull/15241)) [@davidwendt](https://github.com/davidwendt)
-- Update dlpack to version 0.8 ([#15237](https://github.com/rapidsai/cudf/pull/15237)) [@dantegd](https://github.com/dantegd)
-- Improve performance in JSON reader when `mixed_types_as_string` option is enabled ([#15236](https://github.com/rapidsai/cudf/pull/15236)) [@shrshi](https://github.com/shrshi)
-- Remove row conversion code from libcudf ([#15234](https://github.com/rapidsai/cudf/pull/15234)) [@ttnghia](https://github.com/ttnghia)
-- Use variable substitution for RAPIDS version in Doxyfile ([#15231](https://github.com/rapidsai/cudf/pull/15231)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- Add ListColumns.to_pandas(arrow_type=) ([#15228](https://github.com/rapidsai/cudf/pull/15228)) [@mroeschke](https://github.com/mroeschke)
-- Treat dask-cudf CI artifacts as pure wheels ([#15223](https://github.com/rapidsai/cudf/pull/15223)) [@bdice](https://github.com/bdice)
-- Clean up usage of __CUDA_ARCH__ and other macros. ([#15218](https://github.com/rapidsai/cudf/pull/15218)) [@bdice](https://github.com/bdice)
-- DOC: use constants in performance-comparisons.ipynb ([#15215](https://github.com/rapidsai/cudf/pull/15215)) [@raybellwaves](https://github.com/raybellwaves)
-- Rewrite conversion in terms of column ([#15213](https://github.com/rapidsai/cudf/pull/15213)) [@vyasr](https://github.com/vyasr)
-- Switch `pytest-xdist` algo to `worksteal` ([#15207](https://github.com/rapidsai/cudf/pull/15207)) [@galipremsagar](https://github.com/galipremsagar)
-- Deprecate strings_column_view::offsets_begin() ([#15205](https://github.com/rapidsai/cudf/pull/15205)) [@davidwendt](https://github.com/davidwendt)
-- Add `get_upstream_resource` method to `stream_checking_resource_adaptor` ([#15203](https://github.com/rapidsai/cudf/pull/15203)) [@miscco](https://github.com/miscco)
-- Tune up row size estimation in the data generator ([#15202](https://github.com/rapidsai/cudf/pull/15202)) [@vuule](https://github.com/vuule)
-- Fix `offset` value for generating test data in `parquet_chunked_reader_test.cu` ([#15200](https://github.com/rapidsai/cudf/pull/15200)) [@ttnghia](https://github.com/ttnghia)
-- Change strings_column_view::char_size to return int64 ([#15197](https://github.com/rapidsai/cudf/pull/15197)) [@davidwendt](https://github.com/davidwendt)
-- Fix includes for row_operators.cuh ([#15194](https://github.com/rapidsai/cudf/pull/15194)) [@davidwendt](https://github.com/davidwendt)
-- Generalize GHA selectors for pure Python testing ([#15191](https://github.com/rapidsai/cudf/pull/15191)) [@bdice](https://github.com/bdice)
-- Improvements for `__cuda_array_interface__` tests ([#15188](https://github.com/rapidsai/cudf/pull/15188)) [@bdice](https://github.com/bdice)
-- Allow to_pandas to return pandas.ArrowDtype ([#15182](https://github.com/rapidsai/cudf/pull/15182)) [@mroeschke](https://github.com/mroeschke)
-- Ignore `byte_range` in `read_json` when the size is not smaller than the input data ([#15180](https://github.com/rapidsai/cudf/pull/15180)) [@vuule](https://github.com/vuule)
-- Expose new stable_sort and finish stream_compaction in pylibcudf ([#15175](https://github.com/rapidsai/cudf/pull/15175)) [@wence-](https://github.com/wence-)
-- [ci] update matrix filters for dask-cudf builds ([#15174](https://github.com/rapidsai/cudf/pull/15174)) [@jameslamb](https://github.com/jameslamb)
-- Change make_strings_children to return uvector ([#15171](https://github.com/rapidsai/cudf/pull/15171)) [@davidwendt](https://github.com/davidwendt)
-- Don&#39;t override to_pandas for Datelike columns ([#15167](https://github.com/rapidsai/cudf/pull/15167)) [@mroeschke](https://github.com/mroeschke)
-- Drop python-snappy from dependencies. ([#15161](https://github.com/rapidsai/cudf/pull/15161)) [@bdice](https://github.com/bdice)
-- Add microkernels for fixed-width and fixed-width dictionary in Parquet decode ([#15159](https://github.com/rapidsai/cudf/pull/15159)) [@abellina](https://github.com/abellina)
-- Make HostColumnVector.DataType accessor methods public ([#15157](https://github.com/rapidsai/cudf/pull/15157)) [@jbrennan333](https://github.com/jbrennan333)
-- Java bindings for left outer distinct join ([#15154](https://github.com/rapidsai/cudf/pull/15154)) [@jlowe](https://github.com/jlowe)
-- Forward-merge branch-24.02 to branch-24.04 ([#15153](https://github.com/rapidsai/cudf/pull/15153)) [@bdice](https://github.com/bdice)
-- Enable pandas pytests for `cudf.pandas` ([#15147](https://github.com/rapidsai/cudf/pull/15147)) [@galipremsagar](https://github.com/galipremsagar)
-- Add java option to keep quotes for JSON reads ([#15146](https://github.com/rapidsai/cudf/pull/15146)) [@revans2](https://github.com/revans2)
-- Change cross-pandas-version testing in `cudf` ([#15145](https://github.com/rapidsai/cudf/pull/15145)) [@galipremsagar](https://github.com/galipremsagar)
-- Use `hostdevice_vector` in `kernel_error`  to avoid the pageable copy ([#15140](https://github.com/rapidsai/cudf/pull/15140)) [@vuule](https://github.com/vuule)
-- Clean up Columns.astype &amp; cudf.dtype ([#15125](https://github.com/rapidsai/cudf/pull/15125)) [@mroeschke](https://github.com/mroeschke)
-- Simplify some to_pandas implementations ([#15123](https://github.com/rapidsai/cudf/pull/15123)) [@mroeschke](https://github.com/mroeschke)
-- Java: Add leak tracking for Scalar instances ([#15121](https://github.com/rapidsai/cudf/pull/15121)) [@jlowe](https://github.com/jlowe)
-- Remove calls to strings_column_view::offsets_begin() ([#15112](https://github.com/rapidsai/cudf/pull/15112)) [@davidwendt](https://github.com/davidwendt)
-- Add support for Python 3.11, require NumPy 1.23+ ([#15111](https://github.com/rapidsai/cudf/pull/15111)) [@jameslamb](https://github.com/jameslamb)
-- Compile-time ipow computation with array lookup ([#15110](https://github.com/rapidsai/cudf/pull/15110)) [@pmattione-nvidia](https://github.com/pmattione-nvidia)
-- Upgrade to `arrow-14.0.2` ([#15108](https://github.com/rapidsai/cudf/pull/15108)) [@galipremsagar](https://github.com/galipremsagar)
-- Dynamically set version in RAPIDS doc builds ([#15101](https://github.com/rapidsai/cudf/pull/15101)) [@jakirkham](https://github.com/jakirkham)
-- Add support for `pandas-2.2` in `cudf` ([#15100](https://github.com/rapidsai/cudf/pull/15100)) [@galipremsagar](https://github.com/galipremsagar)
-- Update devcontainers to CUDA Toolkit 12.2 ([#15099](https://github.com/rapidsai/cudf/pull/15099)) [@trxcllnt](https://github.com/trxcllnt)
-- Fix `datetime` binop pytest failures in pandas-2.2 ([#15090](https://github.com/rapidsai/cudf/pull/15090)) [@galipremsagar](https://github.com/galipremsagar)
-- Validate types in pylibcudf Column/Table constructors ([#15088](https://github.com/rapidsai/cudf/pull/15088)) [@wence-](https://github.com/wence-)
-- xfail test_join_ordering_pandas_compat for pandas 2.2 ([#15080](https://github.com/rapidsai/cudf/pull/15080)) [@mroeschke](https://github.com/mroeschke)
-- Add general purpose host memory allocator reference to cuIO with a demo of pooled-pinned allocation. ([#15079](https://github.com/rapidsai/cudf/pull/15079)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Adjust test_binops for pandas 2.2 ([#15078](https://github.com/rapidsai/cudf/pull/15078)) [@mroeschke](https://github.com/mroeschke)
-- Remove offsets_begin() call from nvtext::generate_ngrams ([#15077](https://github.com/rapidsai/cudf/pull/15077)) [@davidwendt](https://github.com/davidwendt)
-- Use offsetalator in cudf::detail::has_nonempty_null_rows ([#15076](https://github.com/rapidsai/cudf/pull/15076)) [@davidwendt](https://github.com/davidwendt)
-- Deprecate cudf::hashing::spark_murmurhash3_x86_32 ([#15074](https://github.com/rapidsai/cudf/pull/15074)) [@davidwendt](https://github.com/davidwendt)
-- Fix cudf::test::to_host to handle both offset types for strings columns ([#15073](https://github.com/rapidsai/cudf/pull/15073)) [@davidwendt](https://github.com/davidwendt)
-- Add condition for test_groupby_nulls_basic in pandas 2.2 ([#15072](https://github.com/rapidsai/cudf/pull/15072)) [@mroeschke](https://github.com/mroeschke)
-- xfail tests in test_udf_masked_ops due to pandas 2.2 bug ([#15071](https://github.com/rapidsai/cudf/pull/15071)) [@mroeschke](https://github.com/mroeschke)
-- target branch-24.04 for GitHub Actions workflows ([#15069](https://github.com/rapidsai/cudf/pull/15069)) [@jameslamb](https://github.com/jameslamb)
-- Implement stable version of `cudf::sort` ([#15066](https://github.com/rapidsai/cudf/pull/15066)) [@wence-](https://github.com/wence-)
-- Fix ORC and JSON tests failures for pandas 2.2 ([#15062](https://github.com/rapidsai/cudf/pull/15062)) [@mroeschke](https://github.com/mroeschke)
-- Adjust test_joining for pandas 2.2 ([#15060](https://github.com/rapidsai/cudf/pull/15060)) [@mroeschke](https://github.com/mroeschke)
-- Align MultiIndex.get_indexder with pandas 2.2 change ([#15059](https://github.com/rapidsai/cudf/pull/15059)) [@mroeschke](https://github.com/mroeschke)
-- Fix test_resample index dtype checking for pandas 2.2 ([#15058](https://github.com/rapidsai/cudf/pull/15058)) [@mroeschke](https://github.com/mroeschke)
-- Split out strings/replace.cu and rework its gtests ([#15054](https://github.com/rapidsai/cudf/pull/15054)) [@davidwendt](https://github.com/davidwendt)
-- Avoid incompatible value type setting in test_rolling for pandas 2.2 ([#15050](https://github.com/rapidsai/cudf/pull/15050)) [@mroeschke](https://github.com/mroeschke)
-- Change chained replace inplace test to COW test for pandas 2.2 ([#15049](https://github.com/rapidsai/cudf/pull/15049)) [@mroeschke](https://github.com/mroeschke)
-- Deprecate datelike isin casting strings to dates to match pandas 2.2 ([#15046](https://github.com/rapidsai/cudf/pull/15046)) [@mroeschke](https://github.com/mroeschke)
-- Avoid chained indexing in test_indexing for pandas 2.2 ([#15045](https://github.com/rapidsai/cudf/pull/15045)) [@mroeschke](https://github.com/mroeschke)
-- Avoid pandas 2.2 `DeprecationWarning` in test_hdf ([#15044](https://github.com/rapidsai/cudf/pull/15044)) [@mroeschke](https://github.com/mroeschke)
-- Use appropriate make_offsets_child_column for building lists columns ([#15043](https://github.com/rapidsai/cudf/pull/15043)) [@davidwendt](https://github.com/davidwendt)
-- Factor out position-offsets logic from strings split_helper utility ([#15040](https://github.com/rapidsai/cudf/pull/15040)) [@davidwendt](https://github.com/davidwendt)
-- Forward-merge branch-24.02 to branch-24.04 ([#15039](https://github.com/rapidsai/cudf/pull/15039)) [@bdice](https://github.com/bdice)
-- Clean up nvtx macros ([#15038](https://github.com/rapidsai/cudf/pull/15038)) [@PointKernel](https://github.com/PointKernel)
-- Add xfailures for test_applymap for pandas 2.2 ([#15034](https://github.com/rapidsai/cudf/pull/15034)) [@mroeschke](https://github.com/mroeschke)
-- Expose libcudf filter expression in read_parquet ([#15028](https://github.com/rapidsai/cudf/pull/15028)) [@wence-](https://github.com/wence-)
-- Adjust tests in test_dataframe.py for pandas 2.2 ([#15023](https://github.com/rapidsai/cudf/pull/15023)) [@mroeschke](https://github.com/mroeschke)
-- Adjust test_datetime_infer_format for pandas 2.2 ([#15021](https://github.com/rapidsai/cudf/pull/15021)) [@mroeschke](https://github.com/mroeschke)
-- Performance optimizations for parquet sub-rowgroup reader. ([#15020](https://github.com/rapidsai/cudf/pull/15020)) [@nvdbaranec](https://github.com/nvdbaranec)
-- JNI bindings for distinct_hash_join ([#15019](https://github.com/rapidsai/cudf/pull/15019)) [@jlowe](https://github.com/jlowe)
-- Change copy_if_safe to call thrust instead of the overload function ([#15018](https://github.com/rapidsai/cudf/pull/15018)) [@davidwendt](https://github.com/davidwendt)
-- Improve performance of copy_if_else for long strings ([#15017](https://github.com/rapidsai/cudf/pull/15017)) [@davidwendt](https://github.com/davidwendt)
-- Fix is_string_dtype test for pandas 2.2 ([#15012](https://github.com/rapidsai/cudf/pull/15012)) [@mroeschke](https://github.com/mroeschke)
-- Rework cudf::strings::detail::copy_range for offsetalator ([#15010](https://github.com/rapidsai/cudf/pull/15010)) [@davidwendt](https://github.com/davidwendt)
-- Use offsetalator in cudf::get_json_object() ([#15009](https://github.com/rapidsai/cudf/pull/15009)) [@davidwendt](https://github.com/davidwendt)
-- Align integral types in ORC to specs ([#15008](https://github.com/rapidsai/cudf/pull/15008)) [@vuule](https://github.com/vuule)
-- Clean up detail sequence header inclusion ([#15007](https://github.com/rapidsai/cudf/pull/15007)) [@PointKernel](https://github.com/PointKernel)
-- Add groupby.apply(include_groups=) to match pandas 2.2 deprecation ([#15006](https://github.com/rapidsai/cudf/pull/15006)) [@mroeschke](https://github.com/mroeschke)
-- Use offsetalator in cudf::interleave_columns() ([#15004](https://github.com/rapidsai/cudf/pull/15004)) [@davidwendt](https://github.com/davidwendt)
-- Use offsetalator in cudf::row_bit_count() ([#15003](https://github.com/rapidsai/cudf/pull/15003)) [@davidwendt](https://github.com/davidwendt)
-- Use offsetalator in cudf::strings::wrap() ([#15002](https://github.com/rapidsai/cudf/pull/15002)) [@davidwendt](https://github.com/davidwendt)
-- Use offsetalator in cudf::strings::reverse ([#15001](https://github.com/rapidsai/cudf/pull/15001)) [@davidwendt](https://github.com/davidwendt)
-- Deprecate groupby fillna ([#15000](https://github.com/rapidsai/cudf/pull/15000)) [@mroeschke](https://github.com/mroeschke)
-- Ensure to_* IO methods respect pandas 2.2 keyword only deprecation ([#14999](https://github.com/rapidsai/cudf/pull/14999)) [@mroeschke](https://github.com/mroeschke)
-- Remove unneeded calls to create_chars_child_column utility ([#14997](https://github.com/rapidsai/cudf/pull/14997)) [@davidwendt](https://github.com/davidwendt)
-- Add environment-agnostic scripts for running ctests and pytests ([#14992](https://github.com/rapidsai/cudf/pull/14992)) [@trxcllnt](https://github.com/trxcllnt)
-- Filter all `DeprecationWarning`&#39;s by `ArrowTable.to_pandas()` ([#14989](https://github.com/rapidsai/cudf/pull/14989)) [@galipremsagar](https://github.com/galipremsagar)
-- Deprecate replace with categorical columns ([#14988](https://github.com/rapidsai/cudf/pull/14988)) [@mroeschke](https://github.com/mroeschke)
-- Deprecate delim_whitespace in read_csv for pandas 2.2 ([#14986](https://github.com/rapidsai/cudf/pull/14986)) [@mroeschke](https://github.com/mroeschke)
-- Deprecate parameters similar to pandas 2.2 ([#14984](https://github.com/rapidsai/cudf/pull/14984)) [@mroeschke](https://github.com/mroeschke)
-- Ensure that `ctest` is called with `--no-tests=error`. ([#14983](https://github.com/rapidsai/cudf/pull/14983)) [@bdice](https://github.com/bdice)
-- Deprecate non-integer `periods` in `date_range` and `interval_range` ([#14976](https://github.com/rapidsai/cudf/pull/14976)) [@galipremsagar](https://github.com/galipremsagar)
-- Update ops-bot.yaml ([#14974](https://github.com/rapidsai/cudf/pull/14974)) [@AyodeAwe](https://github.com/AyodeAwe)
-- Use page statistics in Parquet reader ([#14973](https://github.com/rapidsai/cudf/pull/14973)) [@etseidl](https://github.com/etseidl)
-- Use fused types for overloaded function signatures ([#14969](https://github.com/rapidsai/cudf/pull/14969)) [@vyasr](https://github.com/vyasr)
-- Deprecate certain frequency strings ([#14967](https://github.com/rapidsai/cudf/pull/14967)) [@galipremsagar](https://github.com/galipremsagar)
-- Update copyrights for 24.04. ([#14964](https://github.com/rapidsai/cudf/pull/14964)) [@bdice](https://github.com/bdice)
-- Add missing atomic operators, refactor atomic operators, move atomic operators to detail namespace. ([#14962](https://github.com/rapidsai/cudf/pull/14962)) [@bdice](https://github.com/bdice)
-- Introduce `GetJsonObjectOptions` in `getJSONObject` Java API ([#14956](https://github.com/rapidsai/cudf/pull/14956)) [@SurajAralihalli](https://github.com/SurajAralihalli)
-- JNI JSON read with DataSource and infered schema, along with basic java nested Schema JSON reads ([#14954](https://github.com/rapidsai/cudf/pull/14954)) [@revans2](https://github.com/revans2)
-- Make codecov only informational (always pass). ([#14952](https://github.com/rapidsai/cudf/pull/14952)) [@bdice](https://github.com/bdice)
-- Replace legacy cudf and dask_cudf imports as (d)gd ([#14944](https://github.com/rapidsai/cudf/pull/14944)) [@mroeschke](https://github.com/mroeschke)
-- Replace _is_datetime64tz/interval_dtype with isinstance ([#14943](https://github.com/rapidsai/cudf/pull/14943)) [@mroeschke](https://github.com/mroeschke)
-- Update tests for pandas 2. ([#14941](https://github.com/rapidsai/cudf/pull/14941)) [@bdice](https://github.com/bdice)
-- Use more public pandas APIs ([#14929](https://github.com/rapidsai/cudf/pull/14929)) [@mroeschke](https://github.com/mroeschke)
-- Replace local copyright check with pre-commit-hooks verify-copyright ([#14917](https://github.com/rapidsai/cudf/pull/14917)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- Add `pandas-2.x` support in `cudf` ([#14916](https://github.com/rapidsai/cudf/pull/14916)) [@galipremsagar](https://github.com/galipremsagar)
-- Use offsetalator in nvtext::byte_pair_encoding ([#14888](https://github.com/rapidsai/cudf/pull/14888)) [@davidwendt](https://github.com/davidwendt)
-- De-DOS line-endings ([#14880](https://github.com/rapidsai/cudf/pull/14880)) [@wence-](https://github.com/wence-)
-- Add detail `cuco_allocator` ([#14877](https://github.com/rapidsai/cudf/pull/14877)) [@PointKernel](https://github.com/PointKernel)
-- Move all core types to using enum class in Cython ([#14876](https://github.com/rapidsai/cudf/pull/14876)) [@vyasr](https://github.com/vyasr)
-- Read `cudf.__version__` in Sphinx build ([#14872](https://github.com/rapidsai/cudf/pull/14872)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- Use int64 offset types for accessing code-points in nvtext::normalize ([#14868](https://github.com/rapidsai/cudf/pull/14868)) [@davidwendt](https://github.com/davidwendt)
-- Read version from VERSION file in CMake ([#14867](https://github.com/rapidsai/cudf/pull/14867)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- Update conda-cpp-post-build-checks to branch-24.04. ([#14854](https://github.com/rapidsai/cudf/pull/14854)) [@bdice](https://github.com/bdice)
-- Update cudf for compatibility with the latest cuco ([#14849](https://github.com/rapidsai/cudf/pull/14849)) [@PointKernel](https://github.com/PointKernel)
-- Remove deprecated strings functions ([#14848](https://github.com/rapidsai/cudf/pull/14848)) [@davidwendt](https://github.com/davidwendt)
-- Fix CI workflows for pandas-tests and add test summary. ([#14847](https://github.com/rapidsai/cudf/pull/14847)) [@bdice](https://github.com/bdice)
-- Use offsetalator in cudf::strings::copy_slice ([#14844](https://github.com/rapidsai/cudf/pull/14844)) [@davidwendt](https://github.com/davidwendt)
-- Fix V2 Parquet page alignment for use with zStandard compression ([#14841](https://github.com/rapidsai/cudf/pull/14841)) [@etseidl](https://github.com/etseidl)
-- Fix calls to deprecated strings factory API in examples. ([#14838](https://github.com/rapidsai/cudf/pull/14838)) [@bdice](https://github.com/bdice)
-- Update pre-commit hooks ([#14837](https://github.com/rapidsai/cudf/pull/14837)) [@bdice](https://github.com/bdice)
-- Use `rapids_cuda_set_runtime` to determine cuda runtime usage by target ([#14833](https://github.com/rapidsai/cudf/pull/14833)) [@vyasr](https://github.com/vyasr)
-- Remove get_mem_info functions from custom memory resources ([#14832](https://github.com/rapidsai/cudf/pull/14832)) [@harrism](https://github.com/harrism)
-- Fix debug build by splitting row_operator_tests_utilities.cu ([#14826](https://github.com/rapidsai/cudf/pull/14826)) [@davidwendt](https://github.com/davidwendt)
-- Remove -DNVBench_ENABLE_CUPTI=OFF. ([#14820](https://github.com/rapidsai/cudf/pull/14820)) [@bdice](https://github.com/bdice)
-- Use cuco::static_set in the hash-based groupby ([#14813](https://github.com/rapidsai/cudf/pull/14813)) [@PointKernel](https://github.com/PointKernel)
-- Branch 24.04 merge branch 24.02 ([#14809](https://github.com/rapidsai/cudf/pull/14809)) [@vyasr](https://github.com/vyasr)
-- Branch 24.04 merge branch 24.02 ([#14806](https://github.com/rapidsai/cudf/pull/14806)) [@vyasr](https://github.com/vyasr)
-- Introduce basic &quot;cudf&quot; backend for Dask Expressions ([#14805](https://github.com/rapidsai/cudf/pull/14805)) [@rjzamora](https://github.com/rjzamora)
-- Remove `build_struct|list_column` ([#14786](https://github.com/rapidsai/cudf/pull/14786)) [@mroeschke](https://github.com/mroeschke)
-- Use offsetalator in nvtext tokenize functions ([#14783](https://github.com/rapidsai/cudf/pull/14783)) [@davidwendt](https://github.com/davidwendt)
-- Reduce execution time of Python ORC tests ([#14776](https://github.com/rapidsai/cudf/pull/14776)) [@vuule](https://github.com/vuule)
-- Use offsetalator in cudf::strings::split functions ([#14757](https://github.com/rapidsai/cudf/pull/14757)) [@davidwendt](https://github.com/davidwendt)
-- Use offsetalator in cudf::strings::findall ([#14745](https://github.com/rapidsai/cudf/pull/14745)) [@davidwendt](https://github.com/davidwendt)
-- Use offsetalator in cudf::strings::url_decode ([#14744](https://github.com/rapidsai/cudf/pull/14744)) [@davidwendt](https://github.com/davidwendt)
-- Use get_offset_value utility in strings shift function ([#14743](https://github.com/rapidsai/cudf/pull/14743)) [@davidwendt](https://github.com/davidwendt)
-- Use as_column instead of full ([#14698](https://github.com/rapidsai/cudf/pull/14698)) [@mroeschke](https://github.com/mroeschke)
-- List all notable breaking changes ([#13535](https://github.com/rapidsai/cudf/pull/13535)) [@galipremsagar](https://github.com/galipremsagar)
-
-# cuDF 24.02.00 (12 Feb 2024)
-
-## 🚨 Breaking Changes
-
-- Remove **kwargs from astype ([#14765](https://github.com/rapidsai/cudf/pull/14765)) [@mroeschke](https://github.com/mroeschke)
-- Remove mimesis as a testing dependency ([#14723](https://github.com/rapidsai/cudf/pull/14723)) [@mroeschke](https://github.com/mroeschke)
-- Update to Dask&#39;s `shuffle_method` kwarg ([#14708](https://github.com/rapidsai/cudf/pull/14708)) [@pentschev](https://github.com/pentschev)
-- Drop Pascal GPU support. ([#14630](https://github.com/rapidsai/cudf/pull/14630)) [@bdice](https://github.com/bdice)
-- Update to CCCL 2.2.0. ([#14576](https://github.com/rapidsai/cudf/pull/14576)) [@bdice](https://github.com/bdice)
-- Expunge as_frame conversions in Column algorithms ([#14491](https://github.com/rapidsai/cudf/pull/14491)) [@wence-](https://github.com/wence-)
-- Deprecate cudf::make_strings_column accepting typed offsets ([#14461](https://github.com/rapidsai/cudf/pull/14461)) [@davidwendt](https://github.com/davidwendt)
-- Remove deprecated nvtext::load_merge_pairs_file ([#14460](https://github.com/rapidsai/cudf/pull/14460)) [@davidwendt](https://github.com/davidwendt)
-- Include writer code and writerVersion in ORC files ([#14458](https://github.com/rapidsai/cudf/pull/14458)) [@vuule](https://github.com/vuule)
-- Remove null mask for zero nulls in json readers ([#14451](https://github.com/rapidsai/cudf/pull/14451)) [@karthikeyann](https://github.com/karthikeyann)
-- REF: Remove **kwargs from to_pandas, raise if nullable is not implemented ([#14438](https://github.com/rapidsai/cudf/pull/14438)) [@mroeschke](https://github.com/mroeschke)
-- Consolidate 1D pandas object handling in as_column ([#14394](https://github.com/rapidsai/cudf/pull/14394)) [@mroeschke](https://github.com/mroeschke)
-- Move chars column to parent data buffer in strings column ([#14202](https://github.com/rapidsai/cudf/pull/14202)) [@karthikeyann](https://github.com/karthikeyann)
-- Switch to scikit-build-core ([#13531](https://github.com/rapidsai/cudf/pull/13531)) [@vyasr](https://github.com/vyasr)
-
-## 🐛 Bug Fixes
-
-- Exclude tests from builds ([#14981](https://github.com/rapidsai/cudf/pull/14981)) [@vyasr](https://github.com/vyasr)
-- Fix the bounce buffer size in ORC writer ([#14947](https://github.com/rapidsai/cudf/pull/14947)) [@vuule](https://github.com/vuule)
-- Revert sum/product aggregation to always produce `int64_t` type ([#14907](https://github.com/rapidsai/cudf/pull/14907)) [@SurajAralihalli](https://github.com/SurajAralihalli)
-- Fixed an issue with output chunking computation stemming from input chunking. ([#14889](https://github.com/rapidsai/cudf/pull/14889)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Fix total_byte_size in Parquet row group metadata ([#14802](https://github.com/rapidsai/cudf/pull/14802)) [@etseidl](https://github.com/etseidl)
-- Fix index difference to follow the pandas format ([#14789](https://github.com/rapidsai/cudf/pull/14789)) [@amiralimi](https://github.com/amiralimi)
-- Fix shared-workflows repo name ([#14784](https://github.com/rapidsai/cudf/pull/14784)) [@raydouglass](https://github.com/raydouglass)
-- Remove unparseable attributes from all nodes ([#14780](https://github.com/rapidsai/cudf/pull/14780)) [@vyasr](https://github.com/vyasr)
-- Refactor and add validation to IntervalIndex.__init__ ([#14778](https://github.com/rapidsai/cudf/pull/14778)) [@mroeschke](https://github.com/mroeschke)
-- Work around incompatibilities between V2 page header handling and zStandard compression in Parquet writer ([#14772](https://github.com/rapidsai/cudf/pull/14772)) [@etseidl](https://github.com/etseidl)
-- Fix calls to deprecated strings factory API ([#14771](https://github.com/rapidsai/cudf/pull/14771)) [@davidwendt](https://github.com/davidwendt)
-- Fix ptx file discovery in editable installs ([#14767](https://github.com/rapidsai/cudf/pull/14767)) [@vyasr](https://github.com/vyasr)
-- Revise ``shuffle`` deprecation to align with dask/dask ([#14762](https://github.com/rapidsai/cudf/pull/14762)) [@rjzamora](https://github.com/rjzamora)
-- Enable intermediate proxies to be picklable ([#14752](https://github.com/rapidsai/cudf/pull/14752)) [@shwina](https://github.com/shwina)
-- Add CUDF_TEST_PROGRAM_MAIN macro to tests lacking it ([#14751](https://github.com/rapidsai/cudf/pull/14751)) [@etseidl](https://github.com/etseidl)
-- Fix CMake args ([#14746](https://github.com/rapidsai/cudf/pull/14746)) [@vyasr](https://github.com/vyasr)
-- Fix logic bug introduced in #14730 ([#14742](https://github.com/rapidsai/cudf/pull/14742)) [@wence-](https://github.com/wence-)
-- [Java] Choose The Correct RoundingMode For Checking Decimal OutOfBounds ([#14731](https://github.com/rapidsai/cudf/pull/14731)) [@razajafri](https://github.com/razajafri)
-- Fix ``Groupby.get_group`` ([#14728](https://github.com/rapidsai/cudf/pull/14728)) [@rjzamora](https://github.com/rjzamora)
-- Ensure that all CUDA kernels in cudf have hidden visibility. ([#14726](https://github.com/rapidsai/cudf/pull/14726)) [@robertmaynard](https://github.com/robertmaynard)
-- Split cuda versions for notebook testing ([#14722](https://github.com/rapidsai/cudf/pull/14722)) [@raydouglass](https://github.com/raydouglass)
-- Fix to_numeric not preserving Series index and name ([#14718](https://github.com/rapidsai/cudf/pull/14718)) [@mroeschke](https://github.com/mroeschke)
-- Update dask-cudf wheel name ([#14713](https://github.com/rapidsai/cudf/pull/14713)) [@raydouglass](https://github.com/raydouglass)
-- Fix strings::contains matching end of string target ([#14711](https://github.com/rapidsai/cudf/pull/14711)) [@davidwendt](https://github.com/davidwendt)
-- Update to Dask&#39;s `shuffle_method` kwarg ([#14708](https://github.com/rapidsai/cudf/pull/14708)) [@pentschev](https://github.com/pentschev)
-- Write file-level statistics when writing ORC files with zero rows ([#14707](https://github.com/rapidsai/cudf/pull/14707)) [@vuule](https://github.com/vuule)
-- Potential fix for peformance regression in #14415 ([#14706](https://github.com/rapidsai/cudf/pull/14706)) [@etseidl](https://github.com/etseidl)
-- Ensure DataFrame column types are preserved during serialization ([#14705](https://github.com/rapidsai/cudf/pull/14705)) [@mroeschke](https://github.com/mroeschke)
-- Skip numba test that fails on ARM ([#14702](https://github.com/rapidsai/cudf/pull/14702)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Allow Z in datetime string parsing in non pandas compat mode ([#14701](https://github.com/rapidsai/cudf/pull/14701)) [@mroeschke](https://github.com/mroeschke)
-- Fix nan_as_null not being respected when passing arrow object ([#14688](https://github.com/rapidsai/cudf/pull/14688)) [@mroeschke](https://github.com/mroeschke)
-- Fix constructing Series/Index from arrow array and dtype ([#14686](https://github.com/rapidsai/cudf/pull/14686)) [@mroeschke](https://github.com/mroeschke)
-- Fix Aggregation Type Promotion: Ensure Unsigned Input Types Result in Unsigned Output for Sum and Multiply ([#14679](https://github.com/rapidsai/cudf/pull/14679)) [@SurajAralihalli](https://github.com/SurajAralihalli)
-- Add BaseOffset as a final proxy type to pass instancechecks for offsets against `BaseOffset` ([#14678](https://github.com/rapidsai/cudf/pull/14678)) [@shwina](https://github.com/shwina)
-- Add row conversion code from spark-rapids-jni ([#14664](https://github.com/rapidsai/cudf/pull/14664)) [@ttnghia](https://github.com/ttnghia)
-- Unconditionally export the CCCL path ([#14656](https://github.com/rapidsai/cudf/pull/14656)) [@vyasr](https://github.com/vyasr)
-- Ensure libcudf searches for our patched version of CCCL first ([#14655](https://github.com/rapidsai/cudf/pull/14655)) [@robertmaynard](https://github.com/robertmaynard)
-- Constrain CUDA in notebook testing to prevent CUDA 12.1 usage until we have pynvjitlink ([#14648](https://github.com/rapidsai/cudf/pull/14648)) [@vyasr](https://github.com/vyasr)
-- Fix invalid memory access in Parquet reader ([#14637](https://github.com/rapidsai/cudf/pull/14637)) [@etseidl](https://github.com/etseidl)
-- Use column_empty over as_column([]) ([#14632](https://github.com/rapidsai/cudf/pull/14632)) [@mroeschke](https://github.com/mroeschke)
-- Add (implicit) handling for torch tensors in is_scalar ([#14623](https://github.com/rapidsai/cudf/pull/14623)) [@wence-](https://github.com/wence-)
-- Fix astype/fillna not maintaining column subclass and types ([#14615](https://github.com/rapidsai/cudf/pull/14615)) [@mroeschke](https://github.com/mroeschke)
-- Remove non-empty nulls in cudf::get_json_object ([#14609](https://github.com/rapidsai/cudf/pull/14609)) [@davidwendt](https://github.com/davidwendt)
-- Remove `cuda::proclaim_return_type` from nested lambda ([#14607](https://github.com/rapidsai/cudf/pull/14607)) [@ttnghia](https://github.com/ttnghia)
-- Fix DataFrame.reindex when column reindexing to MultiIndex/RangeIndex ([#14605](https://github.com/rapidsai/cudf/pull/14605)) [@mroeschke](https://github.com/mroeschke)
-- Address potential race conditions in Parquet reader ([#14602](https://github.com/rapidsai/cudf/pull/14602)) [@etseidl](https://github.com/etseidl)
-- Fix DataFrame.reindex removing column name ([#14601](https://github.com/rapidsai/cudf/pull/14601)) [@mroeschke](https://github.com/mroeschke)
-- Remove unsanitized input test data from copy gtests ([#14600](https://github.com/rapidsai/cudf/pull/14600)) [@davidwendt](https://github.com/davidwendt)
-- Fix race detected in Parquet writer ([#14598](https://github.com/rapidsai/cudf/pull/14598)) [@etseidl](https://github.com/etseidl)
-- Correct invalid or missing return types ([#14587](https://github.com/rapidsai/cudf/pull/14587)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix unsanitized nulls from strings segmented-reduce ([#14586](https://github.com/rapidsai/cudf/pull/14586)) [@davidwendt](https://github.com/davidwendt)
-- Upgrade to nvCOMP 3.0.5 ([#14581](https://github.com/rapidsai/cudf/pull/14581)) [@davidwendt](https://github.com/davidwendt)
-- Fix unsanitized nulls produced by `cudf::clamp` APIs ([#14580](https://github.com/rapidsai/cudf/pull/14580)) [@davidwendt](https://github.com/davidwendt)
-- Fix unsanitized nulls produced by libcudf dictionary decode ([#14578](https://github.com/rapidsai/cudf/pull/14578)) [@davidwendt](https://github.com/davidwendt)
-- Fixes a symbol group lookup table issue ([#14561](https://github.com/rapidsai/cudf/pull/14561)) [@elstehle](https://github.com/elstehle)
-- Drop llvm16 from cuda118-conda devcontainer image ([#14526](https://github.com/rapidsai/cudf/pull/14526)) [@charlesbluca](https://github.com/charlesbluca)
-- REF: Make DataFrame.from_pandas process by column ([#14483](https://github.com/rapidsai/cudf/pull/14483)) [@mroeschke](https://github.com/mroeschke)
-- Improve memory footprint of isin by using contains ([#14478](https://github.com/rapidsai/cudf/pull/14478)) [@wence-](https://github.com/wence-)
-- Move creation of env.yaml outside the current directory ([#14476](https://github.com/rapidsai/cudf/pull/14476)) [@davidwendt](https://github.com/davidwendt)
-- Enable `pd.Timestamp` objects to be picklable when `cudf.pandas` is active ([#14474](https://github.com/rapidsai/cudf/pull/14474)) [@shwina](https://github.com/shwina)
-- Correct dtype of count aggregations on empty dataframes ([#14473](https://github.com/rapidsai/cudf/pull/14473)) [@wence-](https://github.com/wence-)
-- Avoid DataFrame conversion in `MultiIndex.from_pandas` ([#14470](https://github.com/rapidsai/cudf/pull/14470)) [@mroeschke](https://github.com/mroeschke)
-- JSON writer: avoid default stream use in `string_scalar` constructors ([#14444](https://github.com/rapidsai/cudf/pull/14444)) [@vuule](https://github.com/vuule)
-- Fix default stream use in the CSV reader ([#14443](https://github.com/rapidsai/cudf/pull/14443)) [@vuule](https://github.com/vuule)
-- Preserve DataFrame(columns=).columns dtype during empty-like construction ([#14381](https://github.com/rapidsai/cudf/pull/14381)) [@mroeschke](https://github.com/mroeschke)
-- Defer PTX file load to runtime ([#13690](https://github.com/rapidsai/cudf/pull/13690)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-
-## 📖 Documentation
-
-- Disable parallel build ([#14796](https://github.com/rapidsai/cudf/pull/14796)) [@vyasr](https://github.com/vyasr)
-- Add pylibcudf to the docs ([#14791](https://github.com/rapidsai/cudf/pull/14791)) [@vyasr](https://github.com/vyasr)
-- Describe unpickling expectations when cudf.pandas is enabled ([#14693](https://github.com/rapidsai/cudf/pull/14693)) [@shwina](https://github.com/shwina)
-- Update CONTRIBUTING for pyproject-only builds ([#14653](https://github.com/rapidsai/cudf/pull/14653)) [@vyasr](https://github.com/vyasr)
-- More doxygen fixes ([#14639](https://github.com/rapidsai/cudf/pull/14639)) [@vyasr](https://github.com/vyasr)
-- Enable doxygen XML generation and fix issues ([#14477](https://github.com/rapidsai/cudf/pull/14477)) [@vyasr](https://github.com/vyasr)
-- Some doxygen improvements ([#14469](https://github.com/rapidsai/cudf/pull/14469)) [@vyasr](https://github.com/vyasr)
-- Remove warning in dask-cudf docs ([#14454](https://github.com/rapidsai/cudf/pull/14454)) [@wence-](https://github.com/wence-)
-- Update README links with redirects. ([#14378](https://github.com/rapidsai/cudf/pull/14378)) [@bdice](https://github.com/bdice)
-- Add pip install instructions to README ([#13677](https://github.com/rapidsai/cudf/pull/13677)) [@shwina](https://github.com/shwina)
-
-## 🚀 New Features
-
-- Add ci check for external kernels ([#14768](https://github.com/rapidsai/cudf/pull/14768)) [@robertmaynard](https://github.com/robertmaynard)
-- JSON single quote normalization API ([#14729](https://github.com/rapidsai/cudf/pull/14729)) [@shrshi](https://github.com/shrshi)
-- Write cuDF version in Parquet &quot;created_by&quot; metadata field ([#14721](https://github.com/rapidsai/cudf/pull/14721)) [@etseidl](https://github.com/etseidl)
-- Implement remaining copying APIs in pylibcudf along with required helper functions ([#14640](https://github.com/rapidsai/cudf/pull/14640)) [@vyasr](https://github.com/vyasr)
-- Don&#39;t constrain `numba&lt;0.58` ([#14616](https://github.com/rapidsai/cudf/pull/14616)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Add DELTA_LENGTH_BYTE_ARRAY encoder and decoder for Parquet ([#14590](https://github.com/rapidsai/cudf/pull/14590)) [@etseidl](https://github.com/etseidl)
-- JSON - Parse mixed types as string in JSON reader ([#14572](https://github.com/rapidsai/cudf/pull/14572)) [@karthikeyann](https://github.com/karthikeyann)
-- JSON quote normalization ([#14545](https://github.com/rapidsai/cudf/pull/14545)) [@shrshi](https://github.com/shrshi)
-- Make DefaultHostMemoryAllocator settable ([#14523](https://github.com/rapidsai/cudf/pull/14523)) [@gerashegalov](https://github.com/gerashegalov)
-- Implement more copying APIs in pylibcudf ([#14508](https://github.com/rapidsai/cudf/pull/14508)) [@vyasr](https://github.com/vyasr)
-- Include writer code and writerVersion in ORC files ([#14458](https://github.com/rapidsai/cudf/pull/14458)) [@vuule](https://github.com/vuule)
-- Parquet sub-rowgroup reading. ([#14360](https://github.com/rapidsai/cudf/pull/14360)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Move chars column to parent data buffer in strings column ([#14202](https://github.com/rapidsai/cudf/pull/14202)) [@karthikeyann](https://github.com/karthikeyann)
-- PARQUET-2261 Size Statistics ([#14000](https://github.com/rapidsai/cudf/pull/14000)) [@etseidl](https://github.com/etseidl)
-- Improve GroupBy JIT error handling ([#13854](https://github.com/rapidsai/cudf/pull/13854)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Generate unified Python/C++ docs ([#13846](https://github.com/rapidsai/cudf/pull/13846)) [@vyasr](https://github.com/vyasr)
-- Expand JIT groupby test suite ([#13813](https://github.com/rapidsai/cudf/pull/13813)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-
-## 🛠️ Improvements
-
-- Pin `pytest&lt;8` ([#14920](https://github.com/rapidsai/cudf/pull/14920)) [@galipremsagar](https://github.com/galipremsagar)
-- Move cudf::char_utf8 definition from detail to public header ([#14779](https://github.com/rapidsai/cudf/pull/14779)) [@davidwendt](https://github.com/davidwendt)
-- Clean up `TimedeltaIndex.__init__` constructor ([#14775](https://github.com/rapidsai/cudf/pull/14775)) [@mroeschke](https://github.com/mroeschke)
-- Clean up `DatetimeIndex.__init__` constructor ([#14774](https://github.com/rapidsai/cudf/pull/14774)) [@mroeschke](https://github.com/mroeschke)
-- Some `frame.py` typing, move seldom used methods in `frame.py` ([#14766](https://github.com/rapidsai/cudf/pull/14766)) [@mroeschke](https://github.com/mroeschke)
-- Remove **kwargs from astype ([#14765](https://github.com/rapidsai/cudf/pull/14765)) [@mroeschke](https://github.com/mroeschke)
-- fix benchmarks compatibility with newer pytest-cases ([#14764](https://github.com/rapidsai/cudf/pull/14764)) [@jameslamb](https://github.com/jameslamb)
-- Add `pynvjitlink` as a dependency ([#14763](https://github.com/rapidsai/cudf/pull/14763)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Resolve degenerate performance in `create_structs_data` ([#14761](https://github.com/rapidsai/cudf/pull/14761)) [@SurajAralihalli](https://github.com/SurajAralihalli)
-- Simplify ColumnAccessor methods; avoid unnecessary validations ([#14758](https://github.com/rapidsai/cudf/pull/14758)) [@mroeschke](https://github.com/mroeschke)
-- Pin pytest-cases&lt;3.8.2 ([#14756](https://github.com/rapidsai/cudf/pull/14756)) [@mroeschke](https://github.com/mroeschke)
-- Use _from_data instead of _from_columns for initialzing Frame ([#14755](https://github.com/rapidsai/cudf/pull/14755)) [@mroeschke](https://github.com/mroeschke)
-- Consolidate cudf object handling in as_column ([#14754](https://github.com/rapidsai/cudf/pull/14754)) [@mroeschke](https://github.com/mroeschke)
-- Reduce execution time of Parquet C++ tests ([#14750](https://github.com/rapidsai/cudf/pull/14750)) [@vuule](https://github.com/vuule)
-- Implement to_datetime(..., utc=True) ([#14749](https://github.com/rapidsai/cudf/pull/14749)) [@mroeschke](https://github.com/mroeschke)
-- Remove usages of rapids-env-update ([#14748](https://github.com/rapidsai/cudf/pull/14748)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
-- Provide explicit pool size and avoid RMM detail APIs ([#14741](https://github.com/rapidsai/cudf/pull/14741)) [@harrism](https://github.com/harrism)
-- Implement `cudf.MultiIndex.from_arrays` ([#14740](https://github.com/rapidsai/cudf/pull/14740)) [@mroeschke](https://github.com/mroeschke)
-- Remove unused/single use methods ([#14739](https://github.com/rapidsai/cudf/pull/14739)) [@mroeschke](https://github.com/mroeschke)
-- refactor CUDA versions in dependencies.yaml ([#14733](https://github.com/rapidsai/cudf/pull/14733)) [@jameslamb](https://github.com/jameslamb)
-- Remove unneeded methods in Column ([#14730](https://github.com/rapidsai/cudf/pull/14730)) [@mroeschke](https://github.com/mroeschke)
-- Clean up base column methods ([#14725](https://github.com/rapidsai/cudf/pull/14725)) [@mroeschke](https://github.com/mroeschke)
-- Ensure column.fillna signatures are consistent ([#14724](https://github.com/rapidsai/cudf/pull/14724)) [@mroeschke](https://github.com/mroeschke)
-- Remove mimesis as a testing dependency ([#14723](https://github.com/rapidsai/cudf/pull/14723)) [@mroeschke](https://github.com/mroeschke)
-- Replace as_numerical with as_numerical_column/codes ([#14719](https://github.com/rapidsai/cudf/pull/14719)) [@mroeschke](https://github.com/mroeschke)
-- Use offsetalator in gather_chars ([#14700](https://github.com/rapidsai/cudf/pull/14700)) [@davidwendt](https://github.com/davidwendt)
-- Use make_strings_children for fill() specialization logic ([#14697](https://github.com/rapidsai/cudf/pull/14697)) [@davidwendt](https://github.com/davidwendt)
-- Change `io::detail::orc` namespace into `io::orc::detail` ([#14696](https://github.com/rapidsai/cudf/pull/14696)) [@ttnghia](https://github.com/ttnghia)
-- Fix call to deprecated factory function ([#14695](https://github.com/rapidsai/cudf/pull/14695)) [@davidwendt](https://github.com/davidwendt)
-- Use as_column instead of arange for range like inputs ([#14689](https://github.com/rapidsai/cudf/pull/14689)) [@mroeschke](https://github.com/mroeschke)
-- Reorganize ORC reader into multiple files and perform some small fixes to cuIO code ([#14665](https://github.com/rapidsai/cudf/pull/14665)) [@ttnghia](https://github.com/ttnghia)
-- Split parquet test into multiple files ([#14663](https://github.com/rapidsai/cudf/pull/14663)) [@etseidl](https://github.com/etseidl)
-- Custom error messages for IO with nonexistent files ([#14662](https://github.com/rapidsai/cudf/pull/14662)) [@vuule](https://github.com/vuule)
-- Explicitly pass .dtype into is_foo_dtype functions ([#14657](https://github.com/rapidsai/cudf/pull/14657)) [@mroeschke](https://github.com/mroeschke)
-- Basic validation in reader benchmarks ([#14647](https://github.com/rapidsai/cudf/pull/14647)) [@vuule](https://github.com/vuule)
-- Update dependencies.yaml to support CUDA 12.*. ([#14644](https://github.com/rapidsai/cudf/pull/14644)) [@bdice](https://github.com/bdice)
-- Consolidate memoryview handling in as_column ([#14643](https://github.com/rapidsai/cudf/pull/14643)) [@mroeschke](https://github.com/mroeschke)
-- Convert `FieldType` to scoped enum ([#14642](https://github.com/rapidsai/cudf/pull/14642)) [@vuule](https://github.com/vuule)
-- Use instance over is_foo_dtype ([#14641](https://github.com/rapidsai/cudf/pull/14641)) [@mroeschke](https://github.com/mroeschke)
-- Use isinstance over is_foo_dtype internally ([#14638](https://github.com/rapidsai/cudf/pull/14638)) [@mroeschke](https://github.com/mroeschke)
-- Remove unnecessary **kwargs in function signatures ([#14635](https://github.com/rapidsai/cudf/pull/14635)) [@mroeschke](https://github.com/mroeschke)
-- Drop nvbench patch for nvml. ([#14631](https://github.com/rapidsai/cudf/pull/14631)) [@bdice](https://github.com/bdice)
-- Drop Pascal GPU support. ([#14630](https://github.com/rapidsai/cudf/pull/14630)) [@bdice](https://github.com/bdice)
-- Add cpp/doxygen/xml to .gitignore ([#14613](https://github.com/rapidsai/cudf/pull/14613)) [@davidwendt](https://github.com/davidwendt)
-- Create strings-specific make_offsets_child_column for multiple offset types ([#14612](https://github.com/rapidsai/cudf/pull/14612)) [@davidwendt](https://github.com/davidwendt)
-- Use the offsetalator in cudf::concatenate for strings ([#14611](https://github.com/rapidsai/cudf/pull/14611)) [@davidwendt](https://github.com/davidwendt)
-- Make Parquet ColumnIndex null_counts optional ([#14596](https://github.com/rapidsai/cudf/pull/14596)) [@etseidl](https://github.com/etseidl)
-- Support `freq` in DatetimeIndex ([#14593](https://github.com/rapidsai/cudf/pull/14593)) [@shwina](https://github.com/shwina)
-- Remove legacy benchmarks for cuDF-python ([#14591](https://github.com/rapidsai/cudf/pull/14591)) [@osidekyle](https://github.com/osidekyle)
-- Remove WORKSPACE env var from cudf_test temp_directory class ([#14588](https://github.com/rapidsai/cudf/pull/14588)) [@davidwendt](https://github.com/davidwendt)
-- Use exceptions instead of return values to handle errors in `CompactProtocolReader` ([#14582](https://github.com/rapidsai/cudf/pull/14582)) [@vuule](https://github.com/vuule)
-- Use cuda::proclaim_return_type on device lambdas. ([#14577](https://github.com/rapidsai/cudf/pull/14577)) [@bdice](https://github.com/bdice)
-- Update to CCCL 2.2.0. ([#14576](https://github.com/rapidsai/cudf/pull/14576)) [@bdice](https://github.com/bdice)
-- Update dependencies.yaml to new pip index ([#14575](https://github.com/rapidsai/cudf/pull/14575)) [@vyasr](https://github.com/vyasr)
-- Simplify Python CMake ([#14565](https://github.com/rapidsai/cudf/pull/14565)) [@vyasr](https://github.com/vyasr)
-- Java expose parquet pass_read_limit ([#14564](https://github.com/rapidsai/cudf/pull/14564)) [@revans2](https://github.com/revans2)
-- Add column sanitization checks in `CUDF_TEST_EXPECT_COLUMN_*` macros ([#14559](https://github.com/rapidsai/cudf/pull/14559)) [@SurajAralihalli](https://github.com/SurajAralihalli)
-- Use cudf_test temp_directory class for nvtext::subword_tokenize gbenchmark ([#14558](https://github.com/rapidsai/cudf/pull/14558)) [@davidwendt](https://github.com/davidwendt)
-- Fix return type of prefix increment overloads ([#14544](https://github.com/rapidsai/cudf/pull/14544)) [@vuule](https://github.com/vuule)
-- Make bpe_merge_pairs_impl member private ([#14543](https://github.com/rapidsai/cudf/pull/14543)) [@davidwendt](https://github.com/davidwendt)
-- Small clean up in `io::statistics` ([#14542](https://github.com/rapidsai/cudf/pull/14542)) [@vuule](https://github.com/vuule)
-- Change json gtest environment variable to compile-time definition ([#14541](https://github.com/rapidsai/cudf/pull/14541)) [@davidwendt](https://github.com/davidwendt)
-- Remove extra total chars size calculation from cudf::concatenate ([#14540](https://github.com/rapidsai/cudf/pull/14540)) [@davidwendt](https://github.com/davidwendt)
-- Refactor IndexedFrame.hash_values to use cudf::hashing functions, add xxhash64 to cudf Python. ([#14538](https://github.com/rapidsai/cudf/pull/14538)) [@bdice](https://github.com/bdice)
-- Move non-templated inline function definitions from table_view.hpp to table_view.cpp ([#14535](https://github.com/rapidsai/cudf/pull/14535)) [@davidwendt](https://github.com/davidwendt)
-- Add JNI for strings::code_points ([#14533](https://github.com/rapidsai/cudf/pull/14533)) [@thirtiseven](https://github.com/thirtiseven)
-- Add a test for issue 12773 ([#14529](https://github.com/rapidsai/cudf/pull/14529)) [@vyasr](https://github.com/vyasr)
-- Split libarrow build dependencies. ([#14506](https://github.com/rapidsai/cudf/pull/14506)) [@bdice](https://github.com/bdice)
-- Implement `IndexedFrame.duplicated` with `distinct_indices` + `scatter` ([#14493](https://github.com/rapidsai/cudf/pull/14493)) [@wence-](https://github.com/wence-)
-- Expunge as_frame conversions in Column algorithms ([#14491](https://github.com/rapidsai/cudf/pull/14491)) [@wence-](https://github.com/wence-)
-- Remove unsanitized null from input strings column in rank_tests.cpp ([#14475](https://github.com/rapidsai/cudf/pull/14475)) [@davidwendt](https://github.com/davidwendt)
-- Refactor Parquet kernel_error ([#14464](https://github.com/rapidsai/cudf/pull/14464)) [@etseidl](https://github.com/etseidl)
-- Deprecate cudf::make_strings_column accepting typed offsets ([#14461](https://github.com/rapidsai/cudf/pull/14461)) [@davidwendt](https://github.com/davidwendt)
-- Remove deprecated nvtext::load_merge_pairs_file ([#14460](https://github.com/rapidsai/cudf/pull/14460)) [@davidwendt](https://github.com/davidwendt)
-- Introduce Comprehensive Pathological Unit Tests for Issue #14409 ([#14459](https://github.com/rapidsai/cudf/pull/14459)) [@aocsa](https://github.com/aocsa)
-- Expose stream parameter in public nvtext APIs ([#14456](https://github.com/rapidsai/cudf/pull/14456)) [@davidwendt](https://github.com/davidwendt)
-- Include encode type in the error message when unsupported Parquet encoding is detected ([#14453](https://github.com/rapidsai/cudf/pull/14453)) [@ZelboK](https://github.com/ZelboK)
-- Remove null mask for zero nulls in json readers ([#14451](https://github.com/rapidsai/cudf/pull/14451)) [@karthikeyann](https://github.com/karthikeyann)
-- Refactor cudf.Series.__init__ ([#14450](https://github.com/rapidsai/cudf/pull/14450)) [@mroeschke](https://github.com/mroeschke)
-- Remove the use of `volatile` in Parquet ([#14448](https://github.com/rapidsai/cudf/pull/14448)) [@vuule](https://github.com/vuule)
-- REF: Remove **kwargs from to_pandas, raise if nullable is not implemented ([#14438](https://github.com/rapidsai/cudf/pull/14438)) [@mroeschke](https://github.com/mroeschke)
-- Testing stream pool implementation ([#14437](https://github.com/rapidsai/cudf/pull/14437)) [@shrshi](https://github.com/shrshi)
-- Match pandas join ordering obligations in pandas-compatible mode ([#14428](https://github.com/rapidsai/cudf/pull/14428)) [@wence-](https://github.com/wence-)
-- Forward-merge branch-23.12 to branch-24.02 ([#14426](https://github.com/rapidsai/cudf/pull/14426)) [@bdice](https://github.com/bdice)
-- Use isinstance(..., cudf.IntervalDtype) instead of is_interval_dtype ([#14424](https://github.com/rapidsai/cudf/pull/14424)) [@mroeschke](https://github.com/mroeschke)
-- Use isinstance(..., cudf.CategoricalDtype) instead of is_categorical_dtype ([#14423](https://github.com/rapidsai/cudf/pull/14423)) [@mroeschke](https://github.com/mroeschke)
-- Forward-merge branch-23.12 to branch-24.02 ([#14422](https://github.com/rapidsai/cudf/pull/14422)) [@bdice](https://github.com/bdice)
-- REF: Remove instances of pd.core ([#14421](https://github.com/rapidsai/cudf/pull/14421)) [@mroeschke](https://github.com/mroeschke)
-- Expose streams in public filling APIs for label_bins ([#14401](https://github.com/rapidsai/cudf/pull/14401)) [@ZelboK](https://github.com/ZelboK)
-- Consolidate 1D pandas object handling in as_column ([#14394](https://github.com/rapidsai/cudf/pull/14394)) [@mroeschke](https://github.com/mroeschke)
-- Limit DELTA_BINARY_PACKED encoder to the same number of bits as the physical type being encoded ([#14392](https://github.com/rapidsai/cudf/pull/14392)) [@etseidl](https://github.com/etseidl)
-- Add SHA-1 and SHA-2 hash functions. ([#14391](https://github.com/rapidsai/cudf/pull/14391)) [@bdice](https://github.com/bdice)
-- Expose streams in Parquet reader and writer APIs ([#14359](https://github.com/rapidsai/cudf/pull/14359)) [@shrshi](https://github.com/shrshi)
-- Update to fmt 10.1.1 and spdlog 1.12.0. ([#14355](https://github.com/rapidsai/cudf/pull/14355)) [@bdice](https://github.com/bdice)
-- Replace default stream for scalars and column factories usages (because of defaulted arguments) ([#14354](https://github.com/rapidsai/cudf/pull/14354)) [@karthikeyann](https://github.com/karthikeyann)
-- Expose streams in ORC reader and writer APIs ([#14350](https://github.com/rapidsai/cudf/pull/14350)) [@shrshi](https://github.com/shrshi)
-- Convert compression and io to string axis type in IO benchmarks ([#14347](https://github.com/rapidsai/cudf/pull/14347)) [@SurajAralihalli](https://github.com/SurajAralihalli)
-- Add cuDF devcontainers ([#14015](https://github.com/rapidsai/cudf/pull/14015)) [@trxcllnt](https://github.com/trxcllnt)
-- Refactoring of Buffers (last step towards unifying COW and Spilling) ([#13801](https://github.com/rapidsai/cudf/pull/13801)) [@madsbk](https://github.com/madsbk)
-- Switch to scikit-build-core ([#13531](https://github.com/rapidsai/cudf/pull/13531)) [@vyasr](https://github.com/vyasr)
-- Simplify null count checking in column equality comparator ([#13312](https://github.com/rapidsai/cudf/pull/13312)) [@vyasr](https://github.com/vyasr)
-
-# cuDF 23.12.00 (6 Dec 2023)
-
-## 🚨 Breaking Changes
-
-- Raise error in `reindex` when `index` is not unique ([#14400](https://github.com/rapidsai/cudf/pull/14400)) [@galipremsagar](https://github.com/galipremsagar)
-- Expose stream parameter to get_json_object API ([#14297](https://github.com/rapidsai/cudf/pull/14297)) [@davidwendt](https://github.com/davidwendt)
-- Refactor cudf_kafka to use skbuild ([#14292](https://github.com/rapidsai/cudf/pull/14292)) [@jdye64](https://github.com/jdye64)
-- Expose stream parameter in public strings convert APIs ([#14255](https://github.com/rapidsai/cudf/pull/14255)) [@davidwendt](https://github.com/davidwendt)
-- Upgrade to nvCOMP 3.0.4 ([#13815](https://github.com/rapidsai/cudf/pull/13815)) [@vuule](https://github.com/vuule)
-
-## 🐛 Bug Fixes
-
-- Update actions/labeler to v4 ([#14562](https://github.com/rapidsai/cudf/pull/14562)) [@raydouglass](https://github.com/raydouglass)
-- Fix data corruption when skipping rows ([#14557](https://github.com/rapidsai/cudf/pull/14557)) [@etseidl](https://github.com/etseidl)
-- Fix function name typo in `cudf.pandas` profiler ([#14514](https://github.com/rapidsai/cudf/pull/14514)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix intermediate type checking in expression parsing ([#14445](https://github.com/rapidsai/cudf/pull/14445)) [@vyasr](https://github.com/vyasr)
-- Forward merge `branch-23.10` into `branch-23.12` ([#14435](https://github.com/rapidsai/cudf/pull/14435)) [@raydouglass](https://github.com/raydouglass)
-- Remove needs: wheel-build-cudf. ([#14427](https://github.com/rapidsai/cudf/pull/14427)) [@bdice](https://github.com/bdice)
-- Fix dask dependency in custreamz ([#14420](https://github.com/rapidsai/cudf/pull/14420)) [@vyasr](https://github.com/vyasr)
-- Ensure nvbench initializes nvml context when built statically ([#14411](https://github.com/rapidsai/cudf/pull/14411)) [@robertmaynard](https://github.com/robertmaynard)
-- Support java AST String literal with desired encoding ([#14402](https://github.com/rapidsai/cudf/pull/14402)) [@winningsix](https://github.com/winningsix)
-- Raise error in `reindex` when `index` is not unique ([#14400](https://github.com/rapidsai/cudf/pull/14400)) [@galipremsagar](https://github.com/galipremsagar)
-- Always build nvbench statically so we don&#39;t need to package it ([#14399](https://github.com/rapidsai/cudf/pull/14399)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix token-count logic in nvtext::tokenize_with_vocabulary ([#14393](https://github.com/rapidsai/cudf/pull/14393)) [@davidwendt](https://github.com/davidwendt)
-- Fix as_column(pd.Timestamp/Timedelta, length=) not respecting length ([#14390](https://github.com/rapidsai/cudf/pull/14390)) [@mroeschke](https://github.com/mroeschke)
-- cudf.pandas: cuDF subpath checking in module `__getattr__` ([#14388](https://github.com/rapidsai/cudf/pull/14388)) [@shwina](https://github.com/shwina)
-- Fix and disable encoding for nanosecond statistics in ORC writer ([#14367](https://github.com/rapidsai/cudf/pull/14367)) [@vuule](https://github.com/vuule)
-- Add the new manylinux builds to the build job ([#14351](https://github.com/rapidsai/cudf/pull/14351)) [@vyasr](https://github.com/vyasr)
-- cudf jit parser now supports .pragma instructions with quotes ([#14348](https://github.com/rapidsai/cudf/pull/14348)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix overflow check in `cudf::merge` ([#14345](https://github.com/rapidsai/cudf/pull/14345)) [@divyegala](https://github.com/divyegala)
-- Add cramjam ([#14344](https://github.com/rapidsai/cudf/pull/14344)) [@vyasr](https://github.com/vyasr)
-- Enable `dask_cudf/io` pytests in CI ([#14338](https://github.com/rapidsai/cudf/pull/14338)) [@galipremsagar](https://github.com/galipremsagar)
-- Temporarily avoid the current build of pydata-sphinx-theme ([#14332](https://github.com/rapidsai/cudf/pull/14332)) [@vyasr](https://github.com/vyasr)
-- Fix host buffer access from device function in the Parquet reader ([#14328](https://github.com/rapidsai/cudf/pull/14328)) [@vuule](https://github.com/vuule)
-- Run IO tests for Dask-cuDF ([#14327](https://github.com/rapidsai/cudf/pull/14327)) [@rjzamora](https://github.com/rjzamora)
-- Fix logical type issues in the Parquet writer ([#14322](https://github.com/rapidsai/cudf/pull/14322)) [@vuule](https://github.com/vuule)
-- Remove aws-sdk-pinning and revert to arrow 12.0.1 ([#14319](https://github.com/rapidsai/cudf/pull/14319)) [@vyasr](https://github.com/vyasr)
-- test is_valid before reading column data ([#14318](https://github.com/rapidsai/cudf/pull/14318)) [@etseidl](https://github.com/etseidl)
-- Fix gtest validity setting for TextTokenizeTest.Vocabulary ([#14312](https://github.com/rapidsai/cudf/pull/14312)) [@davidwendt](https://github.com/davidwendt)
-- Fixes stack context for json lines format that recovers from invalid JSON lines ([#14309](https://github.com/rapidsai/cudf/pull/14309)) [@elstehle](https://github.com/elstehle)
-- Downgrade to Arrow 12.0.0 for aws-sdk-cpp and fix cudf_kafka builds for new CI containers ([#14296](https://github.com/rapidsai/cudf/pull/14296)) [@vyasr](https://github.com/vyasr)
-- fixing thread index overflow issue ([#14290](https://github.com/rapidsai/cudf/pull/14290)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Fix memset error in nvtext::edit_distance_matrix ([#14283](https://github.com/rapidsai/cudf/pull/14283)) [@davidwendt](https://github.com/davidwendt)
-- Changes JSON reader&#39;s recovery option&#39;s behaviour to ignore all characters after a valid JSON record ([#14279](https://github.com/rapidsai/cudf/pull/14279)) [@elstehle](https://github.com/elstehle)
-- Handle empty string correctly in Parquet statistics ([#14257](https://github.com/rapidsai/cudf/pull/14257)) [@etseidl](https://github.com/etseidl)
-- Fixes behaviour for incomplete lines when `recover_with_nulls` is enabled ([#14252](https://github.com/rapidsai/cudf/pull/14252)) [@elstehle](https://github.com/elstehle)
-- cudf::detail::pinned_allocator doesn&#39;t throw from `deallocate` ([#14251](https://github.com/rapidsai/cudf/pull/14251)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix strings replace for adjacent, identical multi-byte UTF-8 character targets ([#14235](https://github.com/rapidsai/cudf/pull/14235)) [@davidwendt](https://github.com/davidwendt)
-- Fix the precision when converting a decimal128 column to an arrow array ([#14230](https://github.com/rapidsai/cudf/pull/14230)) [@jihoonson](https://github.com/jihoonson)
-- Fixing parquet list of struct interpretation ([#13715](https://github.com/rapidsai/cudf/pull/13715)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-
-## 📖 Documentation
-
-- Fix io reference in docs. ([#14452](https://github.com/rapidsai/cudf/pull/14452)) [@bdice](https://github.com/bdice)
-- Update README ([#14374](https://github.com/rapidsai/cudf/pull/14374)) [@shwina](https://github.com/shwina)
-- Example code for blog on new row comparators ([#13795](https://github.com/rapidsai/cudf/pull/13795)) [@divyegala](https://github.com/divyegala)
-
-## 🚀 New Features
-
-- Expose streams in public unary APIs ([#14342](https://github.com/rapidsai/cudf/pull/14342)) [@vyasr](https://github.com/vyasr)
-- Add python tests for Parquet DELTA_BINARY_PACKED encoder ([#14316](https://github.com/rapidsai/cudf/pull/14316)) [@etseidl](https://github.com/etseidl)
-- Update rapids-cmake functions to non-deprecated signatures ([#14265](https://github.com/rapidsai/cudf/pull/14265)) [@robertmaynard](https://github.com/robertmaynard)
-- Expose streams in public null mask APIs ([#14263](https://github.com/rapidsai/cudf/pull/14263)) [@vyasr](https://github.com/vyasr)
-- Expose streams in binaryop APIs ([#14187](https://github.com/rapidsai/cudf/pull/14187)) [@vyasr](https://github.com/vyasr)
-- Add pylibcudf.Scalar that interoperates with Arrow scalars ([#14133](https://github.com/rapidsai/cudf/pull/14133)) [@vyasr](https://github.com/vyasr)
-- Add decoder for DELTA_BYTE_ARRAY to Parquet reader ([#14101](https://github.com/rapidsai/cudf/pull/14101)) [@etseidl](https://github.com/etseidl)
-- Add DELTA_BINARY_PACKED encoder for Parquet writer ([#14100](https://github.com/rapidsai/cudf/pull/14100)) [@etseidl](https://github.com/etseidl)
-- Add BytePairEncoder class to cuDF ([#13891](https://github.com/rapidsai/cudf/pull/13891)) [@davidwendt](https://github.com/davidwendt)
-- Upgrade to nvCOMP 3.0.4 ([#13815](https://github.com/rapidsai/cudf/pull/13815)) [@vuule](https://github.com/vuule)
-- Use `pynvjitlink` for CUDA 12+ MVC ([#13650](https://github.com/rapidsai/cudf/pull/13650)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-
-## 🛠️ Improvements
-
-- Build concurrency for nightly and merge triggers ([#14441](https://github.com/rapidsai/cudf/pull/14441)) [@bdice](https://github.com/bdice)
-- Cleanup remaining usages of dask dependencies ([#14407](https://github.com/rapidsai/cudf/pull/14407)) [@galipremsagar](https://github.com/galipremsagar)
-- Update to Arrow 14.0.1. ([#14387](https://github.com/rapidsai/cudf/pull/14387)) [@bdice](https://github.com/bdice)
-- Remove Cython libcpp wrappers ([#14382](https://github.com/rapidsai/cudf/pull/14382)) [@vyasr](https://github.com/vyasr)
-- Forward-merge branch-23.10 to branch-23.12 ([#14372](https://github.com/rapidsai/cudf/pull/14372)) [@bdice](https://github.com/bdice)
-- Upgrade to arrow 14 ([#14371](https://github.com/rapidsai/cudf/pull/14371)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix a pytest typo in `test_kurt_skew_error` ([#14368](https://github.com/rapidsai/cudf/pull/14368)) [@galipremsagar](https://github.com/galipremsagar)
-- Use new rapids-dask-dependency metapackage for managing dask versions ([#14364](https://github.com/rapidsai/cudf/pull/14364)) [@vyasr](https://github.com/vyasr)
-- Change `nullable()` to `has_nulls()` in `cudf::detail::gather` ([#14363](https://github.com/rapidsai/cudf/pull/14363)) [@divyegala](https://github.com/divyegala)
-- Split up scan_inclusive.cu to improve its compile time ([#14358](https://github.com/rapidsai/cudf/pull/14358)) [@davidwendt](https://github.com/davidwendt)
-- Implement user_datasource_wrapper is_empty() and is_device_read_preferred(). ([#14357](https://github.com/rapidsai/cudf/pull/14357)) [@tpn](https://github.com/tpn)
-- Added streams to CSV reader and writer api ([#14340](https://github.com/rapidsai/cudf/pull/14340)) [@shrshi](https://github.com/shrshi)
-- Upgrade wheels to use arrow 13 ([#14339](https://github.com/rapidsai/cudf/pull/14339)) [@vyasr](https://github.com/vyasr)
-- Rework nvtext::byte_pair_encoding API ([#14337](https://github.com/rapidsai/cudf/pull/14337)) [@davidwendt](https://github.com/davidwendt)
-- Improve performance of nvtext::tokenize_with_vocabulary for long strings ([#14336](https://github.com/rapidsai/cudf/pull/14336)) [@davidwendt](https://github.com/davidwendt)
-- Upgrade `arrow` to `13` ([#14330](https://github.com/rapidsai/cudf/pull/14330)) [@galipremsagar](https://github.com/galipremsagar)
-- Expose stream parameter in public nvtext replace APIs ([#14329](https://github.com/rapidsai/cudf/pull/14329)) [@davidwendt](https://github.com/davidwendt)
-- Drop `pyorc` dependency and use `pandas`/`pyarrow` instead ([#14323](https://github.com/rapidsai/cudf/pull/14323)) [@galipremsagar](https://github.com/galipremsagar)
-- Avoid `pyarrow.fs` import for local storage ([#14321](https://github.com/rapidsai/cudf/pull/14321)) [@rjzamora](https://github.com/rjzamora)
-- Unpin `dask` and `distributed` for `23.12` development ([#14320](https://github.com/rapidsai/cudf/pull/14320)) [@galipremsagar](https://github.com/galipremsagar)
-- Expose stream parameter in public nvtext tokenize APIs ([#14317](https://github.com/rapidsai/cudf/pull/14317)) [@davidwendt](https://github.com/davidwendt)
-- Added streams to JSON reader and writer api ([#14313](https://github.com/rapidsai/cudf/pull/14313)) [@shrshi](https://github.com/shrshi)
-- Minor improvements in `source_info` ([#14308](https://github.com/rapidsai/cudf/pull/14308)) [@vuule](https://github.com/vuule)
-- Forward-merge branch-23.10 to branch-23.12 ([#14307](https://github.com/rapidsai/cudf/pull/14307)) [@bdice](https://github.com/bdice)
-- Add stream parameter to Set Operations (Public List APIs) ([#14305](https://github.com/rapidsai/cudf/pull/14305)) [@SurajAralihalli](https://github.com/SurajAralihalli)
-- Expose stream parameter to get_json_object API ([#14297](https://github.com/rapidsai/cudf/pull/14297)) [@davidwendt](https://github.com/davidwendt)
-- Sort dictionary data alphabetically in the ORC writer ([#14295](https://github.com/rapidsai/cudf/pull/14295)) [@vuule](https://github.com/vuule)
-- Expose stream parameter in public strings filter APIs ([#14293](https://github.com/rapidsai/cudf/pull/14293)) [@davidwendt](https://github.com/davidwendt)
-- Refactor cudf_kafka to use skbuild ([#14292](https://github.com/rapidsai/cudf/pull/14292)) [@jdye64](https://github.com/jdye64)
-- Update `shared-action-workflows` references ([#14289](https://github.com/rapidsai/cudf/pull/14289)) [@AyodeAwe](https://github.com/AyodeAwe)
-- Register ``partd`` encode dispatch in ``dask_cudf`` ([#14287](https://github.com/rapidsai/cudf/pull/14287)) [@rjzamora](https://github.com/rjzamora)
-- Update versioning strategy ([#14285](https://github.com/rapidsai/cudf/pull/14285)) [@vyasr](https://github.com/vyasr)
-- Move and rename byte-pair-encoding source files ([#14284](https://github.com/rapidsai/cudf/pull/14284)) [@davidwendt](https://github.com/davidwendt)
-- Expose stream parameter in public strings combine APIs ([#14281](https://github.com/rapidsai/cudf/pull/14281)) [@davidwendt](https://github.com/davidwendt)
-- Expose stream parameter in public strings contains APIs ([#14280](https://github.com/rapidsai/cudf/pull/14280)) [@davidwendt](https://github.com/davidwendt)
-- Add stream parameter to List Sort and Filter APIs ([#14272](https://github.com/rapidsai/cudf/pull/14272)) [@SurajAralihalli](https://github.com/SurajAralihalli)
-- Use branch-23.12 workflows. ([#14271](https://github.com/rapidsai/cudf/pull/14271)) [@bdice](https://github.com/bdice)
-- Refactor LogicalType for Parquet ([#14264](https://github.com/rapidsai/cudf/pull/14264)) [@etseidl](https://github.com/etseidl)
-- Centralize chunked reading code in the parquet reader to reader_impl_chunking.cu ([#14262](https://github.com/rapidsai/cudf/pull/14262)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Expose stream parameter in public strings replace APIs ([#14261](https://github.com/rapidsai/cudf/pull/14261)) [@davidwendt](https://github.com/davidwendt)
-- Expose stream parameter in public strings APIs ([#14260](https://github.com/rapidsai/cudf/pull/14260)) [@davidwendt](https://github.com/davidwendt)
-- Cleanup of namespaces in parquet code. ([#14259](https://github.com/rapidsai/cudf/pull/14259)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Make parquet schema index type consistent ([#14256](https://github.com/rapidsai/cudf/pull/14256)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Expose stream parameter in public strings convert APIs ([#14255](https://github.com/rapidsai/cudf/pull/14255)) [@davidwendt](https://github.com/davidwendt)
-- Add in java bindings for DataSource ([#14254](https://github.com/rapidsai/cudf/pull/14254)) [@revans2](https://github.com/revans2)
-- Reimplement `cudf::merge` for nested types without using comparators ([#14250](https://github.com/rapidsai/cudf/pull/14250)) [@divyegala](https://github.com/divyegala)
-- Add stream parameter to List Manipulation and Operations APIs ([#14248](https://github.com/rapidsai/cudf/pull/14248)) [@SurajAralihalli](https://github.com/SurajAralihalli)
-- Expose stream parameter in public strings split/partition APIs ([#14247](https://github.com/rapidsai/cudf/pull/14247)) [@davidwendt](https://github.com/davidwendt)
-- Improve `contains_column` by invoking `contains_table` ([#14238](https://github.com/rapidsai/cudf/pull/14238)) [@PointKernel](https://github.com/PointKernel)
-- Detect and report errors in Parquet header parsing ([#14237](https://github.com/rapidsai/cudf/pull/14237)) [@etseidl](https://github.com/etseidl)
-- Normalizing offsets iterator ([#14234](https://github.com/rapidsai/cudf/pull/14234)) [@davidwendt](https://github.com/davidwendt)
-- Forward merge `23.10` into `23.12` ([#14231](https://github.com/rapidsai/cudf/pull/14231)) [@galipremsagar](https://github.com/galipremsagar)
-- Return error if BOOL8 column-type is used with integers-to-hex ([#14208](https://github.com/rapidsai/cudf/pull/14208)) [@davidwendt](https://github.com/davidwendt)
-- Enable indexalator for device code ([#14206](https://github.com/rapidsai/cudf/pull/14206)) [@davidwendt](https://github.com/davidwendt)
-- Marginally reduce memory footprint of joins ([#14197](https://github.com/rapidsai/cudf/pull/14197)) [@wence-](https://github.com/wence-)
-- Add nvtx annotations to spilling-based data movement ([#14196](https://github.com/rapidsai/cudf/pull/14196)) [@wence-](https://github.com/wence-)
-- Optimize ORC writer for decimal columns ([#14190](https://github.com/rapidsai/cudf/pull/14190)) [@vuule](https://github.com/vuule)
-- Remove the use of volatile in ORC ([#14175](https://github.com/rapidsai/cudf/pull/14175)) [@vuule](https://github.com/vuule)
-- Add `bytes_per_second` to distinct_count of stream_compaction nvbench. ([#14172](https://github.com/rapidsai/cudf/pull/14172)) [@Blonck](https://github.com/Blonck)
-- Add `bytes_per_second` to transpose benchmark ([#14170](https://github.com/rapidsai/cudf/pull/14170)) [@Blonck](https://github.com/Blonck)
-- cuDF: Build CUDA 12.0 ARM conda packages. ([#14112](https://github.com/rapidsai/cudf/pull/14112)) [@bdice](https://github.com/bdice)
-- Add `bytes_per_second` to shift benchmark ([#13950](https://github.com/rapidsai/cudf/pull/13950)) [@Blonck](https://github.com/Blonck)
-- Extract `debug_utilities.hpp/cu` from `column_utilities.hpp/cu` ([#13720](https://github.com/rapidsai/cudf/pull/13720)) [@ttnghia](https://github.com/ttnghia)
-
-# cuDF 23.10.00 (11 Oct 2023)
-
-## 🚨 Breaking Changes
-
-- Expose stream parameter in public nvtext ngram APIs ([#14061](https://github.com/rapidsai/cudf/pull/14061)) [@davidwendt](https://github.com/davidwendt)
-- Raise `MixedTypeError` when a column of mixed-dtype is being constructed ([#14050](https://github.com/rapidsai/cudf/pull/14050)) [@galipremsagar](https://github.com/galipremsagar)
-- Raise `NotImplementedError` for `MultiIndex.to_series` ([#14049](https://github.com/rapidsai/cudf/pull/14049)) [@galipremsagar](https://github.com/galipremsagar)
-- Create table_input_metadata from a table_metadata ([#13920](https://github.com/rapidsai/cudf/pull/13920)) [@etseidl](https://github.com/etseidl)
-- Enable RLE boolean encoding for v2 Parquet files ([#13886](https://github.com/rapidsai/cudf/pull/13886)) [@etseidl](https://github.com/etseidl)
-- Change `NA` to `NaT` for `datetime` and `timedelta` types ([#13868](https://github.com/rapidsai/cudf/pull/13868)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix `any`, `all` reduction behavior for `axis=None` and warn for other reductions ([#13831](https://github.com/rapidsai/cudf/pull/13831)) [@galipremsagar](https://github.com/galipremsagar)
-- Add minhash support for MurmurHash3_x64_128 ([#13796](https://github.com/rapidsai/cudf/pull/13796)) [@davidwendt](https://github.com/davidwendt)
-- Remove the libcudf cudf::offset_type type ([#13788](https://github.com/rapidsai/cudf/pull/13788)) [@davidwendt](https://github.com/davidwendt)
-- Raise error when trying to join `datetime` and `timedelta` types with other types ([#13786](https://github.com/rapidsai/cudf/pull/13786)) [@galipremsagar](https://github.com/galipremsagar)
-- Update to Cython 3.0.0 ([#13777](https://github.com/rapidsai/cudf/pull/13777)) [@vyasr](https://github.com/vyasr)
-- Raise error on constructing an array from mixed type inputs ([#13768](https://github.com/rapidsai/cudf/pull/13768)) [@galipremsagar](https://github.com/galipremsagar)
-- Enforce deprecations in `23.10` ([#13732](https://github.com/rapidsai/cudf/pull/13732)) [@galipremsagar](https://github.com/galipremsagar)
-- Upgrade to arrow 12 ([#13728](https://github.com/rapidsai/cudf/pull/13728)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove Arrow dependency from the `datasource.hpp` public header ([#13698](https://github.com/rapidsai/cudf/pull/13698)) [@vuule](https://github.com/vuule)
-
-## 🐛 Bug Fixes
-
-- Fix inaccurate ceil/floor and inaccurate rescaling casts of fixed-point values. ([#14242](https://github.com/rapidsai/cudf/pull/14242)) [@bdice](https://github.com/bdice)
-- Fix inaccuracy in decimal128 rounding. ([#14233](https://github.com/rapidsai/cudf/pull/14233)) [@bdice](https://github.com/bdice)
-- Workaround for illegal instruction error in sm90 for warp instrinsics with mask ([#14201](https://github.com/rapidsai/cudf/pull/14201)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix pytorch related pytest ([#14198](https://github.com/rapidsai/cudf/pull/14198)) [@galipremsagar](https://github.com/galipremsagar)
-- Pin to `aws-sdk-cpp&lt;1.11` ([#14173](https://github.com/rapidsai/cudf/pull/14173)) [@pentschev](https://github.com/pentschev)
-- Fix assert failure for range window functions ([#14168](https://github.com/rapidsai/cudf/pull/14168)) [@mythrocks](https://github.com/mythrocks)
-- Fix Memcheck error found in JSON_TEST JsonReaderTest.ErrorStrings ([#14164](https://github.com/rapidsai/cudf/pull/14164)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix calls to copy_bitmask to pass stream parameter ([#14158](https://github.com/rapidsai/cudf/pull/14158)) [@davidwendt](https://github.com/davidwendt)
-- Fix DataFrame from Series with different CategoricalIndexes ([#14157](https://github.com/rapidsai/cudf/pull/14157)) [@mroeschke](https://github.com/mroeschke)
-- Pin to numpy&lt;1.25 and numba&lt;0.58 to avoid errors and deprecation warnings-as-errors. ([#14156](https://github.com/rapidsai/cudf/pull/14156)) [@bdice](https://github.com/bdice)
-- Fix kernel launch error for cudf::io::orc::gpu::rowgroup_char_counts_kernel ([#14139](https://github.com/rapidsai/cudf/pull/14139)) [@davidwendt](https://github.com/davidwendt)
-- Don&#39;t sort columns for DataFrame init from list of Series ([#14136](https://github.com/rapidsai/cudf/pull/14136)) [@mroeschke](https://github.com/mroeschke)
-- Fix DataFrame.values with no columns but index ([#14134](https://github.com/rapidsai/cudf/pull/14134)) [@mroeschke](https://github.com/mroeschke)
-- Avoid circular cimports in _lib/cpp/reduce.pxd ([#14125](https://github.com/rapidsai/cudf/pull/14125)) [@vyasr](https://github.com/vyasr)
-- Add support for nested dict in `DataFrame` constructor ([#14119](https://github.com/rapidsai/cudf/pull/14119)) [@galipremsagar](https://github.com/galipremsagar)
-- Restrict iterables of `DataFrame`&#39;s as input to `DataFrame` constructor ([#14118](https://github.com/rapidsai/cudf/pull/14118)) [@galipremsagar](https://github.com/galipremsagar)
-- Allow `numeric_only=True` for reduction operations on numeric types ([#14111](https://github.com/rapidsai/cudf/pull/14111)) [@galipremsagar](https://github.com/galipremsagar)
-- Preserve name of the column while initializing a `DataFrame` ([#14110](https://github.com/rapidsai/cudf/pull/14110)) [@galipremsagar](https://github.com/galipremsagar)
-- Correct numerous 20054-D: dynamic initialization errors found on arm+12.2 ([#14108](https://github.com/rapidsai/cudf/pull/14108)) [@robertmaynard](https://github.com/robertmaynard)
-- Drop `kwargs` from `Series.count` ([#14106](https://github.com/rapidsai/cudf/pull/14106)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix naming issues with `Index.to_frame` and `MultiIndex.to_frame` APIs ([#14105](https://github.com/rapidsai/cudf/pull/14105)) [@galipremsagar](https://github.com/galipremsagar)
-- Only use memory resources that haven&#39;t been freed ([#14103](https://github.com/rapidsai/cudf/pull/14103)) [@robertmaynard](https://github.com/robertmaynard)
-- Add support for `__round__` in `Series` and `DataFrame` ([#14099](https://github.com/rapidsai/cudf/pull/14099)) [@galipremsagar](https://github.com/galipremsagar)
-- Validate ignore_index type in drop_duplicates ([#14098](https://github.com/rapidsai/cudf/pull/14098)) [@mroeschke](https://github.com/mroeschke)
-- Fix renaming `Series` and `Index` ([#14080](https://github.com/rapidsai/cudf/pull/14080)) [@galipremsagar](https://github.com/galipremsagar)
-- Raise NotImplementedError in to_datetime if Z (or tz component) in string ([#14074](https://github.com/rapidsai/cudf/pull/14074)) [@mroeschke](https://github.com/mroeschke)
-- Raise NotImplementedError for datetime strings with UTC offset ([#14070](https://github.com/rapidsai/cudf/pull/14070)) [@mroeschke](https://github.com/mroeschke)
-- Update pyarrow-related dispatch logic in dask_cudf ([#14069](https://github.com/rapidsai/cudf/pull/14069)) [@rjzamora](https://github.com/rjzamora)
-- Use `conda mambabuild` rather than `mamba mambabuild` ([#14067](https://github.com/rapidsai/cudf/pull/14067)) [@wence-](https://github.com/wence-)
-- Raise NotImplementedError in to_datetime with dayfirst without infer_format ([#14058](https://github.com/rapidsai/cudf/pull/14058)) [@mroeschke](https://github.com/mroeschke)
-- Fix various issues in `Index.intersection` ([#14054](https://github.com/rapidsai/cudf/pull/14054)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix `Index.difference` to match with pandas ([#14053](https://github.com/rapidsai/cudf/pull/14053)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix empty string column construction ([#14052](https://github.com/rapidsai/cudf/pull/14052)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix `IntervalIndex.union` to preserve type-metadata ([#14051](https://github.com/rapidsai/cudf/pull/14051)) [@galipremsagar](https://github.com/galipremsagar)
-- Raise `MixedTypeError` when a column of mixed-dtype is being constructed ([#14050](https://github.com/rapidsai/cudf/pull/14050)) [@galipremsagar](https://github.com/galipremsagar)
-- Raise `NotImplementedError` for `MultiIndex.to_series` ([#14049](https://github.com/rapidsai/cudf/pull/14049)) [@galipremsagar](https://github.com/galipremsagar)
-- Ignore compile_commands.json ([#14048](https://github.com/rapidsai/cudf/pull/14048)) [@harrism](https://github.com/harrism)
-- Raise TypeError for any non-parseable argument in to_datetime ([#14044](https://github.com/rapidsai/cudf/pull/14044)) [@mroeschke](https://github.com/mroeschke)
-- Raise NotImplementedError for to_datetime with z format ([#14037](https://github.com/rapidsai/cudf/pull/14037)) [@mroeschke](https://github.com/mroeschke)
-- Implement `sort_remaining` for `sort_index` ([#14033](https://github.com/rapidsai/cudf/pull/14033)) [@wence-](https://github.com/wence-)
-- Raise NotImplementedError for Categoricals with timezones ([#14032](https://github.com/rapidsai/cudf/pull/14032)) [@mroeschke](https://github.com/mroeschke)
-- Temporary fix Parquet metadata with empty value string being ignored from writing ([#14026](https://github.com/rapidsai/cudf/pull/14026)) [@ttnghia](https://github.com/ttnghia)
-- Preserve types of scalar being returned when possible in `quantile` ([#14014](https://github.com/rapidsai/cudf/pull/14014)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix return type of `MultiIndex.difference` ([#14009](https://github.com/rapidsai/cudf/pull/14009)) [@galipremsagar](https://github.com/galipremsagar)
-- Raise an error when timezone subtypes are encountered in `pd.IntervalDtype` ([#14006](https://github.com/rapidsai/cudf/pull/14006)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix map column can not be non-nullable for java ([#14003](https://github.com/rapidsai/cudf/pull/14003)) [@res-life](https://github.com/res-life)
-- Fix `name` selection in `Index.difference` and `Index.intersection` ([#13986](https://github.com/rapidsai/cudf/pull/13986)) [@galipremsagar](https://github.com/galipremsagar)
-- Restore column type metadata with `dropna` to fix `factorize` API ([#13980](https://github.com/rapidsai/cudf/pull/13980)) [@galipremsagar](https://github.com/galipremsagar)
-- Use thread_index_type to avoid out of bounds accesses in conditional joins ([#13971](https://github.com/rapidsai/cudf/pull/13971)) [@vyasr](https://github.com/vyasr)
-- Fix `MultiIndex.to_numpy` to return numpy array with tuples ([#13966](https://github.com/rapidsai/cudf/pull/13966)) [@galipremsagar](https://github.com/galipremsagar)
-- Use cudf::thread_index_type in get_json_object and tdigest kernels ([#13962](https://github.com/rapidsai/cudf/pull/13962)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Fix an issue with `IntervalIndex.repr` when null values are present ([#13958](https://github.com/rapidsai/cudf/pull/13958)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix type metadata issue preservation with `Column.unique` ([#13957](https://github.com/rapidsai/cudf/pull/13957)) [@galipremsagar](https://github.com/galipremsagar)
-- Handle `Interval` scalars when passed in list-like inputs to `cudf.Index` ([#13956](https://github.com/rapidsai/cudf/pull/13956)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix setting of categories order when `dtype` is passed to a `CategoricalColumn` ([#13955](https://github.com/rapidsai/cudf/pull/13955)) [@galipremsagar](https://github.com/galipremsagar)
-- Handle `as_index` in `GroupBy.apply` ([#13951](https://github.com/rapidsai/cudf/pull/13951)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Raise error for string types in `nsmallest` and `nlargest` ([#13946](https://github.com/rapidsai/cudf/pull/13946)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix `index` of `Groupby.apply` results when it is performed on empty objects ([#13944](https://github.com/rapidsai/cudf/pull/13944)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix integer overflow in shim `device_sum` functions ([#13943](https://github.com/rapidsai/cudf/pull/13943)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Fix type mismatch in groupby reduction for empty objects ([#13942](https://github.com/rapidsai/cudf/pull/13942)) [@galipremsagar](https://github.com/galipremsagar)
-- Fixed processed bytes calculation in APPLY_BOOLEAN_MASK benchmark. ([#13937](https://github.com/rapidsai/cudf/pull/13937)) [@Blonck](https://github.com/Blonck)
-- Fix construction of `Grouping` objects ([#13932](https://github.com/rapidsai/cudf/pull/13932)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix an issue with `loc` when column names is `MultiIndex` ([#13929](https://github.com/rapidsai/cudf/pull/13929)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix handling of typecasting in `searchsorted` ([#13925](https://github.com/rapidsai/cudf/pull/13925)) [@galipremsagar](https://github.com/galipremsagar)
-- Preserve index `name` in `reindex` ([#13917](https://github.com/rapidsai/cudf/pull/13917)) [@galipremsagar](https://github.com/galipremsagar)
-- Use `cudf::thread_index_type` in cuIO to prevent overflow in row indexing ([#13910](https://github.com/rapidsai/cudf/pull/13910)) [@vuule](https://github.com/vuule)
-- Fix for encodings listed in the Parquet column chunk metadata ([#13907](https://github.com/rapidsai/cudf/pull/13907)) [@etseidl](https://github.com/etseidl)
-- Use cudf::thread_index_type in concatenate.cu. ([#13906](https://github.com/rapidsai/cudf/pull/13906)) [@bdice](https://github.com/bdice)
-- Use cudf::thread_index_type in replace.cu. ([#13905](https://github.com/rapidsai/cudf/pull/13905)) [@bdice](https://github.com/bdice)
-- Add noSanitizer tag to Java reduction tests failing with sanitizer in CUDA 12 ([#13904](https://github.com/rapidsai/cudf/pull/13904)) [@jlowe](https://github.com/jlowe)
-- Remove the internal use of the cudf&#39;s default stream in cuIO ([#13903](https://github.com/rapidsai/cudf/pull/13903)) [@vuule](https://github.com/vuule)
-- Use cuda-nvtx-dev CUDA 12 package. ([#13901](https://github.com/rapidsai/cudf/pull/13901)) [@bdice](https://github.com/bdice)
-- Use `thread_index_type` to avoid index overflow in grid-stride loops ([#13895](https://github.com/rapidsai/cudf/pull/13895)) [@PointKernel](https://github.com/PointKernel)
-- Fix memory access error in cudf::shift for sliced strings ([#13894](https://github.com/rapidsai/cudf/pull/13894)) [@davidwendt](https://github.com/davidwendt)
-- Raise error when trying to construct a `DataFrame` with mixed types ([#13889](https://github.com/rapidsai/cudf/pull/13889)) [@galipremsagar](https://github.com/galipremsagar)
-- Return `nan` when one variable to be correlated has zero variance in JIT GroupBy Apply ([#13884](https://github.com/rapidsai/cudf/pull/13884)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Correctly detect the BOM mark in `read_csv` with compressed input ([#13881](https://github.com/rapidsai/cudf/pull/13881)) [@vuule](https://github.com/vuule)
-- Check for the presence of all values in `MultiIndex.isin` ([#13879](https://github.com/rapidsai/cudf/pull/13879)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix nvtext::generate_character_ngrams performance regression for longer strings ([#13874](https://github.com/rapidsai/cudf/pull/13874)) [@davidwendt](https://github.com/davidwendt)
-- Fix return type of `MultiIndex.levels` ([#13870](https://github.com/rapidsai/cudf/pull/13870)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix List&#39;s missing children metadata in JSON writer ([#13869](https://github.com/rapidsai/cudf/pull/13869)) [@karthikeyann](https://github.com/karthikeyann)
-- Disable construction of Index when `freq` is set in pandas-compatibility mode ([#13857](https://github.com/rapidsai/cudf/pull/13857)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix an issue with fetching `NA` from a `TimedeltaColumn` ([#13853](https://github.com/rapidsai/cudf/pull/13853)) [@galipremsagar](https://github.com/galipremsagar)
-- Simplify implementation of interval_range() and fix behaviour for floating `freq` ([#13844](https://github.com/rapidsai/cudf/pull/13844)) [@shwina](https://github.com/shwina)
-- Fix binary operations between `Series` and `Index` ([#13842](https://github.com/rapidsai/cudf/pull/13842)) [@galipremsagar](https://github.com/galipremsagar)
-- Update make_lists_column_from_scalar to use make_offsets_child_column utility ([#13841](https://github.com/rapidsai/cudf/pull/13841)) [@davidwendt](https://github.com/davidwendt)
-- Fix read out of bounds in string concatenate ([#13838](https://github.com/rapidsai/cudf/pull/13838)) [@pentschev](https://github.com/pentschev)
-- Raise error for more cases when `timezone-aware` data is passed to `as_column` ([#13835](https://github.com/rapidsai/cudf/pull/13835)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix `any`, `all` reduction behavior for `axis=None` and warn for other reductions ([#13831](https://github.com/rapidsai/cudf/pull/13831)) [@galipremsagar](https://github.com/galipremsagar)
-- Raise error when trying to construct time-zone aware timestamps ([#13830](https://github.com/rapidsai/cudf/pull/13830)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix cuFile I/O factories ([#13829](https://github.com/rapidsai/cudf/pull/13829)) [@vuule](https://github.com/vuule)
-- DataFrame with namedtuples uses ._field as column names ([#13824](https://github.com/rapidsai/cudf/pull/13824)) [@mroeschke](https://github.com/mroeschke)
-- Branch 23.10 merge 23.08 ([#13822](https://github.com/rapidsai/cudf/pull/13822)) [@vyasr](https://github.com/vyasr)
-- Return a Series from JIT GroupBy apply, rather than a DataFrame ([#13820](https://github.com/rapidsai/cudf/pull/13820)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- No need to dlsym EnsureS3Finalized we can call it directly ([#13819](https://github.com/rapidsai/cudf/pull/13819)) [@robertmaynard](https://github.com/robertmaynard)
-- Raise error when mixed types are being constructed ([#13816](https://github.com/rapidsai/cudf/pull/13816)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix unbounded sequence issue in `DataFrame` constructor ([#13811](https://github.com/rapidsai/cudf/pull/13811)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix Byte-Pair-Encoding usage of cuco static-map for storing merge-pairs ([#13807](https://github.com/rapidsai/cudf/pull/13807)) [@davidwendt](https://github.com/davidwendt)
-- Fix for Parquet writer when requested pages per row is smaller than fragment size ([#13806](https://github.com/rapidsai/cudf/pull/13806)) [@etseidl](https://github.com/etseidl)
-- Remove hangs from trying to construct un-bounded sequences ([#13799](https://github.com/rapidsai/cudf/pull/13799)) [@galipremsagar](https://github.com/galipremsagar)
-- Bug/update libcudf to handle arrow12 changes ([#13794](https://github.com/rapidsai/cudf/pull/13794)) [@robertmaynard](https://github.com/robertmaynard)
-- Update get_arrow to arrows 12 CMake target name of arrow::xsimd ([#13790](https://github.com/rapidsai/cudf/pull/13790)) [@robertmaynard](https://github.com/robertmaynard)
-- Raise error when trying to join `datetime` and `timedelta` types with other types ([#13786](https://github.com/rapidsai/cudf/pull/13786)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix negative unary operation for boolean type ([#13780](https://github.com/rapidsai/cudf/pull/13780)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix contains(`in`) method for `Series` ([#13779](https://github.com/rapidsai/cudf/pull/13779)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix binary operation column ordering and missing column issues ([#13778](https://github.com/rapidsai/cudf/pull/13778)) [@galipremsagar](https://github.com/galipremsagar)
-- Cast only time of day to nanos to avoid an overflow in Parquet INT96 write ([#13776](https://github.com/rapidsai/cudf/pull/13776)) [@gerashegalov](https://github.com/gerashegalov)
-- Preserve names of column object in various APIs ([#13772](https://github.com/rapidsai/cudf/pull/13772)) [@galipremsagar](https://github.com/galipremsagar)
-- Raise error on constructing an array from mixed type inputs ([#13768](https://github.com/rapidsai/cudf/pull/13768)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix construction of DataFrames from dict when columns are provided ([#13766](https://github.com/rapidsai/cudf/pull/13766)) [@wence-](https://github.com/wence-)
-- Provide our own Cython declaration for make_unique ([#13746](https://github.com/rapidsai/cudf/pull/13746)) [@wence-](https://github.com/wence-)
-
-## 📖 Documentation
-
-- Fix typo in docstring: metadata. ([#14025](https://github.com/rapidsai/cudf/pull/14025)) [@bdice](https://github.com/bdice)
-- Fix typo in parquet/page_decode.cuh ([#13849](https://github.com/rapidsai/cudf/pull/13849)) [@XinyuZeng](https://github.com/XinyuZeng)
-- Simplify Python doc configuration ([#13826](https://github.com/rapidsai/cudf/pull/13826)) [@vyasr](https://github.com/vyasr)
-- Update documentation to reflect recent changes in JSON reader and writer ([#13791](https://github.com/rapidsai/cudf/pull/13791)) [@vuule](https://github.com/vuule)
-- Fix all warnings in Python docs ([#13789](https://github.com/rapidsai/cudf/pull/13789)) [@vyasr](https://github.com/vyasr)
-
-## 🚀 New Features
-
-- [Java] Add JNI bindings for `integers_to_hex` ([#14205](https://github.com/rapidsai/cudf/pull/14205)) [@razajafri](https://github.com/razajafri)
-- Propagate errors from Parquet reader kernels back to host ([#14167](https://github.com/rapidsai/cudf/pull/14167)) [@vuule](https://github.com/vuule)
-- JNI for `HISTOGRAM` and `MERGE_HISTOGRAM` aggregations ([#14154](https://github.com/rapidsai/cudf/pull/14154)) [@ttnghia](https://github.com/ttnghia)
-- Expose streams in all public sorting APIs ([#14146](https://github.com/rapidsai/cudf/pull/14146)) [@vyasr](https://github.com/vyasr)
-- Enable direct ingestion and production of Arrow scalars ([#14121](https://github.com/rapidsai/cudf/pull/14121)) [@vyasr](https://github.com/vyasr)
-- Implement `GroupBy.value_counts` to match pandas API ([#14114](https://github.com/rapidsai/cudf/pull/14114)) [@stmio](https://github.com/stmio)
-- Refactor parquet thrift reader ([#14097](https://github.com/rapidsai/cudf/pull/14097)) [@etseidl](https://github.com/etseidl)
-- Refactor `hash_reduce_by_row` ([#14095](https://github.com/rapidsai/cudf/pull/14095)) [@ttnghia](https://github.com/ttnghia)
-- Support negative preceding/following for ROW window functions ([#14093](https://github.com/rapidsai/cudf/pull/14093)) [@mythrocks](https://github.com/mythrocks)
-- Support for progressive parquet chunked reading. ([#14079](https://github.com/rapidsai/cudf/pull/14079)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Implement `HISTOGRAM` and `MERGE_HISTOGRAM` aggregations ([#14045](https://github.com/rapidsai/cudf/pull/14045)) [@ttnghia](https://github.com/ttnghia)
-- Expose streams in public search APIs ([#14034](https://github.com/rapidsai/cudf/pull/14034)) [@vyasr](https://github.com/vyasr)
-- Expose streams in public replace APIs ([#14010](https://github.com/rapidsai/cudf/pull/14010)) [@vyasr](https://github.com/vyasr)
-- Add stream parameter to public cudf::strings::split APIs ([#13997](https://github.com/rapidsai/cudf/pull/13997)) [@davidwendt](https://github.com/davidwendt)
-- Expose streams in public filling APIs ([#13990](https://github.com/rapidsai/cudf/pull/13990)) [@vyasr](https://github.com/vyasr)
-- Expose streams in public concatenate APIs ([#13987](https://github.com/rapidsai/cudf/pull/13987)) [@vyasr](https://github.com/vyasr)
-- Use HostMemoryAllocator in jni::allocate_host_buffer ([#13975](https://github.com/rapidsai/cudf/pull/13975)) [@gerashegalov](https://github.com/gerashegalov)
-- Enable fractional null probability for hashing benchmark ([#13967](https://github.com/rapidsai/cudf/pull/13967)) [@Blonck](https://github.com/Blonck)
-- Switch pylibcudf-enabled types to use enum class in Cython ([#13931](https://github.com/rapidsai/cudf/pull/13931)) [@vyasr](https://github.com/vyasr)
-- Add nvtext::tokenize_with_vocabulary API ([#13930](https://github.com/rapidsai/cudf/pull/13930)) [@davidwendt](https://github.com/davidwendt)
-- Rewrite `DataFrame.stack` to support multi level column names ([#13927](https://github.com/rapidsai/cudf/pull/13927)) [@isVoid](https://github.com/isVoid)
-- Add HostMemoryAllocator interface ([#13924](https://github.com/rapidsai/cudf/pull/13924)) [@gerashegalov](https://github.com/gerashegalov)
-- Global stream pool ([#13922](https://github.com/rapidsai/cudf/pull/13922)) [@etseidl](https://github.com/etseidl)
-- Create table_input_metadata from a table_metadata ([#13920](https://github.com/rapidsai/cudf/pull/13920)) [@etseidl](https://github.com/etseidl)
-- Translate column size overflow exception to JNI ([#13911](https://github.com/rapidsai/cudf/pull/13911)) [@mythrocks](https://github.com/mythrocks)
-- Enable RLE boolean encoding for v2 Parquet files ([#13886](https://github.com/rapidsai/cudf/pull/13886)) [@etseidl](https://github.com/etseidl)
-- Exclude some tests from running with the compute sanitizer ([#13872](https://github.com/rapidsai/cudf/pull/13872)) [@firestarman](https://github.com/firestarman)
-- Expand statistics support in ORC writer ([#13848](https://github.com/rapidsai/cudf/pull/13848)) [@vuule](https://github.com/vuule)
-- Register the memory mapped buffer in `datasource` to improve H2D throughput ([#13814](https://github.com/rapidsai/cudf/pull/13814)) [@vuule](https://github.com/vuule)
-- Add cudf::strings::find function with target per row ([#13808](https://github.com/rapidsai/cudf/pull/13808)) [@davidwendt](https://github.com/davidwendt)
-- Add minhash support for MurmurHash3_x64_128 ([#13796](https://github.com/rapidsai/cudf/pull/13796)) [@davidwendt](https://github.com/davidwendt)
-- Remove unnecessary pointer copying in JIT GroupBy Apply ([#13792](https://github.com/rapidsai/cudf/pull/13792)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Add &#39;poll&#39; function to custreamz kafka consumer ([#13782](https://github.com/rapidsai/cudf/pull/13782)) [@jdye64](https://github.com/jdye64)
-- Support `corr` in `GroupBy.apply` through the jit engine ([#13767](https://github.com/rapidsai/cudf/pull/13767)) [@shwina](https://github.com/shwina)
-- Optionally write version 2 page headers in Parquet writer ([#13751](https://github.com/rapidsai/cudf/pull/13751)) [@etseidl](https://github.com/etseidl)
-- Support more numeric types in `Groupby.apply` with `engine=&#39;jit&#39;` ([#13729](https://github.com/rapidsai/cudf/pull/13729)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- [FEA] Add DELTA_BINARY_PACKED decoding support to Parquet reader ([#13637](https://github.com/rapidsai/cudf/pull/13637)) [@etseidl](https://github.com/etseidl)
-- Read FIXED_LEN_BYTE_ARRAY as binary in parquet reader ([#13437](https://github.com/rapidsai/cudf/pull/13437)) [@PointKernel](https://github.com/PointKernel)
-
-## 🛠️ Improvements
-
-- Pin `dask` and `distributed` for `23.10` release ([#14225](https://github.com/rapidsai/cudf/pull/14225)) [@galipremsagar](https://github.com/galipremsagar)
-- update rmm tag path ([#14195](https://github.com/rapidsai/cudf/pull/14195)) [@AyodeAwe](https://github.com/AyodeAwe)
-- Disable `Recently Updated` Check ([#14193](https://github.com/rapidsai/cudf/pull/14193)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Move cpp/src/hash/hash_allocator.cuh to include/cudf/hashing/detail ([#14163](https://github.com/rapidsai/cudf/pull/14163)) [@davidwendt](https://github.com/davidwendt)
-- Add Parquet reader benchmarks for row selection ([#14147](https://github.com/rapidsai/cudf/pull/14147)) [@vuule](https://github.com/vuule)
-- Update image names ([#14145](https://github.com/rapidsai/cudf/pull/14145)) [@AyodeAwe](https://github.com/AyodeAwe)
-- Support callables in DataFrame.assign ([#14142](https://github.com/rapidsai/cudf/pull/14142)) [@wence-](https://github.com/wence-)
-- Reduce memory usage of as_categorical_column ([#14138](https://github.com/rapidsai/cudf/pull/14138)) [@wence-](https://github.com/wence-)
-- Replace Python scalar conversions with libcudf ([#14124](https://github.com/rapidsai/cudf/pull/14124)) [@vyasr](https://github.com/vyasr)
-- Update to clang 16.0.6. ([#14120](https://github.com/rapidsai/cudf/pull/14120)) [@bdice](https://github.com/bdice)
-- Fix type of empty `Index` and raise warning in `Series` constructor ([#14116](https://github.com/rapidsai/cudf/pull/14116)) [@galipremsagar](https://github.com/galipremsagar)
-- Add stream parameter to external dict APIs ([#14115](https://github.com/rapidsai/cudf/pull/14115)) [@SurajAralihalli](https://github.com/SurajAralihalli)
-- Add fallback matrix for nvcomp. ([#14082](https://github.com/rapidsai/cudf/pull/14082)) [@bdice](https://github.com/bdice)
-- [Java] Add recoverWithNull to JSONOptions and pass to Table.readJSON ([#14078](https://github.com/rapidsai/cudf/pull/14078)) [@andygrove](https://github.com/andygrove)
-- Remove header tests ([#14072](https://github.com/rapidsai/cudf/pull/14072)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Refactor `contains_table` with cuco::static_set ([#14064](https://github.com/rapidsai/cudf/pull/14064)) [@PointKernel](https://github.com/PointKernel)
-- Remove debug print in a Parquet test ([#14063](https://github.com/rapidsai/cudf/pull/14063)) [@vuule](https://github.com/vuule)
-- Expose stream parameter in public nvtext ngram APIs ([#14061](https://github.com/rapidsai/cudf/pull/14061)) [@davidwendt](https://github.com/davidwendt)
-- Expose stream parameter in public strings find APIs ([#14060](https://github.com/rapidsai/cudf/pull/14060)) [@davidwendt](https://github.com/davidwendt)
-- Update doxygen to 1.9.1 ([#14059](https://github.com/rapidsai/cudf/pull/14059)) [@vyasr](https://github.com/vyasr)
-- Remove the mr from the base fixture ([#14057](https://github.com/rapidsai/cudf/pull/14057)) [@vyasr](https://github.com/vyasr)
-- Expose streams in public strings case APIs ([#14056](https://github.com/rapidsai/cudf/pull/14056)) [@davidwendt](https://github.com/davidwendt)
-- Refactor libcudf indexalator to typed normalator ([#14043](https://github.com/rapidsai/cudf/pull/14043)) [@davidwendt](https://github.com/davidwendt)
-- Use cudf::make_empty_column instead of column_view constructor ([#14030](https://github.com/rapidsai/cudf/pull/14030)) [@davidwendt](https://github.com/davidwendt)
-- Remove quadratic runtime due to accessing Frame._dtypes in loop ([#14028](https://github.com/rapidsai/cudf/pull/14028)) [@wence-](https://github.com/wence-)
-- Explicitly depend on zlib in conda recipes ([#14018](https://github.com/rapidsai/cudf/pull/14018)) [@wence-](https://github.com/wence-)
-- Use grid_stride for stride computations. ([#13996](https://github.com/rapidsai/cudf/pull/13996)) [@bdice](https://github.com/bdice)
-- Fix an issue where casting null-array to `object` dtype will result in a failure ([#13994](https://github.com/rapidsai/cudf/pull/13994)) [@galipremsagar](https://github.com/galipremsagar)
-- Add tab as literal to cudf::test::to_string output ([#13993](https://github.com/rapidsai/cudf/pull/13993)) [@davidwendt](https://github.com/davidwendt)
-- Enable `codes` dtype parity in pandas-compatibility mode for `factorize` API ([#13982](https://github.com/rapidsai/cudf/pull/13982)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix `CategoricalIndex` ordering in `Groupby.agg` when pandas-compatibility mode is enabled ([#13978](https://github.com/rapidsai/cudf/pull/13978)) [@galipremsagar](https://github.com/galipremsagar)
-- Produce a fatal error if cudf is unable to find pyarrow include directory ([#13976](https://github.com/rapidsai/cudf/pull/13976)) [@cwharris](https://github.com/cwharris)
-- Use `thread_index_type` in `partitioning.cu` ([#13973](https://github.com/rapidsai/cudf/pull/13973)) [@divyegala](https://github.com/divyegala)
-- Use `cudf::thread_index_type` in `merge.cu` ([#13972](https://github.com/rapidsai/cudf/pull/13972)) [@divyegala](https://github.com/divyegala)
-- Use `copy-pr-bot` ([#13970](https://github.com/rapidsai/cudf/pull/13970)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Use cudf::thread_index_type in strings custom kernels ([#13968](https://github.com/rapidsai/cudf/pull/13968)) [@davidwendt](https://github.com/davidwendt)
-- Add `bytes_per_second` to hash_partition benchmark ([#13965](https://github.com/rapidsai/cudf/pull/13965)) [@Blonck](https://github.com/Blonck)
-- Added pinned pool reservation API for java ([#13964](https://github.com/rapidsai/cudf/pull/13964)) [@revans2](https://github.com/revans2)
-- Simplify wheel build scripts and allow alphas of RAPIDS dependencies ([#13963](https://github.com/rapidsai/cudf/pull/13963)) [@vyasr](https://github.com/vyasr)
-- Add `bytes_per_second` to copy_if_else benchmark ([#13960](https://github.com/rapidsai/cudf/pull/13960)) [@Blonck](https://github.com/Blonck)
-- Add pandas compatible output to `Series.unique` ([#13959](https://github.com/rapidsai/cudf/pull/13959)) [@galipremsagar](https://github.com/galipremsagar)
-- Add `bytes_per_second` to compiled binaryop benchmark ([#13938](https://github.com/rapidsai/cudf/pull/13938)) [@Blonck](https://github.com/Blonck)
-- Unpin `dask` and `distributed` for `23.10` development ([#13935](https://github.com/rapidsai/cudf/pull/13935)) [@galipremsagar](https://github.com/galipremsagar)
-- Make HostColumnVector.getRefCount public ([#13934](https://github.com/rapidsai/cudf/pull/13934)) [@abellina](https://github.com/abellina)
-- Use cuco::static_set in JSON tree algorithm ([#13928](https://github.com/rapidsai/cudf/pull/13928)) [@karthikeyann](https://github.com/karthikeyann)
-- Add java API to get size of host memory needed to copy column view ([#13919](https://github.com/rapidsai/cudf/pull/13919)) [@revans2](https://github.com/revans2)
-- Use cudf::size_type instead of int32 where appropriate in nvtext functions ([#13915](https://github.com/rapidsai/cudf/pull/13915)) [@davidwendt](https://github.com/davidwendt)
-- Enable hugepage for arrow host allocations ([#13914](https://github.com/rapidsai/cudf/pull/13914)) [@madsbk](https://github.com/madsbk)
-- Improve performance of nvtext::edit_distance ([#13912](https://github.com/rapidsai/cudf/pull/13912)) [@davidwendt](https://github.com/davidwendt)
-- Ensure cudf internals use pylibcudf in pure Python mode ([#13909](https://github.com/rapidsai/cudf/pull/13909)) [@vyasr](https://github.com/vyasr)
-- Use `empty()` instead of `size()` where possible ([#13908](https://github.com/rapidsai/cudf/pull/13908)) [@vuule](https://github.com/vuule)
-- [JNI] Adds HostColumnVector.EventHandler for spillability checks ([#13898](https://github.com/rapidsai/cudf/pull/13898)) [@abellina](https://github.com/abellina)
-- Return `Timestamp` &amp; `Timedelta` for fetching scalars in `DatetimeIndex` &amp; `TimedeltaIndex` ([#13896](https://github.com/rapidsai/cudf/pull/13896)) [@galipremsagar](https://github.com/galipremsagar)
-- Allow explicit `shuffle=&quot;p2p&quot;` within dask-cudf API ([#13893](https://github.com/rapidsai/cudf/pull/13893)) [@rjzamora](https://github.com/rjzamora)
-- Disable creation of `DatetimeIndex` when `freq` is passed to `cudf.date_range` ([#13890](https://github.com/rapidsai/cudf/pull/13890)) [@galipremsagar](https://github.com/galipremsagar)
-- Bring parity with pandas for `datetime` &amp; `timedelta` comparison operations ([#13877](https://github.com/rapidsai/cudf/pull/13877)) [@galipremsagar](https://github.com/galipremsagar)
-- Change `NA` to `NaT` for `datetime` and `timedelta` types ([#13868](https://github.com/rapidsai/cudf/pull/13868)) [@galipremsagar](https://github.com/galipremsagar)
-- Raise error when `astype(object)` is called in pandas compatibility mode ([#13862](https://github.com/rapidsai/cudf/pull/13862)) [@galipremsagar](https://github.com/galipremsagar)
-- Fixes a performance regression in FST ([#13850](https://github.com/rapidsai/cudf/pull/13850)) [@elstehle](https://github.com/elstehle)
-- Set native handles to null on close in Java wrapper classes ([#13818](https://github.com/rapidsai/cudf/pull/13818)) [@jlowe](https://github.com/jlowe)
-- Avoid use of CUDF_EXPECTS in libcudf unit tests outside of helper functions with return values ([#13812](https://github.com/rapidsai/cudf/pull/13812)) [@vuule](https://github.com/vuule)
-- Update `lists::contains` to experimental row comparator ([#13810](https://github.com/rapidsai/cudf/pull/13810)) [@divyegala](https://github.com/divyegala)
-- Reduce `lists::contains` dispatches for scalars ([#13805](https://github.com/rapidsai/cudf/pull/13805)) [@divyegala](https://github.com/divyegala)
-- Long string optimization for string column parsing in JSON reader ([#13803](https://github.com/rapidsai/cudf/pull/13803)) [@karthikeyann](https://github.com/karthikeyann)
-- Raise NotImplementedError for pd.SparseDtype ([#13798](https://github.com/rapidsai/cudf/pull/13798)) [@mroeschke](https://github.com/mroeschke)
-- Remove the libcudf cudf::offset_type type ([#13788](https://github.com/rapidsai/cudf/pull/13788)) [@davidwendt](https://github.com/davidwendt)
-- Move Spark-indpendent Table debug to cudf Java ([#13783](https://github.com/rapidsai/cudf/pull/13783)) [@gerashegalov](https://github.com/gerashegalov)
-- Update to Cython 3.0.0 ([#13777](https://github.com/rapidsai/cudf/pull/13777)) [@vyasr](https://github.com/vyasr)
-- Refactor Parquet reader handling of V2 page header info ([#13775](https://github.com/rapidsai/cudf/pull/13775)) [@etseidl](https://github.com/etseidl)
-- Branch 23.10 merge 23.08 ([#13773](https://github.com/rapidsai/cudf/pull/13773)) [@vyasr](https://github.com/vyasr)
-- Restructure JSON code to correctly reflect legacy/experimental status ([#13757](https://github.com/rapidsai/cudf/pull/13757)) [@vuule](https://github.com/vuule)
-- Branch 23.10 merge 23.08 ([#13753](https://github.com/rapidsai/cudf/pull/13753)) [@vyasr](https://github.com/vyasr)
-- Enforce deprecations in `23.10` ([#13732](https://github.com/rapidsai/cudf/pull/13732)) [@galipremsagar](https://github.com/galipremsagar)
-- Upgrade to arrow 12 ([#13728](https://github.com/rapidsai/cudf/pull/13728)) [@galipremsagar](https://github.com/galipremsagar)
-- Refactors JSON reader&#39;s pushdown automaton ([#13716](https://github.com/rapidsai/cudf/pull/13716)) [@elstehle](https://github.com/elstehle)
-- Remove Arrow dependency from the `datasource.hpp` public header ([#13698](https://github.com/rapidsai/cudf/pull/13698)) [@vuule](https://github.com/vuule)
-
-# cuDF 23.08.00 (9 Aug 2023)
-
-## 🚨 Breaking Changes
-
-- Enforce deprecations and add clarifications around existing deprecations ([#13710](https://github.com/rapidsai/cudf/pull/13710)) [@galipremsagar](https://github.com/galipremsagar)
-- Separate MurmurHash32 from hash_functions.cuh ([#13681](https://github.com/rapidsai/cudf/pull/13681)) [@davidwendt](https://github.com/davidwendt)
-- Avoid storing metadata in pointers in ORC and Parquet writers ([#13648](https://github.com/rapidsai/cudf/pull/13648)) [@vuule](https://github.com/vuule)
-- Expose streams in all public copying APIs ([#13629](https://github.com/rapidsai/cudf/pull/13629)) [@vyasr](https://github.com/vyasr)
-- Remove deprecated cudf::strings::slice_strings (by delimiter) functions ([#13628](https://github.com/rapidsai/cudf/pull/13628)) [@davidwendt](https://github.com/davidwendt)
-- Remove deprecated cudf.set_allocator. ([#13591](https://github.com/rapidsai/cudf/pull/13591)) [@bdice](https://github.com/bdice)
-- Change build.sh to use pip install instead of setup.py ([#13507](https://github.com/rapidsai/cudf/pull/13507)) [@vyasr](https://github.com/vyasr)
-- Remove unused max_rows_tensor parameter from subword tokenizer ([#13463](https://github.com/rapidsai/cudf/pull/13463)) [@davidwendt](https://github.com/davidwendt)
-- Fix decimal scale reductions in `_get_decimal_type` ([#13224](https://github.com/rapidsai/cudf/pull/13224)) [@charlesbluca](https://github.com/charlesbluca)
-
-## 🐛 Bug Fixes
-
-- Add CUDA version to cudf_kafka and libcudf-example build strings. ([#13769](https://github.com/rapidsai/cudf/pull/13769)) [@bdice](https://github.com/bdice)
-- Fix typo in wheels-test.yaml. ([#13763](https://github.com/rapidsai/cudf/pull/13763)) [@bdice](https://github.com/bdice)
-- Don&#39;t test strings shorter than the requested ngram size ([#13758](https://github.com/rapidsai/cudf/pull/13758)) [@vyasr](https://github.com/vyasr)
-- Add CUDA version to custreamz build string. ([#13754](https://github.com/rapidsai/cudf/pull/13754)) [@bdice](https://github.com/bdice)
-- Fix writing of ORC files with empty child string columns ([#13745](https://github.com/rapidsai/cudf/pull/13745)) [@vuule](https://github.com/vuule)
-- Remove the erroneous &quot;empty level&quot; short-circuit from ORC reader ([#13722](https://github.com/rapidsai/cudf/pull/13722)) [@vuule](https://github.com/vuule)
-- Fix character counting when writing sliced tables into ORC ([#13721](https://github.com/rapidsai/cudf/pull/13721)) [@vuule](https://github.com/vuule)
-- Parquet uses row group row count if missing from header ([#13712](https://github.com/rapidsai/cudf/pull/13712)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Fix reading of RLE encoded boolean data from parquet files with V2 page headers ([#13707](https://github.com/rapidsai/cudf/pull/13707)) [@etseidl](https://github.com/etseidl)
-- Fix a corner case of list lexicographic comparator ([#13701](https://github.com/rapidsai/cudf/pull/13701)) [@ttnghia](https://github.com/ttnghia)
-- Fix combined filtering and column projection in `dask_cudf.read_parquet` ([#13697](https://github.com/rapidsai/cudf/pull/13697)) [@rjzamora](https://github.com/rjzamora)
-- Revert fetch-rapids changes ([#13696](https://github.com/rapidsai/cudf/pull/13696)) [@vyasr](https://github.com/vyasr)
-- Data generator - include offsets in the size estimate of list elments ([#13688](https://github.com/rapidsai/cudf/pull/13688)) [@vuule](https://github.com/vuule)
-- Add `cuda-nvcc-impl` to `cudf` for `numba` CUDA 12 ([#13673](https://github.com/rapidsai/cudf/pull/13673)) [@jakirkham](https://github.com/jakirkham)
-- Fix combined filtering and column projection in `read_parquet` ([#13666](https://github.com/rapidsai/cudf/pull/13666)) [@rjzamora](https://github.com/rjzamora)
-- Use `thrust::identity` as hash functions for byte pair encoding ([#13665](https://github.com/rapidsai/cudf/pull/13665)) [@PointKernel](https://github.com/PointKernel)
-- Fix loc-getitem ordering when index contains duplicate labels ([#13659](https://github.com/rapidsai/cudf/pull/13659)) [@wence-](https://github.com/wence-)
-- [REVIEW] Introduce parity with pandas for `MultiIndex.loc` ordering &amp; fix a bug in `Groupby` with `as_index` ([#13657](https://github.com/rapidsai/cudf/pull/13657)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix memcheck error found in nvtext tokenize functions ([#13649](https://github.com/rapidsai/cudf/pull/13649)) [@davidwendt](https://github.com/davidwendt)
-- Fix `has_nonempty_nulls` ignoring column offset ([#13647](https://github.com/rapidsai/cudf/pull/13647)) [@ttnghia](https://github.com/ttnghia)
-- [Java] Avoid double-free corruption in case of an Exception while creating a ColumnView ([#13645](https://github.com/rapidsai/cudf/pull/13645)) [@razajafri](https://github.com/razajafri)
-- Fix memcheck error in ORC reader call to cudf::io::copy_uncompressed_kernel ([#13643](https://github.com/rapidsai/cudf/pull/13643)) [@davidwendt](https://github.com/davidwendt)
-- Fix CUDA 12 conda environment to remove cubinlinker and ptxcompiler. ([#13636](https://github.com/rapidsai/cudf/pull/13636)) [@bdice](https://github.com/bdice)
-- Fix inf/NaN comparisons for FLOAT orderby in window functions ([#13635](https://github.com/rapidsai/cudf/pull/13635)) [@mythrocks](https://github.com/mythrocks)
-- Refactor `Index` search to simplify code and increase correctness ([#13625](https://github.com/rapidsai/cudf/pull/13625)) [@wence-](https://github.com/wence-)
-- Fix compile warning for unused variable in split_re.cu ([#13621](https://github.com/rapidsai/cudf/pull/13621)) [@davidwendt](https://github.com/davidwendt)
-- Fix tz_localize for dask_cudf Series ([#13610](https://github.com/rapidsai/cudf/pull/13610)) [@shwina](https://github.com/shwina)
-- Fix issue with no decompressed data in ORC reader ([#13609](https://github.com/rapidsai/cudf/pull/13609)) [@vuule](https://github.com/vuule)
-- Fix floating point window range extents. ([#13606](https://github.com/rapidsai/cudf/pull/13606)) [@mythrocks](https://github.com/mythrocks)
-- Fix `localize(None)` for timezone-naive columns ([#13603](https://github.com/rapidsai/cudf/pull/13603)) [@shwina](https://github.com/shwina)
-- Fixed a memory leak caused by Exception thrown while constructing a ColumnView ([#13597](https://github.com/rapidsai/cudf/pull/13597)) [@razajafri](https://github.com/razajafri)
-- Handle nullptr return value from bitmask_or in distinct_count ([#13590](https://github.com/rapidsai/cudf/pull/13590)) [@wence-](https://github.com/wence-)
-- Bring parity with pandas in Index.join ([#13589](https://github.com/rapidsai/cudf/pull/13589)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix cudf.melt when there are more than 255 columns ([#13588](https://github.com/rapidsai/cudf/pull/13588)) [@hcho3](https://github.com/hcho3)
-- Fix memory issues in cuIO due to removal of memory padding ([#13586](https://github.com/rapidsai/cudf/pull/13586)) [@ttnghia](https://github.com/ttnghia)
-- Fix Parquet multi-file reading ([#13584](https://github.com/rapidsai/cudf/pull/13584)) [@etseidl](https://github.com/etseidl)
-- Fix memcheck error found in LISTS_TEST ([#13579](https://github.com/rapidsai/cudf/pull/13579)) [@davidwendt](https://github.com/davidwendt)
-- Fix memcheck error found in STRINGS_TEST ([#13578](https://github.com/rapidsai/cudf/pull/13578)) [@davidwendt](https://github.com/davidwendt)
-- Fix memcheck error found in INTEROP_TEST ([#13577](https://github.com/rapidsai/cudf/pull/13577)) [@davidwendt](https://github.com/davidwendt)
-- Fix memcheck errors found in REDUCTION_TEST ([#13574](https://github.com/rapidsai/cudf/pull/13574)) [@davidwendt](https://github.com/davidwendt)
-- Preemptive fix for hive-partitioning change in dask ([#13564](https://github.com/rapidsai/cudf/pull/13564)) [@rjzamora](https://github.com/rjzamora)
-- Fix an issue with `dask_cudf.read_csv` when lines are needed to be skipped ([#13555](https://github.com/rapidsai/cudf/pull/13555)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix out-of-bounds memory write in cudf::dictionary::detail::concatenate ([#13554](https://github.com/rapidsai/cudf/pull/13554)) [@davidwendt](https://github.com/davidwendt)
-- Fix the null mask size in json reader ([#13537](https://github.com/rapidsai/cudf/pull/13537)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix cudf::strings::strip for all-empty input column ([#13533](https://github.com/rapidsai/cudf/pull/13533)) [@davidwendt](https://github.com/davidwendt)
-- Make sure to build without isolation or installing dependencies ([#13524](https://github.com/rapidsai/cudf/pull/13524)) [@vyasr](https://github.com/vyasr)
-- Remove preload lib from CMake for now ([#13519](https://github.com/rapidsai/cudf/pull/13519)) [@vyasr](https://github.com/vyasr)
-- Fix missing separator after null values in JSON writer ([#13503](https://github.com/rapidsai/cudf/pull/13503)) [@karthikeyann](https://github.com/karthikeyann)
-- Ensure `single_lane_block_sum_reduce` is safe to call in a loop ([#13488](https://github.com/rapidsai/cudf/pull/13488)) [@wence-](https://github.com/wence-)
-- Update all versions in pyproject.toml files. ([#13486](https://github.com/rapidsai/cudf/pull/13486)) [@bdice](https://github.com/bdice)
-- Remove applying nvbench that doesn&#39;t exist in 23.08 ([#13484](https://github.com/rapidsai/cudf/pull/13484)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix chunked Parquet reader benchmark ([#13482](https://github.com/rapidsai/cudf/pull/13482)) [@vuule](https://github.com/vuule)
-- Update JNI JSON reader column compatability for Spark ([#13477](https://github.com/rapidsai/cudf/pull/13477)) [@revans2](https://github.com/revans2)
-- Fix unsanitized output of scan with strings ([#13455](https://github.com/rapidsai/cudf/pull/13455)) [@davidwendt](https://github.com/davidwendt)
-- Reject functions without bytecode from `_can_be_jitted` in GroupBy Apply ([#13429](https://github.com/rapidsai/cudf/pull/13429)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Fix decimal scale reductions in `_get_decimal_type` ([#13224](https://github.com/rapidsai/cudf/pull/13224)) [@charlesbluca](https://github.com/charlesbluca)
-
-## 📖 Documentation
-
-- Fix doxygen groups for io data sources and sinks ([#13718](https://github.com/rapidsai/cudf/pull/13718)) [@davidwendt](https://github.com/davidwendt)
-- Add pandas compatibility note to DataFrame.query docstring ([#13693](https://github.com/rapidsai/cudf/pull/13693)) [@beckernick](https://github.com/beckernick)
-- Add pylibcudf to developer guide ([#13639](https://github.com/rapidsai/cudf/pull/13639)) [@vyasr](https://github.com/vyasr)
-- Fix repeated words in doxygen text ([#13598](https://github.com/rapidsai/cudf/pull/13598)) [@karthikeyann](https://github.com/karthikeyann)
-- Update docs for top-level API. ([#13592](https://github.com/rapidsai/cudf/pull/13592)) [@bdice](https://github.com/bdice)
-- Fix the the doxygen text for cudf::concatenate and other places ([#13561](https://github.com/rapidsai/cudf/pull/13561)) [@davidwendt](https://github.com/davidwendt)
-- Document stream validation approach used in testing ([#13556](https://github.com/rapidsai/cudf/pull/13556)) [@vyasr](https://github.com/vyasr)
-- Cleanup doc repetitions in libcudf ([#13470](https://github.com/rapidsai/cudf/pull/13470)) [@karthikeyann](https://github.com/karthikeyann)
-
-## 🚀 New Features
-
-- Support `min` and `max` aggregations for list type in groupby and reduction ([#13676](https://github.com/rapidsai/cudf/pull/13676)) [@ttnghia](https://github.com/ttnghia)
-- Add nvtext::jaccard_index API for strings columns ([#13669](https://github.com/rapidsai/cudf/pull/13669)) [@davidwendt](https://github.com/davidwendt)
-- Add read_parquet_metadata libcudf API ([#13663](https://github.com/rapidsai/cudf/pull/13663)) [@karthikeyann](https://github.com/karthikeyann)
-- Expose streams in all public copying APIs ([#13629](https://github.com/rapidsai/cudf/pull/13629)) [@vyasr](https://github.com/vyasr)
-- Add XXHash_64 hash function to cudf ([#13612](https://github.com/rapidsai/cudf/pull/13612)) [@davidwendt](https://github.com/davidwendt)
-- Java support: Floating point order-by columns for RANGE window functions ([#13595](https://github.com/rapidsai/cudf/pull/13595)) [@mythrocks](https://github.com/mythrocks)
-- Use `cuco::static_map` to build string dictionaries in ORC writer ([#13580](https://github.com/rapidsai/cudf/pull/13580)) [@vuule](https://github.com/vuule)
-- Add pylibcudf subpackage with gather implementation ([#13562](https://github.com/rapidsai/cudf/pull/13562)) [@vyasr](https://github.com/vyasr)
-- Add JNI for `lists::concatenate_list_elements` ([#13547](https://github.com/rapidsai/cudf/pull/13547)) [@ttnghia](https://github.com/ttnghia)
-- Enable nested types for `lists::concatenate_list_elements` ([#13545](https://github.com/rapidsai/cudf/pull/13545)) [@ttnghia](https://github.com/ttnghia)
-- Add unicode encoding for string columns in JSON writer ([#13539](https://github.com/rapidsai/cudf/pull/13539)) [@karthikeyann](https://github.com/karthikeyann)
-- Remove numba kernels from `find_index_of_val` ([#13517](https://github.com/rapidsai/cudf/pull/13517)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Floating point order-by columns for RANGE window functions ([#13512](https://github.com/rapidsai/cudf/pull/13512)) [@mythrocks](https://github.com/mythrocks)
-- Parse column chunk metadata statistics in parquet reader ([#13472](https://github.com/rapidsai/cudf/pull/13472)) [@karthikeyann](https://github.com/karthikeyann)
-- Add `abs` function to apply ([#13408](https://github.com/rapidsai/cudf/pull/13408)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- [FEA] AST filtering in parquet reader ([#13348](https://github.com/rapidsai/cudf/pull/13348)) [@karthikeyann](https://github.com/karthikeyann)
-- [FEA] Adds option to recover from invalid JSON lines in JSON tokenizer ([#13344](https://github.com/rapidsai/cudf/pull/13344)) [@elstehle](https://github.com/elstehle)
-- Ensure cccl packages don&#39;t clash with upstream version ([#13235](https://github.com/rapidsai/cudf/pull/13235)) [@robertmaynard](https://github.com/robertmaynard)
-- Update `struct_minmax_util` to experimental row comparator ([#13069](https://github.com/rapidsai/cudf/pull/13069)) [@divyegala](https://github.com/divyegala)
-- Add stream parameter to hashing APIs ([#12090](https://github.com/rapidsai/cudf/pull/12090)) [@vyasr](https://github.com/vyasr)
-
-## 🛠️ Improvements
-
-- Pin `dask` and `distributed` for `23.08` release ([#13802](https://github.com/rapidsai/cudf/pull/13802)) [@galipremsagar](https://github.com/galipremsagar)
-- Relax protobuf pinnings. ([#13770](https://github.com/rapidsai/cudf/pull/13770)) [@bdice](https://github.com/bdice)
-- Switch fully unbounded window functions to use aggregations ([#13727](https://github.com/rapidsai/cudf/pull/13727)) [@mythrocks](https://github.com/mythrocks)
-- Switch to new wheel building pipeline ([#13723](https://github.com/rapidsai/cudf/pull/13723)) [@vyasr](https://github.com/vyasr)
-- Revert CUDA 12.0 CI workflows to branch-23.08. ([#13719](https://github.com/rapidsai/cudf/pull/13719)) [@bdice](https://github.com/bdice)
-- Adding identify minimum version requirement ([#13713](https://github.com/rapidsai/cudf/pull/13713)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Enforce deprecations and add clarifications around existing deprecations ([#13710](https://github.com/rapidsai/cudf/pull/13710)) [@galipremsagar](https://github.com/galipremsagar)
-- Optimize ORC reader performance for list data ([#13708](https://github.com/rapidsai/cudf/pull/13708)) [@vyasr](https://github.com/vyasr)
-- fix limit overflow message in a docstring ([#13703](https://github.com/rapidsai/cudf/pull/13703)) [@ahmet-uyar](https://github.com/ahmet-uyar)
-- Alleviates JSON parser&#39;s need for multi-file sources to end with a newline ([#13702](https://github.com/rapidsai/cudf/pull/13702)) [@elstehle](https://github.com/elstehle)
-- Update cython-lint and replace flake8 with ruff ([#13699](https://github.com/rapidsai/cudf/pull/13699)) [@vyasr](https://github.com/vyasr)
-- Add `__dask_tokenize__` definitions to cudf classes ([#13695](https://github.com/rapidsai/cudf/pull/13695)) [@rjzamora](https://github.com/rjzamora)
-- Convert libcudf hashing benchmarks to nvbench ([#13694](https://github.com/rapidsai/cudf/pull/13694)) [@davidwendt](https://github.com/davidwendt)
-- Separate MurmurHash32 from hash_functions.cuh ([#13681](https://github.com/rapidsai/cudf/pull/13681)) [@davidwendt](https://github.com/davidwendt)
-- Improve performance of cudf::strings::split on whitespace ([#13680](https://github.com/rapidsai/cudf/pull/13680)) [@davidwendt](https://github.com/davidwendt)
-- Allow ORC and Parquet writers to write nullable columns without nulls as non-nullable ([#13675](https://github.com/rapidsai/cudf/pull/13675)) [@vuule](https://github.com/vuule)
-- Raise a NotImplementedError in to_datetime when utc is passed ([#13670](https://github.com/rapidsai/cudf/pull/13670)) [@shwina](https://github.com/shwina)
-- Add rmm_mode parameter to nvbench base fixture ([#13668](https://github.com/rapidsai/cudf/pull/13668)) [@davidwendt](https://github.com/davidwendt)
-- Fix multiindex loc ordering in pandas-compat mode ([#13660](https://github.com/rapidsai/cudf/pull/13660)) [@wence-](https://github.com/wence-)
-- Add nvtext hash_character_ngrams function ([#13654](https://github.com/rapidsai/cudf/pull/13654)) [@davidwendt](https://github.com/davidwendt)
-- Avoid storing metadata in pointers in ORC and Parquet writers ([#13648](https://github.com/rapidsai/cudf/pull/13648)) [@vuule](https://github.com/vuule)
-- Acquire spill lock in to/from_arrow ([#13646](https://github.com/rapidsai/cudf/pull/13646)) [@shwina](https://github.com/shwina)
-- Expose stable versions of libcudf sort routines ([#13634](https://github.com/rapidsai/cudf/pull/13634)) [@wence-](https://github.com/wence-)
-- Separate out hash_test.cpp source for each hash API ([#13633](https://github.com/rapidsai/cudf/pull/13633)) [@davidwendt](https://github.com/davidwendt)
-- Remove deprecated cudf::strings::slice_strings (by delimiter) functions ([#13628](https://github.com/rapidsai/cudf/pull/13628)) [@davidwendt](https://github.com/davidwendt)
-- Create separate libcudf hash APIs for each supported hash function ([#13626](https://github.com/rapidsai/cudf/pull/13626)) [@davidwendt](https://github.com/davidwendt)
-- Add convert_dtypes API ([#13623](https://github.com/rapidsai/cudf/pull/13623)) [@shwina](https://github.com/shwina)
-- Clean up cupy in dependencies.yaml. ([#13617](https://github.com/rapidsai/cudf/pull/13617)) [@bdice](https://github.com/bdice)
-- Use cuda-version to constrain cudatoolkit. ([#13615](https://github.com/rapidsai/cudf/pull/13615)) [@bdice](https://github.com/bdice)
-- Add murmurhash3_x64_128 function to libcudf ([#13604](https://github.com/rapidsai/cudf/pull/13604)) [@davidwendt](https://github.com/davidwendt)
-- Performance improvement for cudf::strings::like ([#13594](https://github.com/rapidsai/cudf/pull/13594)) [@davidwendt](https://github.com/davidwendt)
-- Remove deprecated cudf.set_allocator. ([#13591](https://github.com/rapidsai/cudf/pull/13591)) [@bdice](https://github.com/bdice)
-- Clean up cudf device atomic with `cuda::atomic_ref` ([#13583](https://github.com/rapidsai/cudf/pull/13583)) [@PointKernel](https://github.com/PointKernel)
-- Add java bindings for distinct count ([#13573](https://github.com/rapidsai/cudf/pull/13573)) [@revans2](https://github.com/revans2)
-- Use nvcomp conda package. ([#13566](https://github.com/rapidsai/cudf/pull/13566)) [@bdice](https://github.com/bdice)
-- Add exception to string_scalar if input string exceeds size_type ([#13560](https://github.com/rapidsai/cudf/pull/13560)) [@davidwendt](https://github.com/davidwendt)
-- Add dispatch for `cudf.Dataframe` to/from `pyarrow.Table` conversion ([#13558](https://github.com/rapidsai/cudf/pull/13558)) [@rjzamora](https://github.com/rjzamora)
-- Get rid of `cuco::pair_type` aliases ([#13553](https://github.com/rapidsai/cudf/pull/13553)) [@PointKernel](https://github.com/PointKernel)
-- Introduce parity with pandas when `sort=False` in `Groupby` ([#13551](https://github.com/rapidsai/cudf/pull/13551)) [@galipremsagar](https://github.com/galipremsagar)
-- Update CMake in docker to 3.26.4 ([#13550](https://github.com/rapidsai/cudf/pull/13550)) [@NvTimLiu](https://github.com/NvTimLiu)
-- Clarify source of error message in stream testing. ([#13541](https://github.com/rapidsai/cudf/pull/13541)) [@bdice](https://github.com/bdice)
-- Deprecate `strings_to_categorical` in `cudf.read_parquet` ([#13540](https://github.com/rapidsai/cudf/pull/13540)) [@galipremsagar](https://github.com/galipremsagar)
-- Update to CMake 3.26.4 ([#13538](https://github.com/rapidsai/cudf/pull/13538)) [@vyasr](https://github.com/vyasr)
-- s3 folder naming fix ([#13536](https://github.com/rapidsai/cudf/pull/13536)) [@AyodeAwe](https://github.com/AyodeAwe)
-- Implement iloc-getitem using parse-don&#39;t-validate approach ([#13534](https://github.com/rapidsai/cudf/pull/13534)) [@wence-](https://github.com/wence-)
-- Make synchronization explicit in the names of `hostdevice_*` copying APIs ([#13530](https://github.com/rapidsai/cudf/pull/13530)) [@ttnghia](https://github.com/ttnghia)
-- Add benchmark (Google Benchmark) dependency to conda packages. ([#13528](https://github.com/rapidsai/cudf/pull/13528)) [@bdice](https://github.com/bdice)
-- Add libcufile to dependencies.yaml. ([#13523](https://github.com/rapidsai/cudf/pull/13523)) [@bdice](https://github.com/bdice)
-- Fix some memoization logic in groupby/sort/sort_helper.cu ([#13521](https://github.com/rapidsai/cudf/pull/13521)) [@davidwendt](https://github.com/davidwendt)
-- Use sizes_to_offsets_iterator in cudf::gather for strings ([#13520](https://github.com/rapidsai/cudf/pull/13520)) [@davidwendt](https://github.com/davidwendt)
-- use rapids-upload-docs script ([#13518](https://github.com/rapidsai/cudf/pull/13518)) [@AyodeAwe](https://github.com/AyodeAwe)
-- Support UTF-8 BOM in CSV reader ([#13516](https://github.com/rapidsai/cudf/pull/13516)) [@davidwendt](https://github.com/davidwendt)
-- Move stream-related test configuration to CMake ([#13513](https://github.com/rapidsai/cudf/pull/13513)) [@vyasr](https://github.com/vyasr)
-- Implement `cudf.option_context` ([#13511](https://github.com/rapidsai/cudf/pull/13511)) [@galipremsagar](https://github.com/galipremsagar)
-- Unpin `dask` and `distributed` for development ([#13508](https://github.com/rapidsai/cudf/pull/13508)) [@galipremsagar](https://github.com/galipremsagar)
-- Change build.sh to use pip install instead of setup.py ([#13507](https://github.com/rapidsai/cudf/pull/13507)) [@vyasr](https://github.com/vyasr)
-- Use test default stream ([#13506](https://github.com/rapidsai/cudf/pull/13506)) [@vyasr](https://github.com/vyasr)
-- Remove documentation build scripts for Jenkins ([#13495](https://github.com/rapidsai/cudf/pull/13495)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Use east const in include files ([#13494](https://github.com/rapidsai/cudf/pull/13494)) [@karthikeyann](https://github.com/karthikeyann)
-- Use east const in src files ([#13493](https://github.com/rapidsai/cudf/pull/13493)) [@karthikeyann](https://github.com/karthikeyann)
-- Use east const in tests files ([#13492](https://github.com/rapidsai/cudf/pull/13492)) [@karthikeyann](https://github.com/karthikeyann)
-- Use east const in benchmarks files ([#13491](https://github.com/rapidsai/cudf/pull/13491)) [@karthikeyann](https://github.com/karthikeyann)
-- Performance improvement for nvtext tokenize/token functions ([#13480](https://github.com/rapidsai/cudf/pull/13480)) [@davidwendt](https://github.com/davidwendt)
-- Add pd.Float*Dtype to Avro and ORC mappings ([#13475](https://github.com/rapidsai/cudf/pull/13475)) [@mroeschke](https://github.com/mroeschke)
-- Use pandas public APIs where available ([#13467](https://github.com/rapidsai/cudf/pull/13467)) [@mroeschke](https://github.com/mroeschke)
-- Allow pd.ArrowDtype in cudf.from_pandas ([#13465](https://github.com/rapidsai/cudf/pull/13465)) [@mroeschke](https://github.com/mroeschke)
-- Rework libcudf regex benchmarks with nvbench ([#13464](https://github.com/rapidsai/cudf/pull/13464)) [@davidwendt](https://github.com/davidwendt)
-- Remove unused max_rows_tensor parameter from subword tokenizer ([#13463](https://github.com/rapidsai/cudf/pull/13463)) [@davidwendt](https://github.com/davidwendt)
-- Separate io-text and nvtext pytests into different files ([#13435](https://github.com/rapidsai/cudf/pull/13435)) [@davidwendt](https://github.com/davidwendt)
-- Add a move_to function to cudf::string_view::const_iterator ([#13428](https://github.com/rapidsai/cudf/pull/13428)) [@davidwendt](https://github.com/davidwendt)
-- Allow newer scikit-build ([#13424](https://github.com/rapidsai/cudf/pull/13424)) [@vyasr](https://github.com/vyasr)
-- Refactor sort_by_values to sort_values, drop indices from return values. ([#13419](https://github.com/rapidsai/cudf/pull/13419)) [@bdice](https://github.com/bdice)
-- Inline Cython exception handler ([#13411](https://github.com/rapidsai/cudf/pull/13411)) [@vyasr](https://github.com/vyasr)
-- Init JNI version 23.08.0-SNAPSHOT ([#13401](https://github.com/rapidsai/cudf/pull/13401)) [@pxLi](https://github.com/pxLi)
-- Refactor ORC reader ([#13396](https://github.com/rapidsai/cudf/pull/13396)) [@ttnghia](https://github.com/ttnghia)
-- JNI: Remove cleaned objects in memory cleaner ([#13378](https://github.com/rapidsai/cudf/pull/13378)) [@res-life](https://github.com/res-life)
-- Add tests of currently unsupported indexing ([#13338](https://github.com/rapidsai/cudf/pull/13338)) [@wence-](https://github.com/wence-)
-- Performance improvement for some libcudf regex functions for long strings ([#13322](https://github.com/rapidsai/cudf/pull/13322)) [@davidwendt](https://github.com/davidwendt)
-- Exposure Tracked Buffer (first step towards unifying copy-on-write and spilling) ([#13307](https://github.com/rapidsai/cudf/pull/13307)) [@madsbk](https://github.com/madsbk)
-- Write string data directly to column_buffer in Parquet reader ([#13302](https://github.com/rapidsai/cudf/pull/13302)) [@etseidl](https://github.com/etseidl)
-- Add stacktrace into cudf exception types ([#13298](https://github.com/rapidsai/cudf/pull/13298)) [@ttnghia](https://github.com/ttnghia)
-- cuDF: Build CUDA 12 packages ([#12922](https://github.com/rapidsai/cudf/pull/12922)) [@bdice](https://github.com/bdice)
-
-# cuDF 23.06.00 (7 Jun 2023)
-
-## 🚨 Breaking Changes
-
-- Fix batch processing for parquet writer ([#13438](https://github.com/rapidsai/cudf/pull/13438)) [@ttnghia](https://github.com/ttnghia)
-- Use &lt;NA&gt; instead of null to match pandas. ([#13415](https://github.com/rapidsai/cudf/pull/13415)) [@bdice](https://github.com/bdice)
-- Remove UNKNOWN_NULL_COUNT ([#13372](https://github.com/rapidsai/cudf/pull/13372)) [@vyasr](https://github.com/vyasr)
-- Remove default UNKNOWN_NULL_COUNT from cudf::column member functions ([#13341](https://github.com/rapidsai/cudf/pull/13341)) [@davidwendt](https://github.com/davidwendt)
-- Use std::overflow_error when output would exceed column size limit ([#13323](https://github.com/rapidsai/cudf/pull/13323)) [@davidwendt](https://github.com/davidwendt)
-- Remove null mask and null count from column_view constructors ([#13311](https://github.com/rapidsai/cudf/pull/13311)) [@vyasr](https://github.com/vyasr)
-- Change default value of the `observed=` argument in groupby to `True` to reflect the actual behaviour ([#13296](https://github.com/rapidsai/cudf/pull/13296)) [@shwina](https://github.com/shwina)
-- Throw error if UNINITIALIZED is passed to cudf::state_null_count ([#13292](https://github.com/rapidsai/cudf/pull/13292)) [@davidwendt](https://github.com/davidwendt)
-- Remove default null-count parameter from cudf::make_strings_column factory ([#13227](https://github.com/rapidsai/cudf/pull/13227)) [@davidwendt](https://github.com/davidwendt)
-- Remove UNKNOWN_NULL_COUNT where it can be easily computed ([#13205](https://github.com/rapidsai/cudf/pull/13205)) [@vyasr](https://github.com/vyasr)
-- Update minimum Python version to Python 3.9 ([#13196](https://github.com/rapidsai/cudf/pull/13196)) [@shwina](https://github.com/shwina)
-- Refactor contiguous_split API into contiguous_split.hpp ([#13186](https://github.com/rapidsai/cudf/pull/13186)) [@abellina](https://github.com/abellina)
-- Cleanup Parquet chunked writer ([#13094](https://github.com/rapidsai/cudf/pull/13094)) [@ttnghia](https://github.com/ttnghia)
-- Cleanup ORC chunked writer ([#13091](https://github.com/rapidsai/cudf/pull/13091)) [@ttnghia](https://github.com/ttnghia)
-- Raise `NotImplementedError` when attempting to construct cuDF objects from timezone-aware datetimes ([#13086](https://github.com/rapidsai/cudf/pull/13086)) [@shwina](https://github.com/shwina)
-- Remove deprecated regex functions from libcudf ([#13067](https://github.com/rapidsai/cudf/pull/13067)) [@davidwendt](https://github.com/davidwendt)
-- [REVIEW] Upgrade to `arrow-11` ([#12757](https://github.com/rapidsai/cudf/pull/12757)) [@galipremsagar](https://github.com/galipremsagar)
-- Implement Python drop_duplicates with cudf::stable_distinct. ([#11656](https://github.com/rapidsai/cudf/pull/11656)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-
-## 🐛 Bug Fixes
-
-- Fix valid count computation in offset_bitmask_binop kernel ([#13489](https://github.com/rapidsai/cudf/pull/13489)) [@davidwendt](https://github.com/davidwendt)
-- Fix writing of ORC files with empty rowgroups ([#13466](https://github.com/rapidsai/cudf/pull/13466)) [@vuule](https://github.com/vuule)
-- Fix cudf::repeat logic when count is zero ([#13459](https://github.com/rapidsai/cudf/pull/13459)) [@davidwendt](https://github.com/davidwendt)
-- Fix batch processing for parquet writer ([#13438](https://github.com/rapidsai/cudf/pull/13438)) [@ttnghia](https://github.com/ttnghia)
-- Fix invalid use of std::exclusive_scan in Parquet writer ([#13434](https://github.com/rapidsai/cudf/pull/13434)) [@etseidl](https://github.com/etseidl)
-- Patch numba if it is imported first to ensure minor version compatibility works. ([#13433](https://github.com/rapidsai/cudf/pull/13433)) [@bdice](https://github.com/bdice)
-- Fix cudf::strings::replace_with_backrefs hang on empty match result ([#13418](https://github.com/rapidsai/cudf/pull/13418)) [@davidwendt](https://github.com/davidwendt)
-- Use &lt;NA&gt; instead of null to match pandas. ([#13415](https://github.com/rapidsai/cudf/pull/13415)) [@bdice](https://github.com/bdice)
-- Fix tokenize with non-space delimiter ([#13403](https://github.com/rapidsai/cudf/pull/13403)) [@shwina](https://github.com/shwina)
-- Fix groupby head/tail for empty dataframe ([#13398](https://github.com/rapidsai/cudf/pull/13398)) [@shwina](https://github.com/shwina)
-- Default to closed=&quot;right&quot; in `IntervalIndex` constructor ([#13394](https://github.com/rapidsai/cudf/pull/13394)) [@shwina](https://github.com/shwina)
-- Correctly reorder and reindex scan groupbys with null keys ([#13389](https://github.com/rapidsai/cudf/pull/13389)) [@wence-](https://github.com/wence-)
-- Fix unused argument errors in nvcc 11.5 ([#13387](https://github.com/rapidsai/cudf/pull/13387)) [@abellina](https://github.com/abellina)
-- Updates needed to work with jitify that leverages libcudacxx ([#13383](https://github.com/rapidsai/cudf/pull/13383)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix unused parameter warning/error in parquet/page_data.cu ([#13367](https://github.com/rapidsai/cudf/pull/13367)) [@davidwendt](https://github.com/davidwendt)
-- Fix page size estimation in Parquet writer ([#13364](https://github.com/rapidsai/cudf/pull/13364)) [@etseidl](https://github.com/etseidl)
-- Fix subword_tokenize error when input contains no tokens ([#13320](https://github.com/rapidsai/cudf/pull/13320)) [@davidwendt](https://github.com/davidwendt)
-- Support gcc 12 as the C++ compiler ([#13316](https://github.com/rapidsai/cudf/pull/13316)) [@robertmaynard](https://github.com/robertmaynard)
-- Correctly set bitmask size in `from_column_view` ([#13315](https://github.com/rapidsai/cudf/pull/13315)) [@wence-](https://github.com/wence-)
-- Fix approach to detecting assignment for gte/lte operators ([#13285](https://github.com/rapidsai/cudf/pull/13285)) [@vyasr](https://github.com/vyasr)
-- Fix parquet schema interpretation issue ([#13277](https://github.com/rapidsai/cudf/pull/13277)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Fix 64bit shift bug in avro reader ([#13276](https://github.com/rapidsai/cudf/pull/13276)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix unused variables/parameters in parquet/writer_impl.cu ([#13263](https://github.com/rapidsai/cudf/pull/13263)) [@davidwendt](https://github.com/davidwendt)
-- Clean up buffers in case AssertionError ([#13262](https://github.com/rapidsai/cudf/pull/13262)) [@razajafri](https://github.com/razajafri)
-- Allow empty input table in ast `compute_column` ([#13245](https://github.com/rapidsai/cudf/pull/13245)) [@wence-](https://github.com/wence-)
-- Fix structs_column_wrapper constructors to copy input column wrappers ([#13243](https://github.com/rapidsai/cudf/pull/13243)) [@davidwendt](https://github.com/davidwendt)
-- Fix the row index stream order in ORC reader ([#13242](https://github.com/rapidsai/cudf/pull/13242)) [@vuule](https://github.com/vuule)
-- Make `is_decompression_disabled` and `is_compression_disabled` thread-safe ([#13240](https://github.com/rapidsai/cudf/pull/13240)) [@vuule](https://github.com/vuule)
-- Add [[maybe_unused]] to nvbench environment. ([#13219](https://github.com/rapidsai/cudf/pull/13219)) [@bdice](https://github.com/bdice)
-- Fix race in ORC string dictionary creation ([#13214](https://github.com/rapidsai/cudf/pull/13214)) [@revans2](https://github.com/revans2)
-- Add scalar argtypes to udf cache keys ([#13194](https://github.com/rapidsai/cudf/pull/13194)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Fix unused parameter warning/error in grouped_rolling.cu ([#13192](https://github.com/rapidsai/cudf/pull/13192)) [@davidwendt](https://github.com/davidwendt)
-- Avoid skbuild 0.17.2 which affected the cmake -DPython_LIBRARY string ([#13188](https://github.com/rapidsai/cudf/pull/13188)) [@sevagh](https://github.com/sevagh)
-- Fix `hostdevice_vector::subspan` ([#13187](https://github.com/rapidsai/cudf/pull/13187)) [@ttnghia](https://github.com/ttnghia)
-- Use custom nvbench entry point to ensure `cudf::nvbench_base_fixture` usage ([#13183](https://github.com/rapidsai/cudf/pull/13183)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix slice_strings to return empty strings for stop &lt; start indices ([#13178](https://github.com/rapidsai/cudf/pull/13178)) [@davidwendt](https://github.com/davidwendt)
-- Allow compilation with any GTest version 1.11+ ([#13153](https://github.com/rapidsai/cudf/pull/13153)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix a few clang-format style check errors ([#13146](https://github.com/rapidsai/cudf/pull/13146)) [@davidwendt](https://github.com/davidwendt)
-- [REVIEW] Fix `Series` and `DataFrame` constructors to validate index lengths ([#13122](https://github.com/rapidsai/cudf/pull/13122)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix hash join when the input tables have nulls on only one side ([#13120](https://github.com/rapidsai/cudf/pull/13120)) [@ttnghia](https://github.com/ttnghia)
-- Fix GPU_ARCHS setting in Java CMake build and CMAKE_CUDA_ARCHITECTURES in Python package build. ([#13117](https://github.com/rapidsai/cudf/pull/13117)) [@davidwendt](https://github.com/davidwendt)
-- Adds checks to make sure json reader won&#39;t overflow ([#13115](https://github.com/rapidsai/cudf/pull/13115)) [@elstehle](https://github.com/elstehle)
-- Fix `null_count` of columns returned by `chunked_parquet_reader` ([#13111](https://github.com/rapidsai/cudf/pull/13111)) [@vuule](https://github.com/vuule)
-- Fixes sliced list and struct column bug  in JSON chunked writer ([#13108](https://github.com/rapidsai/cudf/pull/13108)) [@karthikeyann](https://github.com/karthikeyann)
-- [REVIEW] Fix missing confluent kafka version ([#13101](https://github.com/rapidsai/cudf/pull/13101)) [@galipremsagar](https://github.com/galipremsagar)
-- Use make_empty_lists_column instead of make_empty_column(type_id::LIST) ([#13099](https://github.com/rapidsai/cudf/pull/13099)) [@davidwendt](https://github.com/davidwendt)
-- Raise `NotImplementedError` when attempting to construct cuDF objects from timezone-aware datetimes ([#13086](https://github.com/rapidsai/cudf/pull/13086)) [@shwina](https://github.com/shwina)
-- Fix column selection `read_parquet` benchmarks ([#13082](https://github.com/rapidsai/cudf/pull/13082)) [@vuule](https://github.com/vuule)
-- Fix bugs in iterative groupby apply algorithm ([#13078](https://github.com/rapidsai/cudf/pull/13078)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Add algorithm include in data_sink.hpp ([#13068](https://github.com/rapidsai/cudf/pull/13068)) [@ahendriksen](https://github.com/ahendriksen)
-- Fix tests/identify_stream_usage.cpp ([#13066](https://github.com/rapidsai/cudf/pull/13066)) [@ahendriksen](https://github.com/ahendriksen)
-- Prevent overflow with `skip_rows` in ORC and Parquet readers ([#13063](https://github.com/rapidsai/cudf/pull/13063)) [@vuule](https://github.com/vuule)
-- Add except declaration in Cython interface for regex_program::create ([#13054](https://github.com/rapidsai/cudf/pull/13054)) [@davidwendt](https://github.com/davidwendt)
-- [REVIEW] Fix branch version in CI scripts ([#13029](https://github.com/rapidsai/cudf/pull/13029)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix OOB memory access in CSV reader when reading without NA values ([#13011](https://github.com/rapidsai/cudf/pull/13011)) [@vuule](https://github.com/vuule)
-- Fix read_avro() skip_rows and num_rows. ([#12912](https://github.com/rapidsai/cudf/pull/12912)) [@tpn](https://github.com/tpn)
-- Purge nonempty nulls from byte_cast list outputs. ([#11971](https://github.com/rapidsai/cudf/pull/11971)) [@bdice](https://github.com/bdice)
-- Fix consumption of CPU-backed interchange protocol dataframes ([#11392](https://github.com/rapidsai/cudf/pull/11392)) [@shwina](https://github.com/shwina)
-
-## 🚀 New Features
-
-- Remove numba JIT kernel usage from dataframe copy tests ([#13385](https://github.com/rapidsai/cudf/pull/13385)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Add JNI for ORC/Parquet writer compression statistics ([#13376](https://github.com/rapidsai/cudf/pull/13376)) [@ttnghia](https://github.com/ttnghia)
-- Use _compile_or_get in JIT groupby apply ([#13350](https://github.com/rapidsai/cudf/pull/13350)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- cuDF numba cuda 12 updates ([#13337](https://github.com/rapidsai/cudf/pull/13337)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Add tz_convert method to convert between timestamps ([#13328](https://github.com/rapidsai/cudf/pull/13328)) [@shwina](https://github.com/shwina)
-- Optionally return compression statistics from ORC and Parquet writers ([#13294](https://github.com/rapidsai/cudf/pull/13294)) [@vuule](https://github.com/vuule)
-- Support the case=False argument to str.contains ([#13290](https://github.com/rapidsai/cudf/pull/13290)) [@shwina](https://github.com/shwina)
-- Add an event handler for ColumnVector.close ([#13279](https://github.com/rapidsai/cudf/pull/13279)) [@abellina](https://github.com/abellina)
-- JNI api for cudf::chunked_pack ([#13278](https://github.com/rapidsai/cudf/pull/13278)) [@abellina](https://github.com/abellina)
-- Implement a chunked_pack API ([#13260](https://github.com/rapidsai/cudf/pull/13260)) [@abellina](https://github.com/abellina)
-- Update cudf recipes to use GTest version to &gt;=1.13 ([#13207](https://github.com/rapidsai/cudf/pull/13207)) [@robertmaynard](https://github.com/robertmaynard)
-- JNI changes for range-extents in window functions. ([#13199](https://github.com/rapidsai/cudf/pull/13199)) [@mythrocks](https://github.com/mythrocks)
-- Add support for DatetimeTZDtype and tz_localize ([#13163](https://github.com/rapidsai/cudf/pull/13163)) [@shwina](https://github.com/shwina)
-- Add IS_NULL operator to AST ([#13145](https://github.com/rapidsai/cudf/pull/13145)) [@karthikeyann](https://github.com/karthikeyann)
-- STRING order-by column for RANGE window functions ([#13143](https://github.com/rapidsai/cudf/pull/13143)) [@mythrocks](https://github.com/mythrocks)
-- Update `contains_table` to experimental row hasher and equality comparator ([#13119](https://github.com/rapidsai/cudf/pull/13119)) [@divyegala](https://github.com/divyegala)
-- Automatically select `GroupBy.apply` algorithm based on if the UDF is jittable ([#13113](https://github.com/rapidsai/cudf/pull/13113)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Refactor Parquet chunked writer ([#13076](https://github.com/rapidsai/cudf/pull/13076)) [@ttnghia](https://github.com/ttnghia)
-- Add Python bindings for string literal support in AST ([#13073](https://github.com/rapidsai/cudf/pull/13073)) [@karthikeyann](https://github.com/karthikeyann)
-- Add Java bindings for string literal support in AST ([#13072](https://github.com/rapidsai/cudf/pull/13072)) [@karthikeyann](https://github.com/karthikeyann)
-- Add string scalar support in AST ([#13061](https://github.com/rapidsai/cudf/pull/13061)) [@karthikeyann](https://github.com/karthikeyann)
-- Log cuIO warnings using the libcudf logger ([#13043](https://github.com/rapidsai/cudf/pull/13043)) [@vuule](https://github.com/vuule)
-- Update `mixed_join` to use experimental row hasher and comparator ([#13028](https://github.com/rapidsai/cudf/pull/13028)) [@divyegala](https://github.com/divyegala)
-- Support structs of lists in row lexicographic comparator ([#13005](https://github.com/rapidsai/cudf/pull/13005)) [@ttnghia](https://github.com/ttnghia)
-- Adding `hostdevice_span` that is a span createable from `hostdevice_vector` ([#12981](https://github.com/rapidsai/cudf/pull/12981)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Add nvtext::minhash function ([#12961](https://github.com/rapidsai/cudf/pull/12961)) [@davidwendt](https://github.com/davidwendt)
-- Support lists of structs in row lexicographic comparator ([#12953](https://github.com/rapidsai/cudf/pull/12953)) [@ttnghia](https://github.com/ttnghia)
-- Update `join` to use experimental row hasher and comparator ([#12787](https://github.com/rapidsai/cudf/pull/12787)) [@divyegala](https://github.com/divyegala)
-- Implement Python drop_duplicates with cudf::stable_distinct. ([#11656](https://github.com/rapidsai/cudf/pull/11656)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-
-## 🛠️ Improvements
-
-- Drop extraneous dependencies from cudf conda recipe. ([#13406](https://github.com/rapidsai/cudf/pull/13406)) [@bdice](https://github.com/bdice)
-- Handle some corner-cases in indexing with boolean masks ([#13402](https://github.com/rapidsai/cudf/pull/13402)) [@wence-](https://github.com/wence-)
-- Add cudf::stable_distinct public API, tests, and benchmarks. ([#13392](https://github.com/rapidsai/cudf/pull/13392)) [@bdice](https://github.com/bdice)
-- [JNI] Pass this ColumnVector to the onClosed event handler ([#13386](https://github.com/rapidsai/cudf/pull/13386)) [@abellina](https://github.com/abellina)
-- Fix JNI method with mismatched parameter list ([#13384](https://github.com/rapidsai/cudf/pull/13384)) [@ttnghia](https://github.com/ttnghia)
-- Split up experimental_row_operator_tests.cu to improve its compile time ([#13382](https://github.com/rapidsai/cudf/pull/13382)) [@davidwendt](https://github.com/davidwendt)
-- Deprecate cudf::strings::slice_strings APIs that accept delimiters ([#13373](https://github.com/rapidsai/cudf/pull/13373)) [@davidwendt](https://github.com/davidwendt)
-- Remove UNKNOWN_NULL_COUNT ([#13372](https://github.com/rapidsai/cudf/pull/13372)) [@vyasr](https://github.com/vyasr)
-- Move some nvtext benchmarks to nvbench ([#13368](https://github.com/rapidsai/cudf/pull/13368)) [@davidwendt](https://github.com/davidwendt)
-- run docs nightly too ([#13366](https://github.com/rapidsai/cudf/pull/13366)) [@AyodeAwe](https://github.com/AyodeAwe)
-- Add warning for default `dtype` parameter in `get_dummies` ([#13365](https://github.com/rapidsai/cudf/pull/13365)) [@galipremsagar](https://github.com/galipremsagar)
-- Add log messages about kvikIO compatibility mode ([#13363](https://github.com/rapidsai/cudf/pull/13363)) [@vuule](https://github.com/vuule)
-- Switch back to using primary shared-action-workflows branch ([#13362](https://github.com/rapidsai/cudf/pull/13362)) [@vyasr](https://github.com/vyasr)
-- Deprecate `StringIndex` and use `Index` instead ([#13361](https://github.com/rapidsai/cudf/pull/13361)) [@galipremsagar](https://github.com/galipremsagar)
-- Ensure columns have valid null counts in CUDF JNI. ([#13355](https://github.com/rapidsai/cudf/pull/13355)) [@mythrocks](https://github.com/mythrocks)
-- Expunge most uses of `TypeVar(bound=&quot;Foo&quot;)` ([#13346](https://github.com/rapidsai/cudf/pull/13346)) [@wence-](https://github.com/wence-)
-- Remove all references to UNKNOWN_NULL_COUNT in Python ([#13345](https://github.com/rapidsai/cudf/pull/13345)) [@vyasr](https://github.com/vyasr)
-- Improve `distinct_count` with `cuco::static_set` ([#13343](https://github.com/rapidsai/cudf/pull/13343)) [@PointKernel](https://github.com/PointKernel)
-- Fix  `contiguous_split` performance ([#13342](https://github.com/rapidsai/cudf/pull/13342)) [@ttnghia](https://github.com/ttnghia)
-- Remove default UNKNOWN_NULL_COUNT from cudf::column member functions ([#13341](https://github.com/rapidsai/cudf/pull/13341)) [@davidwendt](https://github.com/davidwendt)
-- Update mypy to 1.3 ([#13340](https://github.com/rapidsai/cudf/pull/13340)) [@wence-](https://github.com/wence-)
-- [Java] Purge non-empty nulls when setting validity ([#13335](https://github.com/rapidsai/cudf/pull/13335)) [@razajafri](https://github.com/razajafri)
-- Add row-wise filtering step to `read_parquet` ([#13334](https://github.com/rapidsai/cudf/pull/13334)) [@rjzamora](https://github.com/rjzamora)
-- Performance improvement for nvtext::minhash ([#13333](https://github.com/rapidsai/cudf/pull/13333)) [@davidwendt](https://github.com/davidwendt)
-- Fix some libcudf functions to set the null count on returning columns ([#13331](https://github.com/rapidsai/cudf/pull/13331)) [@davidwendt](https://github.com/davidwendt)
-- Change cudf::detail::concatenate_masks to return null-count ([#13330](https://github.com/rapidsai/cudf/pull/13330)) [@davidwendt](https://github.com/davidwendt)
-- Move `meta` calculation in `dask_cudf.read_parquet` ([#13327](https://github.com/rapidsai/cudf/pull/13327)) [@rjzamora](https://github.com/rjzamora)
-- Changes to support Numpy &gt;= 1.24 ([#13325](https://github.com/rapidsai/cudf/pull/13325)) [@shwina](https://github.com/shwina)
-- Use std::overflow_error when output would exceed column size limit ([#13323](https://github.com/rapidsai/cudf/pull/13323)) [@davidwendt](https://github.com/davidwendt)
-- Clean up `distinct_count` benchmark ([#13321](https://github.com/rapidsai/cudf/pull/13321)) [@PointKernel](https://github.com/PointKernel)
-- Fix gtest pinning to 1.13.0. ([#13319](https://github.com/rapidsai/cudf/pull/13319)) [@bdice](https://github.com/bdice)
-- Remove null mask and null count from column_view constructors ([#13311](https://github.com/rapidsai/cudf/pull/13311)) [@vyasr](https://github.com/vyasr)
-- Address feedback from 13289 ([#13306](https://github.com/rapidsai/cudf/pull/13306)) [@vyasr](https://github.com/vyasr)
-- Change default value of the `observed=` argument in groupby to `True` to reflect the actual behaviour ([#13296](https://github.com/rapidsai/cudf/pull/13296)) [@shwina](https://github.com/shwina)
-- First check for `BaseDtype` when infering the data type of an arbitrary object ([#13295](https://github.com/rapidsai/cudf/pull/13295)) [@shwina](https://github.com/shwina)
-- Throw error if UNINITIALIZED is passed to cudf::state_null_count ([#13292](https://github.com/rapidsai/cudf/pull/13292)) [@davidwendt](https://github.com/davidwendt)
-- Support CUDA 12.0 for pip wheels ([#13289](https://github.com/rapidsai/cudf/pull/13289)) [@divyegala](https://github.com/divyegala)
-- Refactor `transform_lists_of_structs` in `row_operators.cu` ([#13288](https://github.com/rapidsai/cudf/pull/13288)) [@ttnghia](https://github.com/ttnghia)
-- Branch 23.06 merge 23.04 ([#13286](https://github.com/rapidsai/cudf/pull/13286)) [@vyasr](https://github.com/vyasr)
-- Update cupy dependency ([#13284](https://github.com/rapidsai/cudf/pull/13284)) [@vyasr](https://github.com/vyasr)
-- Performance improvement in cudf::strings::join_strings for long strings ([#13283](https://github.com/rapidsai/cudf/pull/13283)) [@davidwendt](https://github.com/davidwendt)
-- Fix unused variables and functions ([#13275](https://github.com/rapidsai/cudf/pull/13275)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix integer overflow in `partition` `scatter_map` construction ([#13272](https://github.com/rapidsai/cudf/pull/13272)) [@wence-](https://github.com/wence-)
-- Numba 0.57 compatibility fixes ([#13271](https://github.com/rapidsai/cudf/pull/13271)) [@gmarkall](https://github.com/gmarkall)
-- Performance improvement in cudf::strings::all_characters_of_type ([#13259](https://github.com/rapidsai/cudf/pull/13259)) [@davidwendt](https://github.com/davidwendt)
-- Remove default null-count parameter from some libcudf factory functions ([#13258](https://github.com/rapidsai/cudf/pull/13258)) [@davidwendt](https://github.com/davidwendt)
-- Roll our own generate_string() because mimesis&#39; has gone away ([#13257](https://github.com/rapidsai/cudf/pull/13257)) [@shwina](https://github.com/shwina)
-- Build wheels using new single image workflow ([#13249](https://github.com/rapidsai/cudf/pull/13249)) [@vyasr](https://github.com/vyasr)
-- Enable sccache hits from local builds ([#13248](https://github.com/rapidsai/cudf/pull/13248)) [@AyodeAwe](https://github.com/AyodeAwe)
-- Revert to branch-23.06 for shared-action-workflows ([#13247](https://github.com/rapidsai/cudf/pull/13247)) [@shwina](https://github.com/shwina)
-- Introduce `pandas_compatible` option in `cudf` ([#13241](https://github.com/rapidsai/cudf/pull/13241)) [@galipremsagar](https://github.com/galipremsagar)
-- Add metadata_builder helper class ([#13232](https://github.com/rapidsai/cudf/pull/13232)) [@abellina](https://github.com/abellina)
-- Use libkvikio conda packages in libcudf, add explicit libcufile dependency. ([#13231](https://github.com/rapidsai/cudf/pull/13231)) [@bdice](https://github.com/bdice)
-- Remove default null-count parameter from cudf::make_strings_column factory ([#13227](https://github.com/rapidsai/cudf/pull/13227)) [@davidwendt](https://github.com/davidwendt)
-- Performance improvement in cudf::strings::find/rfind for long strings ([#13226](https://github.com/rapidsai/cudf/pull/13226)) [@davidwendt](https://github.com/davidwendt)
-- Add chunked reader benchmark ([#13223](https://github.com/rapidsai/cudf/pull/13223)) [@SrikarVanavasam](https://github.com/SrikarVanavasam)
-- Set the null count in output columns in the CSV reader ([#13221](https://github.com/rapidsai/cudf/pull/13221)) [@vuule](https://github.com/vuule)
-- Skip Non-Empty nulls tests for the nightly build just like we skip CuFileTest and CudaFatalTest ([#13213](https://github.com/rapidsai/cudf/pull/13213)) [@razajafri](https://github.com/razajafri)
-- Fix string_scalar stream usage in write_json.cu ([#13212](https://github.com/rapidsai/cudf/pull/13212)) [@davidwendt](https://github.com/davidwendt)
-- Use canonicalized name for dlopen&#39;d libraries (libcufile) ([#13210](https://github.com/rapidsai/cudf/pull/13210)) [@shwina](https://github.com/shwina)
-- Refactor pinned memory vector and ORC+Parquet writers ([#13206](https://github.com/rapidsai/cudf/pull/13206)) [@ttnghia](https://github.com/ttnghia)
-- Remove UNKNOWN_NULL_COUNT where it can be easily computed ([#13205](https://github.com/rapidsai/cudf/pull/13205)) [@vyasr](https://github.com/vyasr)
-- Optimization to decoding of parquet level streams ([#13203](https://github.com/rapidsai/cudf/pull/13203)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Clean up and simplify `gpuDecideCompression` ([#13202](https://github.com/rapidsai/cudf/pull/13202)) [@vuule](https://github.com/vuule)
-- Use std::array for a statically sized vector in `create_serialized_trie` ([#13201](https://github.com/rapidsai/cudf/pull/13201)) [@vuule](https://github.com/vuule)
-- Update minimum Python version to Python 3.9 ([#13196](https://github.com/rapidsai/cudf/pull/13196)) [@shwina](https://github.com/shwina)
-- Refactor contiguous_split API into contiguous_split.hpp ([#13186](https://github.com/rapidsai/cudf/pull/13186)) [@abellina](https://github.com/abellina)
-- Remove usage of rapids-get-rapids-version-from-git ([#13184](https://github.com/rapidsai/cudf/pull/13184)) [@jjacobelli](https://github.com/jjacobelli)
-- Enable mixed-dtype decimal/scalar binary operations ([#13171](https://github.com/rapidsai/cudf/pull/13171)) [@shwina](https://github.com/shwina)
-- Split up unique_count.cu to improve build time ([#13169](https://github.com/rapidsai/cudf/pull/13169)) [@davidwendt](https://github.com/davidwendt)
-- Use nvtx3 includes in string examples. ([#13165](https://github.com/rapidsai/cudf/pull/13165)) [@bdice](https://github.com/bdice)
-- Change some .cu gtest files to .cpp ([#13155](https://github.com/rapidsai/cudf/pull/13155)) [@davidwendt](https://github.com/davidwendt)
-- Remove wheel pytest verbosity ([#13151](https://github.com/rapidsai/cudf/pull/13151)) [@sevagh](https://github.com/sevagh)
-- Fix libcudf to always pass null-count to set_null_mask ([#13149](https://github.com/rapidsai/cudf/pull/13149)) [@davidwendt](https://github.com/davidwendt)
-- Fix gtests to always pass null-count to set_null_mask calls ([#13148](https://github.com/rapidsai/cudf/pull/13148)) [@davidwendt](https://github.com/davidwendt)
-- Optimize JSON writer ([#13144](https://github.com/rapidsai/cudf/pull/13144)) [@karthikeyann](https://github.com/karthikeyann)
-- Performance improvement for libcudf upper/lower conversion for long strings ([#13142](https://github.com/rapidsai/cudf/pull/13142)) [@davidwendt](https://github.com/davidwendt)
-- [REVIEW] Deprecate `pad` and `backfill` methods ([#13140](https://github.com/rapidsai/cudf/pull/13140)) [@galipremsagar](https://github.com/galipremsagar)
-- Use CTAD instead of functions in ProtobufReader ([#13135](https://github.com/rapidsai/cudf/pull/13135)) [@vuule](https://github.com/vuule)
-- Remove more instances of `UNKNOWN_NULL_COUNT` ([#13134](https://github.com/rapidsai/cudf/pull/13134)) [@vyasr](https://github.com/vyasr)
-- Update clang-format to 16.0.1. ([#13133](https://github.com/rapidsai/cudf/pull/13133)) [@bdice](https://github.com/bdice)
-- Add log messages about cuIO&#39;s nvCOMP and cuFile use ([#13132](https://github.com/rapidsai/cudf/pull/13132)) [@vuule](https://github.com/vuule)
-- Branch 23.06 merge 23.04 ([#13131](https://github.com/rapidsai/cudf/pull/13131)) [@vyasr](https://github.com/vyasr)
-- Compute null-count in cudf::detail::slice ([#13124](https://github.com/rapidsai/cudf/pull/13124)) [@davidwendt](https://github.com/davidwendt)
-- Use ARC V2 self-hosted runners for GPU jobs ([#13123](https://github.com/rapidsai/cudf/pull/13123)) [@jjacobelli](https://github.com/jjacobelli)
-- Set null-count in linked_column_view conversion operator ([#13121](https://github.com/rapidsai/cudf/pull/13121)) [@davidwendt](https://github.com/davidwendt)
-- Adding ifdefs around nvcc-specific pragmas ([#13110](https://github.com/rapidsai/cudf/pull/13110)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Add null-count parameter to json experimental parse_data utility ([#13107](https://github.com/rapidsai/cudf/pull/13107)) [@davidwendt](https://github.com/davidwendt)
-- Remove uses-setup-env-vars ([#13105](https://github.com/rapidsai/cudf/pull/13105)) [@vyasr](https://github.com/vyasr)
-- Explicitly compute null count in concatenate APIs ([#13104](https://github.com/rapidsai/cudf/pull/13104)) [@vyasr](https://github.com/vyasr)
-- Replace unnecessary uses of `UNKNOWN_NULL_COUNT` ([#13102](https://github.com/rapidsai/cudf/pull/13102)) [@vyasr](https://github.com/vyasr)
-- Performance improvement for cudf::string_view::find functions ([#13100](https://github.com/rapidsai/cudf/pull/13100)) [@davidwendt](https://github.com/davidwendt)
-- Use `.element()` instead of `.data()` for window range calculations ([#13095](https://github.com/rapidsai/cudf/pull/13095)) [@mythrocks](https://github.com/mythrocks)
-- Cleanup Parquet chunked writer ([#13094](https://github.com/rapidsai/cudf/pull/13094)) [@ttnghia](https://github.com/ttnghia)
-- Fix unused variable error/warning in page_data.cu ([#13093](https://github.com/rapidsai/cudf/pull/13093)) [@davidwendt](https://github.com/davidwendt)
-- Cleanup ORC chunked writer ([#13091](https://github.com/rapidsai/cudf/pull/13091)) [@ttnghia](https://github.com/ttnghia)
-- Remove using namespace cudf; from libcudf gtests source ([#13089](https://github.com/rapidsai/cudf/pull/13089)) [@davidwendt](https://github.com/davidwendt)
-- Change cudf::test::make_null_mask to also return null-count ([#13081](https://github.com/rapidsai/cudf/pull/13081)) [@davidwendt](https://github.com/davidwendt)
-- Resolved automerger from `branch-23.04` to `branch-23.06` ([#13080](https://github.com/rapidsai/cudf/pull/13080)) [@galipremsagar](https://github.com/galipremsagar)
-- Assert for non-empty nulls ([#13071](https://github.com/rapidsai/cudf/pull/13071)) [@razajafri](https://github.com/razajafri)
-- Remove deprecated regex functions from libcudf ([#13067](https://github.com/rapidsai/cudf/pull/13067)) [@davidwendt](https://github.com/davidwendt)
-- Refactor `cudf::detail::sorted_order` ([#13062](https://github.com/rapidsai/cudf/pull/13062)) [@ttnghia](https://github.com/ttnghia)
-- Improve performance of slice_strings for long strings ([#13057](https://github.com/rapidsai/cudf/pull/13057)) [@davidwendt](https://github.com/davidwendt)
-- Reduce shared memory usage in gpuComputePageSizes by 50% ([#13047](https://github.com/rapidsai/cudf/pull/13047)) [@nvdbaranec](https://github.com/nvdbaranec)
-- [REVIEW] Add notes to performance comparisons notebook ([#13044](https://github.com/rapidsai/cudf/pull/13044)) [@galipremsagar](https://github.com/galipremsagar)
-- Enable binary operations between scalars and columns of differing decimal types ([#13034](https://github.com/rapidsai/cudf/pull/13034)) [@shwina](https://github.com/shwina)
-- Remove console output from some libcudf gtests ([#13027](https://github.com/rapidsai/cudf/pull/13027)) [@davidwendt](https://github.com/davidwendt)
-- Remove underscore in build string. ([#13025](https://github.com/rapidsai/cudf/pull/13025)) [@bdice](https://github.com/bdice)
-- Bump up JNI version 23.06.0-SNAPSHOT ([#13021](https://github.com/rapidsai/cudf/pull/13021)) [@pxLi](https://github.com/pxLi)
-- Fix auto merger from `branch-23.04` to `branch-23.06` ([#13009](https://github.com/rapidsai/cudf/pull/13009)) [@galipremsagar](https://github.com/galipremsagar)
-- Reduce peak memory use when writing compressed ORC files. ([#12963](https://github.com/rapidsai/cudf/pull/12963)) [@vuule](https://github.com/vuule)
-- Add nvtx annotatations to groupby methods ([#12941](https://github.com/rapidsai/cudf/pull/12941)) [@wence-](https://github.com/wence-)
-- Compute column sizes in Parquet preprocess with single kernel ([#12931](https://github.com/rapidsai/cudf/pull/12931)) [@SrikarVanavasam](https://github.com/SrikarVanavasam)
-- Add Python bindings for time zone data (TZiF) reader ([#12826](https://github.com/rapidsai/cudf/pull/12826)) [@shwina](https://github.com/shwina)
-- Optimize set-like operations ([#12769](https://github.com/rapidsai/cudf/pull/12769)) [@ttnghia](https://github.com/ttnghia)
-- [REVIEW] Upgrade to `arrow-11` ([#12757](https://github.com/rapidsai/cudf/pull/12757)) [@galipremsagar](https://github.com/galipremsagar)
-- Add empty test files for test reorganization ([#12288](https://github.com/rapidsai/cudf/pull/12288)) [@shwina](https://github.com/shwina)
-
-# cuDF 23.04.00 (6 Apr 2023)
-
-## 🚨 Breaking Changes
-
-- Pin `dask` and `distributed` for release ([#13070](https://github.com/rapidsai/cudf/pull/13070)) [@galipremsagar](https://github.com/galipremsagar)
-- Declare a different name for nan_equality.UNEQUAL to prevent Cython warnings. ([#12947](https://github.com/rapidsai/cudf/pull/12947)) [@bdice](https://github.com/bdice)
-- Update minimum `pandas` and `numpy` pinnings ([#12887](https://github.com/rapidsai/cudf/pull/12887)) [@galipremsagar](https://github.com/galipremsagar)
-- Deprecate `names` &amp; `dtype` in `Index.copy` ([#12825](https://github.com/rapidsai/cudf/pull/12825)) [@galipremsagar](https://github.com/galipremsagar)
-- Deprecate `Index.is_*` methods ([#12820](https://github.com/rapidsai/cudf/pull/12820)) [@galipremsagar](https://github.com/galipremsagar)
-- Deprecate `datetime_is_numeric` from `describe` ([#12818](https://github.com/rapidsai/cudf/pull/12818)) [@galipremsagar](https://github.com/galipremsagar)
-- Deprecate `na_sentinel` in `factorize` ([#12817](https://github.com/rapidsai/cudf/pull/12817)) [@galipremsagar](https://github.com/galipremsagar)
-- Make string methods return a Series with a useful Index ([#12814](https://github.com/rapidsai/cudf/pull/12814)) [@shwina](https://github.com/shwina)
-- Produce useful guidance on overflow error in `to_csv` ([#12705](https://github.com/rapidsai/cudf/pull/12705)) [@wence-](https://github.com/wence-)
-- Move `strings_udf` code into cuDF ([#12669](https://github.com/rapidsai/cudf/pull/12669)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Remove cudf::strings::repeat_strings_output_sizes and optional parameter from cudf::strings::repeat_strings ([#12609](https://github.com/rapidsai/cudf/pull/12609)) [@davidwendt](https://github.com/davidwendt)
-- Replace message parsing with throwing more specific exceptions ([#12426](https://github.com/rapidsai/cudf/pull/12426)) [@vyasr](https://github.com/vyasr)
-
-## 🐛 Bug Fixes
-
-- Fix memcheck script to execute only _TEST files found in bin/gtests/libcudf ([#13006](https://github.com/rapidsai/cudf/pull/13006)) [@davidwendt](https://github.com/davidwendt)
-- Fix `DataFrame` constructor to broadcast scalar inputs properly ([#12997](https://github.com/rapidsai/cudf/pull/12997)) [@galipremsagar](https://github.com/galipremsagar)
-- Drop `force_nullable_schema` from chunked parquet writer ([#12996](https://github.com/rapidsai/cudf/pull/12996)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix gtest column utility comparator diff reporting ([#12995](https://github.com/rapidsai/cudf/pull/12995)) [@davidwendt](https://github.com/davidwendt)
-- Handle index names while performing `groupby` ([#12992](https://github.com/rapidsai/cudf/pull/12992)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix `__setitem__` on string columns when the scalar value ends in a null byte ([#12991](https://github.com/rapidsai/cudf/pull/12991)) [@wence-](https://github.com/wence-)
-- Fix `sort_values` when column is all empty strings ([#12988](https://github.com/rapidsai/cudf/pull/12988)) [@eriknw](https://github.com/eriknw)
-- Remove unused variable and fix memory issue in ORC writer ([#12984](https://github.com/rapidsai/cudf/pull/12984)) [@ttnghia](https://github.com/ttnghia)
-- Pre-emptive fix for upstream `dask.dataframe.read_parquet` changes ([#12983](https://github.com/rapidsai/cudf/pull/12983)) [@rjzamora](https://github.com/rjzamora)
-- Remove MANIFEST.in use auto-generated one for sdists and package_data for wheels ([#12960](https://github.com/rapidsai/cudf/pull/12960)) [@vyasr](https://github.com/vyasr)
-- Update to use rapids-export(COMPONENTS) feature. ([#12959](https://github.com/rapidsai/cudf/pull/12959)) [@robertmaynard](https://github.com/robertmaynard)
-- cudftestutil supports static gtest dependencies ([#12957](https://github.com/rapidsai/cudf/pull/12957)) [@robertmaynard](https://github.com/robertmaynard)
-- Include gtest in build environment. ([#12956](https://github.com/rapidsai/cudf/pull/12956)) [@vyasr](https://github.com/vyasr)
-- Correctly handle scalar indices in `Index.__getitem__` ([#12955](https://github.com/rapidsai/cudf/pull/12955)) [@wence-](https://github.com/wence-)
-- Avoid building cython twice ([#12945](https://github.com/rapidsai/cudf/pull/12945)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix set index error for Series rolling window operations ([#12942](https://github.com/rapidsai/cudf/pull/12942)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix calculation of null counts for Parquet statistics ([#12938](https://github.com/rapidsai/cudf/pull/12938)) [@etseidl](https://github.com/etseidl)
-- Preserve integer dtype of hive-partitioned column containing nulls ([#12930](https://github.com/rapidsai/cudf/pull/12930)) [@rjzamora](https://github.com/rjzamora)
-- Use get_current_device_resource for intermediate allocations in COLLECT_LIST window code ([#12927](https://github.com/rapidsai/cudf/pull/12927)) [@karthikeyann](https://github.com/karthikeyann)
-- Mark dlpack tensor deleter as noexcept to match PyCapsule_Destructor signature. ([#12921](https://github.com/rapidsai/cudf/pull/12921)) [@bdice](https://github.com/bdice)
-- Fix conda recipe post-link.sh typo ([#12916](https://github.com/rapidsai/cudf/pull/12916)) [@pentschev](https://github.com/pentschev)
-- min_rows and num_rows are swapped in ComputePageSizes declaration in Parquet reader ([#12886](https://github.com/rapidsai/cudf/pull/12886)) [@etseidl](https://github.com/etseidl)
-- Expect cupy to now support bool arrays for dlpack. ([#12883](https://github.com/rapidsai/cudf/pull/12883)) [@vyasr](https://github.com/vyasr)
-- Use python -m pytest for nightly wheel tests ([#12871](https://github.com/rapidsai/cudf/pull/12871)) [@bdice](https://github.com/bdice)
-- Parquet writer column_size() should return a size_t ([#12870](https://github.com/rapidsai/cudf/pull/12870)) [@etseidl](https://github.com/etseidl)
-- Fix cudf::hash_partition kernel launch error with decimal128 types ([#12863](https://github.com/rapidsai/cudf/pull/12863)) [@davidwendt](https://github.com/davidwendt)
-- Fix an issue with parquet chunked reader undercounting string lengths. ([#12859](https://github.com/rapidsai/cudf/pull/12859)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Remove tokenizers pre-install pinning. ([#12854](https://github.com/rapidsai/cudf/pull/12854)) [@vyasr](https://github.com/vyasr)
-- Fix parquet `RangeIndex` bug ([#12838](https://github.com/rapidsai/cudf/pull/12838)) [@rjzamora](https://github.com/rjzamora)
-- Remove KAFKA_HOST_TEST from compute-sanitizer check ([#12831](https://github.com/rapidsai/cudf/pull/12831)) [@davidwendt](https://github.com/davidwendt)
-- Make string methods return a Series with a useful Index ([#12814](https://github.com/rapidsai/cudf/pull/12814)) [@shwina](https://github.com/shwina)
-- Tell cudf_kafka to use header-only fmt ([#12796](https://github.com/rapidsai/cudf/pull/12796)) [@vyasr](https://github.com/vyasr)
-- Add `GroupBy.dtypes` ([#12783](https://github.com/rapidsai/cudf/pull/12783)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix a leak in a test and clarify some test names ([#12781](https://github.com/rapidsai/cudf/pull/12781)) [@revans2](https://github.com/revans2)
-- Fix bug in all-null list due to join_list_elements special handling ([#12767](https://github.com/rapidsai/cudf/pull/12767)) [@karthikeyann](https://github.com/karthikeyann)
-- Add try/except for expected null-schema error in read_parquet ([#12756](https://github.com/rapidsai/cudf/pull/12756)) [@rjzamora](https://github.com/rjzamora)
-- Throw an exception if an unsupported page encoding is detected in Parquet reader ([#12754](https://github.com/rapidsai/cudf/pull/12754)) [@etseidl](https://github.com/etseidl)
-- Fix a bug with `num_keys` in `_scatter_by_slice` ([#12749](https://github.com/rapidsai/cudf/pull/12749)) [@thomcom](https://github.com/thomcom)
-- Bump pinned rapids wheel deps to 23.4 ([#12735](https://github.com/rapidsai/cudf/pull/12735)) [@sevagh](https://github.com/sevagh)
-- Rework logic in cudf::strings::split_record to improve performance ([#12729](https://github.com/rapidsai/cudf/pull/12729)) [@davidwendt](https://github.com/davidwendt)
-- Add `always_nullable` flag to Dremel encoding ([#12727](https://github.com/rapidsai/cudf/pull/12727)) [@divyegala](https://github.com/divyegala)
-- Fix memcheck read error in compound segmented reduce ([#12722](https://github.com/rapidsai/cudf/pull/12722)) [@davidwendt](https://github.com/davidwendt)
-- Fix faulty conditional logic in JIT `GroupBy.apply` ([#12706](https://github.com/rapidsai/cudf/pull/12706)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Produce useful guidance on overflow error in `to_csv` ([#12705](https://github.com/rapidsai/cudf/pull/12705)) [@wence-](https://github.com/wence-)
-- Handle parquet list data corner case ([#12698](https://github.com/rapidsai/cudf/pull/12698)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Fix missing trailing comma in json writer ([#12688](https://github.com/rapidsai/cudf/pull/12688)) [@karthikeyann](https://github.com/karthikeyann)
-- Remove child fom newCudaAsyncMemoryResource ([#12681](https://github.com/rapidsai/cudf/pull/12681)) [@abellina](https://github.com/abellina)
-- Handle bool types in `round` API ([#12670](https://github.com/rapidsai/cudf/pull/12670)) [@galipremsagar](https://github.com/galipremsagar)
-- Ensure all of device bitmask is initialized in from_arrow ([#12668](https://github.com/rapidsai/cudf/pull/12668)) [@wence-](https://github.com/wence-)
-- Fix `from_arrow` to load a sliced arrow table ([#12665](https://github.com/rapidsai/cudf/pull/12665)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix dask-cudf read_parquet bug for multi-file aggregation ([#12663](https://github.com/rapidsai/cudf/pull/12663)) [@rjzamora](https://github.com/rjzamora)
-- Fix AllocateLikeTest gtests reading uninitialized null-mask ([#12643](https://github.com/rapidsai/cudf/pull/12643)) [@davidwendt](https://github.com/davidwendt)
-- Fix `find_common_dtype` and `values` to handle complex dtypes ([#12537](https://github.com/rapidsai/cudf/pull/12537)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix fetching of MultiIndex values when a label is passed ([#12521](https://github.com/rapidsai/cudf/pull/12521)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix `Series` comparison vs scalars ([#12519](https://github.com/rapidsai/cudf/pull/12519)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Allow casting from `UDFString` back to `StringView` to call methods in `strings_udf` ([#12363](https://github.com/rapidsai/cudf/pull/12363)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-
-## 📖 Documentation
-
-- Fix `GroupBy.apply` doc examples rendering ([#12994](https://github.com/rapidsai/cudf/pull/12994)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- add sphinx building and s3 uploading for dask-cudf docs ([#12982](https://github.com/rapidsai/cudf/pull/12982)) [@quasiben](https://github.com/quasiben)
-- Add developer documentation forbidding default parameters in detail APIs ([#12978](https://github.com/rapidsai/cudf/pull/12978)) [@vyasr](https://github.com/vyasr)
-- Add README symlink for dask-cudf. ([#12946](https://github.com/rapidsai/cudf/pull/12946)) [@bdice](https://github.com/bdice)
-- Remove return type from [@return doxygen tags ([#12908](https://github.com/rapidsai/cudf/pull/12908)) @davidwendt](https://github.com/return doxygen tags ([#12908](https://github.com/rapidsai/cudf/pull/12908)) @davidwendt)
-- Fix docs build to be `pydata-sphinx-theme=0.13.0` compatible ([#12874](https://github.com/rapidsai/cudf/pull/12874)) [@galipremsagar](https://github.com/galipremsagar)
-- Add skeleton API and prose documentation for dask-cudf ([#12725](https://github.com/rapidsai/cudf/pull/12725)) [@wence-](https://github.com/wence-)
-- Enable doctests for GroupBy methods ([#12658](https://github.com/rapidsai/cudf/pull/12658)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Add comment about CUB patch for SegmentedSortInt.Bool gtest ([#12611](https://github.com/rapidsai/cudf/pull/12611)) [@davidwendt](https://github.com/davidwendt)
-
-## 🚀 New Features
-
-- Add JNI method for strings::replace multi variety ([#12979](https://github.com/rapidsai/cudf/pull/12979)) [@NVnavkumar](https://github.com/NVnavkumar)
-- Add nunique aggregation support for cudf::segmented_reduce ([#12972](https://github.com/rapidsai/cudf/pull/12972)) [@davidwendt](https://github.com/davidwendt)
-- Refactor orc chunked writer ([#12949](https://github.com/rapidsai/cudf/pull/12949)) [@ttnghia](https://github.com/ttnghia)
-- Make Parquet writer `nullable` option application to single table writes ([#12933](https://github.com/rapidsai/cudf/pull/12933)) [@vuule](https://github.com/vuule)
-- Refactor `io::orc::ProtobufWriter` ([#12877](https://github.com/rapidsai/cudf/pull/12877)) [@ttnghia](https://github.com/ttnghia)
-- Make timezone table independent from ORC ([#12805](https://github.com/rapidsai/cudf/pull/12805)) [@vuule](https://github.com/vuule)
-- Cache JIT `GroupBy.apply` functions ([#12802](https://github.com/rapidsai/cudf/pull/12802)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Implement initial support for avro logical types ([#6482) (#12788](https://github.com/rapidsai/cudf/pull/6482) (#12788)) [@tpn](https://github.com/tpn)
-- Update `tests/column_utilities` to use `experimental::equality` row comparator ([#12777](https://github.com/rapidsai/cudf/pull/12777)) [@divyegala](https://github.com/divyegala)
-- Update `distinct/unique_count` to `experimental::row` hasher/comparator ([#12776](https://github.com/rapidsai/cudf/pull/12776)) [@divyegala](https://github.com/divyegala)
-- Update `hash_partition` to use `experimental::row::row_hasher` ([#12761](https://github.com/rapidsai/cudf/pull/12761)) [@divyegala](https://github.com/divyegala)
-- Update `is_sorted` to use `experimental::row::lexicographic` ([#12752](https://github.com/rapidsai/cudf/pull/12752)) [@divyegala](https://github.com/divyegala)
-- Update default data source in cuio reader benchmarks ([#12740](https://github.com/rapidsai/cudf/pull/12740)) [@PointKernel](https://github.com/PointKernel)
-- Reenable stream identification library in CI ([#12714](https://github.com/rapidsai/cudf/pull/12714)) [@vyasr](https://github.com/vyasr)
-- Add `regex_program` strings splitting java APIs and tests ([#12713](https://github.com/rapidsai/cudf/pull/12713)) [@cindyyuanjiang](https://github.com/cindyyuanjiang)
-- Add `regex_program` strings replacing java APIs and tests ([#12701](https://github.com/rapidsai/cudf/pull/12701)) [@cindyyuanjiang](https://github.com/cindyyuanjiang)
-- Add `regex_program` strings extract java APIs and tests ([#12699](https://github.com/rapidsai/cudf/pull/12699)) [@cindyyuanjiang](https://github.com/cindyyuanjiang)
-- Variable fragment sizes for Parquet writer ([#12685](https://github.com/rapidsai/cudf/pull/12685)) [@etseidl](https://github.com/etseidl)
-- Add segmented reduction support for fixed-point types ([#12680](https://github.com/rapidsai/cudf/pull/12680)) [@davidwendt](https://github.com/davidwendt)
-- Move `strings_udf` code into cuDF ([#12669](https://github.com/rapidsai/cudf/pull/12669)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Add `regex_program` searching APIs and related java classes ([#12666](https://github.com/rapidsai/cudf/pull/12666)) [@cindyyuanjiang](https://github.com/cindyyuanjiang)
-- Add logging to libcudf ([#12637](https://github.com/rapidsai/cudf/pull/12637)) [@vuule](https://github.com/vuule)
-- Add compound aggregations to cudf::segmented_reduce ([#12573](https://github.com/rapidsai/cudf/pull/12573)) [@davidwendt](https://github.com/davidwendt)
-- Convert `rank` to use to experimental row comparators ([#12481](https://github.com/rapidsai/cudf/pull/12481)) [@divyegala](https://github.com/divyegala)
-- Use rapids-cmake parallel testing feature ([#12451](https://github.com/rapidsai/cudf/pull/12451)) [@robertmaynard](https://github.com/robertmaynard)
-- Enable detection of undesired stream usage ([#12089](https://github.com/rapidsai/cudf/pull/12089)) [@vyasr](https://github.com/vyasr)
-
-## 🛠️ Improvements
-
-- Pin `dask` and `distributed` for release ([#13070](https://github.com/rapidsai/cudf/pull/13070)) [@galipremsagar](https://github.com/galipremsagar)
-- Pin cupy in wheel tests to supported versions ([#13041](https://github.com/rapidsai/cudf/pull/13041)) [@vyasr](https://github.com/vyasr)
-- Pin numba version ([#13001](https://github.com/rapidsai/cudf/pull/13001)) [@vyasr](https://github.com/vyasr)
-- Rework gtests SequenceTest to remove using namepace cudf ([#12985](https://github.com/rapidsai/cudf/pull/12985)) [@davidwendt](https://github.com/davidwendt)
-- Stop setting package version attribute in wheels ([#12977](https://github.com/rapidsai/cudf/pull/12977)) [@vyasr](https://github.com/vyasr)
-- Move detail reduction functions to cudf::reduction::detail namespace ([#12971](https://github.com/rapidsai/cudf/pull/12971)) [@davidwendt](https://github.com/davidwendt)
-- Remove default detail mrs: part7 ([#12970](https://github.com/rapidsai/cudf/pull/12970)) [@vyasr](https://github.com/vyasr)
-- Remove default detail mrs: part6 ([#12969](https://github.com/rapidsai/cudf/pull/12969)) [@vyasr](https://github.com/vyasr)
-- Remove default detail mrs: part5 ([#12968](https://github.com/rapidsai/cudf/pull/12968)) [@vyasr](https://github.com/vyasr)
-- Remove default detail mrs: part4 ([#12967](https://github.com/rapidsai/cudf/pull/12967)) [@vyasr](https://github.com/vyasr)
-- Remove default detail mrs: part3 ([#12966](https://github.com/rapidsai/cudf/pull/12966)) [@vyasr](https://github.com/vyasr)
-- Remove default detail mrs: part2 ([#12965](https://github.com/rapidsai/cudf/pull/12965)) [@vyasr](https://github.com/vyasr)
-- Remove default detail mrs: part1 ([#12964](https://github.com/rapidsai/cudf/pull/12964)) [@vyasr](https://github.com/vyasr)
-- Add `force_nullable_schema` parameter to Parquet writer. ([#12952](https://github.com/rapidsai/cudf/pull/12952)) [@galipremsagar](https://github.com/galipremsagar)
-- Declare a different name for nan_equality.UNEQUAL to prevent Cython warnings. ([#12947](https://github.com/rapidsai/cudf/pull/12947)) [@bdice](https://github.com/bdice)
-- Remove remaining default stream parameters ([#12943](https://github.com/rapidsai/cudf/pull/12943)) [@vyasr](https://github.com/vyasr)
-- Fix cudf::segmented_reduce gtest for ANY aggregation ([#12940](https://github.com/rapidsai/cudf/pull/12940)) [@davidwendt](https://github.com/davidwendt)
-- Implement `groupby.head` and `groupby.tail` ([#12939](https://github.com/rapidsai/cudf/pull/12939)) [@wence-](https://github.com/wence-)
-- Fix libcudf gtests to pass null-count=0 for empty validity masks ([#12923](https://github.com/rapidsai/cudf/pull/12923)) [@davidwendt](https://github.com/davidwendt)
-- Migrate parquet encoding to use experimental row operators ([#12918](https://github.com/rapidsai/cudf/pull/12918)) [@PointKernel](https://github.com/PointKernel)
-- Fix benchmarks coded in namespace cudf and using namespace cudf ([#12915](https://github.com/rapidsai/cudf/pull/12915)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix io/text gtests coded in namespace cudf::test ([#12914](https://github.com/rapidsai/cudf/pull/12914)) [@karthikeyann](https://github.com/karthikeyann)
-- Pass `SCCACHE_S3_USE_SSL` to conda builds ([#12910](https://github.com/rapidsai/cudf/pull/12910)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Fix FST, JSON gtests &amp; benchmarks coded in namespace cudf::test ([#12907](https://github.com/rapidsai/cudf/pull/12907)) [@karthikeyann](https://github.com/karthikeyann)
-- Generate pyproject dependencies using dfg ([#12906](https://github.com/rapidsai/cudf/pull/12906)) [@vyasr](https://github.com/vyasr)
-- Update libcudf counting functions to specify cudf::size_type ([#12904](https://github.com/rapidsai/cudf/pull/12904)) [@davidwendt](https://github.com/davidwendt)
-- Fix `moto` env vars &amp; pass `AWS_SESSION_TOKEN` to conda builds ([#12902](https://github.com/rapidsai/cudf/pull/12902)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Rewrite CSV writer benchmark with nvbench ([#12901](https://github.com/rapidsai/cudf/pull/12901)) [@PointKernel](https://github.com/PointKernel)
-- Rework some code logic to reduce iterator and comparator inlining to improve compile time ([#12900](https://github.com/rapidsai/cudf/pull/12900)) [@davidwendt](https://github.com/davidwendt)
-- Deprecate `line_terminator` in favor of `lineterminator` in `to_csv` ([#12896](https://github.com/rapidsai/cudf/pull/12896)) [@wence-](https://github.com/wence-)
-- Add `stream` and `mr` parameters for `structs::detail::flatten_nested_columns` ([#12892](https://github.com/rapidsai/cudf/pull/12892)) [@ttnghia](https://github.com/ttnghia)
-- Deprecate libcudf regex APIs accepting pattern strings directly ([#12891](https://github.com/rapidsai/cudf/pull/12891)) [@davidwendt](https://github.com/davidwendt)
-- Remove default parameters from detail headers in include ([#12888](https://github.com/rapidsai/cudf/pull/12888)) [@vyasr](https://github.com/vyasr)
-- Update minimum `pandas` and `numpy` pinnings ([#12887](https://github.com/rapidsai/cudf/pull/12887)) [@galipremsagar](https://github.com/galipremsagar)
-- Implement `groupby.sample` ([#12882](https://github.com/rapidsai/cudf/pull/12882)) [@wence-](https://github.com/wence-)
-- Update JNI build ENV default to gcc 11 ([#12881](https://github.com/rapidsai/cudf/pull/12881)) [@pxLi](https://github.com/pxLi)
-- Change return type of `cudf::structs::detail::flatten_nested_columns` to smart pointer ([#12878](https://github.com/rapidsai/cudf/pull/12878)) [@ttnghia](https://github.com/ttnghia)
-- Fix passing seed parameter to MurmurHash3_32 in cudf::hash() function ([#12875](https://github.com/rapidsai/cudf/pull/12875)) [@davidwendt](https://github.com/davidwendt)
-- Remove manual artifact upload step in CI ([#12869](https://github.com/rapidsai/cudf/pull/12869)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Update to GCC 11 ([#12868](https://github.com/rapidsai/cudf/pull/12868)) [@bdice](https://github.com/bdice)
-- Fix null hive-partition behavior in dask-cudf parquet ([#12866](https://github.com/rapidsai/cudf/pull/12866)) [@rjzamora](https://github.com/rjzamora)
-- Update to protobuf&gt;=4.21.6,&lt;4.22. ([#12864](https://github.com/rapidsai/cudf/pull/12864)) [@bdice](https://github.com/bdice)
-- Update RMM allocators ([#12861](https://github.com/rapidsai/cudf/pull/12861)) [@pentschev](https://github.com/pentschev)
-- Improve performance for replace-multi for long strings ([#12858](https://github.com/rapidsai/cudf/pull/12858)) [@davidwendt](https://github.com/davidwendt)
-- Drop Python 3.7 handling for pickle protocol 4 ([#12857](https://github.com/rapidsai/cudf/pull/12857)) [@jakirkham](https://github.com/jakirkham)
-- Migrate as much as possible to pyproject.toml ([#12850](https://github.com/rapidsai/cudf/pull/12850)) [@vyasr](https://github.com/vyasr)
-- Enable nbqa pre-commit hooks for isort and black. ([#12848](https://github.com/rapidsai/cudf/pull/12848)) [@bdice](https://github.com/bdice)
-- Setting a threshold for KvikIO IO ([#12841](https://github.com/rapidsai/cudf/pull/12841)) [@madsbk](https://github.com/madsbk)
-- Update datasets download URL ([#12840](https://github.com/rapidsai/cudf/pull/12840)) [@jjacobelli](https://github.com/jjacobelli)
-- Make docs builds less verbose ([#12836](https://github.com/rapidsai/cudf/pull/12836)) [@AyodeAwe](https://github.com/AyodeAwe)
-- Consolidate linter configs into pyproject.toml ([#12834](https://github.com/rapidsai/cudf/pull/12834)) [@vyasr](https://github.com/vyasr)
-- Deprecate `names` &amp; `dtype` in `Index.copy` ([#12825](https://github.com/rapidsai/cudf/pull/12825)) [@galipremsagar](https://github.com/galipremsagar)
-- Deprecate `inplace` parameters in categorical methods ([#12824](https://github.com/rapidsai/cudf/pull/12824)) [@galipremsagar](https://github.com/galipremsagar)
-- Add optional text file support to ninja-log utility ([#12823](https://github.com/rapidsai/cudf/pull/12823)) [@davidwendt](https://github.com/davidwendt)
-- Deprecate `Index.is_*` methods ([#12820](https://github.com/rapidsai/cudf/pull/12820)) [@galipremsagar](https://github.com/galipremsagar)
-- Add dfg as a pre-commit hook ([#12819](https://github.com/rapidsai/cudf/pull/12819)) [@vyasr](https://github.com/vyasr)
-- Deprecate `datetime_is_numeric` from `describe` ([#12818](https://github.com/rapidsai/cudf/pull/12818)) [@galipremsagar](https://github.com/galipremsagar)
-- Deprecate `na_sentinel` in `factorize` ([#12817](https://github.com/rapidsai/cudf/pull/12817)) [@galipremsagar](https://github.com/galipremsagar)
-- Shuffling read into a sub function in parquet read ([#12809](https://github.com/rapidsai/cudf/pull/12809)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Fixing parquet coalescing of reads ([#12808](https://github.com/rapidsai/cudf/pull/12808)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- CI: Remove specification of manual stage for check_style.sh script. ([#12803](https://github.com/rapidsai/cudf/pull/12803)) [@csadorf](https://github.com/csadorf)
-- Add compute-sanitizer github workflow action to nightly tests ([#12800](https://github.com/rapidsai/cudf/pull/12800)) [@davidwendt](https://github.com/davidwendt)
-- Enable groupby std and variance aggregation types in libcudf Debug build ([#12799](https://github.com/rapidsai/cudf/pull/12799)) [@davidwendt](https://github.com/davidwendt)
-- Expose seed argument to hash_values ([#12795](https://github.com/rapidsai/cudf/pull/12795)) [@ayushdg](https://github.com/ayushdg)
-- Fix groupby gtests coded in namespace cudf::test ([#12784](https://github.com/rapidsai/cudf/pull/12784)) [@davidwendt](https://github.com/davidwendt)
-- Improve performance for cudf::strings::count_characters for long strings ([#12779](https://github.com/rapidsai/cudf/pull/12779)) [@davidwendt](https://github.com/davidwendt)
-- Deallocate encoded data in ORC writer immediately after compression ([#12770](https://github.com/rapidsai/cudf/pull/12770)) [@vuule](https://github.com/vuule)
-- Stop force pulling fmt in nvbench. ([#12768](https://github.com/rapidsai/cudf/pull/12768)) [@vyasr](https://github.com/vyasr)
-- Remove now redundant cuda initialization ([#12758](https://github.com/rapidsai/cudf/pull/12758)) [@vyasr](https://github.com/vyasr)
-- Adds JSON reader, writer io benchmark ([#12753](https://github.com/rapidsai/cudf/pull/12753)) [@karthikeyann](https://github.com/karthikeyann)
-- Use test paths relative to package directory. ([#12751](https://github.com/rapidsai/cudf/pull/12751)) [@bdice](https://github.com/bdice)
-- Add build metrics report as artifact to cpp-build workflow ([#12750](https://github.com/rapidsai/cudf/pull/12750)) [@davidwendt](https://github.com/davidwendt)
-- Add JNI methods for detecting and purging non-empty nulls from LIST and STRUCT ([#12742](https://github.com/rapidsai/cudf/pull/12742)) [@razajafri](https://github.com/razajafri)
-- Stop using versioneer to manage versions ([#12741](https://github.com/rapidsai/cudf/pull/12741)) [@vyasr](https://github.com/vyasr)
-- Reduce error handling verbosity in CI tests scripts ([#12738](https://github.com/rapidsai/cudf/pull/12738)) [@AjayThorve](https://github.com/AjayThorve)
-- Reduce the number of test cases in multibyte_split benchmark ([#12737](https://github.com/rapidsai/cudf/pull/12737)) [@PointKernel](https://github.com/PointKernel)
-- Update shared workflow branches ([#12733](https://github.com/rapidsai/cudf/pull/12733)) [@ajschmidt8](https://github.com/ajschmidt8)
-- JNI switches to nested JSON reader ([#12732](https://github.com/rapidsai/cudf/pull/12732)) [@res-life](https://github.com/res-life)
-- Changing `cudf::io::source_info` to use `cudf::host_span&lt;std::byte&gt;` in a non-breaking form ([#12730](https://github.com/rapidsai/cudf/pull/12730)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Add nvbench environment class for initializing RMM in benchmarks ([#12728](https://github.com/rapidsai/cudf/pull/12728)) [@davidwendt](https://github.com/davidwendt)
-- Split C++ and Python build dependencies into separate lists. ([#12724](https://github.com/rapidsai/cudf/pull/12724)) [@bdice](https://github.com/bdice)
-- Add build dependencies to Java tests. ([#12723](https://github.com/rapidsai/cudf/pull/12723)) [@bdice](https://github.com/bdice)
-- Allow setting the seed argument for hash partition ([#12715](https://github.com/rapidsai/cudf/pull/12715)) [@firestarman](https://github.com/firestarman)
-- Remove gpuCI scripts. ([#12712](https://github.com/rapidsai/cudf/pull/12712)) [@bdice](https://github.com/bdice)
-- Unpin `dask` and `distributed` for development ([#12710](https://github.com/rapidsai/cudf/pull/12710)) [@galipremsagar](https://github.com/galipremsagar)
-- `partition_by_hash()`: use `_split()` ([#12704](https://github.com/rapidsai/cudf/pull/12704)) [@madsbk](https://github.com/madsbk)
-- Remove DataFrame.quantiles from docs. ([#12684](https://github.com/rapidsai/cudf/pull/12684)) [@bdice](https://github.com/bdice)
-- Fast path for `experimental::row::equality` ([#12676](https://github.com/rapidsai/cudf/pull/12676)) [@divyegala](https://github.com/divyegala)
-- Move date to build string in `conda` recipe ([#12661](https://github.com/rapidsai/cudf/pull/12661)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Refactor reduction logic for fixed-point types ([#12652](https://github.com/rapidsai/cudf/pull/12652)) [@davidwendt](https://github.com/davidwendt)
-- Pay off some JNI RMM API tech debt ([#12632](https://github.com/rapidsai/cudf/pull/12632)) [@revans2](https://github.com/revans2)
-- Merge `copy-on-write` feature branch into `branch-23.04` ([#12619](https://github.com/rapidsai/cudf/pull/12619)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove cudf::strings::repeat_strings_output_sizes and optional parameter from cudf::strings::repeat_strings ([#12609](https://github.com/rapidsai/cudf/pull/12609)) [@davidwendt](https://github.com/davidwendt)
-- Pin cuda-nvrtc. ([#12606](https://github.com/rapidsai/cudf/pull/12606)) [@bdice](https://github.com/bdice)
-- Remove cudf::test::print calls from libcudf gtests ([#12604](https://github.com/rapidsai/cudf/pull/12604)) [@davidwendt](https://github.com/davidwendt)
-- Init JNI version 23.04.0-SNAPSHOT ([#12599](https://github.com/rapidsai/cudf/pull/12599)) [@pxLi](https://github.com/pxLi)
-- Add performance benchmarks to user facing docs ([#12595](https://github.com/rapidsai/cudf/pull/12595)) [@galipremsagar](https://github.com/galipremsagar)
-- Add docs build job ([#12592](https://github.com/rapidsai/cudf/pull/12592)) [@AyodeAwe](https://github.com/AyodeAwe)
-- Replace message parsing with throwing more specific exceptions ([#12426](https://github.com/rapidsai/cudf/pull/12426)) [@vyasr](https://github.com/vyasr)
-- Support conversion to/from cudf in dask.dataframe.core.to_backend ([#12380](https://github.com/rapidsai/cudf/pull/12380)) [@rjzamora](https://github.com/rjzamora)
-
-# cuDF 23.02.00 (9 Feb 2023)
-
-## 🚨 Breaking Changes
-
-- Pin `dask` and `distributed` for release ([#12695](https://github.com/rapidsai/cudf/pull/12695)) [@galipremsagar](https://github.com/galipremsagar)
-- Change ways to access `ptr` in `Buffer` ([#12587](https://github.com/rapidsai/cudf/pull/12587)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove column names ([#12578](https://github.com/rapidsai/cudf/pull/12578)) [@vuule](https://github.com/vuule)
-- Default `cudf::io::read_json` to nested JSON parser ([#12544](https://github.com/rapidsai/cudf/pull/12544)) [@vuule](https://github.com/vuule)
-- Switch `engine=cudf` to the new `JSON` reader ([#12509](https://github.com/rapidsai/cudf/pull/12509)) [@galipremsagar](https://github.com/galipremsagar)
-- Add trailing comma support for nested JSON reader ([#12448](https://github.com/rapidsai/cudf/pull/12448)) [@karthikeyann](https://github.com/karthikeyann)
-- Upgrade to `arrow-10.0.1` ([#12327](https://github.com/rapidsai/cudf/pull/12327)) [@galipremsagar](https://github.com/galipremsagar)
-- Fail loudly to avoid data corruption with unsupported input in `read_orc` ([#12325](https://github.com/rapidsai/cudf/pull/12325)) [@vuule](https://github.com/vuule)
-- CSV, JSON reader to infer integer column with nulls as int64 instead of float64 ([#12309](https://github.com/rapidsai/cudf/pull/12309)) [@karthikeyann](https://github.com/karthikeyann)
-- Remove deprecated code for 23.02 ([#12281](https://github.com/rapidsai/cudf/pull/12281)) [@vyasr](https://github.com/vyasr)
-- Null element for parsing error in numeric types in JSON, CSV reader ([#12272](https://github.com/rapidsai/cudf/pull/12272)) [@karthikeyann](https://github.com/karthikeyann)
-- Purge non-empty nulls for `superimpose_nulls` and `push_down_nulls` ([#12239](https://github.com/rapidsai/cudf/pull/12239)) [@ttnghia](https://github.com/ttnghia)
-- Rename `cudf::structs::detail::superimpose_parent_nulls` APIs ([#12230](https://github.com/rapidsai/cudf/pull/12230)) [@ttnghia](https://github.com/ttnghia)
-- Remove JIT type names, refactor id_to_type. ([#12158](https://github.com/rapidsai/cudf/pull/12158)) [@bdice](https://github.com/bdice)
-- Floor division uses integer division for integral arguments ([#12131](https://github.com/rapidsai/cudf/pull/12131)) [@wence-](https://github.com/wence-)
-
-## 🐛 Bug Fixes
-
-- Fix a mask data corruption in UDF ([#12647](https://github.com/rapidsai/cudf/pull/12647)) [@galipremsagar](https://github.com/galipremsagar)
-- pre-commit: Update isort version to 5.12.0 ([#12645](https://github.com/rapidsai/cudf/pull/12645)) [@wence-](https://github.com/wence-)
-- tests: Skip cuInit tests if cuda-gdb is not found or not working ([#12644](https://github.com/rapidsai/cudf/pull/12644)) [@wence-](https://github.com/wence-)
-- Revert regex program java APIs and tests ([#12639](https://github.com/rapidsai/cudf/pull/12639)) [@cindyyuanjiang](https://github.com/cindyyuanjiang)
-- Fix leaks in ColumnVectorTest ([#12625](https://github.com/rapidsai/cudf/pull/12625)) [@jlowe](https://github.com/jlowe)
-- Handle when spillable buffers own each other ([#12607](https://github.com/rapidsai/cudf/pull/12607)) [@madsbk](https://github.com/madsbk)
-- Fix incorrect null counts for sliced columns in JCudfSerialization ([#12589](https://github.com/rapidsai/cudf/pull/12589)) [@jlowe](https://github.com/jlowe)
-- lists: Transfer dtypes correctly through list.get ([#12586](https://github.com/rapidsai/cudf/pull/12586)) [@wence-](https://github.com/wence-)
-- timedelta: Don&#39;t go via float intermediates for floordiv ([#12585](https://github.com/rapidsai/cudf/pull/12585)) [@wence-](https://github.com/wence-)
-- Fixing BUG, `get_next_chunk()` should use the blocking function `device_read()` ([#12584](https://github.com/rapidsai/cudf/pull/12584)) [@madsbk](https://github.com/madsbk)
-- Make JNI QuoteStyle accessible outside ai.rapids.cudf ([#12572](https://github.com/rapidsai/cudf/pull/12572)) [@mythrocks](https://github.com/mythrocks)
-- `partition_by_hash()`: support index ([#12554](https://github.com/rapidsai/cudf/pull/12554)) [@madsbk](https://github.com/madsbk)
-- Mixed Join benchmark bug due to wrong conditional column ([#12553](https://github.com/rapidsai/cudf/pull/12553)) [@divyegala](https://github.com/divyegala)
-- Update List Lexicographical Comparator ([#12538](https://github.com/rapidsai/cudf/pull/12538)) [@divyegala](https://github.com/divyegala)
-- Dynamically read PTX version ([#12534](https://github.com/rapidsai/cudf/pull/12534)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- build.sh switch to use `RAPIDS` magic value ([#12525](https://github.com/rapidsai/cudf/pull/12525)) [@robertmaynard](https://github.com/robertmaynard)
-- Loosen runtime arrow pinning ([#12522](https://github.com/rapidsai/cudf/pull/12522)) [@vyasr](https://github.com/vyasr)
-- Enable metadata transfer for complex types in transpose ([#12491](https://github.com/rapidsai/cudf/pull/12491)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix issues with parquet chunked reader ([#12488](https://github.com/rapidsai/cudf/pull/12488)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Fix missing metadata transfer in concat for `ListColumn` ([#12487](https://github.com/rapidsai/cudf/pull/12487)) [@galipremsagar](https://github.com/galipremsagar)
-- Rename libcudf substring source files to slice ([#12484](https://github.com/rapidsai/cudf/pull/12484)) [@davidwendt](https://github.com/davidwendt)
-- Fix compile issue with arrow 10 ([#12465](https://github.com/rapidsai/cudf/pull/12465)) [@ttnghia](https://github.com/ttnghia)
-- Fix List offsets bug in mixed type list column in nested JSON reader ([#12447](https://github.com/rapidsai/cudf/pull/12447)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix xfail incompatibilities ([#12423](https://github.com/rapidsai/cudf/pull/12423)) [@vyasr](https://github.com/vyasr)
-- Fix bug in Parquet column index encoding ([#12404](https://github.com/rapidsai/cudf/pull/12404)) [@etseidl](https://github.com/etseidl)
-- When building Arrow shared look for a shared OpenSSL ([#12396](https://github.com/rapidsai/cudf/pull/12396)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix get_json_object to return empty column on empty input ([#12384](https://github.com/rapidsai/cudf/pull/12384)) [@davidwendt](https://github.com/davidwendt)
-- Pin arrow 9 in testing dependencies to prevent conda solve issues ([#12377](https://github.com/rapidsai/cudf/pull/12377)) [@vyasr](https://github.com/vyasr)
-- Fix reductions any/all return value for empty input ([#12374](https://github.com/rapidsai/cudf/pull/12374)) [@davidwendt](https://github.com/davidwendt)
-- Fix debug compile errors in parquet.hpp ([#12372](https://github.com/rapidsai/cudf/pull/12372)) [@davidwendt](https://github.com/davidwendt)
-- Purge non-empty nulls in `cudf::make_lists_column` ([#12370](https://github.com/rapidsai/cudf/pull/12370)) [@ttnghia](https://github.com/ttnghia)
-- Use correct memory resource in io::make_column ([#12364](https://github.com/rapidsai/cudf/pull/12364)) [@vyasr](https://github.com/vyasr)
-- Add code to detect possible malformed page data in parquet files. ([#12360](https://github.com/rapidsai/cudf/pull/12360)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Fail loudly to avoid data corruption with unsupported input in `read_orc` ([#12325](https://github.com/rapidsai/cudf/pull/12325)) [@vuule](https://github.com/vuule)
-- Fix NumericPairIteratorTest for float values ([#12306](https://github.com/rapidsai/cudf/pull/12306)) [@davidwendt](https://github.com/davidwendt)
-- Fixes memory allocation in nested JSON tokenizer ([#12300](https://github.com/rapidsai/cudf/pull/12300)) [@elstehle](https://github.com/elstehle)
-- Reconstruct dtypes correctly for list aggs of struct columns ([#12290](https://github.com/rapidsai/cudf/pull/12290)) [@wence-](https://github.com/wence-)
-- Fix regex \A and \Z to strictly match string begin/end ([#12282](https://github.com/rapidsai/cudf/pull/12282)) [@davidwendt](https://github.com/davidwendt)
-- Fix compile issue in `json_chunked_reader.cpp` ([#12280](https://github.com/rapidsai/cudf/pull/12280)) [@ttnghia](https://github.com/ttnghia)
-- Change reductions any/all to return valid values for empty input ([#12279](https://github.com/rapidsai/cudf/pull/12279)) [@davidwendt](https://github.com/davidwendt)
-- Only exclude join keys that are indices from key columns ([#12271](https://github.com/rapidsai/cudf/pull/12271)) [@wence-](https://github.com/wence-)
-- Fix spill to device limit ([#12252](https://github.com/rapidsai/cudf/pull/12252)) [@madsbk](https://github.com/madsbk)
-- Correct behaviour of sort in `concat` for singleton concatenations ([#12247](https://github.com/rapidsai/cudf/pull/12247)) [@wence-](https://github.com/wence-)
-- Purge non-empty nulls for `superimpose_nulls` and `push_down_nulls` ([#12239](https://github.com/rapidsai/cudf/pull/12239)) [@ttnghia](https://github.com/ttnghia)
-- Patch CUB DeviceSegmentedSort and remove workaround ([#12234](https://github.com/rapidsai/cudf/pull/12234)) [@davidwendt](https://github.com/davidwendt)
-- Fix memory leak in udf_string::assign(&amp;&amp;) function ([#12206](https://github.com/rapidsai/cudf/pull/12206)) [@davidwendt](https://github.com/davidwendt)
-- Workaround thrust-copy-if limit in json get_tree_representation ([#12190](https://github.com/rapidsai/cudf/pull/12190)) [@davidwendt](https://github.com/davidwendt)
-- Fix page size calculation in Parquet writer ([#12182](https://github.com/rapidsai/cudf/pull/12182)) [@etseidl](https://github.com/etseidl)
-- Add cudf::detail::sizes_to_offsets_iterator to allow checking overflow in offsets ([#12180](https://github.com/rapidsai/cudf/pull/12180)) [@davidwendt](https://github.com/davidwendt)
-- Workaround thrust-copy-if limit in wordpiece-tokenizer ([#12168](https://github.com/rapidsai/cudf/pull/12168)) [@davidwendt](https://github.com/davidwendt)
-- Floor division uses integer division for integral arguments ([#12131](https://github.com/rapidsai/cudf/pull/12131)) [@wence-](https://github.com/wence-)
-
-## 📖 Documentation
-
-- Fix link to NVTX ([#12598](https://github.com/rapidsai/cudf/pull/12598)) [@sameerz](https://github.com/sameerz)
-- Include missing groupby functions in documentation ([#12580](https://github.com/rapidsai/cudf/pull/12580)) [@quasiben](https://github.com/quasiben)
-- Fix documentation author ([#12527](https://github.com/rapidsai/cudf/pull/12527)) [@bdice](https://github.com/bdice)
-- Update libcudf reduction docs for casting output types ([#12526](https://github.com/rapidsai/cudf/pull/12526)) [@davidwendt](https://github.com/davidwendt)
-- Add JSON reader page in user guide ([#12499](https://github.com/rapidsai/cudf/pull/12499)) [@GregoryKimball](https://github.com/GregoryKimball)
-- Link unsupported iteration API docstrings ([#12482](https://github.com/rapidsai/cudf/pull/12482)) [@galipremsagar](https://github.com/galipremsagar)
-- `strings_udf` doc update ([#12469](https://github.com/rapidsai/cudf/pull/12469)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Update cudf_assert docs with correct NDEBUG behavior ([#12464](https://github.com/rapidsai/cudf/pull/12464)) [@robertmaynard](https://github.com/robertmaynard)
-- Update pre-commit hooks guide ([#12395](https://github.com/rapidsai/cudf/pull/12395)) [@bdice](https://github.com/bdice)
-- Update test docs to not use detail comparison utilities ([#12332](https://github.com/rapidsai/cudf/pull/12332)) [@PointKernel](https://github.com/PointKernel)
-- Fix doxygen description for regex_program::compute_working_memory_size ([#12329](https://github.com/rapidsai/cudf/pull/12329)) [@davidwendt](https://github.com/davidwendt)
-- Add eval to docs. ([#12322](https://github.com/rapidsai/cudf/pull/12322)) [@vyasr](https://github.com/vyasr)
-- Turn on xfail_strict=true ([#12244](https://github.com/rapidsai/cudf/pull/12244)) [@wence-](https://github.com/wence-)
-- Update 10 minutes to cuDF ([#12114](https://github.com/rapidsai/cudf/pull/12114)) [@wence-](https://github.com/wence-)
-
-## 🚀 New Features
-
-- Use kvikIO as the default IO backend ([#12574](https://github.com/rapidsai/cudf/pull/12574)) [@vuule](https://github.com/vuule)
-- Use `has_nonempty_nulls` instead of `may_contain_non_empty_nulls` in `superimpose_nulls` and `push_down_nulls` ([#12560](https://github.com/rapidsai/cudf/pull/12560)) [@ttnghia](https://github.com/ttnghia)
-- Add strings methods removeprefix and removesuffix ([#12557](https://github.com/rapidsai/cudf/pull/12557)) [@davidwendt](https://github.com/davidwendt)
-- Add `regex_program` java APIs and unit tests ([#12548](https://github.com/rapidsai/cudf/pull/12548)) [@cindyyuanjiang](https://github.com/cindyyuanjiang)
-- Default `cudf::io::read_json` to nested JSON parser ([#12544](https://github.com/rapidsai/cudf/pull/12544)) [@vuule](https://github.com/vuule)
-- Make string quoting optional on CSV write ([#12539](https://github.com/rapidsai/cudf/pull/12539)) [@mythrocks](https://github.com/mythrocks)
-- Use new nvCOMP API to optimize the compression temp memory size ([#12533](https://github.com/rapidsai/cudf/pull/12533)) [@vuule](https://github.com/vuule)
-- Support &quot;values&quot; orient (array of arrays) in Nested JSON reader ([#12498](https://github.com/rapidsai/cudf/pull/12498)) [@karthikeyann](https://github.com/karthikeyann)
-- `one_hot_encode` to use experimental row comparators ([#12478](https://github.com/rapidsai/cudf/pull/12478)) [@divyegala](https://github.com/divyegala)
-- Support %W and %w format specifiers in cudf::strings::to_timestamps ([#12475](https://github.com/rapidsai/cudf/pull/12475)) [@davidwendt](https://github.com/davidwendt)
-- Add JSON Writer ([#12474](https://github.com/rapidsai/cudf/pull/12474)) [@karthikeyann](https://github.com/karthikeyann)
-- Refactor `thrust_copy_if` into `cudf::detail::copy_if_safe` ([#12455](https://github.com/rapidsai/cudf/pull/12455)) [@ttnghia](https://github.com/ttnghia)
-- Add trailing comma support for nested JSON reader ([#12448](https://github.com/rapidsai/cudf/pull/12448)) [@karthikeyann](https://github.com/karthikeyann)
-- Extract `tokenize_json.hpp` detail header from `src/io/json/nested_json.hpp` ([#12432](https://github.com/rapidsai/cudf/pull/12432)) [@ttnghia](https://github.com/ttnghia)
-- JNI bindings to write CSV ([#12425](https://github.com/rapidsai/cudf/pull/12425)) [@mythrocks](https://github.com/mythrocks)
-- Nested JSON depth benchmark ([#12371](https://github.com/rapidsai/cudf/pull/12371)) [@karthikeyann](https://github.com/karthikeyann)
-- Implement `lists::reverse` ([#12336](https://github.com/rapidsai/cudf/pull/12336)) [@ttnghia](https://github.com/ttnghia)
-- Use `device_read` in experimental `read_json` ([#12314](https://github.com/rapidsai/cudf/pull/12314)) [@vuule](https://github.com/vuule)
-- Implement JNI for `strings::reverse` ([#12283](https://github.com/rapidsai/cudf/pull/12283)) [@ttnghia](https://github.com/ttnghia)
-- Null element for parsing error in numeric types in JSON, CSV reader ([#12272](https://github.com/rapidsai/cudf/pull/12272)) [@karthikeyann](https://github.com/karthikeyann)
-- Add cudf::strings:like function with multiple patterns ([#12269](https://github.com/rapidsai/cudf/pull/12269)) [@davidwendt](https://github.com/davidwendt)
-- Add environment variable to control host memory allocation in `hostdevice_vector` ([#12251](https://github.com/rapidsai/cudf/pull/12251)) [@vuule](https://github.com/vuule)
-- Add cudf::strings::reverse function ([#12227](https://github.com/rapidsai/cudf/pull/12227)) [@davidwendt](https://github.com/davidwendt)
-- Selectively use dictionary encoding in Parquet writer ([#12211](https://github.com/rapidsai/cudf/pull/12211)) [@etseidl](https://github.com/etseidl)
-- Support `replace` in `strings_udf` ([#12207](https://github.com/rapidsai/cudf/pull/12207)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Add support to read binary encoded decimals in parquet ([#12205](https://github.com/rapidsai/cudf/pull/12205)) [@PointKernel](https://github.com/PointKernel)
-- Support regex EOL where the string ends with a new-line character ([#12181](https://github.com/rapidsai/cudf/pull/12181)) [@davidwendt](https://github.com/davidwendt)
-- Updating `stream_compaction/unique` to use new row comparators ([#12159](https://github.com/rapidsai/cudf/pull/12159)) [@divyegala](https://github.com/divyegala)
-- Add device buffer datasource ([#12024](https://github.com/rapidsai/cudf/pull/12024)) [@PointKernel](https://github.com/PointKernel)
-- Implement groupby apply with JIT ([#11452](https://github.com/rapidsai/cudf/pull/11452)) [@bwyogatama](https://github.com/bwyogatama)
-
-## 🛠️ Improvements
-
-- Update shared workflow branches ([#12696](https://github.com/rapidsai/cudf/pull/12696)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Pin `dask` and `distributed` for release ([#12695](https://github.com/rapidsai/cudf/pull/12695)) [@galipremsagar](https://github.com/galipremsagar)
-- Don&#39;t upload `libcudf-example` to Anaconda.org ([#12671](https://github.com/rapidsai/cudf/pull/12671)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Pin wheel dependencies to same RAPIDS release ([#12659](https://github.com/rapidsai/cudf/pull/12659)) [@sevagh](https://github.com/sevagh)
-- Use CTK 118/cp310 branch of wheel workflows ([#12602](https://github.com/rapidsai/cudf/pull/12602)) [@sevagh](https://github.com/sevagh)
-- Change ways to access `ptr` in `Buffer` ([#12587](https://github.com/rapidsai/cudf/pull/12587)) [@galipremsagar](https://github.com/galipremsagar)
-- Version a parquet writer xfail ([#12579](https://github.com/rapidsai/cudf/pull/12579)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove column names ([#12578](https://github.com/rapidsai/cudf/pull/12578)) [@vuule](https://github.com/vuule)
-- Parquet reader optimization to address V100 regression. ([#12577](https://github.com/rapidsai/cudf/pull/12577)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Add support for `category` dtypes in CSV reader ([#12571](https://github.com/rapidsai/cudf/pull/12571)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove `spill_lock` parameter from `SpillableBuffer.get_ptr()` ([#12564](https://github.com/rapidsai/cudf/pull/12564)) [@madsbk](https://github.com/madsbk)
-- Optimize `cudf::make_lists_column` ([#12547](https://github.com/rapidsai/cudf/pull/12547)) [@ttnghia](https://github.com/ttnghia)
-- Remove `cudf::strings::repeat_strings_output_sizes` from Java and JNI ([#12546](https://github.com/rapidsai/cudf/pull/12546)) [@ttnghia](https://github.com/ttnghia)
-- Test that cuInit is not called when RAPIDS_NO_INITIALIZE is set ([#12545](https://github.com/rapidsai/cudf/pull/12545)) [@wence-](https://github.com/wence-)
-- Rework repeat_strings to use sizes-to-offsets utility ([#12543](https://github.com/rapidsai/cudf/pull/12543)) [@davidwendt](https://github.com/davidwendt)
-- Replace exclusive_scan with sizes_to_offsets in cudf::lists::sequences ([#12541](https://github.com/rapidsai/cudf/pull/12541)) [@davidwendt](https://github.com/davidwendt)
-- Rework nvtext::ngrams_tokenize to use sizes-to-offsets utility ([#12540](https://github.com/rapidsai/cudf/pull/12540)) [@davidwendt](https://github.com/davidwendt)
-- Fix binary-ops gtests coded in namespace cudf::test ([#12536](https://github.com/rapidsai/cudf/pull/12536)) [@davidwendt](https://github.com/davidwendt)
-- More `[@acquire_spill_lock()` and `as_buffer(..., exposed=False)` ([#12535](https://github.com/rapidsai/cudf/pull/12535)) @madsbk](https://github.com/acquire_spill_lock()` and `as_buffer(..., exposed=False)` ([#12535](https://github.com/rapidsai/cudf/pull/12535)) @madsbk)
-- Guard CUDA runtime APIs with error checking ([#12531](https://github.com/rapidsai/cudf/pull/12531)) [@PointKernel](https://github.com/PointKernel)
-- Update TODOs from issue 10432. ([#12528](https://github.com/rapidsai/cudf/pull/12528)) [@bdice](https://github.com/bdice)
-- Update rapids-cmake definitions version in GitHub Actions style checks. ([#12511](https://github.com/rapidsai/cudf/pull/12511)) [@bdice](https://github.com/bdice)
-- Switch `engine=cudf` to the new `JSON` reader ([#12509](https://github.com/rapidsai/cudf/pull/12509)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix SUM/MEAN aggregation type support. ([#12503](https://github.com/rapidsai/cudf/pull/12503)) [@bdice](https://github.com/bdice)
-- Stop using pandas._testing ([#12492](https://github.com/rapidsai/cudf/pull/12492)) [@vyasr](https://github.com/vyasr)
-- Fix ROLLING_TEST gtests coded in namespace cudf::test ([#12490](https://github.com/rapidsai/cudf/pull/12490)) [@davidwendt](https://github.com/davidwendt)
-- Fix erroneously skipped ORC ZSTD test ([#12486](https://github.com/rapidsai/cudf/pull/12486)) [@vuule](https://github.com/vuule)
-- Rework nvtext::generate_character_ngrams to use make_strings_children ([#12480](https://github.com/rapidsai/cudf/pull/12480)) [@davidwendt](https://github.com/davidwendt)
-- Raise warnings as errors in the test suite ([#12468](https://github.com/rapidsai/cudf/pull/12468)) [@vyasr](https://github.com/vyasr)
-- Remove `int32` hard-coding in python ([#12467](https://github.com/rapidsai/cudf/pull/12467)) [@galipremsagar](https://github.com/galipremsagar)
-- Use cudaMemcpyDefault. ([#12466](https://github.com/rapidsai/cudf/pull/12466)) [@bdice](https://github.com/bdice)
-- Update workflows for nightly tests ([#12462](https://github.com/rapidsai/cudf/pull/12462)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Build CUDA `11.8` and Python `3.10` Packages ([#12457](https://github.com/rapidsai/cudf/pull/12457)) [@ajschmidt8](https://github.com/ajschmidt8)
-- JNI build image default as cuda11.8 ([#12441](https://github.com/rapidsai/cudf/pull/12441)) [@pxLi](https://github.com/pxLi)
-- Re-enable `Recently Updated` Check ([#12435](https://github.com/rapidsai/cudf/pull/12435)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Rework remaining cudf::strings::from_xyz functions to use make_strings_children ([#12434](https://github.com/rapidsai/cudf/pull/12434)) [@vuule](https://github.com/vuule)
-- Build wheels alongside conda CI ([#12427](https://github.com/rapidsai/cudf/pull/12427)) [@sevagh](https://github.com/sevagh)
-- Remove arguments for checking exception messages in Python ([#12424](https://github.com/rapidsai/cudf/pull/12424)) [@vyasr](https://github.com/vyasr)
-- Clean up cuco usage ([#12421](https://github.com/rapidsai/cudf/pull/12421)) [@PointKernel](https://github.com/PointKernel)
-- Fix warnings in remaining modules ([#12406](https://github.com/rapidsai/cudf/pull/12406)) [@vyasr](https://github.com/vyasr)
-- Update `ops-bot.yaml` ([#12402](https://github.com/rapidsai/cudf/pull/12402)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Rework cudf::strings::integers_to_ipv4 to use make_strings_children utility ([#12401](https://github.com/rapidsai/cudf/pull/12401)) [@davidwendt](https://github.com/davidwendt)
-- Use `numpy.empty()` instead of `bytearray` to allocate host memory for spilling ([#12399](https://github.com/rapidsai/cudf/pull/12399)) [@madsbk](https://github.com/madsbk)
-- Deprecate chunksize from dask_cudf.read_csv ([#12394](https://github.com/rapidsai/cudf/pull/12394)) [@rjzamora](https://github.com/rjzamora)
-- Expose the RMM pool size in JNI ([#12390](https://github.com/rapidsai/cudf/pull/12390)) [@revans2](https://github.com/revans2)
-- Fix COPYING_TEST: gtests coded in namespace cudf::test ([#12387](https://github.com/rapidsai/cudf/pull/12387)) [@davidwendt](https://github.com/davidwendt)
-- Rework cudf::strings::url_encode to use make_strings_children utility ([#12385](https://github.com/rapidsai/cudf/pull/12385)) [@davidwendt](https://github.com/davidwendt)
-- Use make_strings_children in parse_data nested json reader ([#12382](https://github.com/rapidsai/cudf/pull/12382)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix warnings in test_datetime.py ([#12381](https://github.com/rapidsai/cudf/pull/12381)) [@vyasr](https://github.com/vyasr)
-- Mixed Join Benchmarks ([#12375](https://github.com/rapidsai/cudf/pull/12375)) [@divyegala](https://github.com/divyegala)
-- Fix warnings in dataframe.py ([#12369](https://github.com/rapidsai/cudf/pull/12369)) [@vyasr](https://github.com/vyasr)
-- Update conda recipes. ([#12368](https://github.com/rapidsai/cudf/pull/12368)) [@bdice](https://github.com/bdice)
-- Use gpu-latest-1 runner tag ([#12366](https://github.com/rapidsai/cudf/pull/12366)) [@bdice](https://github.com/bdice)
-- Rework cudf::strings::from_booleans to use make_strings_children ([#12365](https://github.com/rapidsai/cudf/pull/12365)) [@vuule](https://github.com/vuule)
-- Fix warnings in test modules up to test_dataframe.py ([#12355](https://github.com/rapidsai/cudf/pull/12355)) [@vyasr](https://github.com/vyasr)
-- JSON column performance optimization - struct column nulls ([#12354](https://github.com/rapidsai/cudf/pull/12354)) [@karthikeyann](https://github.com/karthikeyann)
-- Accelerate stable-segmented-sort with CUB segmented sort ([#12347](https://github.com/rapidsai/cudf/pull/12347)) [@davidwendt](https://github.com/davidwendt)
-- Add size check to make_offsets_child_column utility ([#12345](https://github.com/rapidsai/cudf/pull/12345)) [@davidwendt](https://github.com/davidwendt)
-- Enable max compression ratio small block optimization for ZSTD ([#12338](https://github.com/rapidsai/cudf/pull/12338)) [@vuule](https://github.com/vuule)
-- Fix warnings in test_monotonic.py ([#12334](https://github.com/rapidsai/cudf/pull/12334)) [@vyasr](https://github.com/vyasr)
-- Improve JSON column creation performance (list offsets) ([#12330](https://github.com/rapidsai/cudf/pull/12330)) [@karthikeyann](https://github.com/karthikeyann)
-- Upgrade to `arrow-10.0.1` ([#12327](https://github.com/rapidsai/cudf/pull/12327)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix warnings in test_orc.py ([#12326](https://github.com/rapidsai/cudf/pull/12326)) [@vyasr](https://github.com/vyasr)
-- Fix warnings in test_groupby.py ([#12324](https://github.com/rapidsai/cudf/pull/12324)) [@vyasr](https://github.com/vyasr)
-- Fix `test_notebooks.sh` ([#12323](https://github.com/rapidsai/cudf/pull/12323)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Fix transform gtests coded in namespace cudf::test ([#12321](https://github.com/rapidsai/cudf/pull/12321)) [@davidwendt](https://github.com/davidwendt)
-- Fix `check_style.sh` script ([#12320](https://github.com/rapidsai/cudf/pull/12320)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Rework cudf::strings::from_timestamps to use make_strings_children ([#12317](https://github.com/rapidsai/cudf/pull/12317)) [@davidwendt](https://github.com/davidwendt)
-- Fix warnings in test_index.py ([#12313](https://github.com/rapidsai/cudf/pull/12313)) [@vyasr](https://github.com/vyasr)
-- Fix warnings in test_multiindex.py ([#12310](https://github.com/rapidsai/cudf/pull/12310)) [@vyasr](https://github.com/vyasr)
-- CSV, JSON reader to infer integer column with nulls as int64 instead of float64 ([#12309](https://github.com/rapidsai/cudf/pull/12309)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix warnings in test_indexing.py ([#12305](https://github.com/rapidsai/cudf/pull/12305)) [@vyasr](https://github.com/vyasr)
-- Fix warnings in test_joining.py ([#12304](https://github.com/rapidsai/cudf/pull/12304)) [@vyasr](https://github.com/vyasr)
-- Unpin `dask` and `distributed` for development ([#12302](https://github.com/rapidsai/cudf/pull/12302)) [@galipremsagar](https://github.com/galipremsagar)
-- Re-enable `sccache` for Jenkins builds ([#12297](https://github.com/rapidsai/cudf/pull/12297)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Define needs for pr-builder workflow. ([#12296](https://github.com/rapidsai/cudf/pull/12296)) [@bdice](https://github.com/bdice)
-- Forward merge 22.12 into 23.02 ([#12294](https://github.com/rapidsai/cudf/pull/12294)) [@vyasr](https://github.com/vyasr)
-- Fix warnings in test_stats.py ([#12293](https://github.com/rapidsai/cudf/pull/12293)) [@vyasr](https://github.com/vyasr)
-- Fix table gtests coded in namespace cudf::test ([#12292](https://github.com/rapidsai/cudf/pull/12292)) [@davidwendt](https://github.com/davidwendt)
-- Change cython for regex calls to use cudf::strings::regex_program ([#12289](https://github.com/rapidsai/cudf/pull/12289)) [@davidwendt](https://github.com/davidwendt)
-- Improved error reporting when reading multiple JSON files ([#12285](https://github.com/rapidsai/cudf/pull/12285)) [@vuule](https://github.com/vuule)
-- Deprecate Frame.sum_of_squares ([#12284](https://github.com/rapidsai/cudf/pull/12284)) [@vyasr](https://github.com/vyasr)
-- Remove deprecated code for 23.02 ([#12281](https://github.com/rapidsai/cudf/pull/12281)) [@vyasr](https://github.com/vyasr)
-- Clean up handling of max_page_size_bytes in Parquet writer ([#12277](https://github.com/rapidsai/cudf/pull/12277)) [@etseidl](https://github.com/etseidl)
-- Fix replace gtests coded in namespace cudf::test ([#12270](https://github.com/rapidsai/cudf/pull/12270)) [@davidwendt](https://github.com/davidwendt)
-- Add pandas nullable type support in `Index.to_pandas` ([#12268](https://github.com/rapidsai/cudf/pull/12268)) [@galipremsagar](https://github.com/galipremsagar)
-- Rework nvtext::detokenize to use indexalator for row indices ([#12267](https://github.com/rapidsai/cudf/pull/12267)) [@davidwendt](https://github.com/davidwendt)
-- Fix reduction gtests coded in namespace cudf::test ([#12257](https://github.com/rapidsai/cudf/pull/12257)) [@davidwendt](https://github.com/davidwendt)
-- Remove default parameters from cudf::detail::sort function declarations ([#12254](https://github.com/rapidsai/cudf/pull/12254)) [@davidwendt](https://github.com/davidwendt)
-- Add `duplicated` support for `Series`, `DataFrame` and `Index` ([#12246](https://github.com/rapidsai/cudf/pull/12246)) [@galipremsagar](https://github.com/galipremsagar)
-- Replace column/table test utilities with macros ([#12242](https://github.com/rapidsai/cudf/pull/12242)) [@PointKernel](https://github.com/PointKernel)
-- Rework cudf::strings::pad and zfill to use make_strings_children ([#12238](https://github.com/rapidsai/cudf/pull/12238)) [@davidwendt](https://github.com/davidwendt)
-- Fix sort gtests coded in namespace cudf::test ([#12237](https://github.com/rapidsai/cudf/pull/12237)) [@davidwendt](https://github.com/davidwendt)
-- Wrapping concat and file writes in `[@acquire_spill_lock()` ([#12232](https://github.com/rapidsai/cudf/pull/12232)) @madsbk](https://github.com/acquire_spill_lock()` ([#12232](https://github.com/rapidsai/cudf/pull/12232)) @madsbk)
-- Rename `cudf::structs::detail::superimpose_parent_nulls` APIs ([#12230](https://github.com/rapidsai/cudf/pull/12230)) [@ttnghia](https://github.com/ttnghia)
-- Cover parsing to decimal types in `read_json` tests ([#12229](https://github.com/rapidsai/cudf/pull/12229)) [@vuule](https://github.com/vuule)
-- Spill Statistics ([#12223](https://github.com/rapidsai/cudf/pull/12223)) [@madsbk](https://github.com/madsbk)
-- Use CUDF_JNI_ENABLE_PROFILING to conditionally enable profiling support. ([#12221](https://github.com/rapidsai/cudf/pull/12221)) [@bdice](https://github.com/bdice)
-- Clean up of `test_spilling.py` ([#12220](https://github.com/rapidsai/cudf/pull/12220)) [@madsbk](https://github.com/madsbk)
-- Simplify repetitive boolean logic ([#12218](https://github.com/rapidsai/cudf/pull/12218)) [@vuule](https://github.com/vuule)
-- Add `Series.hasnans` and `Index.hasnans` ([#12214](https://github.com/rapidsai/cudf/pull/12214)) [@galipremsagar](https://github.com/galipremsagar)
-- Add cudf::strings:udf::replace function ([#12210](https://github.com/rapidsai/cudf/pull/12210)) [@davidwendt](https://github.com/davidwendt)
-- Adds in new java APIs for appending byte arrays to host columnar data ([#12208](https://github.com/rapidsai/cudf/pull/12208)) [@revans2](https://github.com/revans2)
-- Remove Python dependencies from Java CI. ([#12193](https://github.com/rapidsai/cudf/pull/12193)) [@bdice](https://github.com/bdice)
-- Fix null order in sort-based groupby and improve groupby tests ([#12191](https://github.com/rapidsai/cudf/pull/12191)) [@divyegala](https://github.com/divyegala)
-- Move strings children functions from cudf/strings/detail/utilities.cuh to new header ([#12185](https://github.com/rapidsai/cudf/pull/12185)) [@davidwendt](https://github.com/davidwendt)
-- Clean up existing JNI scalar to column code ([#12173](https://github.com/rapidsai/cudf/pull/12173)) [@revans2](https://github.com/revans2)
-- Remove JIT type names, refactor id_to_type. ([#12158](https://github.com/rapidsai/cudf/pull/12158)) [@bdice](https://github.com/bdice)
-- Update JNI version to 23.02.0-SNAPSHOT ([#12129](https://github.com/rapidsai/cudf/pull/12129)) [@pxLi](https://github.com/pxLi)
-- Minor refactor of cpp/src/io/parquet/page_data.cu ([#12126](https://github.com/rapidsai/cudf/pull/12126)) [@etseidl](https://github.com/etseidl)
-- Add codespell as a linter ([#12097](https://github.com/rapidsai/cudf/pull/12097)) [@benfred](https://github.com/benfred)
-- Enable specifying exceptions in error macros ([#12078](https://github.com/rapidsai/cudf/pull/12078)) [@vyasr](https://github.com/vyasr)
-- Move `_label_encoding` from Series to Column ([#12040](https://github.com/rapidsai/cudf/pull/12040)) [@shwina](https://github.com/shwina)
-- Add GitHub Actions Workflows ([#12002](https://github.com/rapidsai/cudf/pull/12002)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Consolidate dask-cudf `groupby_agg` calls in one place ([#10835](https://github.com/rapidsai/cudf/pull/10835)) [@charlesbluca](https://github.com/charlesbluca)
-
-# cuDF 22.12.00 (8 Dec 2022)
-
-## 🚨 Breaking Changes
-
-- Add JNI for `substring` without &#39;end&#39; parameter. ([#12113](https://github.com/rapidsai/cudf/pull/12113)) [@firestarman](https://github.com/firestarman)
-- Refactor `purge_nonempty_nulls` ([#12111](https://github.com/rapidsai/cudf/pull/12111)) [@ttnghia](https://github.com/ttnghia)
-- Create an `int8` column in `read_csv` when all elements are missing ([#12110](https://github.com/rapidsai/cudf/pull/12110)) [@vuule](https://github.com/vuule)
-- Throw an error when libcudf is built without cuFile and `LIBCUDF_CUFILE_POLICY` is set to `&quot;ALWAYS&quot;` ([#12080](https://github.com/rapidsai/cudf/pull/12080)) [@vuule](https://github.com/vuule)
-- Fix type promotion edge cases in numerical binops ([#12074](https://github.com/rapidsai/cudf/pull/12074)) [@wence-](https://github.com/wence-)
-- Reduce/Remove reliance on `**kwargs` and `*args` in `IO` readers &amp; writers ([#12025](https://github.com/rapidsai/cudf/pull/12025)) [@galipremsagar](https://github.com/galipremsagar)
-- Rollback of `DeviceBufferLike` ([#12009](https://github.com/rapidsai/cudf/pull/12009)) [@madsbk](https://github.com/madsbk)
-- Remove unused `managed_allocator` ([#12005](https://github.com/rapidsai/cudf/pull/12005)) [@vyasr](https://github.com/vyasr)
-- Pass column names to `write_csv` instead of `table_metadata` pointer ([#11972](https://github.com/rapidsai/cudf/pull/11972)) [@vuule](https://github.com/vuule)
-- Accept const refs instead of const unique_ptr refs in reduce and scan APIs. ([#11960](https://github.com/rapidsai/cudf/pull/11960)) [@vyasr](https://github.com/vyasr)
-- Default to equal NaNs in make_merge_sets_aggregation. ([#11952](https://github.com/rapidsai/cudf/pull/11952)) [@bdice](https://github.com/bdice)
-- Remove validation that requires introspection ([#11938](https://github.com/rapidsai/cudf/pull/11938)) [@vyasr](https://github.com/vyasr)
-- Trim quotes for non-string values in nested json parsing ([#11898](https://github.com/rapidsai/cudf/pull/11898)) [@karthikeyann](https://github.com/karthikeyann)
-- Add tests ensuring that cudf&#39;s default stream is always used ([#11875](https://github.com/rapidsai/cudf/pull/11875)) [@vyasr](https://github.com/vyasr)
-- Support nested types as groupby keys in libcudf ([#11792](https://github.com/rapidsai/cudf/pull/11792)) [@PointKernel](https://github.com/PointKernel)
-- Default to equal NaNs in make_collect_set_aggregation. ([#11621](https://github.com/rapidsai/cudf/pull/11621)) [@bdice](https://github.com/bdice)
-- Removing int8 column option from parquet byte_array writing ([#11539](https://github.com/rapidsai/cudf/pull/11539)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- part1: Simplify BaseIndex to an abstract class ([#10389](https://github.com/rapidsai/cudf/pull/10389)) [@skirui-source](https://github.com/skirui-source)
-
-## 🐛 Bug Fixes
-
-- Fix include line for IO Cython modules ([#12250](https://github.com/rapidsai/cudf/pull/12250)) [@vyasr](https://github.com/vyasr)
-- Make dask pinning looser ([#12231](https://github.com/rapidsai/cudf/pull/12231)) [@vyasr](https://github.com/vyasr)
-- Workaround for CUB segmented-sort bug with boolean keys ([#12217](https://github.com/rapidsai/cudf/pull/12217)) [@davidwendt](https://github.com/davidwendt)
-- Fix `from_dict` backend dispatch to match upstream `dask` ([#12203](https://github.com/rapidsai/cudf/pull/12203)) [@galipremsagar](https://github.com/galipremsagar)
-- Merge branch-22.10 into branch-22.12 ([#12198](https://github.com/rapidsai/cudf/pull/12198)) [@davidwendt](https://github.com/davidwendt)
-- Fix compression in ORC writer ([#12194](https://github.com/rapidsai/cudf/pull/12194)) [@vuule](https://github.com/vuule)
-- Don&#39;t use CMake 3.25.0 as it has a show stopping FindCUDAToolkit bug ([#12188](https://github.com/rapidsai/cudf/pull/12188)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix data corruption when reading ORC files with empty stripes ([#12160](https://github.com/rapidsai/cudf/pull/12160)) [@vuule](https://github.com/vuule)
-- Fix decimal binary operations ([#12142](https://github.com/rapidsai/cudf/pull/12142)) [@galipremsagar](https://github.com/galipremsagar)
-- Ensure dlpack include is provided to cudf interop lib ([#12139](https://github.com/rapidsai/cudf/pull/12139)) [@robertmaynard](https://github.com/robertmaynard)
-- Safely allocate `udf_string` pointers in `strings_udf` ([#12138](https://github.com/rapidsai/cudf/pull/12138)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Fix/disable jitify lto ([#12122](https://github.com/rapidsai/cudf/pull/12122)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix conditional_full_join benchmark ([#12121](https://github.com/rapidsai/cudf/pull/12121)) [@GregoryKimball](https://github.com/GregoryKimball)
-- Fix regex working-memory-size refactor error ([#12119](https://github.com/rapidsai/cudf/pull/12119)) [@davidwendt](https://github.com/davidwendt)
-- Add in negative size checks for columns ([#12118](https://github.com/rapidsai/cudf/pull/12118)) [@revans2](https://github.com/revans2)
-- Add JNI for `substring` without &#39;end&#39; parameter. ([#12113](https://github.com/rapidsai/cudf/pull/12113)) [@firestarman](https://github.com/firestarman)
-- Fix reading of CSV files with blank second row ([#12098](https://github.com/rapidsai/cudf/pull/12098)) [@vuule](https://github.com/vuule)
-- Fix an error in IO with `GzipFile` type ([#12085](https://github.com/rapidsai/cudf/pull/12085)) [@galipremsagar](https://github.com/galipremsagar)
-- Workaround groupby aggregate thrust::copy_if overflow ([#12079](https://github.com/rapidsai/cudf/pull/12079)) [@davidwendt](https://github.com/davidwendt)
-- Fix alignment of compressed blocks in ORC writer ([#12077](https://github.com/rapidsai/cudf/pull/12077)) [@vuule](https://github.com/vuule)
-- Fix singleton-range `__setitem__` edge case ([#12075](https://github.com/rapidsai/cudf/pull/12075)) [@wence-](https://github.com/wence-)
-- Fix type promotion edge cases in numerical binops ([#12074](https://github.com/rapidsai/cudf/pull/12074)) [@wence-](https://github.com/wence-)
-- Force using old fmt in nvbench. ([#12067](https://github.com/rapidsai/cudf/pull/12067)) [@vyasr](https://github.com/vyasr)
-- Fixes List offset bug in Nested JSON reader ([#12060](https://github.com/rapidsai/cudf/pull/12060)) [@karthikeyann](https://github.com/karthikeyann)
-- Allow falling back to `shim_60.ptx` by default in `strings_udf` ([#12056](https://github.com/rapidsai/cudf/pull/12056)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Force black exclusions for pre-commit. ([#12036](https://github.com/rapidsai/cudf/pull/12036)) [@bdice](https://github.com/bdice)
-- Add `memory_usage` &amp; `items` implementation for `Struct` column &amp; dtype ([#12033](https://github.com/rapidsai/cudf/pull/12033)) [@galipremsagar](https://github.com/galipremsagar)
-- Reduce/Remove reliance on `**kwargs` and `*args` in `IO` readers &amp; writers ([#12025](https://github.com/rapidsai/cudf/pull/12025)) [@galipremsagar](https://github.com/galipremsagar)
-- Fixes bug in csv_reader_options construction in cython ([#12021](https://github.com/rapidsai/cudf/pull/12021)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix issues when both `usecols` and `names` options are used in `read_csv` ([#12018](https://github.com/rapidsai/cudf/pull/12018)) [@vuule](https://github.com/vuule)
-- Port thrust&#39;s pinned_allocator to cudf, since Thrust 1.17 removes the type ([#12004](https://github.com/rapidsai/cudf/pull/12004)) [@robertmaynard](https://github.com/robertmaynard)
-- Revert &quot;Replace most of preprocessor usage in nvcomp adapter with `constexpr`&quot; ([#11999](https://github.com/rapidsai/cudf/pull/11999)) [@vuule](https://github.com/vuule)
-- Fix bug where `df.loc` resulting in single row could give wrong index ([#11998](https://github.com/rapidsai/cudf/pull/11998)) [@eriknw](https://github.com/eriknw)
-- Switch to DISABLE_DEPRECATION_WARNINGS to match other RAPIDS projects ([#11989](https://github.com/rapidsai/cudf/pull/11989)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix maximum page size estimate in Parquet writer ([#11962](https://github.com/rapidsai/cudf/pull/11962)) [@vuule](https://github.com/vuule)
-- Fix local offset handling in bgzip reader ([#11918](https://github.com/rapidsai/cudf/pull/11918)) [@upsj](https://github.com/upsj)
-- Fix an issue reading struct-of-list types in Parquet. ([#11910](https://github.com/rapidsai/cudf/pull/11910)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Fix memcheck error in TypeInference.Timestamp gtest ([#11905](https://github.com/rapidsai/cudf/pull/11905)) [@davidwendt](https://github.com/davidwendt)
-- Fix type casting in Series.__setitem__ ([#11904](https://github.com/rapidsai/cudf/pull/11904)) [@wence-](https://github.com/wence-)
-- Fix memcheck error in get_dremel_data ([#11903](https://github.com/rapidsai/cudf/pull/11903)) [@davidwendt](https://github.com/davidwendt)
-- Fixes Unsupported column type error due to empty list columns in Nested JSON reader ([#11897](https://github.com/rapidsai/cudf/pull/11897)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix segmented-sort to ignore indices outside the offsets ([#11888](https://github.com/rapidsai/cudf/pull/11888)) [@davidwendt](https://github.com/davidwendt)
-- Fix cudf::stable_sorted_order for NaN and -NaN in FLOAT64 columns ([#11874](https://github.com/rapidsai/cudf/pull/11874)) [@davidwendt](https://github.com/davidwendt)
-- Fix writing of Parquet files with many fragments ([#11869](https://github.com/rapidsai/cudf/pull/11869)) [@etseidl](https://github.com/etseidl)
-- Fix RangeIndex unary operators. ([#11868](https://github.com/rapidsai/cudf/pull/11868)) [@vyasr](https://github.com/vyasr)
-- JNI Avoid NPE for reading host binary data ([#11865](https://github.com/rapidsai/cudf/pull/11865)) [@revans2](https://github.com/revans2)
-- Fix decimal benchmark input data generation ([#11863](https://github.com/rapidsai/cudf/pull/11863)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix pre-commit copyright check ([#11860](https://github.com/rapidsai/cudf/pull/11860)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix Parquet support for seconds and milliseconds duration types ([#11854](https://github.com/rapidsai/cudf/pull/11854)) [@vuule](https://github.com/vuule)
-- Ensure better compiler cache results between cudf cal-ver branches ([#11835](https://github.com/rapidsai/cudf/pull/11835)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix make_column_from_scalar for all-null strings column ([#11807](https://github.com/rapidsai/cudf/pull/11807)) [@davidwendt](https://github.com/davidwendt)
-- Tell jitify_preprocess where to search for libnvrtc ([#11787](https://github.com/rapidsai/cudf/pull/11787)) [@robertmaynard](https://github.com/robertmaynard)
-- add V2 page header support to parquet reader ([#11778](https://github.com/rapidsai/cudf/pull/11778)) [@etseidl](https://github.com/etseidl)
-- Parquet reader: bug fix for a num_rows/skip_rows corner case, w/optimization for nested preprocessing ([#11752](https://github.com/rapidsai/cudf/pull/11752)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Determine if Arrow has S3 support at runtime in unit test. ([#11560](https://github.com/rapidsai/cudf/pull/11560)) [@bdice](https://github.com/bdice)
-
-## 📖 Documentation
-
-- Use rapidsai CODE_OF_CONDUCT.md ([#12166](https://github.com/rapidsai/cudf/pull/12166)) [@bdice](https://github.com/bdice)
-- Add symlinks to notebooks. ([#12128](https://github.com/rapidsai/cudf/pull/12128)) [@bdice](https://github.com/bdice)
-- Add `truncate` API to python doc pages ([#12109](https://github.com/rapidsai/cudf/pull/12109)) [@galipremsagar](https://github.com/galipremsagar)
-- Update Numba docs links. ([#12107](https://github.com/rapidsai/cudf/pull/12107)) [@bdice](https://github.com/bdice)
-- Remove &quot;Multi-GPU with Dask-cuDF&quot; notebook. ([#12095](https://github.com/rapidsai/cudf/pull/12095)) [@bdice](https://github.com/bdice)
-- Fix link to c++ developer guide from `CONTRIBUTING.md` ([#12084](https://github.com/rapidsai/cudf/pull/12084)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Add pivot_table and crosstab to docs. ([#12014](https://github.com/rapidsai/cudf/pull/12014)) [@bdice](https://github.com/bdice)
-- Fix doxygen text for cudf::dictionary::encode ([#11991](https://github.com/rapidsai/cudf/pull/11991)) [@davidwendt](https://github.com/davidwendt)
-- Replace default_stream_value with get_default_stream in docs. ([#11985](https://github.com/rapidsai/cudf/pull/11985)) [@vyasr](https://github.com/vyasr)
-- Add dtype docs pages and docstrings for `cudf` specific dtypes ([#11974](https://github.com/rapidsai/cudf/pull/11974)) [@galipremsagar](https://github.com/galipremsagar)
-- Update Unit Testing in libcudf guidelines to code tests outside the cudf::test namespace ([#11959](https://github.com/rapidsai/cudf/pull/11959)) [@davidwendt](https://github.com/davidwendt)
-- Rename libcudf++ to libcudf. ([#11953](https://github.com/rapidsai/cudf/pull/11953)) [@bdice](https://github.com/bdice)
-- Fix documentation referring to removed as_gpu_matrix method. ([#11937](https://github.com/rapidsai/cudf/pull/11937)) [@bdice](https://github.com/bdice)
-- Remove &quot;experimental&quot; warning for struct columns in ORC reader and writer ([#11880](https://github.com/rapidsai/cudf/pull/11880)) [@vuule](https://github.com/vuule)
-- Initial draft of policies and guidelines for libcudf usage. ([#11853](https://github.com/rapidsai/cudf/pull/11853)) [@vyasr](https://github.com/vyasr)
-- Add clear indication of non-GPU accelerated parameters in read_json docstring ([#11825](https://github.com/rapidsai/cudf/pull/11825)) [@GregoryKimball](https://github.com/GregoryKimball)
-- Add developer docs for writing tests ([#11199](https://github.com/rapidsai/cudf/pull/11199)) [@vyasr](https://github.com/vyasr)
-
-## 🚀 New Features
-
-- Adds an EventHandler to Java MemoryBuffer to be invoked on close ([#12125](https://github.com/rapidsai/cudf/pull/12125)) [@abellina](https://github.com/abellina)
-- Support `+` in `strings_udf` ([#12117](https://github.com/rapidsai/cudf/pull/12117)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Support `upper` and `lower` in `strings_udf` ([#12099](https://github.com/rapidsai/cudf/pull/12099)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Add wheel builds ([#12096](https://github.com/rapidsai/cudf/pull/12096)) [@vyasr](https://github.com/vyasr)
-- Allow setting malloc heap size in string udfs ([#12094](https://github.com/rapidsai/cudf/pull/12094)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Support `strip`, `lstrip`, and `rstrip` in `strings_udf` ([#12091](https://github.com/rapidsai/cudf/pull/12091)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Mark nvcomp zstd compression stable ([#12059](https://github.com/rapidsai/cudf/pull/12059)) [@jbrennan333](https://github.com/jbrennan333)
-- Add debug-only onAllocated/onDeallocated to RmmEventHandler ([#12054](https://github.com/rapidsai/cudf/pull/12054)) [@abellina](https://github.com/abellina)
-- Enable building against the libarrow contained in pyarrow ([#12034](https://github.com/rapidsai/cudf/pull/12034)) [@vyasr](https://github.com/vyasr)
-- Add strings `like` jni and native method ([#12032](https://github.com/rapidsai/cudf/pull/12032)) [@cindyyuanjiang](https://github.com/cindyyuanjiang)
-- Cleanup common parsing code in JSON, CSV reader ([#12022](https://github.com/rapidsai/cudf/pull/12022)) [@karthikeyann](https://github.com/karthikeyann)
-- byte_range support for JSON Lines format ([#12017](https://github.com/rapidsai/cudf/pull/12017)) [@karthikeyann](https://github.com/karthikeyann)
-- Minor cleanup of root CMakeLists.txt for better organization ([#11988](https://github.com/rapidsai/cudf/pull/11988)) [@robertmaynard](https://github.com/robertmaynard)
-- Add inplace arithmetic operators to `MaskedType` ([#11987](https://github.com/rapidsai/cudf/pull/11987)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Implement JNI for chunked Parquet reader ([#11961](https://github.com/rapidsai/cudf/pull/11961)) [@ttnghia](https://github.com/ttnghia)
-- Add method argument to DataFrame.quantile ([#11957](https://github.com/rapidsai/cudf/pull/11957)) [@rjzamora](https://github.com/rjzamora)
-- Add gpu memory watermark apis to JNI ([#11950](https://github.com/rapidsai/cudf/pull/11950)) [@abellina](https://github.com/abellina)
-- Adds retryCount to RmmEventHandler.onAllocFailure ([#11940](https://github.com/rapidsai/cudf/pull/11940)) [@abellina](https://github.com/abellina)
-- Enable returning string data from UDFs used through `apply` ([#11933](https://github.com/rapidsai/cudf/pull/11933)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Switch over to rapids-cmake patches for thrust ([#11921](https://github.com/rapidsai/cudf/pull/11921)) [@robertmaynard](https://github.com/robertmaynard)
-- Add strings udf C++ classes and functions for phase II ([#11912](https://github.com/rapidsai/cudf/pull/11912)) [@davidwendt](https://github.com/davidwendt)
-- Trim quotes for non-string values in nested json parsing ([#11898](https://github.com/rapidsai/cudf/pull/11898)) [@karthikeyann](https://github.com/karthikeyann)
-- Enable CEC for `strings_udf` ([#11884](https://github.com/rapidsai/cudf/pull/11884)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- ArrowIPCTableWriter writes en empty batch in the case of an empty table. ([#11883](https://github.com/rapidsai/cudf/pull/11883)) [@firestarman](https://github.com/firestarman)
-- Implement chunked Parquet reader ([#11867](https://github.com/rapidsai/cudf/pull/11867)) [@ttnghia](https://github.com/ttnghia)
-- Add `read_orc_metadata` to libcudf ([#11815](https://github.com/rapidsai/cudf/pull/11815)) [@vuule](https://github.com/vuule)
-- Support nested types as groupby keys in libcudf ([#11792](https://github.com/rapidsai/cudf/pull/11792)) [@PointKernel](https://github.com/PointKernel)
-- Adding feature Truncate to DataFrame and Series ([#11435](https://github.com/rapidsai/cudf/pull/11435)) [@VamsiTallam95](https://github.com/VamsiTallam95)
-
-## 🛠️ Improvements
-
-- Reduce number of tests marked `spilling` ([#12197](https://github.com/rapidsai/cudf/pull/12197)) [@madsbk](https://github.com/madsbk)
-- Pin `dask` and `distributed` for release ([#12165](https://github.com/rapidsai/cudf/pull/12165)) [@galipremsagar](https://github.com/galipremsagar)
-- Don&#39;t rely on GNU find in headers_test.sh ([#12164](https://github.com/rapidsai/cudf/pull/12164)) [@wence-](https://github.com/wence-)
-- Update cp.clip call ([#12148](https://github.com/rapidsai/cudf/pull/12148)) [@quasiben](https://github.com/quasiben)
-- Enable automatic column projection in groupby().agg ([#12124](https://github.com/rapidsai/cudf/pull/12124)) [@rjzamora](https://github.com/rjzamora)
-- Refactor `purge_nonempty_nulls` ([#12111](https://github.com/rapidsai/cudf/pull/12111)) [@ttnghia](https://github.com/ttnghia)
-- Create an `int8` column in `read_csv` when all elements are missing ([#12110](https://github.com/rapidsai/cudf/pull/12110)) [@vuule](https://github.com/vuule)
-- Spilling to host memory ([#12106](https://github.com/rapidsai/cudf/pull/12106)) [@madsbk](https://github.com/madsbk)
-- First pass of `pd.read_orc` changes in tests ([#12103](https://github.com/rapidsai/cudf/pull/12103)) [@galipremsagar](https://github.com/galipremsagar)
-- Expose engine argument in dask_cudf.read_json ([#12101](https://github.com/rapidsai/cudf/pull/12101)) [@rjzamora](https://github.com/rjzamora)
-- Remove CUDA 10 compatibility code. ([#12088](https://github.com/rapidsai/cudf/pull/12088)) [@bdice](https://github.com/bdice)
-- Move and update `dask` nigthly install in CI ([#12082](https://github.com/rapidsai/cudf/pull/12082)) [@galipremsagar](https://github.com/galipremsagar)
-- Throw an error when libcudf is built without cuFile and `LIBCUDF_CUFILE_POLICY` is set to `&quot;ALWAYS&quot;` ([#12080](https://github.com/rapidsai/cudf/pull/12080)) [@vuule](https://github.com/vuule)
-- Remove macros that inspect the contents of exceptions ([#12076](https://github.com/rapidsai/cudf/pull/12076)) [@vyasr](https://github.com/vyasr)
-- Fix ingest_raw_data performance issue in Nested JSON reader due to RVO ([#12070](https://github.com/rapidsai/cudf/pull/12070)) [@karthikeyann](https://github.com/karthikeyann)
-- Remove overflow error during decimal binops ([#12063](https://github.com/rapidsai/cudf/pull/12063)) [@galipremsagar](https://github.com/galipremsagar)
-- Change cudf::detail::tdigest to cudf::tdigest::detail ([#12050](https://github.com/rapidsai/cudf/pull/12050)) [@davidwendt](https://github.com/davidwendt)
-- Fix quantile gtests coded in namespace cudf::test ([#12049](https://github.com/rapidsai/cudf/pull/12049)) [@davidwendt](https://github.com/davidwendt)
-- Add support for `DataFrame.from_dict`\`to_dict` and `Series.to_dict` ([#12048](https://github.com/rapidsai/cudf/pull/12048)) [@galipremsagar](https://github.com/galipremsagar)
-- Refactor Parquet reader ([#12046](https://github.com/rapidsai/cudf/pull/12046)) [@ttnghia](https://github.com/ttnghia)
-- Forward merge 22.10 into 22.12 ([#12045](https://github.com/rapidsai/cudf/pull/12045)) [@vyasr](https://github.com/vyasr)
-- Standardize newlines at ends of files. ([#12042](https://github.com/rapidsai/cudf/pull/12042)) [@bdice](https://github.com/bdice)
-- Trim trailing whitespace from all files. ([#12041](https://github.com/rapidsai/cudf/pull/12041)) [@bdice](https://github.com/bdice)
-- Use nosync policy in gather and scatter implementations. ([#12038](https://github.com/rapidsai/cudf/pull/12038)) [@bdice](https://github.com/bdice)
-- Remove smart quotes from all docstrings. ([#12035](https://github.com/rapidsai/cudf/pull/12035)) [@bdice](https://github.com/bdice)
-- Update cuda-python dependency to 11.7.1 ([#12030](https://github.com/rapidsai/cudf/pull/12030)) [@galipremsagar](https://github.com/galipremsagar)
-- Add cython-lint to pre-commit checks. ([#12020](https://github.com/rapidsai/cudf/pull/12020)) [@bdice](https://github.com/bdice)
-- Use pragma once ([#12019](https://github.com/rapidsai/cudf/pull/12019)) [@bdice](https://github.com/bdice)
-- New GHA to add issues/prs to project board ([#12016](https://github.com/rapidsai/cudf/pull/12016)) [@jarmak-nv](https://github.com/jarmak-nv)
-- Add DataFrame.pivot_table. ([#12015](https://github.com/rapidsai/cudf/pull/12015)) [@bdice](https://github.com/bdice)
-- Rollback of `DeviceBufferLike` ([#12009](https://github.com/rapidsai/cudf/pull/12009)) [@madsbk](https://github.com/madsbk)
-- Remove default parameters for nvtext::detail functions ([#12007](https://github.com/rapidsai/cudf/pull/12007)) [@davidwendt](https://github.com/davidwendt)
-- Remove default parameters for cudf::dictionary::detail functions ([#12006](https://github.com/rapidsai/cudf/pull/12006)) [@davidwendt](https://github.com/davidwendt)
-- Remove unused `managed_allocator` ([#12005](https://github.com/rapidsai/cudf/pull/12005)) [@vyasr](https://github.com/vyasr)
-- Remove default parameters for cudf::strings::detail functions ([#12003](https://github.com/rapidsai/cudf/pull/12003)) [@davidwendt](https://github.com/davidwendt)
-- Remove unnecessary code from dask-cudf _Frame ([#12001](https://github.com/rapidsai/cudf/pull/12001)) [@rjzamora](https://github.com/rjzamora)
-- Ignore python docs build artifacts ([#12000](https://github.com/rapidsai/cudf/pull/12000)) [@galipremsagar](https://github.com/galipremsagar)
-- Use rapids-cmake for google benchmark. ([#11997](https://github.com/rapidsai/cudf/pull/11997)) [@vyasr](https://github.com/vyasr)
-- Leverage rapids_cython for more automated RPATH handling ([#11996](https://github.com/rapidsai/cudf/pull/11996)) [@vyasr](https://github.com/vyasr)
-- Remove stale labeler ([#11995](https://github.com/rapidsai/cudf/pull/11995)) [@raydouglass](https://github.com/raydouglass)
-- Move protobuf compilation to CMake ([#11986](https://github.com/rapidsai/cudf/pull/11986)) [@vyasr](https://github.com/vyasr)
-- Replace most of preprocessor usage in nvcomp adapter with `constexpr` ([#11980](https://github.com/rapidsai/cudf/pull/11980)) [@vuule](https://github.com/vuule)
-- Add missing noexcepts to column_in_metadata methods ([#11973](https://github.com/rapidsai/cudf/pull/11973)) [@vyasr](https://github.com/vyasr)
-- Pass column names to `write_csv` instead of `table_metadata` pointer ([#11972](https://github.com/rapidsai/cudf/pull/11972)) [@vuule](https://github.com/vuule)
-- Accelerate libcudf segmented sort with CUB segmented sort ([#11969](https://github.com/rapidsai/cudf/pull/11969)) [@davidwendt](https://github.com/davidwendt)
-- Feature/remove default streams ([#11967](https://github.com/rapidsai/cudf/pull/11967)) [@vyasr](https://github.com/vyasr)
-- Add pool memory resource to libcudf basic example ([#11966](https://github.com/rapidsai/cudf/pull/11966)) [@davidwendt](https://github.com/davidwendt)
-- Fix some libcudf calls to cudf::detail::gather ([#11963](https://github.com/rapidsai/cudf/pull/11963)) [@davidwendt](https://github.com/davidwendt)
-- Accept const refs instead of const unique_ptr refs in reduce and scan APIs. ([#11960](https://github.com/rapidsai/cudf/pull/11960)) [@vyasr](https://github.com/vyasr)
-- Add deprecation warning for set_allocator. ([#11958](https://github.com/rapidsai/cudf/pull/11958)) [@vyasr](https://github.com/vyasr)
-- Fix lists and structs gtests coded in namespace cudf::test ([#11956](https://github.com/rapidsai/cudf/pull/11956)) [@davidwendt](https://github.com/davidwendt)
-- Add full page indexes to Parquet writer benchmarks ([#11955](https://github.com/rapidsai/cudf/pull/11955)) [@etseidl](https://github.com/etseidl)
-- Use gather-based strings factory in cudf::strings::strip ([#11954](https://github.com/rapidsai/cudf/pull/11954)) [@davidwendt](https://github.com/davidwendt)
-- Default to equal NaNs in make_merge_sets_aggregation. ([#11952](https://github.com/rapidsai/cudf/pull/11952)) [@bdice](https://github.com/bdice)
-- Add `strip_delimiters` option to `read_text` ([#11946](https://github.com/rapidsai/cudf/pull/11946)) [@upsj](https://github.com/upsj)
-- Refactor multibyte_split `output_builder` ([#11945](https://github.com/rapidsai/cudf/pull/11945)) [@upsj](https://github.com/upsj)
-- Remove validation that requires introspection ([#11938](https://github.com/rapidsai/cudf/pull/11938)) [@vyasr](https://github.com/vyasr)
-- Add `.str.find_multiple` API ([#11928](https://github.com/rapidsai/cudf/pull/11928)) [@galipremsagar](https://github.com/galipremsagar)
-- Add regex_program class for use with all regex APIs ([#11927](https://github.com/rapidsai/cudf/pull/11927)) [@davidwendt](https://github.com/davidwendt)
-- Enable backend dispatching for Dask-DataFrame creation ([#11920](https://github.com/rapidsai/cudf/pull/11920)) [@rjzamora](https://github.com/rjzamora)
-- Performance improvement in JSON Tree traversal ([#11919](https://github.com/rapidsai/cudf/pull/11919)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix some gtests incorrectly coded in namespace cudf::test (part I) ([#11917](https://github.com/rapidsai/cudf/pull/11917)) [@davidwendt](https://github.com/davidwendt)
-- Refactor pad/zfill functions for reuse with strings udf ([#11914](https://github.com/rapidsai/cudf/pull/11914)) [@davidwendt](https://github.com/davidwendt)
-- Add `nanosecond` &amp; `microsecond` to `DatetimeProperties` ([#11911](https://github.com/rapidsai/cudf/pull/11911)) [@galipremsagar](https://github.com/galipremsagar)
-- Pin mimesis version in setup.py. ([#11906](https://github.com/rapidsai/cudf/pull/11906)) [@bdice](https://github.com/bdice)
-- Error on `ListColumn` or any new unsupported column in `cudf.Index` ([#11902](https://github.com/rapidsai/cudf/pull/11902)) [@galipremsagar](https://github.com/galipremsagar)
-- Add thrust output iterator fix (1805) to thrust.patch ([#11900](https://github.com/rapidsai/cudf/pull/11900)) [@davidwendt](https://github.com/davidwendt)
-- Relax `codecov` threshold diff ([#11899](https://github.com/rapidsai/cudf/pull/11899)) [@galipremsagar](https://github.com/galipremsagar)
-- Use public APIs in STREAM_COMPACTION_NVBENCH ([#11892](https://github.com/rapidsai/cudf/pull/11892)) [@GregoryKimball](https://github.com/GregoryKimball)
-- Add coverage for string UDF tests. ([#11891](https://github.com/rapidsai/cudf/pull/11891)) [@vyasr](https://github.com/vyasr)
-- Provide `data_chunk_source` wrapper for `datasource` ([#11886](https://github.com/rapidsai/cudf/pull/11886)) [@upsj](https://github.com/upsj)
-- Handle `multibyte_split` byte_range out-of-bounds offsets on host ([#11885](https://github.com/rapidsai/cudf/pull/11885)) [@upsj](https://github.com/upsj)
-- Add tests ensuring that cudf&#39;s default stream is always used ([#11875](https://github.com/rapidsai/cudf/pull/11875)) [@vyasr](https://github.com/vyasr)
-- Change expect_strings_empty into expect_column_empty libcudf test utility ([#11873](https://github.com/rapidsai/cudf/pull/11873)) [@davidwendt](https://github.com/davidwendt)
-- Add ngroup ([#11871](https://github.com/rapidsai/cudf/pull/11871)) [@shwina](https://github.com/shwina)
-- Reduce memory usage in nested JSON parser - tree generation ([#11864](https://github.com/rapidsai/cudf/pull/11864)) [@karthikeyann](https://github.com/karthikeyann)
-- Unpin `dask` and `distributed` for development ([#11859](https://github.com/rapidsai/cudf/pull/11859)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove unused includes for table/row_operators ([#11857](https://github.com/rapidsai/cudf/pull/11857)) [@GregoryKimball](https://github.com/GregoryKimball)
-- Use conda-forge&#39;s `pyorc` ([#11855](https://github.com/rapidsai/cudf/pull/11855)) [@jakirkham](https://github.com/jakirkham)
-- Add libcudf strings examples ([#11849](https://github.com/rapidsai/cudf/pull/11849)) [@davidwendt](https://github.com/davidwendt)
-- Remove `cudf_io` namespace alias ([#11827](https://github.com/rapidsai/cudf/pull/11827)) [@vuule](https://github.com/vuule)
-- Test/remove thrust vector usage ([#11813](https://github.com/rapidsai/cudf/pull/11813)) [@vyasr](https://github.com/vyasr)
-- Add BGZIP reader to python `read_text` ([#11802](https://github.com/rapidsai/cudf/pull/11802)) [@upsj](https://github.com/upsj)
-- Merge branch-22.10 into branch-22.12 ([#11801](https://github.com/rapidsai/cudf/pull/11801)) [@davidwendt](https://github.com/davidwendt)
-- Fix compile warning from CUDF_FUNC_RANGE in a member function ([#11798](https://github.com/rapidsai/cudf/pull/11798)) [@davidwendt](https://github.com/davidwendt)
-- Update cudf JNI version to 22.12.0-SNAPSHOT ([#11764](https://github.com/rapidsai/cudf/pull/11764)) [@pxLi](https://github.com/pxLi)
-- Update flake8 to 5.0.4 and use flake8-force to check Cython. ([#11736](https://github.com/rapidsai/cudf/pull/11736)) [@bdice](https://github.com/bdice)
-- Add BGZIP multibyte_split benchmark ([#11723](https://github.com/rapidsai/cudf/pull/11723)) [@upsj](https://github.com/upsj)
-- Bifurcate Dependency Lists ([#11674](https://github.com/rapidsai/cudf/pull/11674)) [@bdice](https://github.com/bdice)
-- Default to equal NaNs in make_collect_set_aggregation. ([#11621](https://github.com/rapidsai/cudf/pull/11621)) [@bdice](https://github.com/bdice)
-- Conform &quot;bench_isin&quot; to match generator column names ([#11549](https://github.com/rapidsai/cudf/pull/11549)) [@GregoryKimball](https://github.com/GregoryKimball)
-- Removing int8 column option from parquet byte_array writing ([#11539](https://github.com/rapidsai/cudf/pull/11539)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Add checks for HLG layers in dask-cudf groupby tests ([#10853](https://github.com/rapidsai/cudf/pull/10853)) [@charlesbluca](https://github.com/charlesbluca)
-- part1: Simplify BaseIndex to an abstract class ([#10389](https://github.com/rapidsai/cudf/pull/10389)) [@skirui-source](https://github.com/skirui-source)
-- Make all `nvcc` warnings into errors ([#8916](https://github.com/rapidsai/cudf/pull/8916)) [@trxcllnt](https://github.com/trxcllnt)
-
-# cuDF 22.10.00 (12 Oct 2022)
-
-## 🚨 Breaking Changes
-
-- Disable Zstandard decompression on nvCOMP 2.4 and Pascal GPus ([#11856](https://github.com/rapidsai/cudf/pull/11856)) [@vuule](https://github.com/vuule)
-- Disable nvCOMP DEFLATE integration ([#11811](https://github.com/rapidsai/cudf/pull/11811)) [@vuule](https://github.com/vuule)
-- Fix return type of `Index.isna` &amp; `Index.notna` ([#11769](https://github.com/rapidsai/cudf/pull/11769)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove `kwargs` in `read_csv` &amp; `to_csv` ([#11762](https://github.com/rapidsai/cudf/pull/11762)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix `cudf::partition*` APIs that do not return offsets for empty output table ([#11709](https://github.com/rapidsai/cudf/pull/11709)) [@ttnghia](https://github.com/ttnghia)
-- Fix regex negated classes to not automatically include new-lines ([#11644](https://github.com/rapidsai/cudf/pull/11644)) [@davidwendt](https://github.com/davidwendt)
-- Update zfill to match Python output ([#11634](https://github.com/rapidsai/cudf/pull/11634)) [@davidwendt](https://github.com/davidwendt)
-- Upgrade `pandas` to `1.5` ([#11617](https://github.com/rapidsai/cudf/pull/11617)) [@galipremsagar](https://github.com/galipremsagar)
-- Change default value of `ordered` to `False` in `CategoricalDtype` ([#11604](https://github.com/rapidsai/cudf/pull/11604)) [@galipremsagar](https://github.com/galipremsagar)
-- Move cudf::strings::findall_record to cudf::strings::findall ([#11575](https://github.com/rapidsai/cudf/pull/11575)) [@davidwendt](https://github.com/davidwendt)
-- Adding optional parquet reader schema ([#11524](https://github.com/rapidsai/cudf/pull/11524)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Deprecate `skiprows` and `num_rows` in `read_orc` ([#11522](https://github.com/rapidsai/cudf/pull/11522)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove support for skip_rows / num_rows options in the parquet reader. ([#11503](https://github.com/rapidsai/cudf/pull/11503)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Drop support for `skiprows` and `num_rows` in `cudf.read_parquet` ([#11480](https://github.com/rapidsai/cudf/pull/11480)) [@galipremsagar](https://github.com/galipremsagar)
-- Disable Arrow S3 support by default. ([#11470](https://github.com/rapidsai/cudf/pull/11470)) [@bdice](https://github.com/bdice)
-- Convert thrust::optional usages to std::optional ([#11455](https://github.com/rapidsai/cudf/pull/11455)) [@robertmaynard](https://github.com/robertmaynard)
-- Remove unused is_struct trait. ([#11450](https://github.com/rapidsai/cudf/pull/11450)) [@bdice](https://github.com/bdice)
-- Refactor the `Buffer` class ([#11447](https://github.com/rapidsai/cudf/pull/11447)) [@madsbk](https://github.com/madsbk)
-- Return empty dataframe when reading an ORC file using empty `columns` option ([#11446](https://github.com/rapidsai/cudf/pull/11446)) [@vuule](https://github.com/vuule)
-- Refactor pad_side and strip_type enums into side_type enum ([#11438](https://github.com/rapidsai/cudf/pull/11438)) [@davidwendt](https://github.com/davidwendt)
-- Remove HASH_SERIAL_MURMUR3 / serial32BitMurmurHash3 ([#11383](https://github.com/rapidsai/cudf/pull/11383)) [@bdice](https://github.com/bdice)
-- Use the new JSON parser when the experimental reader is selected ([#11364](https://github.com/rapidsai/cudf/pull/11364)) [@vuule](https://github.com/vuule)
-- Remove deprecated Series.applymap. ([#11031](https://github.com/rapidsai/cudf/pull/11031)) [@bdice](https://github.com/bdice)
-- Remove deprecated expand parameter from str.findall. ([#11030](https://github.com/rapidsai/cudf/pull/11030)) [@bdice](https://github.com/bdice)
-
-## 🐛 Bug Fixes
-
-- Fixes bug in temporary decompression space estimation before calling nvcomp ([#11879](https://github.com/rapidsai/cudf/pull/11879)) [@abellina](https://github.com/abellina)
-- Handle `ptx` file paths during `strings_udf` import ([#11862](https://github.com/rapidsai/cudf/pull/11862)) [@galipremsagar](https://github.com/galipremsagar)
-- Disable Zstandard decompression on nvCOMP 2.4 and Pascal GPus ([#11856](https://github.com/rapidsai/cudf/pull/11856)) [@vuule](https://github.com/vuule)
-- Reset `strings_udf` CEC and solve several related issues ([#11846](https://github.com/rapidsai/cudf/pull/11846)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Fix  bug in new shuffle-based groupby implementation ([#11836](https://github.com/rapidsai/cudf/pull/11836)) [@rjzamora](https://github.com/rjzamora)
-- Fix `is_valid` checks in `Scalar._binaryop` ([#11818](https://github.com/rapidsai/cudf/pull/11818)) [@wence-](https://github.com/wence-)
-- Fix operator `NotImplemented` issue with `numpy` ([#11816](https://github.com/rapidsai/cudf/pull/11816)) [@galipremsagar](https://github.com/galipremsagar)
-- Disable nvCOMP DEFLATE integration ([#11811](https://github.com/rapidsai/cudf/pull/11811)) [@vuule](https://github.com/vuule)
-- Build `strings_udf` package with other python packages in nightlies ([#11808](https://github.com/rapidsai/cudf/pull/11808)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Revert problematic shuffle=explicit-comms changes ([#11803](https://github.com/rapidsai/cudf/pull/11803)) [@rjzamora](https://github.com/rjzamora)
-- Fix regex out-of-bounds write in strided rows logic ([#11797](https://github.com/rapidsai/cudf/pull/11797)) [@davidwendt](https://github.com/davidwendt)
-- Build `cudf` locally before building `strings_udf` conda packages in CI ([#11785](https://github.com/rapidsai/cudf/pull/11785)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Fix an issue in cudf::row_bit_count involving structs and lists at multiple levels. ([#11779](https://github.com/rapidsai/cudf/pull/11779)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Fix return type of `Index.isna` &amp; `Index.notna` ([#11769](https://github.com/rapidsai/cudf/pull/11769)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix issue with set-item in case of `list` and `struct` types ([#11760](https://github.com/rapidsai/cudf/pull/11760)) [@galipremsagar](https://github.com/galipremsagar)
-- Ensure all libcudf APIs run on cudf&#39;s default stream ([#11759](https://github.com/rapidsai/cudf/pull/11759)) [@vyasr](https://github.com/vyasr)
-- Resolve dask_cudf failures caused by upstream groupby changes ([#11755](https://github.com/rapidsai/cudf/pull/11755)) [@rjzamora](https://github.com/rjzamora)
-- Fix ORC string sum statistics ([#11740](https://github.com/rapidsai/cudf/pull/11740)) [@vuule](https://github.com/vuule)
-- Add `strings_udf` package for python 3.9 ([#11730](https://github.com/rapidsai/cudf/pull/11730)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Ensure that all tests launch kernels on cudf&#39;s default stream ([#11726](https://github.com/rapidsai/cudf/pull/11726)) [@vyasr](https://github.com/vyasr)
-- Don&#39;t assume stream is a compile-time constant expression ([#11725](https://github.com/rapidsai/cudf/pull/11725)) [@vyasr](https://github.com/vyasr)
-- Fix get_thrust.cmake format at patch command ([#11715](https://github.com/rapidsai/cudf/pull/11715)) [@davidwendt](https://github.com/davidwendt)
-- Fix `cudf::partition*` APIs that do not return offsets for empty output table ([#11709](https://github.com/rapidsai/cudf/pull/11709)) [@ttnghia](https://github.com/ttnghia)
-- Fix cudf::lists::sort_lists for NaN and Infinity values ([#11703](https://github.com/rapidsai/cudf/pull/11703)) [@davidwendt](https://github.com/davidwendt)
-- Modify ORC reader timestamp parsing to match the apache reader behavior ([#11699](https://github.com/rapidsai/cudf/pull/11699)) [@vuule](https://github.com/vuule)
-- Fix `DataFrame.from_arrow` to preserve type metadata ([#11698](https://github.com/rapidsai/cudf/pull/11698)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix compile error due to missing header ([#11697](https://github.com/rapidsai/cudf/pull/11697)) [@ttnghia](https://github.com/ttnghia)
-- Default to Snappy compression in `to_orc` when using cuDF or Dask ([#11690](https://github.com/rapidsai/cudf/pull/11690)) [@vuule](https://github.com/vuule)
-- Fix an issue related to `Multindex` when `group_keys=True` ([#11689](https://github.com/rapidsai/cudf/pull/11689)) [@galipremsagar](https://github.com/galipremsagar)
-- Transfer correct dtype to exploded column ([#11687](https://github.com/rapidsai/cudf/pull/11687)) [@wence-](https://github.com/wence-)
-- Ignore protobuf generated files in `mypy` checks ([#11685](https://github.com/rapidsai/cudf/pull/11685)) [@galipremsagar](https://github.com/galipremsagar)
-- Maintain the index name after `.loc` ([#11677](https://github.com/rapidsai/cudf/pull/11677)) [@shwina](https://github.com/shwina)
-- Fix issue with extracting nested column data &amp; dtype preservation ([#11671](https://github.com/rapidsai/cudf/pull/11671)) [@galipremsagar](https://github.com/galipremsagar)
-- Ensure that all cudf tests and benchmarks are conda env aware ([#11666](https://github.com/rapidsai/cudf/pull/11666)) [@robertmaynard](https://github.com/robertmaynard)
-- Update to Thrust 1.17.2 to fix cub ODR issues ([#11665](https://github.com/rapidsai/cudf/pull/11665)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix multi-file remote datasource bug ([#11655](https://github.com/rapidsai/cudf/pull/11655)) [@rjzamora](https://github.com/rjzamora)
-- Fix invalid regex quantifier check to not include alternation ([#11654](https://github.com/rapidsai/cudf/pull/11654)) [@davidwendt](https://github.com/davidwendt)
-- Fix bug in `device_write()`: it uses an incorrect size ([#11651](https://github.com/rapidsai/cudf/pull/11651)) [@madsbk](https://github.com/madsbk)
-- fixes overflows in benchmarks ([#11649](https://github.com/rapidsai/cudf/pull/11649)) [@elstehle](https://github.com/elstehle)
-- Fix regex negated classes to not automatically include new-lines ([#11644](https://github.com/rapidsai/cudf/pull/11644)) [@davidwendt](https://github.com/davidwendt)
-- Fix compile error in benchmark nested_json.cpp ([#11637](https://github.com/rapidsai/cudf/pull/11637)) [@davidwendt](https://github.com/davidwendt)
-- Update zfill to match Python output ([#11634](https://github.com/rapidsai/cudf/pull/11634)) [@davidwendt](https://github.com/davidwendt)
-- Removed converted type for INT32 and INT64 since they do not convert ([#11627](https://github.com/rapidsai/cudf/pull/11627)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Fix host scalars construction of nested types ([#11612](https://github.com/rapidsai/cudf/pull/11612)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix compile warning in nested_json_gpu.cu ([#11607](https://github.com/rapidsai/cudf/pull/11607)) [@davidwendt](https://github.com/davidwendt)
-- Change default value of `ordered` to `False` in `CategoricalDtype` ([#11604](https://github.com/rapidsai/cudf/pull/11604)) [@galipremsagar](https://github.com/galipremsagar)
-- Preserve order if necessary when deduping categoricals internally ([#11597](https://github.com/rapidsai/cudf/pull/11597)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Add is_timestamp test for leap second (60) ([#11594](https://github.com/rapidsai/cudf/pull/11594)) [@davidwendt](https://github.com/davidwendt)
-- Fix an issue with `to_arrow` when column name type is not a string ([#11590](https://github.com/rapidsai/cudf/pull/11590)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix exception in segmented-reduce benchmark ([#11588](https://github.com/rapidsai/cudf/pull/11588)) [@davidwendt](https://github.com/davidwendt)
-- Fix encode/decode of negative timestamps in ORC reader/writer ([#11586](https://github.com/rapidsai/cudf/pull/11586)) [@vuule](https://github.com/vuule)
-- Correct distribution data type in `quantiles` benchmark ([#11584](https://github.com/rapidsai/cudf/pull/11584)) [@vuule](https://github.com/vuule)
-- Fix multibyte_split benchmark for host buffers ([#11583](https://github.com/rapidsai/cudf/pull/11583)) [@upsj](https://github.com/upsj)
-- xfail custreamz display test for now ([#11567](https://github.com/rapidsai/cudf/pull/11567)) [@shwina](https://github.com/shwina)
-- Fix JNI for TableWithMeta to use schema_info instead of column_names ([#11566](https://github.com/rapidsai/cudf/pull/11566)) [@jlowe](https://github.com/jlowe)
-- Reduce code duplication for `dask` &amp; `distributed` nightly/stable installs ([#11565](https://github.com/rapidsai/cudf/pull/11565)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix groupby failures in dask_cudf CI ([#11561](https://github.com/rapidsai/cudf/pull/11561)) [@rjzamora](https://github.com/rjzamora)
-- Fix for pivot: error when &#39;values&#39; is a multicharacter string ([#11538](https://github.com/rapidsai/cudf/pull/11538)) [@shaswat-indian](https://github.com/shaswat-indian)
-- find_package(cudf) + arrow9 usable with cudf build directory ([#11535](https://github.com/rapidsai/cudf/pull/11535)) [@robertmaynard](https://github.com/robertmaynard)
-- Fixing crash when writing binary nested data in parquet ([#11526](https://github.com/rapidsai/cudf/pull/11526)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Fix for: error when assigning a value to an empty series ([#11523](https://github.com/rapidsai/cudf/pull/11523)) [@shaswat-indian](https://github.com/shaswat-indian)
-- Fix invalid results from conditional-left-anti-join in debug build ([#11517](https://github.com/rapidsai/cudf/pull/11517)) [@davidwendt](https://github.com/davidwendt)
-- Fix cmake error after upgrading to Arrow 9 ([#11513](https://github.com/rapidsai/cudf/pull/11513)) [@ttnghia](https://github.com/ttnghia)
-- Fix reverse binary operators acting on a host value and cudf.Scalar ([#11512](https://github.com/rapidsai/cudf/pull/11512)) [@bdice](https://github.com/bdice)
-- Update parquet fuzz tests to drop support for `skiprows` &amp; `num_rows` ([#11505](https://github.com/rapidsai/cudf/pull/11505)) [@galipremsagar](https://github.com/galipremsagar)
-- Use rapids-cmake 22.10 best practice for RAPIDS.cmake location ([#11493](https://github.com/rapidsai/cudf/pull/11493)) [@robertmaynard](https://github.com/robertmaynard)
-- Handle some zero-sized corner cases in dlpack interop ([#11449](https://github.com/rapidsai/cudf/pull/11449)) [@wence-](https://github.com/wence-)
-- Return empty dataframe when reading an ORC file using empty `columns` option ([#11446](https://github.com/rapidsai/cudf/pull/11446)) [@vuule](https://github.com/vuule)
-- libcudf c++ example updated to CPM version 0.35.3 ([#11417](https://github.com/rapidsai/cudf/pull/11417)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix regex quantifier check to include capture groups ([#11373](https://github.com/rapidsai/cudf/pull/11373)) [@davidwendt](https://github.com/davidwendt)
-- Fix read_text when byte_range is aligned with field ([#11371](https://github.com/rapidsai/cudf/pull/11371)) [@upsj](https://github.com/upsj)
-- Fix to_timestamps truncated subsecond calculation ([#11367](https://github.com/rapidsai/cudf/pull/11367)) [@davidwendt](https://github.com/davidwendt)
-- column: calculate null_count before release()ing the cudf::column ([#11365](https://github.com/rapidsai/cudf/pull/11365)) [@wence-](https://github.com/wence-)
-
-## 📖 Documentation
-
-- Update `guide-to-udfs` notebook ([#11861](https://github.com/rapidsai/cudf/pull/11861)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Update docstring for cudf.read_text ([#11799](https://github.com/rapidsai/cudf/pull/11799)) [@GregoryKimball](https://github.com/GregoryKimball)
-- Add doc section for `list` &amp; `struct` handling ([#11770](https://github.com/rapidsai/cudf/pull/11770)) [@galipremsagar](https://github.com/galipremsagar)
-- Document that minimum required CMake version is now 3.23.1 ([#11751](https://github.com/rapidsai/cudf/pull/11751)) [@robertmaynard](https://github.com/robertmaynard)
-- Update libcudf documentation build command in DOCUMENTATION.md ([#11735](https://github.com/rapidsai/cudf/pull/11735)) [@davidwendt](https://github.com/davidwendt)
-- Add docs for use of string data to `DataFrame.apply` and `Series.apply` and update guide to UDFs notebook ([#11733](https://github.com/rapidsai/cudf/pull/11733)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Enable more Pydocstyle rules ([#11582](https://github.com/rapidsai/cudf/pull/11582)) [@bdice](https://github.com/bdice)
-- Remove unused cpp/img folder ([#11554](https://github.com/rapidsai/cudf/pull/11554)) [@davidwendt](https://github.com/davidwendt)
-- Publish C++ developer docs ([#11475](https://github.com/rapidsai/cudf/pull/11475)) [@vyasr](https://github.com/vyasr)
-- Fix a misalignment in `cudf.get_dummies` docstring ([#11443](https://github.com/rapidsai/cudf/pull/11443)) [@galipremsagar](https://github.com/galipremsagar)
-- Update contributing doc to include links to the developer guides ([#11390](https://github.com/rapidsai/cudf/pull/11390)) [@davidwendt](https://github.com/davidwendt)
-- Fix table_view_base doxygen format ([#11340](https://github.com/rapidsai/cudf/pull/11340)) [@davidwendt](https://github.com/davidwendt)
-- Create main developer guide for Python ([#11235](https://github.com/rapidsai/cudf/pull/11235)) [@vyasr](https://github.com/vyasr)
-- Add developer documentation for benchmarking ([#11122](https://github.com/rapidsai/cudf/pull/11122)) [@vyasr](https://github.com/vyasr)
-- cuDF error handling document ([#7917](https://github.com/rapidsai/cudf/pull/7917)) [@isVoid](https://github.com/isVoid)
-
-## 🚀 New Features
-
-- Add hasNull statistic reading ability to ORC ([#11747](https://github.com/rapidsai/cudf/pull/11747)) [@devavret](https://github.com/devavret)
-- Add `istitle` to string UDFs ([#11738](https://github.com/rapidsai/cudf/pull/11738)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- JSON Column creation in GPU ([#11714](https://github.com/rapidsai/cudf/pull/11714)) [@karthikeyann](https://github.com/karthikeyann)
-- Adds option to take explicit nested schema for nested JSON reader ([#11682](https://github.com/rapidsai/cudf/pull/11682)) [@elstehle](https://github.com/elstehle)
-- Add BGZIP `data_chunk_reader` ([#11652](https://github.com/rapidsai/cudf/pull/11652)) [@upsj](https://github.com/upsj)
-- Support DECIMAL order-by for RANGE window functions ([#11645](https://github.com/rapidsai/cudf/pull/11645)) [@mythrocks](https://github.com/mythrocks)
-- changing version of cmake to 3.23.3 ([#11619](https://github.com/rapidsai/cudf/pull/11619)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Generate unique keys table in java JNI `contiguousSplitGroups` ([#11614](https://github.com/rapidsai/cudf/pull/11614)) [@res-life](https://github.com/res-life)
-- Generic type casting to support the new nested JSON reader ([#11613](https://github.com/rapidsai/cudf/pull/11613)) [@elstehle](https://github.com/elstehle)
-- JSON tree traversal ([#11610](https://github.com/rapidsai/cudf/pull/11610)) [@karthikeyann](https://github.com/karthikeyann)
-- Add casting operators to masked UDFs ([#11578](https://github.com/rapidsai/cudf/pull/11578)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Adds type inference and type conversion for leaf-columns to the nested JSON parser ([#11574](https://github.com/rapidsai/cudf/pull/11574)) [@elstehle](https://github.com/elstehle)
-- Add strings &#39;like&#39; function ([#11558](https://github.com/rapidsai/cudf/pull/11558)) [@davidwendt](https://github.com/davidwendt)
-- Handle hyphen as literal for regex cclass when incomplete range ([#11557](https://github.com/rapidsai/cudf/pull/11557)) [@davidwendt](https://github.com/davidwendt)
-- Enable ZSTD compression in ORC and Parquet writers ([#11551](https://github.com/rapidsai/cudf/pull/11551)) [@vuule](https://github.com/vuule)
-- Adds support for json lines format to the nested JSON reader ([#11534](https://github.com/rapidsai/cudf/pull/11534)) [@elstehle](https://github.com/elstehle)
-- Adding optional parquet reader schema ([#11524](https://github.com/rapidsai/cudf/pull/11524)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Adds GPU implementation of JSON-token-stream to JSON-tree ([#11518](https://github.com/rapidsai/cudf/pull/11518)) [@karthikeyann](https://github.com/karthikeyann)
-- Add `gdb` pretty-printers for simple types ([#11499](https://github.com/rapidsai/cudf/pull/11499)) [@upsj](https://github.com/upsj)
-- Add `create_random_column` function to the data generator ([#11490](https://github.com/rapidsai/cudf/pull/11490)) [@vuule](https://github.com/vuule)
-- Add fluent API builder to `data_profile` ([#11479](https://github.com/rapidsai/cudf/pull/11479)) [@vuule](https://github.com/vuule)
-- Adds Nested Json benchmark ([#11466](https://github.com/rapidsai/cudf/pull/11466)) [@karthikeyann](https://github.com/karthikeyann)
-- Convert thrust::optional usages to std::optional ([#11455](https://github.com/rapidsai/cudf/pull/11455)) [@robertmaynard](https://github.com/robertmaynard)
-- Python API for the future experimental JSON reader ([#11426](https://github.com/rapidsai/cudf/pull/11426)) [@vuule](https://github.com/vuule)
-- Return schema info from JSON reader ([#11419](https://github.com/rapidsai/cudf/pull/11419)) [@vuule](https://github.com/vuule)
-- Add regex ASCII flag support for matching builtin character classes ([#11404](https://github.com/rapidsai/cudf/pull/11404)) [@davidwendt](https://github.com/davidwendt)
-- Truncate parquet column indexes ([#11403](https://github.com/rapidsai/cudf/pull/11403)) [@etseidl](https://github.com/etseidl)
-- Adds the end-to-end JSON parser implementation ([#11388](https://github.com/rapidsai/cudf/pull/11388)) [@elstehle](https://github.com/elstehle)
-- Use the new JSON parser when the experimental reader is selected ([#11364](https://github.com/rapidsai/cudf/pull/11364)) [@vuule](https://github.com/vuule)
-- Add placeholder for the experimental JSON reader ([#11334](https://github.com/rapidsai/cudf/pull/11334)) [@vuule](https://github.com/vuule)
-- Add read-only functions on string dtypes to `DataFrame.apply` and `Series.apply` ([#11319](https://github.com/rapidsai/cudf/pull/11319)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Added &#39;crosstab&#39; and &#39;pivot_table&#39; features ([#11314](https://github.com/rapidsai/cudf/pull/11314)) [@shaswat-indian](https://github.com/shaswat-indian)
-- Quickly error out when trying to build with unsupported nvcc versions ([#11297](https://github.com/rapidsai/cudf/pull/11297)) [@robertmaynard](https://github.com/robertmaynard)
-- Adds JSON tokenizer ([#11264](https://github.com/rapidsai/cudf/pull/11264)) [@elstehle](https://github.com/elstehle)
-- List lexicographic comparator ([#11129](https://github.com/rapidsai/cudf/pull/11129)) [@devavret](https://github.com/devavret)
-- Add generic type inference for cuIO ([#11121](https://github.com/rapidsai/cudf/pull/11121)) [@PointKernel](https://github.com/PointKernel)
-- Fully support nested types in `cudf::contains` ([#10656](https://github.com/rapidsai/cudf/pull/10656)) [@ttnghia](https://github.com/ttnghia)
-- Support nested types in `lists::contains` ([#10548](https://github.com/rapidsai/cudf/pull/10548)) [@ttnghia](https://github.com/ttnghia)
-
-## 🛠️ Improvements
-
-- Pin `dask` and `distributed` for release ([#11822](https://github.com/rapidsai/cudf/pull/11822)) [@galipremsagar](https://github.com/galipremsagar)
-- Add examples for Nested JSON reader ([#11814](https://github.com/rapidsai/cudf/pull/11814)) [@GregoryKimball](https://github.com/GregoryKimball)
-- Support shuffle-based groupby aggregations in dask_cudf ([#11800](https://github.com/rapidsai/cudf/pull/11800)) [@rjzamora](https://github.com/rjzamora)
-- Update strings udf version updater script ([#11772](https://github.com/rapidsai/cudf/pull/11772)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove `kwargs` in `read_csv` &amp; `to_csv` ([#11762](https://github.com/rapidsai/cudf/pull/11762)) [@galipremsagar](https://github.com/galipremsagar)
-- Pass `dtype` param to avoid `pd.Series` warnings ([#11761](https://github.com/rapidsai/cudf/pull/11761)) [@galipremsagar](https://github.com/galipremsagar)
-- Enable `schema_element` &amp; `keep_quotes` support in json reader ([#11746](https://github.com/rapidsai/cudf/pull/11746)) [@galipremsagar](https://github.com/galipremsagar)
-- Add ability to construct `ListColumn` when size is `None` ([#11745](https://github.com/rapidsai/cudf/pull/11745)) [@galipremsagar](https://github.com/galipremsagar)
-- Reduces memory requirements in JSON parser and adds bytes/s and peak memory usage to benchmarks ([#11732](https://github.com/rapidsai/cudf/pull/11732)) [@elstehle](https://github.com/elstehle)
-- Add missing copyright headers. ([#11712](https://github.com/rapidsai/cudf/pull/11712)) [@bdice](https://github.com/bdice)
-- Fix copyright check issues in pre-commit ([#11711](https://github.com/rapidsai/cudf/pull/11711)) [@bdice](https://github.com/bdice)
-- Include decimal in supported types for range window order-by columns ([#11710](https://github.com/rapidsai/cudf/pull/11710)) [@mythrocks](https://github.com/mythrocks)
-- Disable very large column gtest for contiguous-split ([#11706](https://github.com/rapidsai/cudf/pull/11706)) [@davidwendt](https://github.com/davidwendt)
-- Drop split_out=None test from groupby.agg ([#11704](https://github.com/rapidsai/cudf/pull/11704)) [@wence-](https://github.com/wence-)
-- Use CubinLinker for CUDA Minor Version Compatibility ([#11701](https://github.com/rapidsai/cudf/pull/11701)) [@gmarkall](https://github.com/gmarkall)
-- Add regex capture-group parameter to auto convert to non-capture groups ([#11695](https://github.com/rapidsai/cudf/pull/11695)) [@davidwendt](https://github.com/davidwendt)
-- Add a `__dataframe__` method to the protocol dataframe object ([#11692](https://github.com/rapidsai/cudf/pull/11692)) [@rgommers](https://github.com/rgommers)
-- Special-case multibyte_split for single-byte delimiter ([#11681](https://github.com/rapidsai/cudf/pull/11681)) [@upsj](https://github.com/upsj)
-- Remove isort exclusions ([#11680](https://github.com/rapidsai/cudf/pull/11680)) [@bdice](https://github.com/bdice)
-- Refactor CSV reader benchmarks with nvbench ([#11678](https://github.com/rapidsai/cudf/pull/11678)) [@PointKernel](https://github.com/PointKernel)
-- Check conda recipe headers with pre-commit ([#11669](https://github.com/rapidsai/cudf/pull/11669)) [@bdice](https://github.com/bdice)
-- Remove redundant style check for clang-format. ([#11668](https://github.com/rapidsai/cudf/pull/11668)) [@bdice](https://github.com/bdice)
-- Add support for `group_keys` in `groupby` ([#11659](https://github.com/rapidsai/cudf/pull/11659)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix pandoc pinning. ([#11658](https://github.com/rapidsai/cudf/pull/11658)) [@bdice](https://github.com/bdice)
-- Revert removal of skip_rows / num_rows options from the Parquet reader. ([#11657](https://github.com/rapidsai/cudf/pull/11657)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Update git metadata ([#11647](https://github.com/rapidsai/cudf/pull/11647)) [@bdice](https://github.com/bdice)
-- Call set_null_count on a returning column if null-count is known ([#11646](https://github.com/rapidsai/cudf/pull/11646)) [@davidwendt](https://github.com/davidwendt)
-- Fix some libcudf detail calls not passing the stream variable ([#11642](https://github.com/rapidsai/cudf/pull/11642)) [@davidwendt](https://github.com/davidwendt)
-- Update to mypy 0.971 ([#11640](https://github.com/rapidsai/cudf/pull/11640)) [@wence-](https://github.com/wence-)
-- Refactor strings strip functor to details header ([#11635](https://github.com/rapidsai/cudf/pull/11635)) [@davidwendt](https://github.com/davidwendt)
-- Fix incorrect `nullCount` in `get_json_object` ([#11633](https://github.com/rapidsai/cudf/pull/11633)) [@trxcllnt](https://github.com/trxcllnt)
-- Simplify `hostdevice_vector` ([#11631](https://github.com/rapidsai/cudf/pull/11631)) [@upsj](https://github.com/upsj)
-- Refactor parquet writer benchmarks with nvbench ([#11623](https://github.com/rapidsai/cudf/pull/11623)) [@PointKernel](https://github.com/PointKernel)
-- Rework contains_scalar to check nulls at runtime ([#11622](https://github.com/rapidsai/cudf/pull/11622)) [@davidwendt](https://github.com/davidwendt)
-- Fix incorrect memory resource used in rolling temp columns ([#11618](https://github.com/rapidsai/cudf/pull/11618)) [@mythrocks](https://github.com/mythrocks)
-- Upgrade `pandas` to `1.5` ([#11617](https://github.com/rapidsai/cudf/pull/11617)) [@galipremsagar](https://github.com/galipremsagar)
-- Move type-dispatcher calls from traits.hpp to traits.cpp ([#11616](https://github.com/rapidsai/cudf/pull/11616)) [@davidwendt](https://github.com/davidwendt)
-- Refactor parquet reader benchmarks with nvbench ([#11611](https://github.com/rapidsai/cudf/pull/11611)) [@PointKernel](https://github.com/PointKernel)
-- Forward-merge branch-22.08 to branch-22.10 ([#11608](https://github.com/rapidsai/cudf/pull/11608)) [@bdice](https://github.com/bdice)
-- Use stream in Java API. ([#11601](https://github.com/rapidsai/cudf/pull/11601)) [@bdice](https://github.com/bdice)
-- Refactors of public/detail APIs, CUDF_FUNC_RANGE, stream handling. ([#11600](https://github.com/rapidsai/cudf/pull/11600)) [@bdice](https://github.com/bdice)
-- Improve ORC writer benchmark with nvbench ([#11598](https://github.com/rapidsai/cudf/pull/11598)) [@PointKernel](https://github.com/PointKernel)
-- Tune multibyte_split kernel ([#11587](https://github.com/rapidsai/cudf/pull/11587)) [@upsj](https://github.com/upsj)
-- Move split_utils.cuh to strings/detail ([#11585](https://github.com/rapidsai/cudf/pull/11585)) [@davidwendt](https://github.com/davidwendt)
-- Fix warnings due to compiler regression with `if constexpr` ([#11581](https://github.com/rapidsai/cudf/pull/11581)) [@ttnghia](https://github.com/ttnghia)
-- Add full 24-bit dictionary support to Parquet writer ([#11580](https://github.com/rapidsai/cudf/pull/11580)) [@etseidl](https://github.com/etseidl)
-- Expose &quot;explicit-comms&quot; option in shuffle-based dask_cudf functions ([#11576](https://github.com/rapidsai/cudf/pull/11576)) [@rjzamora](https://github.com/rjzamora)
-- Move cudf::strings::findall_record to cudf::strings::findall ([#11575](https://github.com/rapidsai/cudf/pull/11575)) [@davidwendt](https://github.com/davidwendt)
-- Refactor dask_cudf groupby to use apply_concat_apply ([#11571](https://github.com/rapidsai/cudf/pull/11571)) [@rjzamora](https://github.com/rjzamora)
-- Add ability to write `list(struct)` columns as `map` type in orc writer ([#11568](https://github.com/rapidsai/cudf/pull/11568)) [@galipremsagar](https://github.com/galipremsagar)
-- Add byte_range to multibyte_split benchmark + NVBench refactor ([#11562](https://github.com/rapidsai/cudf/pull/11562)) [@upsj](https://github.com/upsj)
-- JNI support for writing binary columns in parquet ([#11556](https://github.com/rapidsai/cudf/pull/11556)) [@revans2](https://github.com/revans2)
-- Support additional dictionary bit widths in Parquet writer ([#11547](https://github.com/rapidsai/cudf/pull/11547)) [@etseidl](https://github.com/etseidl)
-- Refactor string/numeric conversion utilities ([#11545](https://github.com/rapidsai/cudf/pull/11545)) [@davidwendt](https://github.com/davidwendt)
-- Removing unnecessary asserts in parquet tests ([#11544](https://github.com/rapidsai/cudf/pull/11544)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Clean up ORC reader benchmarks with NVBench ([#11543](https://github.com/rapidsai/cudf/pull/11543)) [@PointKernel](https://github.com/PointKernel)
-- Reuse MurmurHash3_32 in Parquet page data. ([#11528](https://github.com/rapidsai/cudf/pull/11528)) [@bdice](https://github.com/bdice)
-- Add hexadecimal value separators ([#11527](https://github.com/rapidsai/cudf/pull/11527)) [@bdice](https://github.com/bdice)
-- Deprecate `skiprows` and `num_rows` in `read_orc` ([#11522](https://github.com/rapidsai/cudf/pull/11522)) [@galipremsagar](https://github.com/galipremsagar)
-- Struct support for `NULL_EQUALS` binary operation ([#11520](https://github.com/rapidsai/cudf/pull/11520)) [@rwlee](https://github.com/rwlee)
-- Bump hadoop-common from 3.2.3 to 3.2.4 in /java ([#11516](https://github.com/rapidsai/cudf/pull/11516)) [@dependabot[bot]](https://github.com/dependabot[bot])
-- Fix Feather test warning. ([#11511](https://github.com/rapidsai/cudf/pull/11511)) [@bdice](https://github.com/bdice)
-- copy_range ballot_syncs to have no execution dependency ([#11508](https://github.com/rapidsai/cudf/pull/11508)) [@robertmaynard](https://github.com/robertmaynard)
-- Upgrade to `arrow-9.x` ([#11507](https://github.com/rapidsai/cudf/pull/11507)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove support for skip_rows / num_rows options in the parquet reader. ([#11503](https://github.com/rapidsai/cudf/pull/11503)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Single-pass `multibyte_split` ([#11500](https://github.com/rapidsai/cudf/pull/11500)) [@upsj](https://github.com/upsj)
-- Sanitize percentile_approx() output for empty input ([#11498](https://github.com/rapidsai/cudf/pull/11498)) [@SrikarVanavasam](https://github.com/SrikarVanavasam)
-- Unpin `dask` and `distributed` for development ([#11492](https://github.com/rapidsai/cudf/pull/11492)) [@galipremsagar](https://github.com/galipremsagar)
-- Move SparkMurmurHash3_32 functor. ([#11489](https://github.com/rapidsai/cudf/pull/11489)) [@bdice](https://github.com/bdice)
-- Refactor group_nunique.cu to use nullate::DYNAMIC for reduce-by-key functor ([#11482](https://github.com/rapidsai/cudf/pull/11482)) [@davidwendt](https://github.com/davidwendt)
-- Drop support for `skiprows` and `num_rows` in `cudf.read_parquet` ([#11480](https://github.com/rapidsai/cudf/pull/11480)) [@galipremsagar](https://github.com/galipremsagar)
-- Add reduction `distinct_count` benchmark ([#11473](https://github.com/rapidsai/cudf/pull/11473)) [@ttnghia](https://github.com/ttnghia)
-- Add groupby `nunique` aggregation benchmark ([#11472](https://github.com/rapidsai/cudf/pull/11472)) [@ttnghia](https://github.com/ttnghia)
-- Disable Arrow S3 support by default. ([#11470](https://github.com/rapidsai/cudf/pull/11470)) [@bdice](https://github.com/bdice)
-- Add groupby `max` aggregation benchmark ([#11464](https://github.com/rapidsai/cudf/pull/11464)) [@ttnghia](https://github.com/ttnghia)
-- Extract Dremel encoding code from Parquet ([#11461](https://github.com/rapidsai/cudf/pull/11461)) [@vyasr](https://github.com/vyasr)
-- Add missing Thrust #includes. ([#11457](https://github.com/rapidsai/cudf/pull/11457)) [@bdice](https://github.com/bdice)
-- Make CMake hooks verbose ([#11456](https://github.com/rapidsai/cudf/pull/11456)) [@vyasr](https://github.com/vyasr)
-- Control Parquet page size through Python API ([#11454](https://github.com/rapidsai/cudf/pull/11454)) [@etseidl](https://github.com/etseidl)
-- Add control of Parquet column index creation to python ([#11453](https://github.com/rapidsai/cudf/pull/11453)) [@etseidl](https://github.com/etseidl)
-- Remove unused is_struct trait. ([#11450](https://github.com/rapidsai/cudf/pull/11450)) [@bdice](https://github.com/bdice)
-- Refactor the `Buffer` class ([#11447](https://github.com/rapidsai/cudf/pull/11447)) [@madsbk](https://github.com/madsbk)
-- Refactor pad_side and strip_type enums into side_type enum ([#11438](https://github.com/rapidsai/cudf/pull/11438)) [@davidwendt](https://github.com/davidwendt)
-- Update to Thrust 1.17.0 ([#11437](https://github.com/rapidsai/cudf/pull/11437)) [@bdice](https://github.com/bdice)
-- Add in JNI for parsing JSON data and getting the metadata back too. ([#11431](https://github.com/rapidsai/cudf/pull/11431)) [@revans2](https://github.com/revans2)
-- Convert byte_array_view to use std::byte ([#11424](https://github.com/rapidsai/cudf/pull/11424)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Deprecate unflatten_nested_columns ([#11421](https://github.com/rapidsai/cudf/pull/11421)) [@SrikarVanavasam](https://github.com/SrikarVanavasam)
-- Remove HASH_SERIAL_MURMUR3 / serial32BitMurmurHash3 ([#11383](https://github.com/rapidsai/cudf/pull/11383)) [@bdice](https://github.com/bdice)
-- Add Spark list hashing Java tests ([#11379](https://github.com/rapidsai/cudf/pull/11379)) [@bdice](https://github.com/bdice)
-- Move cmake to the build section. ([#11376](https://github.com/rapidsai/cudf/pull/11376)) [@vyasr](https://github.com/vyasr)
-- Remove use of CUDA driver API calls from libcudf ([#11370](https://github.com/rapidsai/cudf/pull/11370)) [@shwina](https://github.com/shwina)
-- Add column constructor from device_uvector&amp;&amp; ([#11356](https://github.com/rapidsai/cudf/pull/11356)) [@SrikarVanavasam](https://github.com/SrikarVanavasam)
-- Remove unused custreamz thirdparty directory ([#11343](https://github.com/rapidsai/cudf/pull/11343)) [@vyasr](https://github.com/vyasr)
-- Update jni version to 22.10.0-SNAPSHOT ([#11338](https://github.com/rapidsai/cudf/pull/11338)) [@pxLi](https://github.com/pxLi)
-- Enable using upstream jitify2 ([#11287](https://github.com/rapidsai/cudf/pull/11287)) [@shwina](https://github.com/shwina)
-- Cache cudf.Scalar ([#11246](https://github.com/rapidsai/cudf/pull/11246)) [@shwina](https://github.com/shwina)
-- Remove deprecated Series.applymap. ([#11031](https://github.com/rapidsai/cudf/pull/11031)) [@bdice](https://github.com/bdice)
-- Remove deprecated expand parameter from str.findall. ([#11030](https://github.com/rapidsai/cudf/pull/11030)) [@bdice](https://github.com/bdice)
-
-# cuDF 22.08.00 (17 Aug 2022)
-
-## 🚨 Breaking Changes
-
-- Remove legacy join APIs ([#11274](https://github.com/rapidsai/cudf/pull/11274)) [@vyasr](https://github.com/vyasr)
-- Remove `lists::drop_list_duplicates` ([#11236](https://github.com/rapidsai/cudf/pull/11236)) [@ttnghia](https://github.com/ttnghia)
-- Remove Index.replace API ([#11131](https://github.com/rapidsai/cudf/pull/11131)) [@vyasr](https://github.com/vyasr)
-- Remove deprecated Index methods from Frame ([#11073](https://github.com/rapidsai/cudf/pull/11073)) [@vyasr](https://github.com/vyasr)
-- Remove public API of cudf.merge_sorted. ([#11032](https://github.com/rapidsai/cudf/pull/11032)) [@bdice](https://github.com/bdice)
-- Drop python `3.7` in code-base ([#11029](https://github.com/rapidsai/cudf/pull/11029)) [@galipremsagar](https://github.com/galipremsagar)
-- Return empty dataframe when reading a Parquet file using empty `columns` option ([#11018](https://github.com/rapidsai/cudf/pull/11018)) [@vuule](https://github.com/vuule)
-- Remove Arrow CUDA IPC code ([#10995](https://github.com/rapidsai/cudf/pull/10995)) [@shwina](https://github.com/shwina)
-- Buffer: make `.ptr` read-only ([#10872](https://github.com/rapidsai/cudf/pull/10872)) [@madsbk](https://github.com/madsbk)
-
-## 🐛 Bug Fixes
-
-- Fix `distributed` error related to `loop_in_thread` ([#11428](https://github.com/rapidsai/cudf/pull/11428)) [@galipremsagar](https://github.com/galipremsagar)
-- Relax arrow pinning to just 8.x and remove cuda build dependency from cudf recipe ([#11412](https://github.com/rapidsai/cudf/pull/11412)) [@kkraus14](https://github.com/kkraus14)
-- Revert &quot;Allow CuPy 11&quot; ([#11409](https://github.com/rapidsai/cudf/pull/11409)) [@jakirkham](https://github.com/jakirkham)
-- Fix `moto` timeouts ([#11369](https://github.com/rapidsai/cudf/pull/11369)) [@galipremsagar](https://github.com/galipremsagar)
-- Set `+/-infinity` as the `identity` values for floating-point numbers in device operators `min` and `max` ([#11357](https://github.com/rapidsai/cudf/pull/11357)) [@ttnghia](https://github.com/ttnghia)
-- Fix memory_usage() for `ListSeries` ([#11355](https://github.com/rapidsai/cudf/pull/11355)) [@thomcom](https://github.com/thomcom)
-- Fix constructing Column from column_view with expired mask ([#11354](https://github.com/rapidsai/cudf/pull/11354)) [@shwina](https://github.com/shwina)
-- Handle parquet corner case:  Columns with more rows than are in the row group. ([#11353](https://github.com/rapidsai/cudf/pull/11353)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Fix `DatetimeIndex` &amp; `TimedeltaIndex` constructors ([#11342](https://github.com/rapidsai/cudf/pull/11342)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix unsigned-compare compile warning in IntPow binops ([#11339](https://github.com/rapidsai/cudf/pull/11339)) [@davidwendt](https://github.com/davidwendt)
-- Fix performance issue and add a new code path to `cudf::detail::contains` ([#11330](https://github.com/rapidsai/cudf/pull/11330)) [@ttnghia](https://github.com/ttnghia)
-- Pin `pytorch` to temporarily unblock from `libcupti` errors ([#11289](https://github.com/rapidsai/cudf/pull/11289)) [@galipremsagar](https://github.com/galipremsagar)
-- Workaround for nvcomp zstd overwriting blocks for orc due to underestimate of sizes ([#11288](https://github.com/rapidsai/cudf/pull/11288)) [@jbrennan333](https://github.com/jbrennan333)
-- Fix inconsistency when hashing two tables in `cudf::detail::contains` ([#11284](https://github.com/rapidsai/cudf/pull/11284)) [@ttnghia](https://github.com/ttnghia)
-- Fix issue related to numpy array and `category` dtype ([#11282](https://github.com/rapidsai/cudf/pull/11282)) [@galipremsagar](https://github.com/galipremsagar)
-- Add NotImplementedError when on is specified in DataFrame.join. ([#11275](https://github.com/rapidsai/cudf/pull/11275)) [@vyasr](https://github.com/vyasr)
-- Fix invalid allocate_like() and empty_like() tests. ([#11268](https://github.com/rapidsai/cudf/pull/11268)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Returns DataFrame When Concatenating Along Axis 1 ([#11263](https://github.com/rapidsai/cudf/pull/11263)) [@isVoid](https://github.com/isVoid)
-- Fix compile error due to missing header ([#11257](https://github.com/rapidsai/cudf/pull/11257)) [@ttnghia](https://github.com/ttnghia)
-- Fix a memory aliasing/crash issue in scatter for lists. ([#11254](https://github.com/rapidsai/cudf/pull/11254)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Fix `tests/rolling/empty_input_test` ([#11238](https://github.com/rapidsai/cudf/pull/11238)) [@ttnghia](https://github.com/ttnghia)
-- Fix const qualifier when using `host_span&lt;bitmask_type const*&gt;` ([#11220](https://github.com/rapidsai/cudf/pull/11220)) [@ttnghia](https://github.com/ttnghia)
-- Avoid using `nvcompBatchedDeflateDecompressGetTempSizeEx` in cuIO ([#11213](https://github.com/rapidsai/cudf/pull/11213)) [@vuule](https://github.com/vuule)
-- Generate benchmark data with correct run length regardless of cardinality ([#11205](https://github.com/rapidsai/cudf/pull/11205)) [@vuule](https://github.com/vuule)
-- Fix cumulative count index behavior ([#11188](https://github.com/rapidsai/cudf/pull/11188)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Fix assertion in dask_cudf test_struct_explode ([#11170](https://github.com/rapidsai/cudf/pull/11170)) [@rjzamora](https://github.com/rjzamora)
-- Provides a method for the user to remove the hook and re-register the hook in a custom shutdown hook manager ([#11161](https://github.com/rapidsai/cudf/pull/11161)) [@res-life](https://github.com/res-life)
-- Fix compatibility issues with pandas 1.4.3 ([#11152](https://github.com/rapidsai/cudf/pull/11152)) [@vyasr](https://github.com/vyasr)
-- Ensure cuco export set is installed in cmake build ([#11147](https://github.com/rapidsai/cudf/pull/11147)) [@jlowe](https://github.com/jlowe)
-- Avoid redundant deepcopy in `cudf.from_pandas` ([#11142](https://github.com/rapidsai/cudf/pull/11142)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix compile error due to missing header ([#11126](https://github.com/rapidsai/cudf/pull/11126)) [@ttnghia](https://github.com/ttnghia)
-- Fix `__cuda_array_interface__` failures ([#11113](https://github.com/rapidsai/cudf/pull/11113)) [@galipremsagar](https://github.com/galipremsagar)
-- Support octal and hex within regex character class pattern ([#11112](https://github.com/rapidsai/cudf/pull/11112)) [@davidwendt](https://github.com/davidwendt)
-- Fix split_re matching logic for word boundaries ([#11106](https://github.com/rapidsai/cudf/pull/11106)) [@davidwendt](https://github.com/davidwendt)
-- Handle multiple files metadata in `read_parquet` ([#11105](https://github.com/rapidsai/cudf/pull/11105)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix index alignment for Series objects with repeated index ([#11103](https://github.com/rapidsai/cudf/pull/11103)) [@shwina](https://github.com/shwina)
-- FindcuFile now searches in the current CUDA Toolkit location ([#11101](https://github.com/rapidsai/cudf/pull/11101)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix regex word boundary logic to include underline ([#11099](https://github.com/rapidsai/cudf/pull/11099)) [@davidwendt](https://github.com/davidwendt)
-- Exclude CudaFatalTest when selecting all Java tests ([#11083](https://github.com/rapidsai/cudf/pull/11083)) [@jlowe](https://github.com/jlowe)
-- Fix duplicate `cudatoolkit` pinning issue ([#11070](https://github.com/rapidsai/cudf/pull/11070)) [@galipremsagar](https://github.com/galipremsagar)
-- Maintain the input index in the result of a groupby-transform ([#11068](https://github.com/rapidsai/cudf/pull/11068)) [@shwina](https://github.com/shwina)
-- Fix bug with row count comparison for expect_columns_equivalent(). ([#11059](https://github.com/rapidsai/cudf/pull/11059)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Fix BPE uninitialized size value for null and empty input strings ([#11054](https://github.com/rapidsai/cudf/pull/11054)) [@davidwendt](https://github.com/davidwendt)
-- Include missing header for usage of `get_current_device_resource()` ([#11047](https://github.com/rapidsai/cudf/pull/11047)) [@AtlantaPepsi](https://github.com/AtlantaPepsi)
-- Fix warn_unused_result error in parquet test ([#11026](https://github.com/rapidsai/cudf/pull/11026)) [@karthikeyann](https://github.com/karthikeyann)
-- Return empty dataframe when reading a Parquet file using empty `columns` option ([#11018](https://github.com/rapidsai/cudf/pull/11018)) [@vuule](https://github.com/vuule)
-- Fix small error in page row count limiting ([#10991](https://github.com/rapidsai/cudf/pull/10991)) [@etseidl](https://github.com/etseidl)
-- Fix a row index entry error in ORC writer issue ([#10989](https://github.com/rapidsai/cudf/pull/10989)) [@vuule](https://github.com/vuule)
-- Fix grouped covariance to require both values to be convertible to double. ([#10891](https://github.com/rapidsai/cudf/pull/10891)) [@bdice](https://github.com/bdice)
-
-## 📖 Documentation
-
-- Fix issues with day &amp; night modes in python docs ([#11400](https://github.com/rapidsai/cudf/pull/11400)) [@galipremsagar](https://github.com/galipremsagar)
-- Update missing data handling APIs in docs ([#11345](https://github.com/rapidsai/cudf/pull/11345)) [@galipremsagar](https://github.com/galipremsagar)
-- Add lists filtering APIs to doxygen group. ([#11336](https://github.com/rapidsai/cudf/pull/11336)) [@bdice](https://github.com/bdice)
-- Remove unused import in README sample ([#11318](https://github.com/rapidsai/cudf/pull/11318)) [@vyasr](https://github.com/vyasr)
-- Note null behavior in `where` docs ([#11276](https://github.com/rapidsai/cudf/pull/11276)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Update docstring for spans in `get_row_data_range` ([#11271](https://github.com/rapidsai/cudf/pull/11271)) [@vyasr](https://github.com/vyasr)
-- Update nvCOMP integration table ([#11231](https://github.com/rapidsai/cudf/pull/11231)) [@vuule](https://github.com/vuule)
-- Add dev docs for documentation writing ([#11217](https://github.com/rapidsai/cudf/pull/11217)) [@vyasr](https://github.com/vyasr)
-- Documentation fix for concatenate ([#11187](https://github.com/rapidsai/cudf/pull/11187)) [@dagardner-nv](https://github.com/dagardner-nv)
-- Fix unresolved links in markdown ([#11173](https://github.com/rapidsai/cudf/pull/11173)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix cudf version in README.md install commands ([#11164](https://github.com/rapidsai/cudf/pull/11164)) [@jvanstraten](https://github.com/jvanstraten)
-- Switch `language` from `None` to `&quot;en&quot;` in docs build ([#11133](https://github.com/rapidsai/cudf/pull/11133)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove docs mentioning scalar_view since no such class exists. ([#11132](https://github.com/rapidsai/cudf/pull/11132)) [@bdice](https://github.com/bdice)
-- Add docstring entry for `DataFrame.value_counts` ([#11039](https://github.com/rapidsai/cudf/pull/11039)) [@galipremsagar](https://github.com/galipremsagar)
-- Add docs to rolling var, std, count. ([#11035](https://github.com/rapidsai/cudf/pull/11035)) [@bdice](https://github.com/bdice)
-- Fix docs for Numba UDFs. ([#11020](https://github.com/rapidsai/cudf/pull/11020)) [@bdice](https://github.com/bdice)
-- Replace column comparison utilities functions with macros ([#11007](https://github.com/rapidsai/cudf/pull/11007)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix Doxygen warnings in multiple headers files ([#11003](https://github.com/rapidsai/cudf/pull/11003)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix doxygen warnings in utilities/ headers ([#10974](https://github.com/rapidsai/cudf/pull/10974)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix Doxygen warnings in table header files ([#10964](https://github.com/rapidsai/cudf/pull/10964)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix Doxygen warnings in column header files ([#10963](https://github.com/rapidsai/cudf/pull/10963)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix Doxygen warnings in strings / header files ([#10937](https://github.com/rapidsai/cudf/pull/10937)) [@karthikeyann](https://github.com/karthikeyann)
-- Generate Doxygen Tag File for Libcudf ([#10932](https://github.com/rapidsai/cudf/pull/10932)) [@isVoid](https://github.com/isVoid)
-- Fix doxygen warnings in structs, lists headers ([#10923](https://github.com/rapidsai/cudf/pull/10923)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix doxygen warnings in fixed_point.hpp ([#10922](https://github.com/rapidsai/cudf/pull/10922)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix doxygen warnings in ast/, rolling, tdigest/, wrappers/, dictionary/ headers ([#10921](https://github.com/rapidsai/cudf/pull/10921)) [@karthikeyann](https://github.com/karthikeyann)
-- fix doxygen warnings in cudf/io/types.hpp, other header files ([#10913](https://github.com/rapidsai/cudf/pull/10913)) [@karthikeyann](https://github.com/karthikeyann)
-- fix doxygen warnings in cudf/io/ avro, csv, json, orc, parquet header files ([#10912](https://github.com/rapidsai/cudf/pull/10912)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix doxygen warnings in cudf/*.hpp ([#10896](https://github.com/rapidsai/cudf/pull/10896)) [@karthikeyann](https://github.com/karthikeyann)
-- Add missing documentation in aggregation.hpp ([#10887](https://github.com/rapidsai/cudf/pull/10887)) [@karthikeyann](https://github.com/karthikeyann)
-- Revise PR template. ([#10774](https://github.com/rapidsai/cudf/pull/10774)) [@bdice](https://github.com/bdice)
-
-## 🚀 New Features
-
-- Change cmake to allow controlling Arrow version via cmake variable ([#11429](https://github.com/rapidsai/cudf/pull/11429)) [@kkraus14](https://github.com/kkraus14)
-- Adding support for list&lt;int8&gt; columns to be written as byte arrays in parquet ([#11328](https://github.com/rapidsai/cudf/pull/11328)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Adding byte array view structure ([#11322](https://github.com/rapidsai/cudf/pull/11322)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Adding byte_array statistics ([#11303](https://github.com/rapidsai/cudf/pull/11303)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Add column indexes to Parquet writer ([#11302](https://github.com/rapidsai/cudf/pull/11302)) [@etseidl](https://github.com/etseidl)
-- Provide an Option for Default Integer and Floating Bitwidth ([#11272](https://github.com/rapidsai/cudf/pull/11272)) [@isVoid](https://github.com/isVoid)
-- FST benchmark ([#11243](https://github.com/rapidsai/cudf/pull/11243)) [@karthikeyann](https://github.com/karthikeyann)
-- Adds the Finite-State Transducer algorithm ([#11242](https://github.com/rapidsai/cudf/pull/11242)) [@elstehle](https://github.com/elstehle)
-- Refactor `collect_set` to use `cudf::distinct` and `cudf::lists::distinct` ([#11228](https://github.com/rapidsai/cudf/pull/11228)) [@ttnghia](https://github.com/ttnghia)
-- Treat zstd as stable in nvcomp releases 2.3.2 and later ([#11226](https://github.com/rapidsai/cudf/pull/11226)) [@jbrennan333](https://github.com/jbrennan333)
-- Add 24 bit dictionary support to Parquet writer ([#11216](https://github.com/rapidsai/cudf/pull/11216)) [@devavret](https://github.com/devavret)
-- Enable positive group indices for extractAllRecord on JNI ([#11215](https://github.com/rapidsai/cudf/pull/11215)) [@anthony-chang](https://github.com/anthony-chang)
-- JNI bindings for NTH_ELEMENT window aggregation ([#11201](https://github.com/rapidsai/cudf/pull/11201)) [@mythrocks](https://github.com/mythrocks)
-- Add JNI bindings for extractAllRecord ([#11196](https://github.com/rapidsai/cudf/pull/11196)) [@anthony-chang](https://github.com/anthony-chang)
-- Add `cudf.options` ([#11193](https://github.com/rapidsai/cudf/pull/11193)) [@isVoid](https://github.com/isVoid)
-- Add thrift support for parquet column and offset indexes ([#11178](https://github.com/rapidsai/cudf/pull/11178)) [@etseidl](https://github.com/etseidl)
-- Adding binary read/write as options for parquet ([#11160](https://github.com/rapidsai/cudf/pull/11160)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Support `nth_element` for window functions ([#11158](https://github.com/rapidsai/cudf/pull/11158)) [@mythrocks](https://github.com/mythrocks)
-- Implement `lists::distinct` and `cudf::detail::stable_distinct` ([#11149](https://github.com/rapidsai/cudf/pull/11149)) [@ttnghia](https://github.com/ttnghia)
-- Implement Groupby pct_change ([#11144](https://github.com/rapidsai/cudf/pull/11144)) [@skirui-source](https://github.com/skirui-source)
-- Add JNI for set operations ([#11143](https://github.com/rapidsai/cudf/pull/11143)) [@ttnghia](https://github.com/ttnghia)
-- Remove deprecated PER_THREAD_DEFAULT_STREAM ([#11134](https://github.com/rapidsai/cudf/pull/11134)) [@jbrennan333](https://github.com/jbrennan333)
-- Added a Java method to check the existence of a list of keys in a map ([#11128](https://github.com/rapidsai/cudf/pull/11128)) [@razajafri](https://github.com/razajafri)
-- Feature/python benchmarking ([#11125](https://github.com/rapidsai/cudf/pull/11125)) [@vyasr](https://github.com/vyasr)
-- Support `nan_equality` in `cudf::distinct` ([#11118](https://github.com/rapidsai/cudf/pull/11118)) [@ttnghia](https://github.com/ttnghia)
-- Added JNI for getMapValueForKeys ([#11104](https://github.com/rapidsai/cudf/pull/11104)) [@razajafri](https://github.com/razajafri)
-- Refactor `semi_anti_join` ([#11100](https://github.com/rapidsai/cudf/pull/11100)) [@ttnghia](https://github.com/ttnghia)
-- Replace remaining instances of rmm::cuda_stream_default with cudf::default_stream_value ([#11082](https://github.com/rapidsai/cudf/pull/11082)) [@jbrennan333](https://github.com/jbrennan333)
-- Adds the Logical Stack algorithm ([#11078](https://github.com/rapidsai/cudf/pull/11078)) [@elstehle](https://github.com/elstehle)
-- Add doxygen-check pre-commit hook ([#11076](https://github.com/rapidsai/cudf/pull/11076)) [@karthikeyann](https://github.com/karthikeyann)
-- Use new nvCOMP API to optimize the decompression temp memory size ([#11064](https://github.com/rapidsai/cudf/pull/11064)) [@vuule](https://github.com/vuule)
-- Add Doxygen CI check ([#11057](https://github.com/rapidsai/cudf/pull/11057)) [@karthikeyann](https://github.com/karthikeyann)
-- Support `duplicate_keep_option` in `cudf::distinct` ([#11052](https://github.com/rapidsai/cudf/pull/11052)) [@ttnghia](https://github.com/ttnghia)
-- Support set operations ([#11043](https://github.com/rapidsai/cudf/pull/11043)) [@ttnghia](https://github.com/ttnghia)
-- Support for ZLIB compression in ORC writer ([#11036](https://github.com/rapidsai/cudf/pull/11036)) [@vuule](https://github.com/vuule)
-- Adding feature swaplevels ([#11027](https://github.com/rapidsai/cudf/pull/11027)) [@VamsiTallam95](https://github.com/VamsiTallam95)
-- Use nvCOMP for ZLIB decompression in ORC reader ([#11024](https://github.com/rapidsai/cudf/pull/11024)) [@vuule](https://github.com/vuule)
-- Function for bfill, ffill #9591 ([#11022](https://github.com/rapidsai/cudf/pull/11022)) [@Sreekiran096](https://github.com/Sreekiran096)
-- Generate group offsets from element labels ([#11017](https://github.com/rapidsai/cudf/pull/11017)) [@ttnghia](https://github.com/ttnghia)
-- Feature axes ([#10979](https://github.com/rapidsai/cudf/pull/10979)) [@VamsiTallam95](https://github.com/VamsiTallam95)
-- Generate group labels from offsets ([#10945](https://github.com/rapidsai/cudf/pull/10945)) [@ttnghia](https://github.com/ttnghia)
-- Add missing cuIO benchmark coverage for duration types ([#10933](https://github.com/rapidsai/cudf/pull/10933)) [@vuule](https://github.com/vuule)
-- Dask-cuDF cumulative groupby ops ([#10889](https://github.com/rapidsai/cudf/pull/10889)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Reindex Improvements ([#10815](https://github.com/rapidsai/cudf/pull/10815)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Implement value_counts for DataFrame ([#10813](https://github.com/rapidsai/cudf/pull/10813)) [@martinfalisse](https://github.com/martinfalisse)
-
-## 🛠️ Improvements
-
-- Pin `dask` &amp; `distributed` for release ([#11433](https://github.com/rapidsai/cudf/pull/11433)) [@galipremsagar](https://github.com/galipremsagar)
-- Use documented header template for `doxygen` ([#11430](https://github.com/rapidsai/cudf/pull/11430)) [@galipremsagar](https://github.com/galipremsagar)
-- Relax arrow version in dev env ([#11418](https://github.com/rapidsai/cudf/pull/11418)) [@galipremsagar](https://github.com/galipremsagar)
-- Allow CuPy 11 ([#11393](https://github.com/rapidsai/cudf/pull/11393)) [@jakirkham](https://github.com/jakirkham)
-- Improve multibyte_split performance ([#11347](https://github.com/rapidsai/cudf/pull/11347)) [@cwharris](https://github.com/cwharris)
-- Switch death test to use explicit trap. ([#11326](https://github.com/rapidsai/cudf/pull/11326)) [@vyasr](https://github.com/vyasr)
-- Add --output-on-failure to ctest args. ([#11321](https://github.com/rapidsai/cudf/pull/11321)) [@vyasr](https://github.com/vyasr)
-- Consolidate remaining DataFrame/Series APIs ([#11315](https://github.com/rapidsai/cudf/pull/11315)) [@vyasr](https://github.com/vyasr)
-- Add JNI support for the join_strings API ([#11309](https://github.com/rapidsai/cudf/pull/11309)) [@revans2](https://github.com/revans2)
-- Add cupy version to setup.py install_requires ([#11306](https://github.com/rapidsai/cudf/pull/11306)) [@vyasr](https://github.com/vyasr)
-- removing some unused code ([#11305](https://github.com/rapidsai/cudf/pull/11305)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Add test of wildcard selection ([#11300](https://github.com/rapidsai/cudf/pull/11300)) [@vyasr](https://github.com/vyasr)
-- Update parquet reader to take stream parameter ([#11294](https://github.com/rapidsai/cudf/pull/11294)) [@PointKernel](https://github.com/PointKernel)
-- Spark list hashing ([#11292](https://github.com/rapidsai/cudf/pull/11292)) [@bdice](https://github.com/bdice)
-- Remove legacy join APIs ([#11274](https://github.com/rapidsai/cudf/pull/11274)) [@vyasr](https://github.com/vyasr)
-- Fix `cudf` recipes syntax ([#11273](https://github.com/rapidsai/cudf/pull/11273)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Fix `cudf` recipe ([#11267](https://github.com/rapidsai/cudf/pull/11267)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Cleanup config files ([#11266](https://github.com/rapidsai/cudf/pull/11266)) [@vyasr](https://github.com/vyasr)
-- Run mypy on all packages ([#11265](https://github.com/rapidsai/cudf/pull/11265)) [@vyasr](https://github.com/vyasr)
-- Update to isort 5.10.1. ([#11262](https://github.com/rapidsai/cudf/pull/11262)) [@vyasr](https://github.com/vyasr)
-- Consolidate flake8 and pydocstyle configuration ([#11260](https://github.com/rapidsai/cudf/pull/11260)) [@vyasr](https://github.com/vyasr)
-- Remove redundant black config specifications. ([#11258](https://github.com/rapidsai/cudf/pull/11258)) [@vyasr](https://github.com/vyasr)
-- Ensure DeprecationWarnings are not introduced via pre-commit ([#11255](https://github.com/rapidsai/cudf/pull/11255)) [@wence-](https://github.com/wence-)
-- Optimization to gpu::PreprocessColumnData in parquet reader. ([#11252](https://github.com/rapidsai/cudf/pull/11252)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Move rolling impl details to detail/ directory. ([#11250](https://github.com/rapidsai/cudf/pull/11250)) [@mythrocks](https://github.com/mythrocks)
-- Remove `lists::drop_list_duplicates` ([#11236](https://github.com/rapidsai/cudf/pull/11236)) [@ttnghia](https://github.com/ttnghia)
-- Use `cudf::lists::distinct` in Python binding ([#11234](https://github.com/rapidsai/cudf/pull/11234)) [@ttnghia](https://github.com/ttnghia)
-- Use `cudf::lists::distinct` in Java binding ([#11233](https://github.com/rapidsai/cudf/pull/11233)) [@ttnghia](https://github.com/ttnghia)
-- Use `cudf::distinct` in Java binding ([#11232](https://github.com/rapidsai/cudf/pull/11232)) [@ttnghia](https://github.com/ttnghia)
-- Pin `dask-cuda` in dev environment ([#11229](https://github.com/rapidsai/cudf/pull/11229)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove cruft in map_lookup ([#11221](https://github.com/rapidsai/cudf/pull/11221)) [@mythrocks](https://github.com/mythrocks)
-- Deprecate `skiprows` &amp; `num_rows` in parquet reader ([#11218](https://github.com/rapidsai/cudf/pull/11218)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove Frame._index ([#11210](https://github.com/rapidsai/cudf/pull/11210)) [@vyasr](https://github.com/vyasr)
-- Improve performance for `cudf::contains` when searching for a scalar ([#11202](https://github.com/rapidsai/cudf/pull/11202)) [@ttnghia](https://github.com/ttnghia)
-- Document why Development component is needing for CMake. ([#11200](https://github.com/rapidsai/cudf/pull/11200)) [@vyasr](https://github.com/vyasr)
-- cleanup unused code in rolling_test.hpp ([#11195](https://github.com/rapidsai/cudf/pull/11195)) [@karthikeyann](https://github.com/karthikeyann)
-- Standardize join internals around DataFrame ([#11184](https://github.com/rapidsai/cudf/pull/11184)) [@vyasr](https://github.com/vyasr)
-- Move character case table declarations from src to detail ([#11183](https://github.com/rapidsai/cudf/pull/11183)) [@davidwendt](https://github.com/davidwendt)
-- Remove usage of Frame in StringMethods ([#11181](https://github.com/rapidsai/cudf/pull/11181)) [@vyasr](https://github.com/vyasr)
-- Expose get_json_object_options to Python ([#11180](https://github.com/rapidsai/cudf/pull/11180)) [@SrikarVanavasam](https://github.com/SrikarVanavasam)
-- Fix decimal128 stats in parquet writer ([#11179](https://github.com/rapidsai/cudf/pull/11179)) [@etseidl](https://github.com/etseidl)
-- Modify CheckPageRows in parquet_test to use datasources ([#11177](https://github.com/rapidsai/cudf/pull/11177)) [@etseidl](https://github.com/etseidl)
-- Pin max version of `cuda-python` to `11.7.0` ([#11174](https://github.com/rapidsai/cudf/pull/11174)) [@Ethyling](https://github.com/Ethyling)
-- Refactor and optimize Frame.where ([#11168](https://github.com/rapidsai/cudf/pull/11168)) [@vyasr](https://github.com/vyasr)
-- Add npos const static member to cudf::string_view ([#11166](https://github.com/rapidsai/cudf/pull/11166)) [@davidwendt](https://github.com/davidwendt)
-- Move _drop_rows_by_label from Frame to IndexedFrame ([#11157](https://github.com/rapidsai/cudf/pull/11157)) [@vyasr](https://github.com/vyasr)
-- Clean up _copy_type_metadata ([#11156](https://github.com/rapidsai/cudf/pull/11156)) [@vyasr](https://github.com/vyasr)
-- Add `nvcc` conda package in dev environment ([#11154](https://github.com/rapidsai/cudf/pull/11154)) [@galipremsagar](https://github.com/galipremsagar)
-- Struct binary comparison op functionality for spark rapids ([#11153](https://github.com/rapidsai/cudf/pull/11153)) [@rwlee](https://github.com/rwlee)
-- Refactor inline conditionals. ([#11151](https://github.com/rapidsai/cudf/pull/11151)) [@bdice](https://github.com/bdice)
-- Refactor Spark hashing tests ([#11145](https://github.com/rapidsai/cudf/pull/11145)) [@bdice](https://github.com/bdice)
-- Add new `_from_data_like_self` factory ([#11140](https://github.com/rapidsai/cudf/pull/11140)) [@vyasr](https://github.com/vyasr)
-- Update get_cucollections to use rapids-cmake ([#11139](https://github.com/rapidsai/cudf/pull/11139)) [@vyasr](https://github.com/vyasr)
-- Remove unnecessary extra function for libcudacxx detection ([#11138](https://github.com/rapidsai/cudf/pull/11138)) [@vyasr](https://github.com/vyasr)
-- Allow initial value for cudf::reduce and cudf::segmented_reduce. ([#11137](https://github.com/rapidsai/cudf/pull/11137)) [@SrikarVanavasam](https://github.com/SrikarVanavasam)
-- Remove Index.replace API ([#11131](https://github.com/rapidsai/cudf/pull/11131)) [@vyasr](https://github.com/vyasr)
-- Move char-type table function declarations from src to detail ([#11127](https://github.com/rapidsai/cudf/pull/11127)) [@davidwendt](https://github.com/davidwendt)
-- Clean up repo root ([#11124](https://github.com/rapidsai/cudf/pull/11124)) [@bdice](https://github.com/bdice)
-- Improve print formatting of strings containing newline characters. ([#11108](https://github.com/rapidsai/cudf/pull/11108)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Fix cudf::string_view::find() to return pos for empty string argument ([#11107](https://github.com/rapidsai/cudf/pull/11107)) [@davidwendt](https://github.com/davidwendt)
-- Forward-merge branch-22.06 to branch-22.08 ([#11086](https://github.com/rapidsai/cudf/pull/11086)) [@bdice](https://github.com/bdice)
-- Take iterators by value in clamp.cu. ([#11084](https://github.com/rapidsai/cudf/pull/11084)) [@bdice](https://github.com/bdice)
-- Performance improvements for row to column conversions ([#11075](https://github.com/rapidsai/cudf/pull/11075)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Remove deprecated Index methods from Frame ([#11073](https://github.com/rapidsai/cudf/pull/11073)) [@vyasr](https://github.com/vyasr)
-- Use per-page max compressed size estimate for compression ([#11066](https://github.com/rapidsai/cudf/pull/11066)) [@devavret](https://github.com/devavret)
-- column to row refactor for performance ([#11063](https://github.com/rapidsai/cudf/pull/11063)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Include `skbuild` directory into `build.sh` `clean` operation ([#11060](https://github.com/rapidsai/cudf/pull/11060)) [@galipremsagar](https://github.com/galipremsagar)
-- Unpin `dask` &amp; `distributed` for development ([#11058](https://github.com/rapidsai/cudf/pull/11058)) [@galipremsagar](https://github.com/galipremsagar)
-- Add support for `Series.between` ([#11051](https://github.com/rapidsai/cudf/pull/11051)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix groupby include ([#11046](https://github.com/rapidsai/cudf/pull/11046)) [@bwyogatama](https://github.com/bwyogatama)
-- Regex cleanup internal reclass and reclass_device classes ([#11045](https://github.com/rapidsai/cudf/pull/11045)) [@davidwendt](https://github.com/davidwendt)
-- Remove public API of cudf.merge_sorted. ([#11032](https://github.com/rapidsai/cudf/pull/11032)) [@bdice](https://github.com/bdice)
-- Drop python `3.7` in code-base ([#11029](https://github.com/rapidsai/cudf/pull/11029)) [@galipremsagar](https://github.com/galipremsagar)
-- Addition &amp; integration of the integer power operator ([#11025](https://github.com/rapidsai/cudf/pull/11025)) [@AtlantaPepsi](https://github.com/AtlantaPepsi)
-- Refactor `lists::contains` ([#11019](https://github.com/rapidsai/cudf/pull/11019)) [@ttnghia](https://github.com/ttnghia)
-- Change build.sh to find C++ library by default and avoid shadowing CMAKE_ARGS ([#11013](https://github.com/rapidsai/cudf/pull/11013)) [@vyasr](https://github.com/vyasr)
-- Clean up parquet unit test ([#11005](https://github.com/rapidsai/cudf/pull/11005)) [@PointKernel](https://github.com/PointKernel)
-- Add missing #pragma once to header files ([#11004](https://github.com/rapidsai/cudf/pull/11004)) [@karthikeyann](https://github.com/karthikeyann)
-- Cleanup `iterator.cuh` and add fixed point support for `scalar_optional_accessor` ([#10999](https://github.com/rapidsai/cudf/pull/10999)) [@ttnghia](https://github.com/ttnghia)
-- Refactor `cudf::contains` ([#10997](https://github.com/rapidsai/cudf/pull/10997)) [@ttnghia](https://github.com/ttnghia)
-- Remove Arrow CUDA IPC code ([#10995](https://github.com/rapidsai/cudf/pull/10995)) [@shwina](https://github.com/shwina)
-- Change file extension for groupby benchmark ([#10985](https://github.com/rapidsai/cudf/pull/10985)) [@ttnghia](https://github.com/ttnghia)
-- Sort recipe include checks. ([#10984](https://github.com/rapidsai/cudf/pull/10984)) [@bdice](https://github.com/bdice)
-- Update cuCollections for thrust upgrade ([#10983](https://github.com/rapidsai/cudf/pull/10983)) [@PointKernel](https://github.com/PointKernel)
-- Expose row-group size options in cudf ParquetWriter ([#10980](https://github.com/rapidsai/cudf/pull/10980)) [@rjzamora](https://github.com/rjzamora)
-- Cleanup cudf::strings::detail::regex_parser class source ([#10975](https://github.com/rapidsai/cudf/pull/10975)) [@davidwendt](https://github.com/davidwendt)
-- Handle missing fields as nulls in get_json_object() ([#10970](https://github.com/rapidsai/cudf/pull/10970)) [@SrikarVanavasam](https://github.com/SrikarVanavasam)
-- Fix license families to match all-caps expected by conda-verify. ([#10931](https://github.com/rapidsai/cudf/pull/10931)) [@bdice](https://github.com/bdice)
-- Include &lt;optional&gt; for GCC 11 compatibility. ([#10927](https://github.com/rapidsai/cudf/pull/10927)) [@bdice](https://github.com/bdice)
-- Enable builds with scikit-build ([#10919](https://github.com/rapidsai/cudf/pull/10919)) [@vyasr](https://github.com/vyasr)
-- Improve `distinct` by using `cuco::static_map::retrieve_all` ([#10916](https://github.com/rapidsai/cudf/pull/10916)) [@PointKernel](https://github.com/PointKernel)
-- update cudfjni to 22.08.0-SNAPSHOT ([#10910](https://github.com/rapidsai/cudf/pull/10910)) [@pxLi](https://github.com/pxLi)
-- Improve the capture of fatal cuda error ([#10884](https://github.com/rapidsai/cudf/pull/10884)) [@sperlingxx](https://github.com/sperlingxx)
-- Cleanup regex compiler operators and operands source ([#10879](https://github.com/rapidsai/cudf/pull/10879)) [@davidwendt](https://github.com/davidwendt)
-- Buffer: make `.ptr` read-only ([#10872](https://github.com/rapidsai/cudf/pull/10872)) [@madsbk](https://github.com/madsbk)
-- Configurable NaN handling in device_row_comparators ([#10870](https://github.com/rapidsai/cudf/pull/10870)) [@rwlee](https://github.com/rwlee)
-- Register `cudf.core.groupby.Grouper` objects to dask `grouper_dispatch` ([#10838](https://github.com/rapidsai/cudf/pull/10838)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Upgrade to `arrow-8` ([#10816](https://github.com/rapidsai/cudf/pull/10816)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove _getattr_ method in RangeIndex class ([#10538](https://github.com/rapidsai/cudf/pull/10538)) [@skirui-source](https://github.com/skirui-source)
-- Adding bins to value counts ([#8247](https://github.com/rapidsai/cudf/pull/8247)) [@marlenezw](https://github.com/marlenezw)
-
-# cuDF 22.06.00 (7 Jun 2022)
-
-## 🚨 Breaking Changes
-
-- Enable Zstandard decompression only when all nvcomp integrations are enabled ([#10944](https://github.com/rapidsai/cudf/pull/10944)) [@vuule](https://github.com/vuule)
-- Rename `sliced_child` to `get_sliced_child`. ([#10885](https://github.com/rapidsai/cudf/pull/10885)) [@bdice](https://github.com/bdice)
-- Add parameters to control page size in Parquet writer ([#10882](https://github.com/rapidsai/cudf/pull/10882)) [@etseidl](https://github.com/etseidl)
-- Make cudf::test::expect_columns_equal() to fail when comparing unsanitary lists. ([#10880](https://github.com/rapidsai/cudf/pull/10880)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Cleanup regex compiler fixed quantifiers source ([#10843](https://github.com/rapidsai/cudf/pull/10843)) [@davidwendt](https://github.com/davidwendt)
-- Refactor `cudf::contains`, renaming and switching parameters role ([#10802](https://github.com/rapidsai/cudf/pull/10802)) [@ttnghia](https://github.com/ttnghia)
-- Generic serialization of all column types ([#10784](https://github.com/rapidsai/cudf/pull/10784)) [@wence-](https://github.com/wence-)
-- Return per-file metadata from readers ([#10782](https://github.com/rapidsai/cudf/pull/10782)) [@vuule](https://github.com/vuule)
-- HostColumnVectoreCore#isNull should return true for out-of-range rows ([#10779](https://github.com/rapidsai/cudf/pull/10779)) [@gerashegalov](https://github.com/gerashegalov)
-- Update `groupby::hash` to use new row operators for keys ([#10770](https://github.com/rapidsai/cudf/pull/10770)) [@PointKernel](https://github.com/PointKernel)
-- update mangle_dupe_cols behavior in csv reader to match pandas 1.4.0 behavior ([#10749](https://github.com/rapidsai/cudf/pull/10749)) [@karthikeyann](https://github.com/karthikeyann)
-- Rename CUDA_TRY macro to CUDF_CUDA_TRY, rename CHECK_CUDA macro to CUDF_CHECK_CUDA. ([#10589](https://github.com/rapidsai/cudf/pull/10589)) [@bdice](https://github.com/bdice)
-- Upgrade `cudf` to support `pandas` 1.4.x versions ([#10584](https://github.com/rapidsai/cudf/pull/10584)) [@galipremsagar](https://github.com/galipremsagar)
-- Move binop methods from Frame to IndexedFrame and standardize the docstring ([#10576](https://github.com/rapidsai/cudf/pull/10576)) [@vyasr](https://github.com/vyasr)
-- Add default= kwarg to .list.get() accessor method ([#10547](https://github.com/rapidsai/cudf/pull/10547)) [@shwina](https://github.com/shwina)
-- Remove deprecated `decimal_cols_as_float` in the ORC reader ([#10515](https://github.com/rapidsai/cudf/pull/10515)) [@vuule](https://github.com/vuule)
-- Support nvComp 2.3 if local, otherwise use nvcomp 2.2 ([#10513](https://github.com/rapidsai/cudf/pull/10513)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix findall_record to return empty list for no matches ([#10491](https://github.com/rapidsai/cudf/pull/10491)) [@davidwendt](https://github.com/davidwendt)
-- Namespace/Docstring Fixes for Reduction ([#10471](https://github.com/rapidsai/cudf/pull/10471)) [@isVoid](https://github.com/isVoid)
-- Additional refactoring of hash functions ([#10462](https://github.com/rapidsai/cudf/pull/10462)) [@bdice](https://github.com/bdice)
-- Fix default value of str.split expand parameter. ([#10457](https://github.com/rapidsai/cudf/pull/10457)) [@bdice](https://github.com/bdice)
-- Remove deprecated code. ([#10450](https://github.com/rapidsai/cudf/pull/10450)) [@vyasr](https://github.com/vyasr)
-
-## 🐛 Bug Fixes
-
-- Fix single column `MultiIndex` issue in `sort_index` ([#10957](https://github.com/rapidsai/cudf/pull/10957)) [@galipremsagar](https://github.com/galipremsagar)
-- Make SerializedTableHeader(numRows) public ([#10949](https://github.com/rapidsai/cudf/pull/10949)) [@gerashegalov](https://github.com/gerashegalov)
-- Fix `gcc_linux` version pinning in dev environment ([#10943](https://github.com/rapidsai/cudf/pull/10943)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix an issue with reading raw string in `cudf.read_json` ([#10924](https://github.com/rapidsai/cudf/pull/10924)) [@galipremsagar](https://github.com/galipremsagar)
-- Make cudf::test::expect_columns_equal() to fail when comparing unsanitary lists. ([#10880](https://github.com/rapidsai/cudf/pull/10880)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Fix segmented_reduce on empty column with non-empty offsets ([#10876](https://github.com/rapidsai/cudf/pull/10876)) [@davidwendt](https://github.com/davidwendt)
-- Fix dask-cudf groupby handling when grouping by all columns ([#10866](https://github.com/rapidsai/cudf/pull/10866)) [@charlesbluca](https://github.com/charlesbluca)
-- Fix a bug in `distinct`: using nested nulls logic ([#10848](https://github.com/rapidsai/cudf/pull/10848)) [@PointKernel](https://github.com/PointKernel)
-- Fix constness / references in weak ordering operator() signatures. ([#10846](https://github.com/rapidsai/cudf/pull/10846)) [@bdice](https://github.com/bdice)
-- Suppress sizeof-array-div warnings in thrust found by gcc-11 ([#10840](https://github.com/rapidsai/cudf/pull/10840)) [@robertmaynard](https://github.com/robertmaynard)
-- Add handling for string by-columns in dask-cudf groupby ([#10830](https://github.com/rapidsai/cudf/pull/10830)) [@charlesbluca](https://github.com/charlesbluca)
-- Fix compile warning in search.cu ([#10827](https://github.com/rapidsai/cudf/pull/10827)) [@davidwendt](https://github.com/davidwendt)
-- Fix element access const correctness in `hostdevice_vector` ([#10804](https://github.com/rapidsai/cudf/pull/10804)) [@vuule](https://github.com/vuule)
-- Update `cuco` git tag ([#10788](https://github.com/rapidsai/cudf/pull/10788)) [@PointKernel](https://github.com/PointKernel)
-- HostColumnVectoreCore#isNull should return true for out-of-range rows ([#10779](https://github.com/rapidsai/cudf/pull/10779)) [@gerashegalov](https://github.com/gerashegalov)
-- Fixing deprecation warnings in test_orc.py ([#10772](https://github.com/rapidsai/cudf/pull/10772)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Enable writing to `s3` storage in chunked parquet writer ([#10769](https://github.com/rapidsai/cudf/pull/10769)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix construction of nested structs with EMPTY child ([#10761](https://github.com/rapidsai/cudf/pull/10761)) [@shwina](https://github.com/shwina)
-- Fix replace error when regex has only zero match quantifiers ([#10760](https://github.com/rapidsai/cudf/pull/10760)) [@davidwendt](https://github.com/davidwendt)
-- Fix an issue with one_level_list schemas in parquet reader. ([#10750](https://github.com/rapidsai/cudf/pull/10750)) [@nvdbaranec](https://github.com/nvdbaranec)
-- update mangle_dupe_cols behavior in csv reader to match pandas 1.4.0 behavior ([#10749](https://github.com/rapidsai/cudf/pull/10749)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix `cupy` function in notebook ([#10737](https://github.com/rapidsai/cudf/pull/10737)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Fix `fillna` to retain `columns` when it is `MultiIndex` ([#10729](https://github.com/rapidsai/cudf/pull/10729)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix scatter for all-empty-string column case ([#10724](https://github.com/rapidsai/cudf/pull/10724)) [@davidwendt](https://github.com/davidwendt)
-- Retain series name in `Series.apply` ([#10716](https://github.com/rapidsai/cudf/pull/10716)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Correct build dir `cudf-config` dependency issues for static builds ([#10704](https://github.com/rapidsai/cudf/pull/10704)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix list of testing requirements in setup.py. ([#10678](https://github.com/rapidsai/cudf/pull/10678)) [@bdice](https://github.com/bdice)
-- Fix rounding to zero error in stod on very small float numbers ([#10672](https://github.com/rapidsai/cudf/pull/10672)) [@davidwendt](https://github.com/davidwendt)
-- cuco isn&#39;t a cudf dependency when we are built shared ([#10662](https://github.com/rapidsai/cudf/pull/10662)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix to_timestamps to support Z for %z format specifier ([#10617](https://github.com/rapidsai/cudf/pull/10617)) [@davidwendt](https://github.com/davidwendt)
-- Verify compression type in Parquet reader ([#10610](https://github.com/rapidsai/cudf/pull/10610)) [@vuule](https://github.com/vuule)
-- Fix struct row comparator&#39;s exception on empty structs ([#10604](https://github.com/rapidsai/cudf/pull/10604)) [@sperlingxx](https://github.com/sperlingxx)
-- Fix strings strip() to accept only str Scalar for to_strip parameter ([#10597](https://github.com/rapidsai/cudf/pull/10597)) [@davidwendt](https://github.com/davidwendt)
-- Fix has_atomic_support check in can_use_hash_groupby() ([#10588](https://github.com/rapidsai/cudf/pull/10588)) [@jbrennan333](https://github.com/jbrennan333)
-- Revert Thrust 1.16 to Thrust 1.15 ([#10586](https://github.com/rapidsai/cudf/pull/10586)) [@bdice](https://github.com/bdice)
-- Fix missing RMM_STATIC_CUDART define when compiling JNI with static CUDA runtime ([#10585](https://github.com/rapidsai/cudf/pull/10585)) [@jlowe](https://github.com/jlowe)
-- pin more cmake versions ([#10570](https://github.com/rapidsai/cudf/pull/10570)) [@robertmaynard](https://github.com/robertmaynard)
-- Re-enable Build Metrics Report ([#10562](https://github.com/rapidsai/cudf/pull/10562)) [@davidwendt](https://github.com/davidwendt)
-- Remove statically linked CUDA runtime check in Java build ([#10532](https://github.com/rapidsai/cudf/pull/10532)) [@jlowe](https://github.com/jlowe)
-- Fix temp data cleanup in `test_text.py` ([#10524](https://github.com/rapidsai/cudf/pull/10524)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Update pre-commit to run black 22.3.0 ([#10523](https://github.com/rapidsai/cudf/pull/10523)) [@vyasr](https://github.com/vyasr)
-- Remove deprecated `decimal_cols_as_float` in the ORC reader ([#10515](https://github.com/rapidsai/cudf/pull/10515)) [@vuule](https://github.com/vuule)
-- Fix findall_record to return empty list for no matches ([#10491](https://github.com/rapidsai/cudf/pull/10491)) [@davidwendt](https://github.com/davidwendt)
-- Allow users to specify data types for a subset of columns in `read_csv` ([#10484](https://github.com/rapidsai/cudf/pull/10484)) [@vuule](https://github.com/vuule)
-- Fix default value of str.split expand parameter. ([#10457](https://github.com/rapidsai/cudf/pull/10457)) [@bdice](https://github.com/bdice)
-- Improve coverage of dask-cudf&#39;s groupby aggregation, add tests for `dropna` support ([#10449](https://github.com/rapidsai/cudf/pull/10449)) [@charlesbluca](https://github.com/charlesbluca)
-- Allow string aggs for `dask_cudf.CudfDataFrameGroupBy.aggregate` ([#10222](https://github.com/rapidsai/cudf/pull/10222)) [@charlesbluca](https://github.com/charlesbluca)
-- In-place updates with loc or iloc don&#39;t work correctly when the LHS has more than one column ([#9918](https://github.com/rapidsai/cudf/pull/9918)) [@skirui-source](https://github.com/skirui-source)
-
-## 📖 Documentation
-
-- Clarify append deprecation notice. ([#10930](https://github.com/rapidsai/cudf/pull/10930)) [@bdice](https://github.com/bdice)
-- Use full name of GPUDirect Storage SDK in docs ([#10904](https://github.com/rapidsai/cudf/pull/10904)) [@vuule](https://github.com/vuule)
-- Update Dask + Pandas to Dask + cuDF path ([#10897](https://github.com/rapidsai/cudf/pull/10897)) [@miguelusque](https://github.com/miguelusque)
-- Add missing documentation in cudf/types.hpp ([#10895](https://github.com/rapidsai/cudf/pull/10895)) [@karthikeyann](https://github.com/karthikeyann)
-- Add strong index iterator docs. ([#10888](https://github.com/rapidsai/cudf/pull/10888)) [@bdice](https://github.com/bdice)
-- spell check fixes ([#10865](https://github.com/rapidsai/cudf/pull/10865)) [@karthikeyann](https://github.com/karthikeyann)
-- Add missing documentation in scalar/ headers ([#10861](https://github.com/rapidsai/cudf/pull/10861)) [@karthikeyann](https://github.com/karthikeyann)
-- Remove typo in ngram documentation ([#10859](https://github.com/rapidsai/cudf/pull/10859)) [@miguelusque](https://github.com/miguelusque)
-- fix doxygen warnings ([#10842](https://github.com/rapidsai/cudf/pull/10842)) [@karthikeyann](https://github.com/karthikeyann)
-- Add a library_design.md file documenting the core Python data structures and their relationship ([#10817](https://github.com/rapidsai/cudf/pull/10817)) [@vyasr](https://github.com/vyasr)
-- Add NumPy to intersphinx references. ([#10809](https://github.com/rapidsai/cudf/pull/10809)) [@bdice](https://github.com/bdice)
-- Add a section to the docs that compares cuDF with Pandas ([#10796](https://github.com/rapidsai/cudf/pull/10796)) [@shwina](https://github.com/shwina)
-- Mention 2 cpp-reviewer requirement in pull request template ([#10768](https://github.com/rapidsai/cudf/pull/10768)) [@davidwendt](https://github.com/davidwendt)
-- Enable pydocstyle for all packages. ([#10759](https://github.com/rapidsai/cudf/pull/10759)) [@bdice](https://github.com/bdice)
-- Enable pydocstyle rules involving quotes ([#10748](https://github.com/rapidsai/cudf/pull/10748)) [@vyasr](https://github.com/vyasr)
-- Revise 10 minutes notebook. ([#10738](https://github.com/rapidsai/cudf/pull/10738)) [@bdice](https://github.com/bdice)
-- Reorganize cuDF Python docs ([#10691](https://github.com/rapidsai/cudf/pull/10691)) [@shwina](https://github.com/shwina)
-- Fix sphinx/jupyter heading issue in UDF notebook ([#10690](https://github.com/rapidsai/cudf/pull/10690)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Migrated user guide notebooks to MyST-NB and added sphinx extension ([#10685](https://github.com/rapidsai/cudf/pull/10685)) [@mmccarty](https://github.com/mmccarty)
-- add data generation to benchmark documentation ([#10677](https://github.com/rapidsai/cudf/pull/10677)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix some docs build warnings ([#10674](https://github.com/rapidsai/cudf/pull/10674)) [@galipremsagar](https://github.com/galipremsagar)
-- Update UDF notebook in User Guide. ([#10668](https://github.com/rapidsai/cudf/pull/10668)) [@bdice](https://github.com/bdice)
-- Improve User Guide docs ([#10663](https://github.com/rapidsai/cudf/pull/10663)) [@bdice](https://github.com/bdice)
-- Fix some docstrings formatting ([#10660](https://github.com/rapidsai/cudf/pull/10660)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove implementation details from `apply` docstrings ([#10651](https://github.com/rapidsai/cudf/pull/10651)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Revise CONTRIBUTING.md ([#10644](https://github.com/rapidsai/cudf/pull/10644)) [@bdice](https://github.com/bdice)
-- Add missing APIs to documentation. ([#10643](https://github.com/rapidsai/cudf/pull/10643)) [@bdice](https://github.com/bdice)
-- Use cudf.read_json as documented API name. ([#10640](https://github.com/rapidsai/cudf/pull/10640)) [@bdice](https://github.com/bdice)
-- Fix docstring section headings. ([#10639](https://github.com/rapidsai/cudf/pull/10639)) [@bdice](https://github.com/bdice)
-- Document cudf.read_text and cudf.read_avro. ([#10638](https://github.com/rapidsai/cudf/pull/10638)) [@bdice](https://github.com/bdice)
-- Fix type-o in docstring for json_reader_options ([#10627](https://github.com/rapidsai/cudf/pull/10627)) [@dagardner-nv](https://github.com/dagardner-nv)
-- Update guide to UDFs with notes about `Series.applymap` deprecation and related changes ([#10607](https://github.com/rapidsai/cudf/pull/10607)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Fix doxygen Modules page for cudf::lists::sequences ([#10561](https://github.com/rapidsai/cudf/pull/10561)) [@davidwendt](https://github.com/davidwendt)
-- Add Replace Backreferences section to Regex Features page ([#10560](https://github.com/rapidsai/cudf/pull/10560)) [@davidwendt](https://github.com/davidwendt)
-- Introduce deprecation policy to developer guide. ([#10252](https://github.com/rapidsai/cudf/pull/10252)) [@vyasr](https://github.com/vyasr)
-
-## 🚀 New Features
-
-- Enable Zstandard decompression only when all nvcomp integrations are enabled ([#10944](https://github.com/rapidsai/cudf/pull/10944)) [@vuule](https://github.com/vuule)
-- Handle nested types in cudf::concatenate_rows() ([#10890](https://github.com/rapidsai/cudf/pull/10890)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Strong index types for equality comparator ([#10883](https://github.com/rapidsai/cudf/pull/10883)) [@ttnghia](https://github.com/ttnghia)
-- Add parameters to control page size in Parquet writer ([#10882](https://github.com/rapidsai/cudf/pull/10882)) [@etseidl](https://github.com/etseidl)
-- Support for Zstandard decompression in ORC reader ([#10873](https://github.com/rapidsai/cudf/pull/10873)) [@vuule](https://github.com/vuule)
-- Use pre-built nvcomp 2.3 binaries by default ([#10851](https://github.com/rapidsai/cudf/pull/10851)) [@robertmaynard](https://github.com/robertmaynard)
-- Support for Zstandard decompression in Parquet reader ([#10847](https://github.com/rapidsai/cudf/pull/10847)) [@vuule](https://github.com/vuule)
-- Add JNI support for apply_boolean_mask ([#10812](https://github.com/rapidsai/cudf/pull/10812)) [@res-life](https://github.com/res-life)
-- Segmented Min/Max for Fixed Point Types ([#10794](https://github.com/rapidsai/cudf/pull/10794)) [@isVoid](https://github.com/isVoid)
-- Return per-file metadata from readers ([#10782](https://github.com/rapidsai/cudf/pull/10782)) [@vuule](https://github.com/vuule)
-- Segmented `apply_boolean_mask` for `LIST` columns ([#10773](https://github.com/rapidsai/cudf/pull/10773)) [@mythrocks](https://github.com/mythrocks)
-- Update `groupby::hash` to use new row operators for keys ([#10770](https://github.com/rapidsai/cudf/pull/10770)) [@PointKernel](https://github.com/PointKernel)
-- Support purging non-empty null elements from LIST/STRING columns ([#10701](https://github.com/rapidsai/cudf/pull/10701)) [@mythrocks](https://github.com/mythrocks)
-- Add `detail::hash_join` ([#10695](https://github.com/rapidsai/cudf/pull/10695)) [@PointKernel](https://github.com/PointKernel)
-- Persist string statistics data across multiple calls to orc chunked write ([#10694](https://github.com/rapidsai/cudf/pull/10694)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Add `.list.astype()` to cast list leaves to specified dtype ([#10693](https://github.com/rapidsai/cudf/pull/10693)) [@shwina](https://github.com/shwina)
-- JNI: Add generateListOffsets API ([#10683](https://github.com/rapidsai/cudf/pull/10683)) [@sperlingxx](https://github.com/sperlingxx)
-- Support `args` in groupby apply ([#10682](https://github.com/rapidsai/cudf/pull/10682)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Enable segmented_gather in Java package ([#10669](https://github.com/rapidsai/cudf/pull/10669)) [@sperlingxx](https://github.com/sperlingxx)
-- Add row hasher with nested column support ([#10641](https://github.com/rapidsai/cudf/pull/10641)) [@devavret](https://github.com/devavret)
-- Add support for numeric_only in DataFrame._reduce ([#10629](https://github.com/rapidsai/cudf/pull/10629)) [@martinfalisse](https://github.com/martinfalisse)
-- First step toward statistics in ORC files with chunked writes ([#10567](https://github.com/rapidsai/cudf/pull/10567)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Add support for struct columns to the random table generator ([#10566](https://github.com/rapidsai/cudf/pull/10566)) [@vuule](https://github.com/vuule)
-- Enable passing a sequence for the `index` argument to `.list.get()` ([#10564](https://github.com/rapidsai/cudf/pull/10564)) [@shwina](https://github.com/shwina)
-- Add python bindings for cudf::list::index_of ([#10549](https://github.com/rapidsai/cudf/pull/10549)) [@ChrisJar](https://github.com/ChrisJar)
-- Add default= kwarg to .list.get() accessor method ([#10547](https://github.com/rapidsai/cudf/pull/10547)) [@shwina](https://github.com/shwina)
-- Add `cudf.DataFrame.applymap` ([#10542](https://github.com/rapidsai/cudf/pull/10542)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Support nvComp 2.3 if local, otherwise use nvcomp 2.2 ([#10513](https://github.com/rapidsai/cudf/pull/10513)) [@robertmaynard](https://github.com/robertmaynard)
-- Add column field ID control in parquet writer ([#10504](https://github.com/rapidsai/cudf/pull/10504)) [@PointKernel](https://github.com/PointKernel)
-- Deprecate `Series.applymap` ([#10497](https://github.com/rapidsai/cudf/pull/10497)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Add option to drop cache in cuIO benchmarks ([#10488](https://github.com/rapidsai/cudf/pull/10488)) [@vuule](https://github.com/vuule)
-- move benchmark input generation in device in reduction nvbench ([#10486](https://github.com/rapidsai/cudf/pull/10486)) [@karthikeyann](https://github.com/karthikeyann)
-- Support Segmented Min/Max Reduction on String Type ([#10447](https://github.com/rapidsai/cudf/pull/10447)) [@isVoid](https://github.com/isVoid)
-- List element Equality comparator ([#10289](https://github.com/rapidsai/cudf/pull/10289)) [@devavret](https://github.com/devavret)
-- Implement all methods of groupby rank aggregation in libcudf, python ([#9569](https://github.com/rapidsai/cudf/pull/9569)) [@karthikeyann](https://github.com/karthikeyann)
-- Implement DataFrame.eval using libcudf ASTs ([#8022](https://github.com/rapidsai/cudf/pull/8022)) [@vyasr](https://github.com/vyasr)
-
-## 🛠️ Improvements
-
-- Use `conda` compilers in env file ([#10915](https://github.com/rapidsai/cudf/pull/10915)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove C style artifacts in cuIO ([#10886](https://github.com/rapidsai/cudf/pull/10886)) [@vuule](https://github.com/vuule)
-- Rename `sliced_child` to `get_sliced_child`. ([#10885](https://github.com/rapidsai/cudf/pull/10885)) [@bdice](https://github.com/bdice)
-- Replace defaulted stream value for libcudf APIs that use NVCOMP ([#10877](https://github.com/rapidsai/cudf/pull/10877)) [@jbrennan333](https://github.com/jbrennan333)
-- Add more unit tests for `cudf::distinct` for nested types with sliced input ([#10860](https://github.com/rapidsai/cudf/pull/10860)) [@ttnghia](https://github.com/ttnghia)
-- Changing `list_view.cuh` to `list_view.hpp` ([#10854](https://github.com/rapidsai/cudf/pull/10854)) [@ttnghia](https://github.com/ttnghia)
-- More error checking in `from_dlpack` ([#10850](https://github.com/rapidsai/cudf/pull/10850)) [@wence-](https://github.com/wence-)
-- Cleanup regex compiler fixed quantifiers source ([#10843](https://github.com/rapidsai/cudf/pull/10843)) [@davidwendt](https://github.com/davidwendt)
-- Adds the JNI call for Cuda.deviceSynchronize ([#10839](https://github.com/rapidsai/cudf/pull/10839)) [@abellina](https://github.com/abellina)
-- Add missing cuda-python dependency to cudf ([#10833](https://github.com/rapidsai/cudf/pull/10833)) [@bdice](https://github.com/bdice)
-- Change std::string parameters in cudf::strings APIs to std::string_view ([#10832](https://github.com/rapidsai/cudf/pull/10832)) [@davidwendt](https://github.com/davidwendt)
-- Split up search.cu to improve compile time ([#10831](https://github.com/rapidsai/cudf/pull/10831)) [@davidwendt](https://github.com/davidwendt)
-- Add tests for null scalar binaryops ([#10828](https://github.com/rapidsai/cudf/pull/10828)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Cleanup regex compile optimize functions ([#10825](https://github.com/rapidsai/cudf/pull/10825)) [@davidwendt](https://github.com/davidwendt)
-- Use `ThreadedMotoServer` instead of `subprocess` in spinning up `s3` server ([#10822](https://github.com/rapidsai/cudf/pull/10822)) [@galipremsagar](https://github.com/galipremsagar)
-- Import `NA` from `missing` rather than using `cudf.NA` everywhere ([#10821](https://github.com/rapidsai/cudf/pull/10821)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Refactor regex builtin character-class identifiers ([#10814](https://github.com/rapidsai/cudf/pull/10814)) [@davidwendt](https://github.com/davidwendt)
-- Change pattern parameter for regex APIs from std::string to std::string_view ([#10810](https://github.com/rapidsai/cudf/pull/10810)) [@davidwendt](https://github.com/davidwendt)
-- Make the JNI API to get list offsets as a view public. ([#10807](https://github.com/rapidsai/cudf/pull/10807)) [@revans2](https://github.com/revans2)
-- Add cudf JNI docker build github action ([#10806](https://github.com/rapidsai/cudf/pull/10806)) [@pxLi](https://github.com/pxLi)
-- Removed `mr` parameter from inplace bitmask operations ([#10805](https://github.com/rapidsai/cudf/pull/10805)) [@AtlantaPepsi](https://github.com/AtlantaPepsi)
-- Refactor `cudf::contains`, renaming and switching parameters role ([#10802](https://github.com/rapidsai/cudf/pull/10802)) [@ttnghia](https://github.com/ttnghia)
-- Handle closed property in IntervalDtype.from_pandas ([#10798](https://github.com/rapidsai/cudf/pull/10798)) [@wence-](https://github.com/wence-)
-- Return weak orderings from `device_row_comparator`. ([#10793](https://github.com/rapidsai/cudf/pull/10793)) [@rwlee](https://github.com/rwlee)
-- Rework `Scalar` imports ([#10791](https://github.com/rapidsai/cudf/pull/10791)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Enable ccache for cudfjni build in Docker ([#10790](https://github.com/rapidsai/cudf/pull/10790)) [@gerashegalov](https://github.com/gerashegalov)
-- Generic serialization of all column types ([#10784](https://github.com/rapidsai/cudf/pull/10784)) [@wence-](https://github.com/wence-)
-- simplifying skiprows test in test_orc.py ([#10783](https://github.com/rapidsai/cudf/pull/10783)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Use column_views instead of column_device_views in binary operations. ([#10780](https://github.com/rapidsai/cudf/pull/10780)) [@bdice](https://github.com/bdice)
-- Add struct utility functions. ([#10776](https://github.com/rapidsai/cudf/pull/10776)) [@bdice](https://github.com/bdice)
-- Add multiple rows to subword tokenizer benchmark ([#10767](https://github.com/rapidsai/cudf/pull/10767)) [@davidwendt](https://github.com/davidwendt)
-- Refactor host decompression in ORC reader ([#10764](https://github.com/rapidsai/cudf/pull/10764)) [@vuule](https://github.com/vuule)
-- Flush output streams before creating a process to drop caches ([#10762](https://github.com/rapidsai/cudf/pull/10762)) [@vuule](https://github.com/vuule)
-- Refactor binaryop/compiled/util.cpp ([#10756](https://github.com/rapidsai/cudf/pull/10756)) [@bdice](https://github.com/bdice)
-- Use warp per string for long strings in cudf::strings::contains() ([#10739](https://github.com/rapidsai/cudf/pull/10739)) [@davidwendt](https://github.com/davidwendt)
-- Use generator expressions in any/all functions. ([#10736](https://github.com/rapidsai/cudf/pull/10736)) [@bdice](https://github.com/bdice)
-- Use canonical &quot;magic methods&quot; (replace `x.__repr__()` with `repr(x)`). ([#10735](https://github.com/rapidsai/cudf/pull/10735)) [@bdice](https://github.com/bdice)
-- Improve use of isinstance. ([#10734](https://github.com/rapidsai/cudf/pull/10734)) [@bdice](https://github.com/bdice)
-- Rename tests from multiIndex to multiindex. ([#10732](https://github.com/rapidsai/cudf/pull/10732)) [@bdice](https://github.com/bdice)
-- Two-table comparators with strong index types ([#10730](https://github.com/rapidsai/cudf/pull/10730)) [@bdice](https://github.com/bdice)
-- Replace std::make_pair with std::pair (C++17 CTAD) ([#10727](https://github.com/rapidsai/cudf/pull/10727)) [@karthikeyann](https://github.com/karthikeyann)
-- Use structured bindings instead of std::tie ([#10726](https://github.com/rapidsai/cudf/pull/10726)) [@karthikeyann](https://github.com/karthikeyann)
-- Missing `f` prefix on f-strings fix ([#10721](https://github.com/rapidsai/cudf/pull/10721)) [@code-review-doctor](https://github.com/code-review-doctor)
-- Add `max_file_size` parameter to chunked parquet dataset writer ([#10718](https://github.com/rapidsai/cudf/pull/10718)) [@galipremsagar](https://github.com/galipremsagar)
-- Deprecate `merge_sorted`, change dask cudf usage to internal method ([#10713](https://github.com/rapidsai/cudf/pull/10713)) [@isVoid](https://github.com/isVoid)
-- Prepare dask_cudf test_parquet.py for upcoming API changes ([#10709](https://github.com/rapidsai/cudf/pull/10709)) [@rjzamora](https://github.com/rjzamora)
-- Remove or simplify various utility functions ([#10705](https://github.com/rapidsai/cudf/pull/10705)) [@vyasr](https://github.com/vyasr)
-- Allow building arrow with parquet and not python ([#10702](https://github.com/rapidsai/cudf/pull/10702)) [@revans2](https://github.com/revans2)
-- Partial cuIO GPU decompression refactor ([#10699](https://github.com/rapidsai/cudf/pull/10699)) [@vuule](https://github.com/vuule)
-- Cython API refactor: `merge.pyx` ([#10698](https://github.com/rapidsai/cudf/pull/10698)) [@isVoid](https://github.com/isVoid)
-- Fix random string data length to become variable ([#10697](https://github.com/rapidsai/cudf/pull/10697)) [@galipremsagar](https://github.com/galipremsagar)
-- Add bindings for index_of with column search key ([#10696](https://github.com/rapidsai/cudf/pull/10696)) [@ChrisJar](https://github.com/ChrisJar)
-- Deprecate index merging ([#10689](https://github.com/rapidsai/cudf/pull/10689)) [@vyasr](https://github.com/vyasr)
-- Remove cudf::strings::string namespace ([#10684](https://github.com/rapidsai/cudf/pull/10684)) [@davidwendt](https://github.com/davidwendt)
-- Standardize imports. ([#10680](https://github.com/rapidsai/cudf/pull/10680)) [@bdice](https://github.com/bdice)
-- Standardize usage of collections.abc. ([#10679](https://github.com/rapidsai/cudf/pull/10679)) [@bdice](https://github.com/bdice)
-- Cython API Refactor: `transpose.pyx`, `sort.pyx` ([#10675](https://github.com/rapidsai/cudf/pull/10675)) [@isVoid](https://github.com/isVoid)
-- Add device_memory_resource parameter to create_string_vector_from_column ([#10673](https://github.com/rapidsai/cudf/pull/10673)) [@davidwendt](https://github.com/davidwendt)
-- Split up mixed-join kernels source files ([#10671](https://github.com/rapidsai/cudf/pull/10671)) [@davidwendt](https://github.com/davidwendt)
-- Use `std::filesystem` for temporary directory location and deletion ([#10664](https://github.com/rapidsai/cudf/pull/10664)) [@vuule](https://github.com/vuule)
-- cleanup benchmark includes ([#10661](https://github.com/rapidsai/cudf/pull/10661)) [@karthikeyann](https://github.com/karthikeyann)
-- Use upstream clang-format pre-commit hook. ([#10659](https://github.com/rapidsai/cudf/pull/10659)) [@bdice](https://github.com/bdice)
-- Clean up C++ includes to use &lt;&gt; instead of &quot;&quot;. ([#10658](https://github.com/rapidsai/cudf/pull/10658)) [@bdice](https://github.com/bdice)
-- Handle RuntimeError thrown by CUDA Python in `validate_setup` ([#10653](https://github.com/rapidsai/cudf/pull/10653)) [@shwina](https://github.com/shwina)
-- Rework JNI CMake to leverage rapids_find_package ([#10649](https://github.com/rapidsai/cudf/pull/10649)) [@jlowe](https://github.com/jlowe)
-- Use conda to build python packages during GPU tests ([#10648](https://github.com/rapidsai/cudf/pull/10648)) [@Ethyling](https://github.com/Ethyling)
-- Deprecate various functions that don&#39;t need to be defined for Index. ([#10647](https://github.com/rapidsai/cudf/pull/10647)) [@vyasr](https://github.com/vyasr)
-- Update pinning to allow newer CMake versions. ([#10646](https://github.com/rapidsai/cudf/pull/10646)) [@vyasr](https://github.com/vyasr)
-- Bump hadoop-common from 3.1.4 to 3.2.3 in /java ([#10645](https://github.com/rapidsai/cudf/pull/10645)) [@dependabot[bot]](https://github.com/dependabot[bot])
-- Remove `concurrent_unordered_multimap`. ([#10642](https://github.com/rapidsai/cudf/pull/10642)) [@bdice](https://github.com/bdice)
-- Improve parquet dictionary encoding ([#10635](https://github.com/rapidsai/cudf/pull/10635)) [@PointKernel](https://github.com/PointKernel)
-- Improve cudf::cuda_error ([#10630](https://github.com/rapidsai/cudf/pull/10630)) [@sperlingxx](https://github.com/sperlingxx)
-- Add support for null and non-numeric types in Series.diff and DataFrame.diff ([#10625](https://github.com/rapidsai/cudf/pull/10625)) [@Matt711](https://github.com/Matt711)
-- Branch 22.06 merge 22.04 ([#10624](https://github.com/rapidsai/cudf/pull/10624)) [@vyasr](https://github.com/vyasr)
-- Unpin `dask` &amp; `distributed` for development ([#10623](https://github.com/rapidsai/cudf/pull/10623)) [@galipremsagar](https://github.com/galipremsagar)
-- Slightly improve accuracy of stod in to_floats ([#10622](https://github.com/rapidsai/cudf/pull/10622)) [@davidwendt](https://github.com/davidwendt)
-- Allow libcudfjni to be built as a static library ([#10619](https://github.com/rapidsai/cudf/pull/10619)) [@jlowe](https://github.com/jlowe)
-- Change stack-based regex state data to use global memory ([#10600](https://github.com/rapidsai/cudf/pull/10600)) [@davidwendt](https://github.com/davidwendt)
-- Resolve Forward merging of `branch-22.04` into `branch-22.06` ([#10598](https://github.com/rapidsai/cudf/pull/10598)) [@galipremsagar](https://github.com/galipremsagar)
-- KvikIO as an alternative GDS backend ([#10593](https://github.com/rapidsai/cudf/pull/10593)) [@madsbk](https://github.com/madsbk)
-- Rename CUDA_TRY macro to CUDF_CUDA_TRY, rename CHECK_CUDA macro to CUDF_CHECK_CUDA. ([#10589](https://github.com/rapidsai/cudf/pull/10589)) [@bdice](https://github.com/bdice)
-- Upgrade `cudf` to support `pandas` 1.4.x versions ([#10584](https://github.com/rapidsai/cudf/pull/10584)) [@galipremsagar](https://github.com/galipremsagar)
-- Refactor binary ops for timedelta and datetime columns ([#10581](https://github.com/rapidsai/cudf/pull/10581)) [@vyasr](https://github.com/vyasr)
-- Refactor cudf::strings::count_re API to use count_matches utility ([#10580](https://github.com/rapidsai/cudf/pull/10580)) [@davidwendt](https://github.com/davidwendt)
-- Update `Programming Language :: Python` Versions to 3.8 &amp; 3.9 ([#10579](https://github.com/rapidsai/cudf/pull/10579)) [@madsbk](https://github.com/madsbk)
-- Automate Java cudf jar build with statically linked dependencies ([#10578](https://github.com/rapidsai/cudf/pull/10578)) [@gerashegalov](https://github.com/gerashegalov)
-- Add patch for thrust-cub 1.16 to fix sort compile times ([#10577](https://github.com/rapidsai/cudf/pull/10577)) [@davidwendt](https://github.com/davidwendt)
-- Move binop methods from Frame to IndexedFrame and standardize the docstring ([#10576](https://github.com/rapidsai/cudf/pull/10576)) [@vyasr](https://github.com/vyasr)
-- Cleanup libcudf strings regex classes ([#10573](https://github.com/rapidsai/cudf/pull/10573)) [@davidwendt](https://github.com/davidwendt)
-- Simplify preprocessing of arguments for DataFrame binops ([#10563](https://github.com/rapidsai/cudf/pull/10563)) [@vyasr](https://github.com/vyasr)
-- Reduce kernel calls to build strings findall results ([#10559](https://github.com/rapidsai/cudf/pull/10559)) [@davidwendt](https://github.com/davidwendt)
-- Forward-merge branch-22.04 to branch-22.06 ([#10557](https://github.com/rapidsai/cudf/pull/10557)) [@bdice](https://github.com/bdice)
-- Update strings contains benchmark to measure varying match rates ([#10555](https://github.com/rapidsai/cudf/pull/10555)) [@davidwendt](https://github.com/davidwendt)
-- JNI: throw CUDA errors more specifically ([#10551](https://github.com/rapidsai/cudf/pull/10551)) [@sperlingxx](https://github.com/sperlingxx)
-- Enable building static libs ([#10545](https://github.com/rapidsai/cudf/pull/10545)) [@trxcllnt](https://github.com/trxcllnt)
-- Remove pip requirements files. ([#10543](https://github.com/rapidsai/cudf/pull/10543)) [@bdice](https://github.com/bdice)
-- Remove Click pinnings that are unnecessary after upgrading black. ([#10541](https://github.com/rapidsai/cudf/pull/10541)) [@vyasr](https://github.com/vyasr)
-- Refactor `memory_usage` to improve performance ([#10537](https://github.com/rapidsai/cudf/pull/10537)) [@galipremsagar](https://github.com/galipremsagar)
-- Adjust the valid range of group index for replace_with_backrefs ([#10530](https://github.com/rapidsai/cudf/pull/10530)) [@sperlingxx](https://github.com/sperlingxx)
-- add accidentally removed comment. ([#10526](https://github.com/rapidsai/cudf/pull/10526)) [@vyasr](https://github.com/vyasr)
-- Update conda environment. ([#10525](https://github.com/rapidsai/cudf/pull/10525)) [@vyasr](https://github.com/vyasr)
-- Remove ColumnBase.__getitem__ ([#10516](https://github.com/rapidsai/cudf/pull/10516)) [@vyasr](https://github.com/vyasr)
-- Optimize `left_semi_join` by materializing the gather mask ([#10511](https://github.com/rapidsai/cudf/pull/10511)) [@cheinger](https://github.com/cheinger)
-- Define proper binary operation APIs for columns ([#10509](https://github.com/rapidsai/cudf/pull/10509)) [@vyasr](https://github.com/vyasr)
-- Upgrade `arrow-cpp` &amp; `pyarrow` to `7.0.0` ([#10503](https://github.com/rapidsai/cudf/pull/10503)) [@galipremsagar](https://github.com/galipremsagar)
-- Update to Thrust 1.16 ([#10489](https://github.com/rapidsai/cudf/pull/10489)) [@bdice](https://github.com/bdice)
-- Namespace/Docstring Fixes for Reduction ([#10471](https://github.com/rapidsai/cudf/pull/10471)) [@isVoid](https://github.com/isVoid)
-- Update cudfjni 22.06.0-SNAPSHOT ([#10467](https://github.com/rapidsai/cudf/pull/10467)) [@pxLi](https://github.com/pxLi)
-- Use Lists of Columns for Various Files ([#10463](https://github.com/rapidsai/cudf/pull/10463)) [@isVoid](https://github.com/isVoid)
-- Additional refactoring of hash functions ([#10462](https://github.com/rapidsai/cudf/pull/10462)) [@bdice](https://github.com/bdice)
-- Fix Series.str.findall behavior for expand=False. ([#10459](https://github.com/rapidsai/cudf/pull/10459)) [@bdice](https://github.com/bdice)
-- Remove deprecated code. ([#10450](https://github.com/rapidsai/cudf/pull/10450)) [@vyasr](https://github.com/vyasr)
-- Update cmake-format version. ([#10440](https://github.com/rapidsai/cudf/pull/10440)) [@vyasr](https://github.com/vyasr)
-- Consolidate C++ `conda` recipes and add `libcudf-tests` package ([#10326](https://github.com/rapidsai/cudf/pull/10326)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Use conda compilers ([#10275](https://github.com/rapidsai/cudf/pull/10275)) [@Ethyling](https://github.com/Ethyling)
-- Add row bitmask as a `detail::hash_join` member ([#10248](https://github.com/rapidsai/cudf/pull/10248)) [@PointKernel](https://github.com/PointKernel)
-
-# cuDF 22.04.00 (6 Apr 2022)
-
-## 🚨 Breaking Changes
-
-- Drop unsupported method argument from nunique and distinct_count. ([#10411](https://github.com/rapidsai/cudf/pull/10411)) [@bdice](https://github.com/bdice)
-- Refactor stream compaction APIs ([#10370](https://github.com/rapidsai/cudf/pull/10370)) [@PointKernel](https://github.com/PointKernel)
-- Add scan_aggregation and reduce_aggregation derived types. ([#10357](https://github.com/rapidsai/cudf/pull/10357)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Avoid `decimal` type narrowing for decimal binops ([#10299](https://github.com/rapidsai/cudf/pull/10299)) [@galipremsagar](https://github.com/galipremsagar)
-- Rewrites `sample` API ([#10262](https://github.com/rapidsai/cudf/pull/10262)) [@isVoid](https://github.com/isVoid)
-- Remove probe-time null equality parameters in `cudf::hash_join` ([#10260](https://github.com/rapidsai/cudf/pull/10260)) [@PointKernel](https://github.com/PointKernel)
-- Enable proper `Index` round-tripping in `orc` reader and writer ([#10170](https://github.com/rapidsai/cudf/pull/10170)) [@galipremsagar](https://github.com/galipremsagar)
-- Add JNI for `strings::split_re` and `strings::split_record_re` ([#10139](https://github.com/rapidsai/cudf/pull/10139)) [@ttnghia](https://github.com/ttnghia)
-- Change cudf::strings::find_multiple to return a lists column ([#10134](https://github.com/rapidsai/cudf/pull/10134)) [@davidwendt](https://github.com/davidwendt)
-- Remove the option to completely disable decimal128 columns in the ORC reader ([#10127](https://github.com/rapidsai/cudf/pull/10127)) [@vuule](https://github.com/vuule)
-- Remove deprecated code ([#10124](https://github.com/rapidsai/cudf/pull/10124)) [@vyasr](https://github.com/vyasr)
-- Update gpu_utils.py to reflect current CUDA support. ([#10113](https://github.com/rapidsai/cudf/pull/10113)) [@bdice](https://github.com/bdice)
-- Optimize compaction operations ([#10030](https://github.com/rapidsai/cudf/pull/10030)) [@PointKernel](https://github.com/PointKernel)
-- Remove deprecated method Series.set_index. ([#9945](https://github.com/rapidsai/cudf/pull/9945)) [@bdice](https://github.com/bdice)
-- Add cudf::strings::findall_record API ([#9911](https://github.com/rapidsai/cudf/pull/9911)) [@davidwendt](https://github.com/davidwendt)
-- Upgrade `arrow` &amp; `pyarrow` to `6.0.1` ([#9686](https://github.com/rapidsai/cudf/pull/9686)) [@galipremsagar](https://github.com/galipremsagar)
-
-## 🐛 Bug Fixes
-
-- Fix an issue with tdigest merge aggregations. ([#10506](https://github.com/rapidsai/cudf/pull/10506)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Batch of fixes for index overflows in grid stride loops. ([#10448](https://github.com/rapidsai/cudf/pull/10448)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Update dask_cudf imports to be compatible with latest dask ([#10442](https://github.com/rapidsai/cudf/pull/10442)) [@rlratzel](https://github.com/rlratzel)
-- Fix for integer overflow in contiguous-split ([#10437](https://github.com/rapidsai/cudf/pull/10437)) [@jbrennan333](https://github.com/jbrennan333)
-- Fix has_null predicate for drop_list_duplicates on nested structs ([#10436](https://github.com/rapidsai/cudf/pull/10436)) [@sperlingxx](https://github.com/sperlingxx)
-- Fix empty reduce with List output and non-List input ([#10435](https://github.com/rapidsai/cudf/pull/10435)) [@sperlingxx](https://github.com/sperlingxx)
-- Fix `list` and `struct` meta generation issue in `dask-cudf` ([#10434](https://github.com/rapidsai/cudf/pull/10434)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix error in `cudf.to_numeric` when a `bool` input is passed ([#10431](https://github.com/rapidsai/cudf/pull/10431)) [@galipremsagar](https://github.com/galipremsagar)
-- Support cupy array in `quantile` input ([#10429](https://github.com/rapidsai/cudf/pull/10429)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix benchmarks to work with new aggregation types ([#10428](https://github.com/rapidsai/cudf/pull/10428)) [@davidwendt](https://github.com/davidwendt)
-- Fix cudf::shift to handle offset greater than column size ([#10414](https://github.com/rapidsai/cudf/pull/10414)) [@davidwendt](https://github.com/davidwendt)
-- Fix lifespan of the temporary directory that holds cuFile configuration file ([#10403](https://github.com/rapidsai/cudf/pull/10403)) [@vuule](https://github.com/vuule)
-- Fix error thrown in compiled-binaryop benchmark ([#10398](https://github.com/rapidsai/cudf/pull/10398)) [@davidwendt](https://github.com/davidwendt)
-- Limiting async allocator using alignment of 512 ([#10395](https://github.com/rapidsai/cudf/pull/10395)) [@rongou](https://github.com/rongou)
-- Include &lt;optional&gt; in multibyte split. ([#10385](https://github.com/rapidsai/cudf/pull/10385)) [@bdice](https://github.com/bdice)
-- Fix issue with column and scalar re-assignment ([#10377](https://github.com/rapidsai/cudf/pull/10377)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix floating point data generation in benchmarks ([#10372](https://github.com/rapidsai/cudf/pull/10372)) [@vuule](https://github.com/vuule)
-- Avoid overflow in fused_concatenate_kernel output_index ([#10344](https://github.com/rapidsai/cudf/pull/10344)) [@abellina](https://github.com/abellina)
-- Remove is_relationally_comparable for table device views ([#10342](https://github.com/rapidsai/cudf/pull/10342)) [@davidwendt](https://github.com/davidwendt)
-- Fix debug compile error in device_span to column_view conversion ([#10331](https://github.com/rapidsai/cudf/pull/10331)) [@davidwendt](https://github.com/davidwendt)
-- Add Pascal support to JCUDF transcode (row_conversion) ([#10329](https://github.com/rapidsai/cudf/pull/10329)) [@mythrocks](https://github.com/mythrocks)
-- Fix `std::bad_alloc` exception due to JIT reserving a huge buffer ([#10317](https://github.com/rapidsai/cudf/pull/10317)) [@ttnghia](https://github.com/ttnghia)
-- Fixes up the overflowed fixed-point round on nullable column ([#10316](https://github.com/rapidsai/cudf/pull/10316)) [@sperlingxx](https://github.com/sperlingxx)
-- Fix DataFrame slicing issues for empty cases ([#10310](https://github.com/rapidsai/cudf/pull/10310)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Fix documentation issues ([#10307](https://github.com/rapidsai/cudf/pull/10307)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Allow Java bindings to use default decimal precisions when writing columns ([#10276](https://github.com/rapidsai/cudf/pull/10276)) [@sperlingxx](https://github.com/sperlingxx)
-- Fix incorrect slicing of GDS read/write calls ([#10274](https://github.com/rapidsai/cudf/pull/10274)) [@vuule](https://github.com/vuule)
-- Fix out-of-memory error in compiled-binaryop benchmark ([#10269](https://github.com/rapidsai/cudf/pull/10269)) [@davidwendt](https://github.com/davidwendt)
-- Add tests of reflected ufuncs and fix behavior of logical reflected ufuncs ([#10261](https://github.com/rapidsai/cudf/pull/10261)) [@vyasr](https://github.com/vyasr)
-- Remove probe-time null equality parameters in `cudf::hash_join` ([#10260](https://github.com/rapidsai/cudf/pull/10260)) [@PointKernel](https://github.com/PointKernel)
-- Fix out-of-memory error in UrlDecode benchmark ([#10258](https://github.com/rapidsai/cudf/pull/10258)) [@davidwendt](https://github.com/davidwendt)
-- Fix groupby reductions that perform operations on source type instead of target type ([#10250](https://github.com/rapidsai/cudf/pull/10250)) [@ttnghia](https://github.com/ttnghia)
-- Fix small leak in explode ([#10245](https://github.com/rapidsai/cudf/pull/10245)) [@revans2](https://github.com/revans2)
-- Yet another small JNI memory leak ([#10238](https://github.com/rapidsai/cudf/pull/10238)) [@revans2](https://github.com/revans2)
-- Fix regex octal parsing to limit to 3 characters ([#10233](https://github.com/rapidsai/cudf/pull/10233)) [@davidwendt](https://github.com/davidwendt)
-- Fix string to decimal128 conversion handling large exponents ([#10231](https://github.com/rapidsai/cudf/pull/10231)) [@davidwendt](https://github.com/davidwendt)
-- Fix JNI leak on copy to device ([#10229](https://github.com/rapidsai/cudf/pull/10229)) [@revans2](https://github.com/revans2)
-- Fix the data generator element size for decimal types ([#10225](https://github.com/rapidsai/cudf/pull/10225)) [@vuule](https://github.com/vuule)
-- Fix `decimal` metadata in parquet writer ([#10224](https://github.com/rapidsai/cudf/pull/10224)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix strings handling of hex in regex pattern ([#10220](https://github.com/rapidsai/cudf/pull/10220)) [@davidwendt](https://github.com/davidwendt)
-- Fix docs builds ([#10216](https://github.com/rapidsai/cudf/pull/10216)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Fix a leftover _has_nulls change from Nullate ([#10211](https://github.com/rapidsai/cudf/pull/10211)) [@devavret](https://github.com/devavret)
-- Fix bitmask of the output for JNI of `lists::drop_list_duplicates` ([#10210](https://github.com/rapidsai/cudf/pull/10210)) [@ttnghia](https://github.com/ttnghia)
-- Fix compile error in `binaryop/compiled/util.cpp` ([#10209](https://github.com/rapidsai/cudf/pull/10209)) [@ttnghia](https://github.com/ttnghia)
-- Skip ORC and Parquet readers&#39; benchmark cases that are not currently supported ([#10194](https://github.com/rapidsai/cudf/pull/10194)) [@vuule](https://github.com/vuule)
-- Fix JNI leak of a cudf::column_view native class. ([#10171](https://github.com/rapidsai/cudf/pull/10171)) [@revans2](https://github.com/revans2)
-- Enable proper `Index` round-tripping in `orc` reader and writer ([#10170](https://github.com/rapidsai/cudf/pull/10170)) [@galipremsagar](https://github.com/galipremsagar)
-- Convert Column Name to String Before Using Struct Column Factory ([#10156](https://github.com/rapidsai/cudf/pull/10156)) [@isVoid](https://github.com/isVoid)
-- Preserve the correct `ListDtype` while creating an identical empty column ([#10151](https://github.com/rapidsai/cudf/pull/10151)) [@galipremsagar](https://github.com/galipremsagar)
-- benchmark fixture - static object pointer fix ([#10145](https://github.com/rapidsai/cudf/pull/10145)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix UDF Caching ([#10133](https://github.com/rapidsai/cudf/pull/10133)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Raise duplicate column error in `DataFrame.rename` ([#10120](https://github.com/rapidsai/cudf/pull/10120)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix flaky memory usage test by guaranteeing array size. ([#10114](https://github.com/rapidsai/cudf/pull/10114)) [@vyasr](https://github.com/vyasr)
-- Encode values from python callback for C++ ([#10103](https://github.com/rapidsai/cudf/pull/10103)) [@jdye64](https://github.com/jdye64)
-- Add check for regex instructions causing an infinite-loop ([#10095](https://github.com/rapidsai/cudf/pull/10095)) [@davidwendt](https://github.com/davidwendt)
-- Remove metadata singleton from nvtext normalizer ([#10090](https://github.com/rapidsai/cudf/pull/10090)) [@davidwendt](https://github.com/davidwendt)
-- Column equality testing fixes ([#10011](https://github.com/rapidsai/cudf/pull/10011)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Pin libcudf runtime dependency for cudf / libcudf-kafka nightlies ([#9847](https://github.com/rapidsai/cudf/pull/9847)) [@charlesbluca](https://github.com/charlesbluca)
-
-## 📖 Documentation
-
-- Fix documentation for DataFrame.corr and Series.corr. ([#10493](https://github.com/rapidsai/cudf/pull/10493)) [@bdice](https://github.com/bdice)
-- Add `cut` to API docs ([#10479](https://github.com/rapidsai/cudf/pull/10479)) [@shwina](https://github.com/shwina)
-- Remove documentation for methods removed in #10124. ([#10366](https://github.com/rapidsai/cudf/pull/10366)) [@bdice](https://github.com/bdice)
-- Fix documentation issues ([#10306](https://github.com/rapidsai/cudf/pull/10306)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Fix `fixed_point` binary operation documentation ([#10198](https://github.com/rapidsai/cudf/pull/10198)) [@codereport](https://github.com/codereport)
-- Remove cleaned up methods from docs ([#10189](https://github.com/rapidsai/cudf/pull/10189)) [@galipremsagar](https://github.com/galipremsagar)
-- Update developer guide to recommend no default stream parameter. ([#10136](https://github.com/rapidsai/cudf/pull/10136)) [@bdice](https://github.com/bdice)
-- Update benchmarking guide to use NVBench. ([#10093](https://github.com/rapidsai/cudf/pull/10093)) [@bdice](https://github.com/bdice)
-
-## 🚀 New Features
-
-- Add StringIO support to read_text ([#10465](https://github.com/rapidsai/cudf/pull/10465)) [@cwharris](https://github.com/cwharris)
-- Add support for tdigest and merge_tdigest aggregations through cudf::reduce ([#10433](https://github.com/rapidsai/cudf/pull/10433)) [@nvdbaranec](https://github.com/nvdbaranec)
-- JNI support for Collect Ops in Reduction ([#10427](https://github.com/rapidsai/cudf/pull/10427)) [@sperlingxx](https://github.com/sperlingxx)
-- Enable read_text with dask_cudf using byte_range ([#10407](https://github.com/rapidsai/cudf/pull/10407)) [@ChrisJar](https://github.com/ChrisJar)
-- Add `cudf::stable_sort_by_key` ([#10387](https://github.com/rapidsai/cudf/pull/10387)) [@PointKernel](https://github.com/PointKernel)
-- Implement `maps_column_view` abstraction over `LIST&lt;STRUCT&lt;K,V&gt;&gt;` ([#10380](https://github.com/rapidsai/cudf/pull/10380)) [@mythrocks](https://github.com/mythrocks)
-- Support Java bindings for Avro reader ([#10373](https://github.com/rapidsai/cudf/pull/10373)) [@HaoYang670](https://github.com/HaoYang670)
-- Refactor stream compaction APIs ([#10370](https://github.com/rapidsai/cudf/pull/10370)) [@PointKernel](https://github.com/PointKernel)
-- Support collect aggregations in reduction ([#10353](https://github.com/rapidsai/cudf/pull/10353)) [@sperlingxx](https://github.com/sperlingxx)
-- Refactor array_ufunc for Index and unify across all classes ([#10346](https://github.com/rapidsai/cudf/pull/10346)) [@vyasr](https://github.com/vyasr)
-- Add JNI for extract_list_element with index column ([#10341](https://github.com/rapidsai/cudf/pull/10341)) [@firestarman](https://github.com/firestarman)
-- Support `min` and `max` operations for structs in rolling window ([#10332](https://github.com/rapidsai/cudf/pull/10332)) [@ttnghia](https://github.com/ttnghia)
-- Add device create_sequence_table for benchmarks ([#10300](https://github.com/rapidsai/cudf/pull/10300)) [@karthikeyann](https://github.com/karthikeyann)
-- Enable numpy ufuncs for DataFrame ([#10287](https://github.com/rapidsai/cudf/pull/10287)) [@vyasr](https://github.com/vyasr)
-- move input generation for json benchmark to device ([#10281](https://github.com/rapidsai/cudf/pull/10281)) [@karthikeyann](https://github.com/karthikeyann)
-- move input generation for type dispatcher benchmark to device ([#10280](https://github.com/rapidsai/cudf/pull/10280)) [@karthikeyann](https://github.com/karthikeyann)
-- move input generation for copy benchmark to device ([#10279](https://github.com/rapidsai/cudf/pull/10279)) [@karthikeyann](https://github.com/karthikeyann)
-- generate url decode benchmark input in device ([#10278](https://github.com/rapidsai/cudf/pull/10278)) [@karthikeyann](https://github.com/karthikeyann)
-- device input generation in join bench ([#10277](https://github.com/rapidsai/cudf/pull/10277)) [@karthikeyann](https://github.com/karthikeyann)
-- Add nvtext::byte_pair_encoding API ([#10270](https://github.com/rapidsai/cudf/pull/10270)) [@davidwendt](https://github.com/davidwendt)
-- Prevent internal usage of expensive APIs ([#10263](https://github.com/rapidsai/cudf/pull/10263)) [@vyasr](https://github.com/vyasr)
-- Column to JCUDF row for tables with strings ([#10235](https://github.com/rapidsai/cudf/pull/10235)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Support `percent_rank()` aggregation ([#10227](https://github.com/rapidsai/cudf/pull/10227)) [@mythrocks](https://github.com/mythrocks)
-- Refactor Series.__array_ufunc__ ([#10217](https://github.com/rapidsai/cudf/pull/10217)) [@vyasr](https://github.com/vyasr)
-- Reduce pytest runtime ([#10203](https://github.com/rapidsai/cudf/pull/10203)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Add regex flags parameter to python cudf strings split ([#10185](https://github.com/rapidsai/cudf/pull/10185)) [@davidwendt](https://github.com/davidwendt)
-- Support for `MOD`, `PMOD` and `PYMOD` for `decimal32/64/128` ([#10179](https://github.com/rapidsai/cudf/pull/10179)) [@codereport](https://github.com/codereport)
-- Adding string row size iterator for row to column and column to row conversion ([#10157](https://github.com/rapidsai/cudf/pull/10157)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Add file size counter to cuIO benchmarks ([#10154](https://github.com/rapidsai/cudf/pull/10154)) [@vuule](https://github.com/vuule)
-- byte_range support for multibyte_split/read_text ([#10150](https://github.com/rapidsai/cudf/pull/10150)) [@cwharris](https://github.com/cwharris)
-- Add JNI for `strings::split_re` and `strings::split_record_re` ([#10139](https://github.com/rapidsai/cudf/pull/10139)) [@ttnghia](https://github.com/ttnghia)
-- Add `maxSplit` parameter to Java binding for `strings:split` ([#10137](https://github.com/rapidsai/cudf/pull/10137)) [@ttnghia](https://github.com/ttnghia)
-- Add libcudf strings split API that accepts regex pattern ([#10128](https://github.com/rapidsai/cudf/pull/10128)) [@davidwendt](https://github.com/davidwendt)
-- generate benchmark input in device ([#10109](https://github.com/rapidsai/cudf/pull/10109)) [@karthikeyann](https://github.com/karthikeyann)
-- Avoid `nan_as_null` op if `nan_count` is 0 ([#10082](https://github.com/rapidsai/cudf/pull/10082)) [@galipremsagar](https://github.com/galipremsagar)
-- Add Dataframe and Index nunique ([#10077](https://github.com/rapidsai/cudf/pull/10077)) [@martinfalisse](https://github.com/martinfalisse)
-- Support nanosecond timestamps in parquet ([#10063](https://github.com/rapidsai/cudf/pull/10063)) [@PointKernel](https://github.com/PointKernel)
-- Java bindings for mixed semi and anti joins ([#10040](https://github.com/rapidsai/cudf/pull/10040)) [@jlowe](https://github.com/jlowe)
-- Implement mixed equality/conditional semi/anti joins ([#10037](https://github.com/rapidsai/cudf/pull/10037)) [@vyasr](https://github.com/vyasr)
-- Optimize compaction operations ([#10030](https://github.com/rapidsai/cudf/pull/10030)) [@PointKernel](https://github.com/PointKernel)
-- Support `args=` in `Series.apply` ([#9982](https://github.com/rapidsai/cudf/pull/9982)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Add cudf::strings::findall_record API ([#9911](https://github.com/rapidsai/cudf/pull/9911)) [@davidwendt](https://github.com/davidwendt)
-- Add covariance for sort groupby (python) ([#9889](https://github.com/rapidsai/cudf/pull/9889)) [@mayankanand007](https://github.com/mayankanand007)
-- Implement DataFrame diff() ([#9817](https://github.com/rapidsai/cudf/pull/9817)) [@skirui-source](https://github.com/skirui-source)
-- Implement DataFrame pct_change ([#9805](https://github.com/rapidsai/cudf/pull/9805)) [@skirui-source](https://github.com/skirui-source)
-- Support segmented reductions and null mask reductions ([#9621](https://github.com/rapidsai/cudf/pull/9621)) [@isVoid](https://github.com/isVoid)
-- Add &#39;spearman&#39; correlation method for `dataframe.corr` and `series.corr` ([#7141](https://github.com/rapidsai/cudf/pull/7141)) [@dominicshanshan](https://github.com/dominicshanshan)
-
-## 🛠️ Improvements
-
-- Add `scipy` skip for a test ([#10502](https://github.com/rapidsai/cudf/pull/10502)) [@galipremsagar](https://github.com/galipremsagar)
-- Temporarily disable new `ops-bot` functionality ([#10496](https://github.com/rapidsai/cudf/pull/10496)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Include &lt;cstddef&gt; to fix compilation of parquet reader on GCC 11. ([#10483](https://github.com/rapidsai/cudf/pull/10483)) [@bdice](https://github.com/bdice)
-- Pin `dask` and `distributed` ([#10481](https://github.com/rapidsai/cudf/pull/10481)) [@galipremsagar](https://github.com/galipremsagar)
-- MD5 refactoring. ([#10445](https://github.com/rapidsai/cudf/pull/10445)) [@bdice](https://github.com/bdice)
-- Remove or split up Frame methods that use the index ([#10439](https://github.com/rapidsai/cudf/pull/10439)) [@vyasr](https://github.com/vyasr)
-- Centralization of tdigest aggregation code. ([#10422](https://github.com/rapidsai/cudf/pull/10422)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Simplify column binary operations ([#10421](https://github.com/rapidsai/cudf/pull/10421)) [@vyasr](https://github.com/vyasr)
-- Add `.github/ops-bot.yaml` config file ([#10420](https://github.com/rapidsai/cudf/pull/10420)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Use list of columns for methods in `Groupby.pyx` ([#10419](https://github.com/rapidsai/cudf/pull/10419)) [@isVoid](https://github.com/isVoid)
-- Remove warnings in `test_timedelta.py` ([#10418](https://github.com/rapidsai/cudf/pull/10418)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix some warnings in `test_parquet.py` ([#10416](https://github.com/rapidsai/cudf/pull/10416)) [@galipremsagar](https://github.com/galipremsagar)
-- JNI support for segmented reduce ([#10413](https://github.com/rapidsai/cudf/pull/10413)) [@revans2](https://github.com/revans2)
-- Clean up null mask after purging null entries ([#10412](https://github.com/rapidsai/cudf/pull/10412)) [@sperlingxx](https://github.com/sperlingxx)
-- Drop unsupported method argument from nunique and distinct_count. ([#10411](https://github.com/rapidsai/cudf/pull/10411)) [@bdice](https://github.com/bdice)
-- Use str instead of builtins.str. ([#10410](https://github.com/rapidsai/cudf/pull/10410)) [@bdice](https://github.com/bdice)
-- Fix warnings in `test_rolling` ([#10405](https://github.com/rapidsai/cudf/pull/10405)) [@bdice](https://github.com/bdice)
-- Enable `codecov` github-check in CI ([#10404](https://github.com/rapidsai/cudf/pull/10404)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix warnings in test_cuda_apply, test_numerical, test_pickling, test_unaops. ([#10402](https://github.com/rapidsai/cudf/pull/10402)) [@bdice](https://github.com/bdice)
-- Set column names in `_from_columns_like_self` factory ([#10400](https://github.com/rapidsai/cudf/pull/10400)) [@isVoid](https://github.com/isVoid)
-- Refactor `nvtx` annotations in `cudf` &amp; `dask-cudf` ([#10396](https://github.com/rapidsai/cudf/pull/10396)) [@galipremsagar](https://github.com/galipremsagar)
-- Consolidate .cov and .corr for sort groupby ([#10386](https://github.com/rapidsai/cudf/pull/10386)) [@skirui-source](https://github.com/skirui-source)
-- Consolidate some Frame APIs ([#10381](https://github.com/rapidsai/cudf/pull/10381)) [@vyasr](https://github.com/vyasr)
-- Refactor hash functions and `hash_combine` ([#10379](https://github.com/rapidsai/cudf/pull/10379)) [@bdice](https://github.com/bdice)
-- Add `nvtx` annotations for `Series` and `Index` ([#10374](https://github.com/rapidsai/cudf/pull/10374)) [@galipremsagar](https://github.com/galipremsagar)
-- Refactor `filling.repeat` API ([#10371](https://github.com/rapidsai/cudf/pull/10371)) [@isVoid](https://github.com/isVoid)
-- Move standalone UTF8 functions from string_view.hpp to utf8.hpp ([#10369](https://github.com/rapidsai/cudf/pull/10369)) [@davidwendt](https://github.com/davidwendt)
-- Remove doc for deprecated function `one_hot_encoding` ([#10367](https://github.com/rapidsai/cudf/pull/10367)) [@isVoid](https://github.com/isVoid)
-- Refactor array function ([#10364](https://github.com/rapidsai/cudf/pull/10364)) [@vyasr](https://github.com/vyasr)
-- Fix warnings in test_csv.py. ([#10362](https://github.com/rapidsai/cudf/pull/10362)) [@bdice](https://github.com/bdice)
-- Implement a mixin for binops ([#10360](https://github.com/rapidsai/cudf/pull/10360)) [@vyasr](https://github.com/vyasr)
-- Refactor cython interface: `copying.pyx` ([#10359](https://github.com/rapidsai/cudf/pull/10359)) [@isVoid](https://github.com/isVoid)
-- Implement a mixin for scans ([#10358](https://github.com/rapidsai/cudf/pull/10358)) [@vyasr](https://github.com/vyasr)
-- Add scan_aggregation and reduce_aggregation derived types. ([#10357](https://github.com/rapidsai/cudf/pull/10357)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Add cleanup of python artifacts ([#10355](https://github.com/rapidsai/cudf/pull/10355)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix warnings in test_categorical.py. ([#10354](https://github.com/rapidsai/cudf/pull/10354)) [@bdice](https://github.com/bdice)
-- Create a dispatcher for invoking regex kernel functions ([#10349](https://github.com/rapidsai/cudf/pull/10349)) [@davidwendt](https://github.com/davidwendt)
-- Fix `codecov` in CI ([#10347](https://github.com/rapidsai/cudf/pull/10347)) [@galipremsagar](https://github.com/galipremsagar)
-- Enable caching for `memory_usage` calculation in `Column` ([#10345](https://github.com/rapidsai/cudf/pull/10345)) [@galipremsagar](https://github.com/galipremsagar)
-- C++17 cleanup: traits replace std::enable_if&lt;&gt;::type with std::enable_if_t ([#10343](https://github.com/rapidsai/cudf/pull/10343)) [@karthikeyann](https://github.com/karthikeyann)
-- JNI: Support appending DECIMAL128 into ColumnBuilder in terms of byte array ([#10338](https://github.com/rapidsai/cudf/pull/10338)) [@sperlingxx](https://github.com/sperlingxx)
-- multibyte_split test improvements ([#10328](https://github.com/rapidsai/cudf/pull/10328)) [@vuule](https://github.com/vuule)
-- Fix warnings in test_binops.py. ([#10327](https://github.com/rapidsai/cudf/pull/10327)) [@bdice](https://github.com/bdice)
-- Fix warnings from pandas in test_array_ufunc.py. ([#10324](https://github.com/rapidsai/cudf/pull/10324)) [@bdice](https://github.com/bdice)
-- Update upload script ([#10321](https://github.com/rapidsai/cudf/pull/10321)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Move hash type declarations to hashing.hpp ([#10320](https://github.com/rapidsai/cudf/pull/10320)) [@davidwendt](https://github.com/davidwendt)
-- C++17 cleanup: traits replace `::value` with `_v` ([#10319](https://github.com/rapidsai/cudf/pull/10319)) [@karthikeyann](https://github.com/karthikeyann)
-- Remove internal columns usage ([#10315](https://github.com/rapidsai/cudf/pull/10315)) [@vyasr](https://github.com/vyasr)
-- Remove extraneous `build.sh` parameter ([#10313](https://github.com/rapidsai/cudf/pull/10313)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Add const qualifier to MurmurHash3_32::hash_combine ([#10311](https://github.com/rapidsai/cudf/pull/10311)) [@davidwendt](https://github.com/davidwendt)
-- Remove `TODO` in `libcudf_kafka` recipe ([#10309](https://github.com/rapidsai/cudf/pull/10309)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Add conversions between column_view and device_span&lt;T const&gt;. ([#10302](https://github.com/rapidsai/cudf/pull/10302)) [@bdice](https://github.com/bdice)
-- Avoid `decimal` type narrowing for decimal binops ([#10299](https://github.com/rapidsai/cudf/pull/10299)) [@galipremsagar](https://github.com/galipremsagar)
-- Deprecate `DataFrame.iteritems` and introduce `.items` ([#10298](https://github.com/rapidsai/cudf/pull/10298)) [@galipremsagar](https://github.com/galipremsagar)
-- Explicitly request CMake use `gnu++17` over `c++17` ([#10297](https://github.com/rapidsai/cudf/pull/10297)) [@robertmaynard](https://github.com/robertmaynard)
-- Add copyright check as pre-commit hook. ([#10290](https://github.com/rapidsai/cudf/pull/10290)) [@vyasr](https://github.com/vyasr)
-- DataFrame `insert` and creation optimizations ([#10285](https://github.com/rapidsai/cudf/pull/10285)) [@galipremsagar](https://github.com/galipremsagar)
-- Improve hash join detail functions ([#10273](https://github.com/rapidsai/cudf/pull/10273)) [@PointKernel](https://github.com/PointKernel)
-- Replace custom `cached_property` implementation with functools ([#10272](https://github.com/rapidsai/cudf/pull/10272)) [@shwina](https://github.com/shwina)
-- Rewrites `sample` API ([#10262](https://github.com/rapidsai/cudf/pull/10262)) [@isVoid](https://github.com/isVoid)
-- Bump hadoop-common from 3.1.0 to 3.1.4 in /java ([#10259](https://github.com/rapidsai/cudf/pull/10259)) [@dependabot[bot]](https://github.com/dependabot[bot])
-- Remove making redundant `copy` across code-base ([#10257](https://github.com/rapidsai/cudf/pull/10257)) [@galipremsagar](https://github.com/galipremsagar)
-- Add more `nvtx` annotations ([#10256](https://github.com/rapidsai/cudf/pull/10256)) [@galipremsagar](https://github.com/galipremsagar)
-- Add `copyright` check in `cudf` ([#10253](https://github.com/rapidsai/cudf/pull/10253)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove redundant copies in `fillna` to improve performance ([#10241](https://github.com/rapidsai/cudf/pull/10241)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove `std::numeric_limit` specializations for timestamp &amp; durations ([#10239](https://github.com/rapidsai/cudf/pull/10239)) [@codereport](https://github.com/codereport)
-- Optimize `DataFrame` creation across code-base ([#10236](https://github.com/rapidsai/cudf/pull/10236)) [@galipremsagar](https://github.com/galipremsagar)
-- Change pytest distribution algorithm and increase parallelism in CI ([#10232](https://github.com/rapidsai/cudf/pull/10232)) [@galipremsagar](https://github.com/galipremsagar)
-- Add environment variables for I/O thread pool and slice sizes ([#10218](https://github.com/rapidsai/cudf/pull/10218)) [@vuule](https://github.com/vuule)
-- Add regex flags to strings findall functions ([#10208](https://github.com/rapidsai/cudf/pull/10208)) [@davidwendt](https://github.com/davidwendt)
-- Update dask-cudf parquet tests to reflect upstream bugfixes to `_metadata` ([#10206](https://github.com/rapidsai/cudf/pull/10206)) [@charlesbluca](https://github.com/charlesbluca)
-- Remove unnecessary nunique function in Series. ([#10205](https://github.com/rapidsai/cudf/pull/10205)) [@martinfalisse](https://github.com/martinfalisse)
-- Refactor DataFrame tests. ([#10204](https://github.com/rapidsai/cudf/pull/10204)) [@bdice](https://github.com/bdice)
-- Rewrites `column.__setitem__`, Use `boolean_mask_scatter` ([#10202](https://github.com/rapidsai/cudf/pull/10202)) [@isVoid](https://github.com/isVoid)
-- Java utilities to aid in accelerating aggregations on 128-bit types ([#10201](https://github.com/rapidsai/cudf/pull/10201)) [@jlowe](https://github.com/jlowe)
-- Fix docstrings alignment in `Frame` methods ([#10199](https://github.com/rapidsai/cudf/pull/10199)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix cuco pair issue in hash join ([#10195](https://github.com/rapidsai/cudf/pull/10195)) [@PointKernel](https://github.com/PointKernel)
-- Replace `dask` groupby `.index` usages with `.by` ([#10193](https://github.com/rapidsai/cudf/pull/10193)) [@galipremsagar](https://github.com/galipremsagar)
-- Add regex flags to strings extract function ([#10192](https://github.com/rapidsai/cudf/pull/10192)) [@davidwendt](https://github.com/davidwendt)
-- Forward-merge branch-22.02 to branch-22.04 ([#10191](https://github.com/rapidsai/cudf/pull/10191)) [@bdice](https://github.com/bdice)
-- Add CMake `install` rule for tests ([#10190](https://github.com/rapidsai/cudf/pull/10190)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Unpin `dask` &amp; `distributed` ([#10182](https://github.com/rapidsai/cudf/pull/10182)) [@galipremsagar](https://github.com/galipremsagar)
-- Add comments to explain test validation ([#10176](https://github.com/rapidsai/cudf/pull/10176)) [@galipremsagar](https://github.com/galipremsagar)
-- Reduce warnings in pytest output ([#10168](https://github.com/rapidsai/cudf/pull/10168)) [@bdice](https://github.com/bdice)
-- Some consolidation of indexed frame methods ([#10167](https://github.com/rapidsai/cudf/pull/10167)) [@vyasr](https://github.com/vyasr)
-- Refactor isin implementations ([#10165](https://github.com/rapidsai/cudf/pull/10165)) [@vyasr](https://github.com/vyasr)
-- Faster struct row comparator ([#10164](https://github.com/rapidsai/cudf/pull/10164)) [@devavret](https://github.com/devavret)
-- Refactor groupby::get_groups. ([#10161](https://github.com/rapidsai/cudf/pull/10161)) [@bdice](https://github.com/bdice)
-- Deprecate `decimal_cols_as_float` in ORC reader (C++ layer) ([#10152](https://github.com/rapidsai/cudf/pull/10152)) [@vuule](https://github.com/vuule)
-- Replace `ccache` with `sccache` ([#10146](https://github.com/rapidsai/cudf/pull/10146)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Murmur3 hash kernel cleanup ([#10143](https://github.com/rapidsai/cudf/pull/10143)) [@rwlee](https://github.com/rwlee)
-- Deprecate `decimal_cols_as_float` in ORC reader ([#10142](https://github.com/rapidsai/cudf/pull/10142)) [@galipremsagar](https://github.com/galipremsagar)
-- Run pyupgrade 2.31.0. ([#10141](https://github.com/rapidsai/cudf/pull/10141)) [@bdice](https://github.com/bdice)
-- Remove `drop_nan` from internal `IndexedFrame._drop_na_rows`. ([#10140](https://github.com/rapidsai/cudf/pull/10140)) [@bdice](https://github.com/bdice)
-- Change cudf::strings::find_multiple to return a lists column ([#10134](https://github.com/rapidsai/cudf/pull/10134)) [@davidwendt](https://github.com/davidwendt)
-- Update cmake-format script for branch 22.04. ([#10132](https://github.com/rapidsai/cudf/pull/10132)) [@bdice](https://github.com/bdice)
-- Accept r-value references in convert_table_for_return(): ([#10131](https://github.com/rapidsai/cudf/pull/10131)) [@mythrocks](https://github.com/mythrocks)
-- Remove the option to completely disable decimal128 columns in the ORC reader ([#10127](https://github.com/rapidsai/cudf/pull/10127)) [@vuule](https://github.com/vuule)
-- Remove deprecated code ([#10124](https://github.com/rapidsai/cudf/pull/10124)) [@vyasr](https://github.com/vyasr)
-- Update gpu_utils.py to reflect current CUDA support. ([#10113](https://github.com/rapidsai/cudf/pull/10113)) [@bdice](https://github.com/bdice)
-- Remove benchmarks suffix ([#10112](https://github.com/rapidsai/cudf/pull/10112)) [@bdice](https://github.com/bdice)
-- Update cudf java binding version to 22.04.0-SNAPSHOT ([#10084](https://github.com/rapidsai/cudf/pull/10084)) [@pxLi](https://github.com/pxLi)
-- Remove unnecessary docker files. ([#10069](https://github.com/rapidsai/cudf/pull/10069)) [@vyasr](https://github.com/vyasr)
-- Limit benchmark iterations using environment variable ([#10060](https://github.com/rapidsai/cudf/pull/10060)) [@karthikeyann](https://github.com/karthikeyann)
-- Add timing chart for libcudf build metrics report page ([#10038](https://github.com/rapidsai/cudf/pull/10038)) [@davidwendt](https://github.com/davidwendt)
-- JNI: Rewrite growBuffersAndRows to accelerate the HostColumnBuilder ([#10025](https://github.com/rapidsai/cudf/pull/10025)) [@sperlingxx](https://github.com/sperlingxx)
-- Reduce redundant code in CUDF JNI ([#10019](https://github.com/rapidsai/cudf/pull/10019)) [@mythrocks](https://github.com/mythrocks)
-- Make snappy decompress check more efficient ([#9995](https://github.com/rapidsai/cudf/pull/9995)) [@cheinger](https://github.com/cheinger)
-- Remove deprecated method Series.set_index. ([#9945](https://github.com/rapidsai/cudf/pull/9945)) [@bdice](https://github.com/bdice)
-- Implement a mixin for reductions ([#9925](https://github.com/rapidsai/cudf/pull/9925)) [@vyasr](https://github.com/vyasr)
-- JNI: Push back decimal utils from spark-rapids ([#9907](https://github.com/rapidsai/cudf/pull/9907)) [@sperlingxx](https://github.com/sperlingxx)
-- Add `assert_column_memory_*` ([#9882](https://github.com/rapidsai/cudf/pull/9882)) [@isVoid](https://github.com/isVoid)
-- Add CUDF_UNREACHABLE macro. ([#9727](https://github.com/rapidsai/cudf/pull/9727)) [@bdice](https://github.com/bdice)
-- Upgrade `arrow` &amp; `pyarrow` to `6.0.1` ([#9686](https://github.com/rapidsai/cudf/pull/9686)) [@galipremsagar](https://github.com/galipremsagar)
-
-# cuDF 22.02.00 (2 Feb 2022)
-
-## 🚨 Breaking Changes
-
-- ORC writer API changes for granular statistics ([#10058](https://github.com/rapidsai/cudf/pull/10058)) [@mythrocks](https://github.com/mythrocks)
-- `decimal128` Support for `to/from_arrow` ([#9986](https://github.com/rapidsai/cudf/pull/9986)) [@codereport](https://github.com/codereport)
-- Remove deprecated method `one_hot_encoding` ([#9977](https://github.com/rapidsai/cudf/pull/9977)) [@isVoid](https://github.com/isVoid)
-- Remove str.subword_tokenize ([#9968](https://github.com/rapidsai/cudf/pull/9968)) [@VibhuJawa](https://github.com/VibhuJawa)
-- Remove deprecated `method` parameter from `merge` and `join`. ([#9944](https://github.com/rapidsai/cudf/pull/9944)) [@bdice](https://github.com/bdice)
-- Remove deprecated method DataFrame.hash_columns. ([#9943](https://github.com/rapidsai/cudf/pull/9943)) [@bdice](https://github.com/bdice)
-- Remove deprecated method Series.hash_encode. ([#9942](https://github.com/rapidsai/cudf/pull/9942)) [@bdice](https://github.com/bdice)
-- Refactoring ceil/round/floor code for datetime64 types ([#9926](https://github.com/rapidsai/cudf/pull/9926)) [@mayankanand007](https://github.com/mayankanand007)
-- Introduce `nan_as_null` parameter for `cudf.Index` ([#9893](https://github.com/rapidsai/cudf/pull/9893)) [@galipremsagar](https://github.com/galipremsagar)
-- Add regex_flags parameter to strings replace_re functions ([#9878](https://github.com/rapidsai/cudf/pull/9878)) [@davidwendt](https://github.com/davidwendt)
-- Break tie for `top` categorical columns in `Series.describe` ([#9867](https://github.com/rapidsai/cudf/pull/9867)) [@isVoid](https://github.com/isVoid)
-- Add partitioning support in parquet writer ([#9810](https://github.com/rapidsai/cudf/pull/9810)) [@devavret](https://github.com/devavret)
-- Move `drop_duplicates`, `drop_na`, `_gather`, `take` to IndexFrame and create their `_base_index` counterparts ([#9807](https://github.com/rapidsai/cudf/pull/9807)) [@isVoid](https://github.com/isVoid)
-- Raise temporary error for `decimal128` types in parquet reader ([#9804](https://github.com/rapidsai/cudf/pull/9804)) [@galipremsagar](https://github.com/galipremsagar)
-- Change default `dtype` of all nulls column from `float` to `object` ([#9803](https://github.com/rapidsai/cudf/pull/9803)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove unused masked udf cython/c++ code ([#9792](https://github.com/rapidsai/cudf/pull/9792)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Pick smallest decimal type with required precision in ORC reader ([#9775](https://github.com/rapidsai/cudf/pull/9775)) [@vuule](https://github.com/vuule)
-- Add decimal128 support to Parquet reader and writer ([#9765](https://github.com/rapidsai/cudf/pull/9765)) [@vuule](https://github.com/vuule)
-- Refactor TableTest assertion methods to a separate utility class ([#9762](https://github.com/rapidsai/cudf/pull/9762)) [@jlowe](https://github.com/jlowe)
-- Use cuFile direct device reads/writes by default in cuIO ([#9722](https://github.com/rapidsai/cudf/pull/9722)) [@vuule](https://github.com/vuule)
-- Match pandas scalar result types in reductions ([#9717](https://github.com/rapidsai/cudf/pull/9717)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Add parameters to control row group size in Parquet writer ([#9677](https://github.com/rapidsai/cudf/pull/9677)) [@vuule](https://github.com/vuule)
-- Refactor bit counting APIs, introduce valid/null count functions, and split host/device side code for segmented counts. ([#9588](https://github.com/rapidsai/cudf/pull/9588)) [@bdice](https://github.com/bdice)
-- Add support for `decimal128` in cudf python ([#9533](https://github.com/rapidsai/cudf/pull/9533)) [@galipremsagar](https://github.com/galipremsagar)
-- Implement `lists::index_of()` to find positions in list rows ([#9510](https://github.com/rapidsai/cudf/pull/9510)) [@mythrocks](https://github.com/mythrocks)
-- Rewriting row/column conversions for Spark &lt;-&gt; cudf data conversions ([#8444](https://github.com/rapidsai/cudf/pull/8444)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-
-## 🐛 Bug Fixes
-
-- Add check for negative stripe index in ORC reader ([#10074](https://github.com/rapidsai/cudf/pull/10074)) [@vuule](https://github.com/vuule)
-- Update Java tests to expect DECIMAL128 from Arrow ([#10073](https://github.com/rapidsai/cudf/pull/10073)) [@jlowe](https://github.com/jlowe)
-- Avoid index materialization when `DataFrame` is created with un-named `Series` objects ([#10071](https://github.com/rapidsai/cudf/pull/10071)) [@galipremsagar](https://github.com/galipremsagar)
-- fix gcc 11 compilation errors ([#10067](https://github.com/rapidsai/cudf/pull/10067)) [@rongou](https://github.com/rongou)
-- Fix `columns` ordering issue in parquet reader ([#10066](https://github.com/rapidsai/cudf/pull/10066)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix dataframe setitem with `ndarray` types ([#10056](https://github.com/rapidsai/cudf/pull/10056)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove implicit copy due to conversion from cudf::size_type and size_t ([#10045](https://github.com/rapidsai/cudf/pull/10045)) [@robertmaynard](https://github.com/robertmaynard)
-- Include &lt;optional&gt; in headers that use std::optional ([#10044](https://github.com/rapidsai/cudf/pull/10044)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix repr and concat of `StructColumn` ([#10042](https://github.com/rapidsai/cudf/pull/10042)) [@galipremsagar](https://github.com/galipremsagar)
-- Include row group level stats when writing ORC files ([#10041](https://github.com/rapidsai/cudf/pull/10041)) [@vuule](https://github.com/vuule)
-- build.sh respects the `--build_metrics` and `--incl_cache_stats` flags ([#10035](https://github.com/rapidsai/cudf/pull/10035)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix memory leaks in JNI native code. ([#10029](https://github.com/rapidsai/cudf/pull/10029)) [@mythrocks](https://github.com/mythrocks)
-- Update JNI to use new arena mr constructor ([#10027](https://github.com/rapidsai/cudf/pull/10027)) [@rongou](https://github.com/rongou)
-- Fix null check when comparing structs in `arg_min` operation of reduction/groupby ([#10026](https://github.com/rapidsai/cudf/pull/10026)) [@ttnghia](https://github.com/ttnghia)
-- Wrap CI script shell variables in quotes to fix local testing. ([#10018](https://github.com/rapidsai/cudf/pull/10018)) [@bdice](https://github.com/bdice)
-- cudftestutil no longer propagates compiler flags to external users ([#10017](https://github.com/rapidsai/cudf/pull/10017)) [@robertmaynard](https://github.com/robertmaynard)
-- Remove `CUDA_DEVICE_CALLABLE` macro usage ([#10015](https://github.com/rapidsai/cudf/pull/10015)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Add missing list filling header in meta.yaml ([#10007](https://github.com/rapidsai/cudf/pull/10007)) [@devavret](https://github.com/devavret)
-- Fix `conda` recipes for `custreamz` &amp; `cudf_kafka` ([#10003](https://github.com/rapidsai/cudf/pull/10003)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Fix matching regex word-boundary () in strings replace ([#9997](https://github.com/rapidsai/cudf/pull/9997)) [@davidwendt](https://github.com/davidwendt)
-- Fix null check when comparing structs in `min` and `max` reduction/groupby operations ([#9994](https://github.com/rapidsai/cudf/pull/9994)) [@ttnghia](https://github.com/ttnghia)
-- Fix octal pattern matching in regex string ([#9993](https://github.com/rapidsai/cudf/pull/9993)) [@davidwendt](https://github.com/davidwendt)
-- `decimal128` Support for `to/from_arrow` ([#9986](https://github.com/rapidsai/cudf/pull/9986)) [@codereport](https://github.com/codereport)
-- Fix groupby shift/diff/fill after selecting from a  `GroupBy` ([#9984](https://github.com/rapidsai/cudf/pull/9984)) [@shwina](https://github.com/shwina)
-- Fix the overflow problem of decimal rescale ([#9966](https://github.com/rapidsai/cudf/pull/9966)) [@sperlingxx](https://github.com/sperlingxx)
-- Use default value for decimal precision in parquet writer when not specified ([#9963](https://github.com/rapidsai/cudf/pull/9963)) [@devavret](https://github.com/devavret)
-- Fix cudf java build error. ([#9958](https://github.com/rapidsai/cudf/pull/9958)) [@firestarman](https://github.com/firestarman)
-- Use gpuci_mamba_retry to install local artifacts. ([#9951](https://github.com/rapidsai/cudf/pull/9951)) [@bdice](https://github.com/bdice)
-- Fix regression HostColumnVectorCore requiring native libs ([#9948](https://github.com/rapidsai/cudf/pull/9948)) [@jlowe](https://github.com/jlowe)
-- Rename aggregate_metadata in writer to fix name collision ([#9938](https://github.com/rapidsai/cudf/pull/9938)) [@devavret](https://github.com/devavret)
-- Fixed issue with percentile_approx where output tdigests could have uninitialized data at the end. ([#9931](https://github.com/rapidsai/cudf/pull/9931)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Resolve racecheck errors in ORC kernels ([#9916](https://github.com/rapidsai/cudf/pull/9916)) [@vuule](https://github.com/vuule)
-- Fix the java build after parquet partitioning support ([#9908](https://github.com/rapidsai/cudf/pull/9908)) [@revans2](https://github.com/revans2)
-- Fix compilation of benchmark for parquet writer. ([#9905](https://github.com/rapidsai/cudf/pull/9905)) [@bdice](https://github.com/bdice)
-- Fix a memcheck error in ORC writer ([#9896](https://github.com/rapidsai/cudf/pull/9896)) [@vuule](https://github.com/vuule)
-- Introduce `nan_as_null` parameter for `cudf.Index` ([#9893](https://github.com/rapidsai/cudf/pull/9893)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix fallback to sort aggregation for grouping only hash aggregate ([#9891](https://github.com/rapidsai/cudf/pull/9891)) [@abellina](https://github.com/abellina)
-- Add zlib to cudfjni link when using static libcudf library dependency ([#9890](https://github.com/rapidsai/cudf/pull/9890)) [@jlowe](https://github.com/jlowe)
-- TimedeltaIndex constructor raises an AttributeError. ([#9884](https://github.com/rapidsai/cudf/pull/9884)) [@skirui-source](https://github.com/skirui-source)
-- Fix cudf.Scalar string datetime construction ([#9875](https://github.com/rapidsai/cudf/pull/9875)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Load libcufile.so with RTLD_NODELETE flag ([#9872](https://github.com/rapidsai/cudf/pull/9872)) [@vuule](https://github.com/vuule)
-- Break tie for `top` categorical columns in `Series.describe` ([#9867](https://github.com/rapidsai/cudf/pull/9867)) [@isVoid](https://github.com/isVoid)
-- Fix null handling for structs `min` and `arg_min` in groupby, groupby scan, reduction, and inclusive_scan ([#9864](https://github.com/rapidsai/cudf/pull/9864)) [@ttnghia](https://github.com/ttnghia)
-- Add one-level list encoding support in parquet reader ([#9848](https://github.com/rapidsai/cudf/pull/9848)) [@PointKernel](https://github.com/PointKernel)
-- Fix an out-of-bounds read in validity copying in contiguous_split. ([#9842](https://github.com/rapidsai/cudf/pull/9842)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Fix join of MultiIndex to Index with one column and overlapping name. ([#9830](https://github.com/rapidsai/cudf/pull/9830)) [@vyasr](https://github.com/vyasr)
-- Fix caching in `Series.applymap` ([#9821](https://github.com/rapidsai/cudf/pull/9821)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Enforce boolean `ascending` for dask-cudf `sort_values` ([#9814](https://github.com/rapidsai/cudf/pull/9814)) [@charlesbluca](https://github.com/charlesbluca)
-- Fix ORC writer crash with empty input columns ([#9808](https://github.com/rapidsai/cudf/pull/9808)) [@vuule](https://github.com/vuule)
-- Change default `dtype` of all nulls column from `float` to `object` ([#9803](https://github.com/rapidsai/cudf/pull/9803)) [@galipremsagar](https://github.com/galipremsagar)
-- Load native dependencies when Java ColumnView is loaded ([#9800](https://github.com/rapidsai/cudf/pull/9800)) [@jlowe](https://github.com/jlowe)
-- Fix dtype-argument bug in dask_cudf read_csv ([#9796](https://github.com/rapidsai/cudf/pull/9796)) [@rjzamora](https://github.com/rjzamora)
-- Fix overflow for min calculation in strings::from_timestamps ([#9793](https://github.com/rapidsai/cudf/pull/9793)) [@revans2](https://github.com/revans2)
-- Fix memory error due to lambda return type deduction limitation ([#9778](https://github.com/rapidsai/cudf/pull/9778)) [@karthikeyann](https://github.com/karthikeyann)
-- Revert regex $/EOL end-of-string new-line special case handling ([#9774](https://github.com/rapidsai/cudf/pull/9774)) [@davidwendt](https://github.com/davidwendt)
-- Fix missing streams ([#9767](https://github.com/rapidsai/cudf/pull/9767)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix make_empty_scalar_like on list_type ([#9759](https://github.com/rapidsai/cudf/pull/9759)) [@sperlingxx](https://github.com/sperlingxx)
-- Update cmake and conda to 22.02 ([#9746](https://github.com/rapidsai/cudf/pull/9746)) [@devavret](https://github.com/devavret)
-- Fix out-of-bounds memory write in decimal128-to-string conversion ([#9740](https://github.com/rapidsai/cudf/pull/9740)) [@davidwendt](https://github.com/davidwendt)
-- Match pandas scalar result types in reductions ([#9717](https://github.com/rapidsai/cudf/pull/9717)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Fix regex non-multiline EOL/$ matching strings ending with a new-line ([#9715](https://github.com/rapidsai/cudf/pull/9715)) [@davidwendt](https://github.com/davidwendt)
-- Fixed build by adding more checks for int8, int16 ([#9707](https://github.com/rapidsai/cudf/pull/9707)) [@razajafri](https://github.com/razajafri)
-- Fix `null` handling when `boolean` dtype is passed ([#9691](https://github.com/rapidsai/cudf/pull/9691)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix stream usage in `segmented_gather()` ([#9679](https://github.com/rapidsai/cudf/pull/9679)) [@mythrocks](https://github.com/mythrocks)
-
-## 📖 Documentation
-
-- Update `decimal` dtypes related docs entries ([#10072](https://github.com/rapidsai/cudf/pull/10072)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix regex doc describing hexadecimal escape characters ([#10009](https://github.com/rapidsai/cudf/pull/10009)) [@davidwendt](https://github.com/davidwendt)
-- Fix cudf compilation instructions. ([#9956](https://github.com/rapidsai/cudf/pull/9956)) [@esoha-nvidia](https://github.com/esoha-nvidia)
-- Fix see also links for IO APIs ([#9895](https://github.com/rapidsai/cudf/pull/9895)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix build instructions for libcudf doxygen ([#9837](https://github.com/rapidsai/cudf/pull/9837)) [@davidwendt](https://github.com/davidwendt)
-- Fix some doxygen warnings and add missing documentation ([#9770](https://github.com/rapidsai/cudf/pull/9770)) [@karthikeyann](https://github.com/karthikeyann)
-- update cuda version in local build ([#9736](https://github.com/rapidsai/cudf/pull/9736)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix doxygen for enum types in libcudf ([#9724](https://github.com/rapidsai/cudf/pull/9724)) [@davidwendt](https://github.com/davidwendt)
-- Spell check fixes ([#9682](https://github.com/rapidsai/cudf/pull/9682)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix links in C++ Developer Guide. ([#9675](https://github.com/rapidsai/cudf/pull/9675)) [@bdice](https://github.com/bdice)
-
-## 🚀 New Features
-
-- Remove libcudacxx patch needed for nvcc 11.4 ([#10057](https://github.com/rapidsai/cudf/pull/10057)) [@robertmaynard](https://github.com/robertmaynard)
-- Allow CuPy 10 ([#10048](https://github.com/rapidsai/cudf/pull/10048)) [@jakirkham](https://github.com/jakirkham)
-- Add in support for NULL_LOGICAL_AND and NULL_LOGICAL_OR binops ([#10016](https://github.com/rapidsai/cudf/pull/10016)) [@revans2](https://github.com/revans2)
-- Add `groupby.transform` (only support for aggregations) ([#10005](https://github.com/rapidsai/cudf/pull/10005)) [@shwina](https://github.com/shwina)
-- Add partitioning support to Parquet chunked writer ([#10000](https://github.com/rapidsai/cudf/pull/10000)) [@devavret](https://github.com/devavret)
-- Add jni for sequences ([#9972](https://github.com/rapidsai/cudf/pull/9972)) [@wbo4958](https://github.com/wbo4958)
-- Java bindings for mixed left, inner, and full joins ([#9941](https://github.com/rapidsai/cudf/pull/9941)) [@jlowe](https://github.com/jlowe)
-- Java bindings for JSON reader support ([#9940](https://github.com/rapidsai/cudf/pull/9940)) [@wbo4958](https://github.com/wbo4958)
-- Enable transpose for string columns in cudf python ([#9937](https://github.com/rapidsai/cudf/pull/9937)) [@galipremsagar](https://github.com/galipremsagar)
-- Support structs for `cudf::contains` with column/scalar input ([#9929](https://github.com/rapidsai/cudf/pull/9929)) [@ttnghia](https://github.com/ttnghia)
-- Implement mixed equality/conditional joins ([#9917](https://github.com/rapidsai/cudf/pull/9917)) [@vyasr](https://github.com/vyasr)
-- Add cudf::strings::extract_all API ([#9909](https://github.com/rapidsai/cudf/pull/9909)) [@davidwendt](https://github.com/davidwendt)
-- Implement JNI for `cudf::scatter` APIs ([#9903](https://github.com/rapidsai/cudf/pull/9903)) [@ttnghia](https://github.com/ttnghia)
-- JNI: Function to copy and set validity from bool column. ([#9901](https://github.com/rapidsai/cudf/pull/9901)) [@mythrocks](https://github.com/mythrocks)
-- Add dictionary support to cudf::copy_if_else ([#9887](https://github.com/rapidsai/cudf/pull/9887)) [@davidwendt](https://github.com/davidwendt)
-- add run_benchmarks target for running benchmarks with json output ([#9879](https://github.com/rapidsai/cudf/pull/9879)) [@karthikeyann](https://github.com/karthikeyann)
-- Add regex_flags parameter to strings replace_re functions ([#9878](https://github.com/rapidsai/cudf/pull/9878)) [@davidwendt](https://github.com/davidwendt)
-- Add_suffix and add_prefix for DataFrames and Series ([#9846](https://github.com/rapidsai/cudf/pull/9846)) [@mayankanand007](https://github.com/mayankanand007)
-- Add JNI for `cudf::drop_duplicates` ([#9841](https://github.com/rapidsai/cudf/pull/9841)) [@ttnghia](https://github.com/ttnghia)
-- Implement per-list sequence ([#9839](https://github.com/rapidsai/cudf/pull/9839)) [@ttnghia](https://github.com/ttnghia)
-- adding `series.transpose` ([#9835](https://github.com/rapidsai/cudf/pull/9835)) [@mayankanand007](https://github.com/mayankanand007)
-- Adding support for `Series.autocorr` ([#9833](https://github.com/rapidsai/cudf/pull/9833)) [@mayankanand007](https://github.com/mayankanand007)
-- Support round operation on datetime64 datatypes ([#9820](https://github.com/rapidsai/cudf/pull/9820)) [@mayankanand007](https://github.com/mayankanand007)
-- Add partitioning support in parquet writer ([#9810](https://github.com/rapidsai/cudf/pull/9810)) [@devavret](https://github.com/devavret)
-- Raise temporary error for `decimal128` types in parquet reader ([#9804](https://github.com/rapidsai/cudf/pull/9804)) [@galipremsagar](https://github.com/galipremsagar)
-- Add decimal128 support to Parquet reader and writer ([#9765](https://github.com/rapidsai/cudf/pull/9765)) [@vuule](https://github.com/vuule)
-- Optimize `groupby::scan` ([#9754](https://github.com/rapidsai/cudf/pull/9754)) [@PointKernel](https://github.com/PointKernel)
-- Add sample JNI API ([#9728](https://github.com/rapidsai/cudf/pull/9728)) [@res-life](https://github.com/res-life)
-- Support `min` and `max` in inclusive scan for structs ([#9725](https://github.com/rapidsai/cudf/pull/9725)) [@ttnghia](https://github.com/ttnghia)
-- Add `first` and `last` method to `IndexedFrame` ([#9710](https://github.com/rapidsai/cudf/pull/9710)) [@isVoid](https://github.com/isVoid)
-- Support `min` and `max` reduction for structs ([#9697](https://github.com/rapidsai/cudf/pull/9697)) [@ttnghia](https://github.com/ttnghia)
-- Add parameters to control row group size in Parquet writer ([#9677](https://github.com/rapidsai/cudf/pull/9677)) [@vuule](https://github.com/vuule)
-- Run compute-sanitizer in nightly build ([#9641](https://github.com/rapidsai/cudf/pull/9641)) [@karthikeyann](https://github.com/karthikeyann)
-- Implement Series.datetime.floor ([#9571](https://github.com/rapidsai/cudf/pull/9571)) [@skirui-source](https://github.com/skirui-source)
-- ceil/floor for `DatetimeIndex` ([#9554](https://github.com/rapidsai/cudf/pull/9554)) [@mayankanand007](https://github.com/mayankanand007)
-- Add support for `decimal128` in cudf python ([#9533](https://github.com/rapidsai/cudf/pull/9533)) [@galipremsagar](https://github.com/galipremsagar)
-- Implement `lists::index_of()` to find positions in list rows ([#9510](https://github.com/rapidsai/cudf/pull/9510)) [@mythrocks](https://github.com/mythrocks)
-- custreamz oauth callback for kafka (librdkafka) ([#9486](https://github.com/rapidsai/cudf/pull/9486)) [@jdye64](https://github.com/jdye64)
-- Add Pearson correlation for sort groupby (python) ([#9166](https://github.com/rapidsai/cudf/pull/9166)) [@skirui-source](https://github.com/skirui-source)
-- Interchange dataframe protocol ([#9071](https://github.com/rapidsai/cudf/pull/9071)) [@iskode](https://github.com/iskode)
-- Rewriting row/column conversions for Spark &lt;-&gt; cudf data conversions ([#8444](https://github.com/rapidsai/cudf/pull/8444)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-
-## 🛠️ Improvements
-
-- Prepare upload scripts for Python 3.7 removal ([#10092](https://github.com/rapidsai/cudf/pull/10092)) [@Ethyling](https://github.com/Ethyling)
-- Simplify custreamz and cudf_kafka recipes files ([#10065](https://github.com/rapidsai/cudf/pull/10065)) [@Ethyling](https://github.com/Ethyling)
-- ORC writer API changes for granular statistics ([#10058](https://github.com/rapidsai/cudf/pull/10058)) [@mythrocks](https://github.com/mythrocks)
-- Remove python constraints in cutreamz and cudf_kafka recipes ([#10052](https://github.com/rapidsai/cudf/pull/10052)) [@Ethyling](https://github.com/Ethyling)
-- Unpin `dask` and `distributed` in CI ([#10028](https://github.com/rapidsai/cudf/pull/10028)) [@galipremsagar](https://github.com/galipremsagar)
-- Add `_from_column_like_self` factory ([#10022](https://github.com/rapidsai/cudf/pull/10022)) [@isVoid](https://github.com/isVoid)
-- Replace custom CUDA bindings previously provided by RMM with official CUDA Python bindings ([#10008](https://github.com/rapidsai/cudf/pull/10008)) [@shwina](https://github.com/shwina)
-- Use `cuda::std::is_arithmetic` in `cudf::is_numeric` trait. ([#9996](https://github.com/rapidsai/cudf/pull/9996)) [@bdice](https://github.com/bdice)
-- Clean up CUDA stream use in cuIO ([#9991](https://github.com/rapidsai/cudf/pull/9991)) [@vuule](https://github.com/vuule)
-- Use addressed-ordered first fit for the pinned memory pool ([#9989](https://github.com/rapidsai/cudf/pull/9989)) [@rongou](https://github.com/rongou)
-- Add strings tests to transpose_test.cpp ([#9985](https://github.com/rapidsai/cudf/pull/9985)) [@davidwendt](https://github.com/davidwendt)
-- Use gpuci_mamba_retry on Java CI. ([#9983](https://github.com/rapidsai/cudf/pull/9983)) [@bdice](https://github.com/bdice)
-- Remove deprecated method `one_hot_encoding` ([#9977](https://github.com/rapidsai/cudf/pull/9977)) [@isVoid](https://github.com/isVoid)
-- Minor cleanup of unused Python functions ([#9974](https://github.com/rapidsai/cudf/pull/9974)) [@vyasr](https://github.com/vyasr)
-- Use new efficient partitioned parquet writing in cuDF ([#9971](https://github.com/rapidsai/cudf/pull/9971)) [@devavret](https://github.com/devavret)
-- Remove str.subword_tokenize ([#9968](https://github.com/rapidsai/cudf/pull/9968)) [@VibhuJawa](https://github.com/VibhuJawa)
-- Forward-merge branch-21.12 to branch-22.02 ([#9947](https://github.com/rapidsai/cudf/pull/9947)) [@bdice](https://github.com/bdice)
-- Remove deprecated `method` parameter from `merge` and `join`. ([#9944](https://github.com/rapidsai/cudf/pull/9944)) [@bdice](https://github.com/bdice)
-- Remove deprecated method DataFrame.hash_columns. ([#9943](https://github.com/rapidsai/cudf/pull/9943)) [@bdice](https://github.com/bdice)
-- Remove deprecated method Series.hash_encode. ([#9942](https://github.com/rapidsai/cudf/pull/9942)) [@bdice](https://github.com/bdice)
-- use ninja in java ci build ([#9933](https://github.com/rapidsai/cudf/pull/9933)) [@rongou](https://github.com/rongou)
-- Add build-time publish step to cpu build script ([#9927](https://github.com/rapidsai/cudf/pull/9927)) [@davidwendt](https://github.com/davidwendt)
-- Refactoring ceil/round/floor code for datetime64 types ([#9926](https://github.com/rapidsai/cudf/pull/9926)) [@mayankanand007](https://github.com/mayankanand007)
-- Remove various unused functions ([#9922](https://github.com/rapidsai/cudf/pull/9922)) [@vyasr](https://github.com/vyasr)
-- Raise in `query` if dtype is not supported ([#9921](https://github.com/rapidsai/cudf/pull/9921)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Add missing imports tests ([#9920](https://github.com/rapidsai/cudf/pull/9920)) [@Ethyling](https://github.com/Ethyling)
-- Spark Decimal128 hashing ([#9919](https://github.com/rapidsai/cudf/pull/9919)) [@rwlee](https://github.com/rwlee)
-- Replace `thrust/std::get` with structured bindings ([#9915](https://github.com/rapidsai/cudf/pull/9915)) [@codereport](https://github.com/codereport)
-- Upgrade thrust version to 1.15 ([#9912](https://github.com/rapidsai/cudf/pull/9912)) [@robertmaynard](https://github.com/robertmaynard)
-- Remove conda envs for CUDA 11.0 and 11.2. ([#9910](https://github.com/rapidsai/cudf/pull/9910)) [@bdice](https://github.com/bdice)
-- Return count of set bits from inplace_bitmask_and. ([#9904](https://github.com/rapidsai/cudf/pull/9904)) [@bdice](https://github.com/bdice)
-- Use dynamic nullate for join hasher and equality comparator ([#9902](https://github.com/rapidsai/cudf/pull/9902)) [@davidwendt](https://github.com/davidwendt)
-- Update ucx-py version on release using rvc ([#9897](https://github.com/rapidsai/cudf/pull/9897)) [@Ethyling](https://github.com/Ethyling)
-- Remove `IncludeCategories` from `.clang-format` ([#9876](https://github.com/rapidsai/cudf/pull/9876)) [@codereport](https://github.com/codereport)
-- Support statically linking CUDA runtime for Java bindings ([#9873](https://github.com/rapidsai/cudf/pull/9873)) [@jlowe](https://github.com/jlowe)
-- Add `clang-tidy` to libcudf ([#9860](https://github.com/rapidsai/cudf/pull/9860)) [@codereport](https://github.com/codereport)
-- Remove deprecated methods from Java Table class ([#9853](https://github.com/rapidsai/cudf/pull/9853)) [@jlowe](https://github.com/jlowe)
-- Add test for map column metadata handling in ORC writer ([#9852](https://github.com/rapidsai/cudf/pull/9852)) [@vuule](https://github.com/vuule)
-- Use pandas `to_offset` to parse frequency string in `date_range` ([#9843](https://github.com/rapidsai/cudf/pull/9843)) [@isVoid](https://github.com/isVoid)
-- add templated benchmark with fixture ([#9838](https://github.com/rapidsai/cudf/pull/9838)) [@karthikeyann](https://github.com/karthikeyann)
-- Use list of column inputs for `apply_boolean_mask` ([#9832](https://github.com/rapidsai/cudf/pull/9832)) [@isVoid](https://github.com/isVoid)
-- Added a few more tests for Decimal to String cast ([#9818](https://github.com/rapidsai/cudf/pull/9818)) [@razajafri](https://github.com/razajafri)
-- Run doctests. ([#9815](https://github.com/rapidsai/cudf/pull/9815)) [@bdice](https://github.com/bdice)
-- Avoid overflow for fixed_point round ([#9809](https://github.com/rapidsai/cudf/pull/9809)) [@sperlingxx](https://github.com/sperlingxx)
-- Move `drop_duplicates`, `drop_na`, `_gather`, `take` to IndexFrame and create their `_base_index` counterparts ([#9807](https://github.com/rapidsai/cudf/pull/9807)) [@isVoid](https://github.com/isVoid)
-- Use vector factories for host-device copies. ([#9806](https://github.com/rapidsai/cudf/pull/9806)) [@bdice](https://github.com/bdice)
-- Refactor host device macros ([#9797](https://github.com/rapidsai/cudf/pull/9797)) [@vyasr](https://github.com/vyasr)
-- Remove unused masked udf cython/c++ code ([#9792](https://github.com/rapidsai/cudf/pull/9792)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Allow custom sort functions for dask-cudf `sort_values` ([#9789](https://github.com/rapidsai/cudf/pull/9789)) [@charlesbluca](https://github.com/charlesbluca)
-- Improve build time of libcudf iterator tests ([#9788](https://github.com/rapidsai/cudf/pull/9788)) [@davidwendt](https://github.com/davidwendt)
-- Copy Java native dependencies directly into classpath ([#9787](https://github.com/rapidsai/cudf/pull/9787)) [@jlowe](https://github.com/jlowe)
-- Add decimal types to cuIO benchmarks ([#9776](https://github.com/rapidsai/cudf/pull/9776)) [@vuule](https://github.com/vuule)
-- Pick smallest decimal type with required precision in ORC reader ([#9775](https://github.com/rapidsai/cudf/pull/9775)) [@vuule](https://github.com/vuule)
-- Avoid overflow for `fixed_point` `cudf::cast` and performance optimization ([#9772](https://github.com/rapidsai/cudf/pull/9772)) [@codereport](https://github.com/codereport)
-- Use CTAD with Thrust function objects ([#9768](https://github.com/rapidsai/cudf/pull/9768)) [@codereport](https://github.com/codereport)
-- Refactor TableTest assertion methods to a separate utility class ([#9762](https://github.com/rapidsai/cudf/pull/9762)) [@jlowe](https://github.com/jlowe)
-- Use Java classloader to find test resources ([#9760](https://github.com/rapidsai/cudf/pull/9760)) [@jlowe](https://github.com/jlowe)
-- Allow cast decimal128 to string and add tests ([#9756](https://github.com/rapidsai/cudf/pull/9756)) [@razajafri](https://github.com/razajafri)
-- Load balance optimization for contiguous_split ([#9755](https://github.com/rapidsai/cudf/pull/9755)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Consolidate and improve `reset_index` ([#9750](https://github.com/rapidsai/cudf/pull/9750)) [@isVoid](https://github.com/isVoid)
-- Update to UCX-Py 0.24 ([#9748](https://github.com/rapidsai/cudf/pull/9748)) [@pentschev](https://github.com/pentschev)
-- Skip cufile tests in JNI build script ([#9744](https://github.com/rapidsai/cudf/pull/9744)) [@pxLi](https://github.com/pxLi)
-- Enable string to decimal 128 cast ([#9742](https://github.com/rapidsai/cudf/pull/9742)) [@razajafri](https://github.com/razajafri)
-- Use stop instead of stop_. ([#9735](https://github.com/rapidsai/cudf/pull/9735)) [@bdice](https://github.com/bdice)
-- Forward-merge branch-21.12 to branch-22.02 ([#9730](https://github.com/rapidsai/cudf/pull/9730)) [@bdice](https://github.com/bdice)
-- Improve cmake format script ([#9723](https://github.com/rapidsai/cudf/pull/9723)) [@vyasr](https://github.com/vyasr)
-- Use cuFile direct device reads/writes by default in cuIO ([#9722](https://github.com/rapidsai/cudf/pull/9722)) [@vuule](https://github.com/vuule)
-- Add directory-partitioned data support to cudf.read_parquet ([#9720](https://github.com/rapidsai/cudf/pull/9720)) [@rjzamora](https://github.com/rjzamora)
-- Use stream allocator adaptor for hash join table ([#9704](https://github.com/rapidsai/cudf/pull/9704)) [@PointKernel](https://github.com/PointKernel)
-- Update check for inf/nan strings in libcudf float conversion to ignore case ([#9694](https://github.com/rapidsai/cudf/pull/9694)) [@davidwendt](https://github.com/davidwendt)
-- Update cudf JNI to 22.02.0-SNAPSHOT ([#9681](https://github.com/rapidsai/cudf/pull/9681)) [@pxLi](https://github.com/pxLi)
-- Replace cudf&#39;s concurrent_ordered_map with cuco::static_map in semi/anti joins ([#9666](https://github.com/rapidsai/cudf/pull/9666)) [@vyasr](https://github.com/vyasr)
-- Some improvements to `parse_decimal` function and bindings for `is_fixed_point` ([#9658](https://github.com/rapidsai/cudf/pull/9658)) [@razajafri](https://github.com/razajafri)
-- Add utility to format ninja-log build times ([#9631](https://github.com/rapidsai/cudf/pull/9631)) [@davidwendt](https://github.com/davidwendt)
-- Allow runtime has_nulls parameter for row operators ([#9623](https://github.com/rapidsai/cudf/pull/9623)) [@davidwendt](https://github.com/davidwendt)
-- Use fsspec.parquet for improved read_parquet performance from remote storage ([#9589](https://github.com/rapidsai/cudf/pull/9589)) [@rjzamora](https://github.com/rjzamora)
-- Refactor bit counting APIs, introduce valid/null count functions, and split host/device side code for segmented counts. ([#9588](https://github.com/rapidsai/cudf/pull/9588)) [@bdice](https://github.com/bdice)
-- Use List of Columns as Input for `drop_nulls`, `gather` and `drop_duplicates` ([#9558](https://github.com/rapidsai/cudf/pull/9558)) [@isVoid](https://github.com/isVoid)
-- Simplify merge internals and reduce overhead ([#9516](https://github.com/rapidsai/cudf/pull/9516)) [@vyasr](https://github.com/vyasr)
-- Add `struct` generation support in datagenerator &amp; fuzz tests ([#9180](https://github.com/rapidsai/cudf/pull/9180)) [@galipremsagar](https://github.com/galipremsagar)
-- Simplify write_csv by removing unnecessary writer/impl classes ([#9089](https://github.com/rapidsai/cudf/pull/9089)) [@cwharris](https://github.com/cwharris)
-
-# cuDF 21.12.00 (9 Dec 2021)
-
-## 🚨 Breaking Changes
-
-- Update `bitmask_and` and `bitmask_or` to return a pair of resulting mask and count of unset bits ([#9616](https://github.com/rapidsai/cudf/pull/9616)) [@PointKernel](https://github.com/PointKernel)
-- Remove sizeof and standardize on memory_usage ([#9544](https://github.com/rapidsai/cudf/pull/9544)) [@vyasr](https://github.com/vyasr)
-- Add support for single-line regex anchors ^/$ in contains_re ([#9482](https://github.com/rapidsai/cudf/pull/9482)) [@davidwendt](https://github.com/davidwendt)
-- Refactor sorting APIs ([#9464](https://github.com/rapidsai/cudf/pull/9464)) [@vyasr](https://github.com/vyasr)
-- Update Java nvcomp JNI bindings to nvcomp 2.x API ([#9384](https://github.com/rapidsai/cudf/pull/9384)) [@jbrennan333](https://github.com/jbrennan333)
-- Support Python UDFs written in terms of rows ([#9343](https://github.com/rapidsai/cudf/pull/9343)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- JNI: Support nested types in ORC writer ([#9334](https://github.com/rapidsai/cudf/pull/9334)) [@firestarman](https://github.com/firestarman)
-- Optionally nullify out-of-bounds indices in segmented_gather(). ([#9318](https://github.com/rapidsai/cudf/pull/9318)) [@mythrocks](https://github.com/mythrocks)
-- Refactor cuIO timestamp processing with `cuda::std::chrono` ([#9278](https://github.com/rapidsai/cudf/pull/9278)) [@PointKernel](https://github.com/PointKernel)
-- Various internal MultiIndex improvements ([#9243](https://github.com/rapidsai/cudf/pull/9243)) [@vyasr](https://github.com/vyasr)
-
-## 🐛 Bug Fixes
-
-- Fix read_parquet bug for bytes input ([#9669](https://github.com/rapidsai/cudf/pull/9669)) [@rjzamora](https://github.com/rjzamora)
-- Use `_gather` internal for `sort_*` ([#9668](https://github.com/rapidsai/cudf/pull/9668)) [@isVoid](https://github.com/isVoid)
-- Fix behavior of equals for non-DataFrame Frames and add tests. ([#9653](https://github.com/rapidsai/cudf/pull/9653)) [@vyasr](https://github.com/vyasr)
-- Dont recompute output size if it is already available ([#9649](https://github.com/rapidsai/cudf/pull/9649)) [@abellina](https://github.com/abellina)
-- Fix read_parquet bug for extended dtypes from remote storage ([#9638](https://github.com/rapidsai/cudf/pull/9638)) [@rjzamora](https://github.com/rjzamora)
-- add const when getting data from a JNI data wrapper ([#9637](https://github.com/rapidsai/cudf/pull/9637)) [@wjxiz1992](https://github.com/wjxiz1992)
-- Fix debrotli issue on CUDA 11.5 ([#9632](https://github.com/rapidsai/cudf/pull/9632)) [@vuule](https://github.com/vuule)
-- Use std::size_t when computing join output size ([#9626](https://github.com/rapidsai/cudf/pull/9626)) [@jlowe](https://github.com/jlowe)
-- Fix `usecols` parameter handling in `dask_cudf.read_csv` ([#9618](https://github.com/rapidsai/cudf/pull/9618)) [@galipremsagar](https://github.com/galipremsagar)
-- Add support for string `&#39;nan&#39;, &#39;inf&#39; &amp; &#39;-inf&#39;` values while type-casting to `float` ([#9613](https://github.com/rapidsai/cudf/pull/9613)) [@galipremsagar](https://github.com/galipremsagar)
-- Avoid passing NativeFileDatasource to pyarrow in read_parquet ([#9608](https://github.com/rapidsai/cudf/pull/9608)) [@rjzamora](https://github.com/rjzamora)
-- Fix test failure with cuda 11.5 in row_bit_count tests. ([#9581](https://github.com/rapidsai/cudf/pull/9581)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Correct _LIBCUDACXX_CUDACC_VER value computation ([#9579](https://github.com/rapidsai/cudf/pull/9579)) [@robertmaynard](https://github.com/robertmaynard)
-- Increase max RLE stream size estimate to avoid potential overflows ([#9568](https://github.com/rapidsai/cudf/pull/9568)) [@vuule](https://github.com/vuule)
-- Fix edge case in tdigest scalar generation for groups containing all nulls. ([#9551](https://github.com/rapidsai/cudf/pull/9551)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Fix pytests failing in `cuda-11.5` environment ([#9547](https://github.com/rapidsai/cudf/pull/9547)) [@galipremsagar](https://github.com/galipremsagar)
-- compile libnvcomp with PTDS if requested ([#9540](https://github.com/rapidsai/cudf/pull/9540)) [@jbrennan333](https://github.com/jbrennan333)
-- Fix `segmented_gather()` for null LIST rows ([#9537](https://github.com/rapidsai/cudf/pull/9537)) [@mythrocks](https://github.com/mythrocks)
-- Deprecate DataFrame.label_encoding, use private _label_encoding method internally. ([#9535](https://github.com/rapidsai/cudf/pull/9535)) [@bdice](https://github.com/bdice)
-- Fix several test and benchmark issues related to bitmask allocations. ([#9521](https://github.com/rapidsai/cudf/pull/9521)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Fix for inserting duplicates in groupby result cache ([#9508](https://github.com/rapidsai/cudf/pull/9508)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix mismatched types error in clip() when using non int64 numeric types ([#9498](https://github.com/rapidsai/cudf/pull/9498)) [@davidwendt](https://github.com/davidwendt)
-- Match conda pinnings for style checks (revert part of #9412, #9433). ([#9490](https://github.com/rapidsai/cudf/pull/9490)) [@bdice](https://github.com/bdice)
-- Make sure all dask-cudf supported aggs are handled in `_tree_node_agg` ([#9487](https://github.com/rapidsai/cudf/pull/9487)) [@charlesbluca](https://github.com/charlesbluca)
-- Resolve `hash_columns` `FutureWarning` in `dask_cudf` ([#9481](https://github.com/rapidsai/cudf/pull/9481)) [@pentschev](https://github.com/pentschev)
-- Add fixed point to AllTypes in libcudf unit tests ([#9472](https://github.com/rapidsai/cudf/pull/9472)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix regex handling of embedded null characters ([#9470](https://github.com/rapidsai/cudf/pull/9470)) [@davidwendt](https://github.com/davidwendt)
-- Fix memcheck error in copy-if-else ([#9467](https://github.com/rapidsai/cudf/pull/9467)) [@davidwendt](https://github.com/davidwendt)
-- Fix bug in dask_cudf.read_parquet for index=False ([#9453](https://github.com/rapidsai/cudf/pull/9453)) [@rjzamora](https://github.com/rjzamora)
-- Preserve the decimal scale when creating a default scalar ([#9449](https://github.com/rapidsai/cudf/pull/9449)) [@revans2](https://github.com/revans2)
-- Push down parent nulls when flattening nested columns. ([#9443](https://github.com/rapidsai/cudf/pull/9443)) [@mythrocks](https://github.com/mythrocks)
-- Fix memcheck error in gtest SegmentedGatherTest/GatherSliced ([#9442](https://github.com/rapidsai/cudf/pull/9442)) [@davidwendt](https://github.com/davidwendt)
-- Revert &quot;Fix quantile division / partition handling for dask-cudf sort… ([#9438](https://github.com/rapidsai/cudf/pull/9438)) [@charlesbluca](https://github.com/charlesbluca)
-- Allow int-like objects for the `decimals` argument in `round` ([#9428](https://github.com/rapidsai/cudf/pull/9428)) [@shwina](https://github.com/shwina)
-- Fix stream compaction&#39;s `drop_duplicates` API to use stable sort ([#9417](https://github.com/rapidsai/cudf/pull/9417)) [@ttnghia](https://github.com/ttnghia)
-- Skip Comparing Uniform Window Results in Var/std Tests ([#9416](https://github.com/rapidsai/cudf/pull/9416)) [@isVoid](https://github.com/isVoid)
-- Fix `StructColumn.to_pandas` type handling issues ([#9388](https://github.com/rapidsai/cudf/pull/9388)) [@galipremsagar](https://github.com/galipremsagar)
-- Correct issues in the build dir cudf-config.cmake ([#9386](https://github.com/rapidsai/cudf/pull/9386)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix Java table partition test to account for non-deterministic ordering ([#9385](https://github.com/rapidsai/cudf/pull/9385)) [@jlowe](https://github.com/jlowe)
-- Fix timestamp truncation/overflow bugs in orc/parquet ([#9382](https://github.com/rapidsai/cudf/pull/9382)) [@PointKernel](https://github.com/PointKernel)
-- Fix the crash in stats code ([#9368](https://github.com/rapidsai/cudf/pull/9368)) [@devavret](https://github.com/devavret)
-- Make Series.hash_encode results reproducible. ([#9366](https://github.com/rapidsai/cudf/pull/9366)) [@bdice](https://github.com/bdice)
-- Fix libcudf compile warnings on debug 11.4 build ([#9360](https://github.com/rapidsai/cudf/pull/9360)) [@davidwendt](https://github.com/davidwendt)
-- Fail gracefully when compiling python UDFs that attempt to access columns with unsupported dtypes ([#9359](https://github.com/rapidsai/cudf/pull/9359)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Set pass_filenames: false in mypy pre-commit configuration. ([#9349](https://github.com/rapidsai/cudf/pull/9349)) [@bdice](https://github.com/bdice)
-- Fix cudf_assert in cudf::io::orc::gpu::gpuDecodeOrcColumnData ([#9348](https://github.com/rapidsai/cudf/pull/9348)) [@davidwendt](https://github.com/davidwendt)
-- Fix memcheck error in groupby-tdigest get_scalar_minmax ([#9339](https://github.com/rapidsai/cudf/pull/9339)) [@davidwendt](https://github.com/davidwendt)
-- Optimizations for `cudf.concat` when `axis=1` ([#9333](https://github.com/rapidsai/cudf/pull/9333)) [@galipremsagar](https://github.com/galipremsagar)
-- Use f-string in join helper warning message. ([#9325](https://github.com/rapidsai/cudf/pull/9325)) [@bdice](https://github.com/bdice)
-- Avoid casting to list or struct dtypes in dask_cudf.read_parquet ([#9314](https://github.com/rapidsai/cudf/pull/9314)) [@rjzamora](https://github.com/rjzamora)
-- Fix null count in statistics for parquet ([#9303](https://github.com/rapidsai/cudf/pull/9303)) [@devavret](https://github.com/devavret)
-- Potential overflow of `decimal32` when casting to `int64_t` ([#9287](https://github.com/rapidsai/cudf/pull/9287)) [@codereport](https://github.com/codereport)
-- Fix quantile division / partition handling for dask-cudf sort on null dataframes ([#9259](https://github.com/rapidsai/cudf/pull/9259)) [@charlesbluca](https://github.com/charlesbluca)
-- Updating cudf version also updates rapids cmake branch ([#9249](https://github.com/rapidsai/cudf/pull/9249)) [@robertmaynard](https://github.com/robertmaynard)
-- Implement `one_hot_encoding` in libcudf and bind to python ([#9229](https://github.com/rapidsai/cudf/pull/9229)) [@isVoid](https://github.com/isVoid)
-- BUG FIX: CSV Writer ignores the header parameter when no metadata is provided ([#8740](https://github.com/rapidsai/cudf/pull/8740)) [@skirui-source](https://github.com/skirui-source)
-
-## 📖 Documentation
-
-- Update Documentation to use `TYPED_TEST_SUITE` ([#9654](https://github.com/rapidsai/cudf/pull/9654)) [@codereport](https://github.com/codereport)
-- Add dedicated page for `StringHandling` in python docs ([#9624](https://github.com/rapidsai/cudf/pull/9624)) [@galipremsagar](https://github.com/galipremsagar)
-- Update docstring of `DataFrame.merge` ([#9572](https://github.com/rapidsai/cudf/pull/9572)) [@galipremsagar](https://github.com/galipremsagar)
-- Use raw strings to avoid SyntaxErrors in parsed docstrings. ([#9526](https://github.com/rapidsai/cudf/pull/9526)) [@bdice](https://github.com/bdice)
-- Add example to docstrings in `rolling.apply` ([#9522](https://github.com/rapidsai/cudf/pull/9522)) [@isVoid](https://github.com/isVoid)
-- Update help message to escape quotes in ./build.sh --cmake-args. ([#9494](https://github.com/rapidsai/cudf/pull/9494)) [@bdice](https://github.com/bdice)
-- Improve Python docstring formatting. ([#9493](https://github.com/rapidsai/cudf/pull/9493)) [@bdice](https://github.com/bdice)
-- Update table of I/O supported types ([#9476](https://github.com/rapidsai/cudf/pull/9476)) [@vuule](https://github.com/vuule)
-- Document invalid regex patterns as undefined behavior ([#9473](https://github.com/rapidsai/cudf/pull/9473)) [@davidwendt](https://github.com/davidwendt)
-- Miscellaneous documentation fixes to `cudf` ([#9471](https://github.com/rapidsai/cudf/pull/9471)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix many documentation errors in libcudf. ([#9355](https://github.com/rapidsai/cudf/pull/9355)) [@karthikeyann](https://github.com/karthikeyann)
-- Fixing SubwordTokenizer docs issue ([#9354](https://github.com/rapidsai/cudf/pull/9354)) [@mayankanand007](https://github.com/mayankanand007)
-- Improved deprecation warnings. ([#9347](https://github.com/rapidsai/cudf/pull/9347)) [@bdice](https://github.com/bdice)
-- doc reorder mr, stream to stream, mr ([#9308](https://github.com/rapidsai/cudf/pull/9308)) [@karthikeyann](https://github.com/karthikeyann)
-- Deprecate method parameters to DataFrame.join, DataFrame.merge. ([#9291](https://github.com/rapidsai/cudf/pull/9291)) [@bdice](https://github.com/bdice)
-- Added deprecation warning for `.label_encoding()` ([#9289](https://github.com/rapidsai/cudf/pull/9289)) [@mayankanand007](https://github.com/mayankanand007)
-
-## 🚀 New Features
-
-- Enable Series.divide and DataFrame.divide ([#9630](https://github.com/rapidsai/cudf/pull/9630)) [@vyasr](https://github.com/vyasr)
-- Update `bitmask_and` and `bitmask_or` to return a pair of resulting mask and count of unset bits ([#9616](https://github.com/rapidsai/cudf/pull/9616)) [@PointKernel](https://github.com/PointKernel)
-- Add handling of mixed numeric types in `to_dlpack` ([#9585](https://github.com/rapidsai/cudf/pull/9585)) [@galipremsagar](https://github.com/galipremsagar)
-- Support re.Pattern object for pat arg in str.replace ([#9573](https://github.com/rapidsai/cudf/pull/9573)) [@davidwendt](https://github.com/davidwendt)
-- Add JNI for `lists::drop_list_duplicates` with keys-values input column ([#9553](https://github.com/rapidsai/cudf/pull/9553)) [@ttnghia](https://github.com/ttnghia)
-- Support structs column in `min`, `max`, `argmin` and `argmax` groupby aggregate() and scan() ([#9545](https://github.com/rapidsai/cudf/pull/9545)) [@ttnghia](https://github.com/ttnghia)
-- Move libcudacxx to use `rapids_cpm` and use newer versions ([#9539](https://github.com/rapidsai/cudf/pull/9539)) [@robertmaynard](https://github.com/robertmaynard)
-- Add scan min/max support for chrono types to libcudf reduction-scan (not groupby scan) ([#9518](https://github.com/rapidsai/cudf/pull/9518)) [@davidwendt](https://github.com/davidwendt)
-- Support `args=` in `apply` ([#9514](https://github.com/rapidsai/cudf/pull/9514)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Add groupby scan min/max support for strings values ([#9502](https://github.com/rapidsai/cudf/pull/9502)) [@davidwendt](https://github.com/davidwendt)
-- Add list output option to character_ngrams() function ([#9499](https://github.com/rapidsai/cudf/pull/9499)) [@davidwendt](https://github.com/davidwendt)
-- More granular column selection in ORC reader ([#9496](https://github.com/rapidsai/cudf/pull/9496)) [@vuule](https://github.com/vuule)
-- add min_periods, ddof to groupby covariance, &amp; correlation aggregation ([#9492](https://github.com/rapidsai/cudf/pull/9492)) [@karthikeyann](https://github.com/karthikeyann)
-- Implement Series.datetime.floor ([#9488](https://github.com/rapidsai/cudf/pull/9488)) [@skirui-source](https://github.com/skirui-source)
-- Enable linting of CMake files using pre-commit ([#9484](https://github.com/rapidsai/cudf/pull/9484)) [@vyasr](https://github.com/vyasr)
-- Add support for single-line regex anchors ^/$ in contains_re ([#9482](https://github.com/rapidsai/cudf/pull/9482)) [@davidwendt](https://github.com/davidwendt)
-- Augment `order_by` to Accept a List of `null_precedence` ([#9455](https://github.com/rapidsai/cudf/pull/9455)) [@isVoid](https://github.com/isVoid)
-- Add format API for list column of strings ([#9454](https://github.com/rapidsai/cudf/pull/9454)) [@davidwendt](https://github.com/davidwendt)
-- Enable Datetime/Timedelta dtypes in Masked UDFs ([#9451](https://github.com/rapidsai/cudf/pull/9451)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Add cudf python groupby.diff ([#9446](https://github.com/rapidsai/cudf/pull/9446)) [@karthikeyann](https://github.com/karthikeyann)
-- Implement `lists::stable_sort_lists` for stable sorting of elements within each row of lists column ([#9425](https://github.com/rapidsai/cudf/pull/9425)) [@ttnghia](https://github.com/ttnghia)
-- add ctest memcheck using cuda-sanitizer ([#9414](https://github.com/rapidsai/cudf/pull/9414)) [@karthikeyann](https://github.com/karthikeyann)
-- Support Unary Operations in Masked UDF ([#9409](https://github.com/rapidsai/cudf/pull/9409)) [@isVoid](https://github.com/isVoid)
-- Move Several Series Function to Frame ([#9394](https://github.com/rapidsai/cudf/pull/9394)) [@isVoid](https://github.com/isVoid)
-- MD5 Python hash API ([#9390](https://github.com/rapidsai/cudf/pull/9390)) [@bdice](https://github.com/bdice)
-- Add cudf strings is_title API ([#9380](https://github.com/rapidsai/cudf/pull/9380)) [@davidwendt](https://github.com/davidwendt)
-- Enable casting to int64, uint64, and double in AST code. ([#9379](https://github.com/rapidsai/cudf/pull/9379)) [@vyasr](https://github.com/vyasr)
-- Add support for writing ORC with map columns ([#9369](https://github.com/rapidsai/cudf/pull/9369)) [@vuule](https://github.com/vuule)
-- extract_list_elements() with column_view indices ([#9367](https://github.com/rapidsai/cudf/pull/9367)) [@mythrocks](https://github.com/mythrocks)
-- Reimplement `lists::drop_list_duplicates` for keys-values lists columns ([#9345](https://github.com/rapidsai/cudf/pull/9345)) [@ttnghia](https://github.com/ttnghia)
-- Support Python UDFs written in terms of rows ([#9343](https://github.com/rapidsai/cudf/pull/9343)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- JNI: Support nested types in ORC writer ([#9334](https://github.com/rapidsai/cudf/pull/9334)) [@firestarman](https://github.com/firestarman)
-- Optionally nullify out-of-bounds indices in segmented_gather(). ([#9318](https://github.com/rapidsai/cudf/pull/9318)) [@mythrocks](https://github.com/mythrocks)
-- Add shallow hash function and shallow equality comparison for column_view ([#9312](https://github.com/rapidsai/cudf/pull/9312)) [@karthikeyann](https://github.com/karthikeyann)
-- Add CudaMemoryBuffer for cudaMalloc memory using RMM cuda_memory_resource ([#9311](https://github.com/rapidsai/cudf/pull/9311)) [@rongou](https://github.com/rongou)
-- Add parameters to control row index stride and stripe size in ORC writer ([#9310](https://github.com/rapidsai/cudf/pull/9310)) [@vuule](https://github.com/vuule)
-- Add `na_position` param to dask-cudf `sort_values` ([#9264](https://github.com/rapidsai/cudf/pull/9264)) [@charlesbluca](https://github.com/charlesbluca)
-- Add `ascending` parameter for dask-cudf `sort_values` ([#9250](https://github.com/rapidsai/cudf/pull/9250)) [@charlesbluca](https://github.com/charlesbluca)
-- New array conversion methods ([#9236](https://github.com/rapidsai/cudf/pull/9236)) [@vyasr](https://github.com/vyasr)
-- Series `apply` method backed by masked UDFs ([#9217](https://github.com/rapidsai/cudf/pull/9217)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Grouping by frequency and resampling ([#9178](https://github.com/rapidsai/cudf/pull/9178)) [@shwina](https://github.com/shwina)
-- Pure-python masked UDFs ([#9174](https://github.com/rapidsai/cudf/pull/9174)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Add Covariance, Pearson correlation for sort groupby (libcudf) ([#9154](https://github.com/rapidsai/cudf/pull/9154)) [@karthikeyann](https://github.com/karthikeyann)
-- Add `calendrical_month_sequence` in c++ and `date_range` in python ([#8886](https://github.com/rapidsai/cudf/pull/8886)) [@shwina](https://github.com/shwina)
-
-## 🛠️ Improvements
-
-- Followup to PR 9088 comments ([#9659](https://github.com/rapidsai/cudf/pull/9659)) [@cwharris](https://github.com/cwharris)
-- Update cuCollections to version that supports installed libcudacxx ([#9633](https://github.com/rapidsai/cudf/pull/9633)) [@robertmaynard](https://github.com/robertmaynard)
-- Add `11.5` dev.yml to `cudf` ([#9617](https://github.com/rapidsai/cudf/pull/9617)) [@galipremsagar](https://github.com/galipremsagar)
-- Add `xfail` for parquet reader `11.5` issue ([#9612](https://github.com/rapidsai/cudf/pull/9612)) [@galipremsagar](https://github.com/galipremsagar)
-- remove deprecated Rmm.initialize method ([#9607](https://github.com/rapidsai/cudf/pull/9607)) [@rongou](https://github.com/rongou)
-- Use HostColumnVectorCore for child columns in JCudfSerialization.unpackHostColumnVectors ([#9596](https://github.com/rapidsai/cudf/pull/9596)) [@sperlingxx](https://github.com/sperlingxx)
-- Set RMM pool to a fixed size in JNI ([#9583](https://github.com/rapidsai/cudf/pull/9583)) [@rongou](https://github.com/rongou)
-- Use nvCOMP for Snappy compression/decompression ([#9582](https://github.com/rapidsai/cudf/pull/9582)) [@vuule](https://github.com/vuule)
-- Build CUDA version agnostic packages for dask-cudf ([#9578](https://github.com/rapidsai/cudf/pull/9578)) [@Ethyling](https://github.com/Ethyling)
-- Fixed tests warning: &quot;TYPED_TEST_CASE is deprecated, please use TYPED_TEST_SUITE&quot; ([#9574](https://github.com/rapidsai/cudf/pull/9574)) [@ttnghia](https://github.com/ttnghia)
-- Enable CMake format in CI and fix style ([#9570](https://github.com/rapidsai/cudf/pull/9570)) [@vyasr](https://github.com/vyasr)
-- Add NVTX Start/End Ranges to JNI ([#9563](https://github.com/rapidsai/cudf/pull/9563)) [@abellina](https://github.com/abellina)
-- Add librdkafka and python-confluent-kafka to dev conda environments s… ([#9562](https://github.com/rapidsai/cudf/pull/9562)) [@jdye64](https://github.com/jdye64)
-- Add offsets_begin/end() to strings_column_view ([#9559](https://github.com/rapidsai/cudf/pull/9559)) [@davidwendt](https://github.com/davidwendt)
-- remove alignment options for RMM jni ([#9550](https://github.com/rapidsai/cudf/pull/9550)) [@rongou](https://github.com/rongou)
-- Add axis parameter passthrough to `DataFrame` and `Series` take for pandas API compatibility ([#9549](https://github.com/rapidsai/cudf/pull/9549)) [@dantegd](https://github.com/dantegd)
-- Remove sizeof and standardize on memory_usage ([#9544](https://github.com/rapidsai/cudf/pull/9544)) [@vyasr](https://github.com/vyasr)
-- Adds cudaProfilerStart/cudaProfilerStop in JNI api ([#9543](https://github.com/rapidsai/cudf/pull/9543)) [@abellina](https://github.com/abellina)
-- Generalize comparison binary operations ([#9542](https://github.com/rapidsai/cudf/pull/9542)) [@vyasr](https://github.com/vyasr)
-- Expose APIs to wrap CUDA or RMM allocations with a Java device buffer instance ([#9538](https://github.com/rapidsai/cudf/pull/9538)) [@jlowe](https://github.com/jlowe)
-- Add scan sum support for duration types to libcudf ([#9536](https://github.com/rapidsai/cudf/pull/9536)) [@davidwendt](https://github.com/davidwendt)
-- Force inlining to improve AST performance ([#9530](https://github.com/rapidsai/cudf/pull/9530)) [@vyasr](https://github.com/vyasr)
-- Generalize some more indexed frame methods ([#9529](https://github.com/rapidsai/cudf/pull/9529)) [@vyasr](https://github.com/vyasr)
-- Add Java bindings for rolling window stddev aggregation ([#9527](https://github.com/rapidsai/cudf/pull/9527)) [@razajafri](https://github.com/razajafri)
-- catch rmm::out_of_memory exceptions in jni ([#9525](https://github.com/rapidsai/cudf/pull/9525)) [@rongou](https://github.com/rongou)
-- Add an overload of `make_empty_column` with `type_id` parameter ([#9524](https://github.com/rapidsai/cudf/pull/9524)) [@ttnghia](https://github.com/ttnghia)
-- Accelerate conditional inner joins with larger right tables ([#9523](https://github.com/rapidsai/cudf/pull/9523)) [@vyasr](https://github.com/vyasr)
-- Initial pass of generalizing `decimal` support in `cudf` python layer ([#9517](https://github.com/rapidsai/cudf/pull/9517)) [@galipremsagar](https://github.com/galipremsagar)
-- Cleanup for flattening nested columns ([#9509](https://github.com/rapidsai/cudf/pull/9509)) [@rwlee](https://github.com/rwlee)
-- Enable running tests using RMM arena and async memory resources ([#9506](https://github.com/rapidsai/cudf/pull/9506)) [@rongou](https://github.com/rongou)
-- Remove dependency on six. ([#9495](https://github.com/rapidsai/cudf/pull/9495)) [@bdice](https://github.com/bdice)
-- Cleanup some libcudf strings gtests ([#9489](https://github.com/rapidsai/cudf/pull/9489)) [@davidwendt](https://github.com/davidwendt)
-- Rename strings/array_tests.cu to strings/array_tests.cpp ([#9480](https://github.com/rapidsai/cudf/pull/9480)) [@davidwendt](https://github.com/davidwendt)
-- Refactor sorting APIs ([#9464](https://github.com/rapidsai/cudf/pull/9464)) [@vyasr](https://github.com/vyasr)
-- Implement DataFrame.hash_values, deprecate DataFrame.hash_columns. ([#9458](https://github.com/rapidsai/cudf/pull/9458)) [@bdice](https://github.com/bdice)
-- Deprecate Series.hash_encode. ([#9457](https://github.com/rapidsai/cudf/pull/9457)) [@bdice](https://github.com/bdice)
-- Update `conda` recipes for Enhanced Compatibility effort ([#9456](https://github.com/rapidsai/cudf/pull/9456)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Small clean up to simplify column selection code in ORC reader ([#9444](https://github.com/rapidsai/cudf/pull/9444)) [@vuule](https://github.com/vuule)
-- add missing stream to scalar.is_valid() wherever stream is available ([#9436](https://github.com/rapidsai/cudf/pull/9436)) [@karthikeyann](https://github.com/karthikeyann)
-- Adds Deprecation Warnings to `one_hot_encoding` and Implement `get_dummies` with Cython API ([#9435](https://github.com/rapidsai/cudf/pull/9435)) [@isVoid](https://github.com/isVoid)
-- Update pre-commit hook URLs. ([#9433](https://github.com/rapidsai/cudf/pull/9433)) [@bdice](https://github.com/bdice)
-- Remove pyarrow import in `dask_cudf.io.parquet` ([#9429](https://github.com/rapidsai/cudf/pull/9429)) [@charlesbluca](https://github.com/charlesbluca)
-- Miscellaneous improvements for UDFs ([#9422](https://github.com/rapidsai/cudf/pull/9422)) [@isVoid](https://github.com/isVoid)
-- Use pre-commit for CI ([#9412](https://github.com/rapidsai/cudf/pull/9412)) [@vyasr](https://github.com/vyasr)
-- Update to UCX-Py 0.23 ([#9407](https://github.com/rapidsai/cudf/pull/9407)) [@pentschev](https://github.com/pentschev)
-- Expose OutOfBoundsPolicy in JNI for Table.gather ([#9406](https://github.com/rapidsai/cudf/pull/9406)) [@abellina](https://github.com/abellina)
-- Improvements to tdigest aggregation code. ([#9403](https://github.com/rapidsai/cudf/pull/9403)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Add Java API to deserialize a table to host columns ([#9402](https://github.com/rapidsai/cudf/pull/9402)) [@jlowe](https://github.com/jlowe)
-- Frame copy to use __class__ instead of type() ([#9397](https://github.com/rapidsai/cudf/pull/9397)) [@madsbk](https://github.com/madsbk)
-- Change all DeprecationWarnings to FutureWarning. ([#9392](https://github.com/rapidsai/cudf/pull/9392)) [@bdice](https://github.com/bdice)
-- Update Java nvcomp JNI bindings to nvcomp 2.x API ([#9384](https://github.com/rapidsai/cudf/pull/9384)) [@jbrennan333](https://github.com/jbrennan333)
-- Add IndexedFrame class and move SingleColumnFrame to a separate module ([#9378](https://github.com/rapidsai/cudf/pull/9378)) [@vyasr](https://github.com/vyasr)
-- Support Arrow NativeFile and PythonFile for remote ORC storage ([#9377](https://github.com/rapidsai/cudf/pull/9377)) [@rjzamora](https://github.com/rjzamora)
-- Use Arrow PythonFile for remote CSV storage ([#9376](https://github.com/rapidsai/cudf/pull/9376)) [@rjzamora](https://github.com/rjzamora)
-- Add multi-threaded writing to GDS writes ([#9372](https://github.com/rapidsai/cudf/pull/9372)) [@devavret](https://github.com/devavret)
-- Miscellaneous column cleanup ([#9370](https://github.com/rapidsai/cudf/pull/9370)) [@vyasr](https://github.com/vyasr)
-- Use single kernel to extract all groups in cudf::strings::extract ([#9358](https://github.com/rapidsai/cudf/pull/9358)) [@davidwendt](https://github.com/davidwendt)
-- Consolidate binary ops into `Frame` ([#9357](https://github.com/rapidsai/cudf/pull/9357)) [@isVoid](https://github.com/isVoid)
-- Move rank scan implementations from scan_inclusive.cu to rank_scan.cu ([#9351](https://github.com/rapidsai/cudf/pull/9351)) [@davidwendt](https://github.com/davidwendt)
-- Remove usage of deprecated thrust::host_space_tag. ([#9350](https://github.com/rapidsai/cudf/pull/9350)) [@bdice](https://github.com/bdice)
-- Use Default Memory Resource for Temporaries in `reduction.cpp` ([#9344](https://github.com/rapidsai/cudf/pull/9344)) [@isVoid](https://github.com/isVoid)
-- Fix Cython compilation warnings. ([#9327](https://github.com/rapidsai/cudf/pull/9327)) [@bdice](https://github.com/bdice)
-- Fix some unused variable warnings in libcudf ([#9326](https://github.com/rapidsai/cudf/pull/9326)) [@davidwendt](https://github.com/davidwendt)
-- Use optional-iterator for copy-if-else kernel ([#9324](https://github.com/rapidsai/cudf/pull/9324)) [@davidwendt](https://github.com/davidwendt)
-- Remove Table class ([#9315](https://github.com/rapidsai/cudf/pull/9315)) [@vyasr](https://github.com/vyasr)
-- Unpin `dask` and `distributed` in CI ([#9307](https://github.com/rapidsai/cudf/pull/9307)) [@galipremsagar](https://github.com/galipremsagar)
-- Add optional-iterator support to indexalator ([#9306](https://github.com/rapidsai/cudf/pull/9306)) [@davidwendt](https://github.com/davidwendt)
-- Consolidate more methods in Frame ([#9305](https://github.com/rapidsai/cudf/pull/9305)) [@vyasr](https://github.com/vyasr)
-- Add Arrow-NativeFile and PythonFile support to read_parquet and read_csv in cudf ([#9304](https://github.com/rapidsai/cudf/pull/9304)) [@rjzamora](https://github.com/rjzamora)
-- Pin mypy in .pre-commit-config.yaml to match conda environment pinning. ([#9300](https://github.com/rapidsai/cudf/pull/9300)) [@bdice](https://github.com/bdice)
-- Use gather.hpp when gather-map exists in device memory ([#9299](https://github.com/rapidsai/cudf/pull/9299)) [@davidwendt](https://github.com/davidwendt)
-- Fix Automerger for `Branch-21.12` from `branch-21.10` ([#9285](https://github.com/rapidsai/cudf/pull/9285)) [@galipremsagar](https://github.com/galipremsagar)
-- Refactor cuIO timestamp processing with `cuda::std::chrono` ([#9278](https://github.com/rapidsai/cudf/pull/9278)) [@PointKernel](https://github.com/PointKernel)
-- Change strings copy_if_else to use optional-iterator instead of pair-iterator ([#9266](https://github.com/rapidsai/cudf/pull/9266)) [@davidwendt](https://github.com/davidwendt)
-- Update cudf java bindings to 21.12.0-SNAPSHOT ([#9248](https://github.com/rapidsai/cudf/pull/9248)) [@pxLi](https://github.com/pxLi)
-- Various internal MultiIndex improvements ([#9243](https://github.com/rapidsai/cudf/pull/9243)) [@vyasr](https://github.com/vyasr)
-- Add detail interface for `split` and `slice(table_view)`, refactors both function with `host_span` ([#9226](https://github.com/rapidsai/cudf/pull/9226)) [@isVoid](https://github.com/isVoid)
-- Refactor MD5 implementation. ([#9212](https://github.com/rapidsai/cudf/pull/9212)) [@bdice](https://github.com/bdice)
-- Update groupby result_cache to allow sharing intermediate results based on column_view instead of requests. ([#9195](https://github.com/rapidsai/cudf/pull/9195)) [@karthikeyann](https://github.com/karthikeyann)
-- Use nvcomp&#39;s snappy decompressor in avro reader ([#9181](https://github.com/rapidsai/cudf/pull/9181)) [@devavret](https://github.com/devavret)
-- Add `isocalendar` API support ([#9169](https://github.com/rapidsai/cudf/pull/9169)) [@marlenezw](https://github.com/marlenezw)
-- Simplify read_json by removing unnecessary reader/impl classes ([#9088](https://github.com/rapidsai/cudf/pull/9088)) [@cwharris](https://github.com/cwharris)
-- Simplify read_csv by removing unnecessary reader/impl classes ([#9041](https://github.com/rapidsai/cudf/pull/9041)) [@cwharris](https://github.com/cwharris)
-- Refactor hash join with cuCollections multimap ([#8934](https://github.com/rapidsai/cudf/pull/8934)) [@PointKernel](https://github.com/PointKernel)
-
-# cuDF 21.10.00 (7 Oct 2021)
-
-## 🚨 Breaking Changes
-
-- Remove Cython APIs for table view generation ([#9199](https://github.com/rapidsai/cudf/pull/9199)) [@vyasr](https://github.com/vyasr)
-- Upgrade `pandas` version in `cudf` ([#9147](https://github.com/rapidsai/cudf/pull/9147)) [@galipremsagar](https://github.com/galipremsagar)
-- Make AST operators nullable ([#9096](https://github.com/rapidsai/cudf/pull/9096)) [@vyasr](https://github.com/vyasr)
-- Remove the option to pass data types as strings to `read_csv` and `read_json` ([#9079](https://github.com/rapidsai/cudf/pull/9079)) [@vuule](https://github.com/vuule)
-- Update JNI java CSV APIs to not use deprecated API ([#9066](https://github.com/rapidsai/cudf/pull/9066)) [@revans2](https://github.com/revans2)
-- Support additional format specifiers in from_timestamps ([#9047](https://github.com/rapidsai/cudf/pull/9047)) [@davidwendt](https://github.com/davidwendt)
-- Expose expression base class publicly and simplify public AST API ([#9045](https://github.com/rapidsai/cudf/pull/9045)) [@vyasr](https://github.com/vyasr)
-- Add support for struct type in ORC writer ([#9025](https://github.com/rapidsai/cudf/pull/9025)) [@vuule](https://github.com/vuule)
-- Remove aliases of various api.types APIs from utils.dtypes. ([#9011](https://github.com/rapidsai/cudf/pull/9011)) [@vyasr](https://github.com/vyasr)
-- Java bindings for conditional join output sizes ([#9002](https://github.com/rapidsai/cudf/pull/9002)) [@jlowe](https://github.com/jlowe)
-- Move compute_column API out of ast namespace ([#8957](https://github.com/rapidsai/cudf/pull/8957)) [@vyasr](https://github.com/vyasr)
-- `cudf.dtype` function ([#8949](https://github.com/rapidsai/cudf/pull/8949)) [@shwina](https://github.com/shwina)
-- Refactor Frame reductions ([#8944](https://github.com/rapidsai/cudf/pull/8944)) [@vyasr](https://github.com/vyasr)
-- Add nested column selection to parquet reader ([#8933](https://github.com/rapidsai/cudf/pull/8933)) [@devavret](https://github.com/devavret)
-- JNI Aggregation Type Changes ([#8919](https://github.com/rapidsai/cudf/pull/8919)) [@revans2](https://github.com/revans2)
-- Add groupby_aggregation and groupby_scan_aggregation classes and force their usage. ([#8906](https://github.com/rapidsai/cudf/pull/8906)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Expand CSV and JSON reader APIs to accept `dtypes` as a vector or map of `data_type` objects ([#8856](https://github.com/rapidsai/cudf/pull/8856)) [@vuule](https://github.com/vuule)
-- Change cudf docs theme to pydata theme ([#8746](https://github.com/rapidsai/cudf/pull/8746)) [@galipremsagar](https://github.com/galipremsagar)
-- Enable compiled binary ops in libcudf, python and java ([#8741](https://github.com/rapidsai/cudf/pull/8741)) [@karthikeyann](https://github.com/karthikeyann)
-- Make groupby transform-like op order match original data order ([#8720](https://github.com/rapidsai/cudf/pull/8720)) [@isVoid](https://github.com/isVoid)
-
-## 🐛 Bug Fixes
-
-- `fixed_point` `cudf::groupby` for `mean` aggregation ([#9296](https://github.com/rapidsai/cudf/pull/9296)) [@codereport](https://github.com/codereport)
-- Fix `interleave_columns` when the input string lists column having empty child column ([#9292](https://github.com/rapidsai/cudf/pull/9292)) [@ttnghia](https://github.com/ttnghia)
-- Update nvcomp to include fixes for installation of headers ([#9276](https://github.com/rapidsai/cudf/pull/9276)) [@devavret](https://github.com/devavret)
-- Fix Java column leak in testParquetWriteMap ([#9271](https://github.com/rapidsai/cudf/pull/9271)) [@jlowe](https://github.com/jlowe)
-- Fix call to thrust::reduce_by_key in argmin/argmax libcudf groupby ([#9263](https://github.com/rapidsai/cudf/pull/9263)) [@davidwendt](https://github.com/davidwendt)
-- Fixing empty input to getMapValue crashing ([#9262](https://github.com/rapidsai/cudf/pull/9262)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Fix duplicate names issue in `MultiIndex.deserialize ` ([#9258](https://github.com/rapidsai/cudf/pull/9258)) [@galipremsagar](https://github.com/galipremsagar)
-- `Dataframe.sort_index` optimizations ([#9238](https://github.com/rapidsai/cudf/pull/9238)) [@galipremsagar](https://github.com/galipremsagar)
-- Temporarily disabling problematic test in parquet writer ([#9230](https://github.com/rapidsai/cudf/pull/9230)) [@devavret](https://github.com/devavret)
-- Explicitly disable groupby on unsupported key types. ([#9227](https://github.com/rapidsai/cudf/pull/9227)) [@mythrocks](https://github.com/mythrocks)
-- Fix `gather` for sliced input structs column ([#9218](https://github.com/rapidsai/cudf/pull/9218)) [@ttnghia](https://github.com/ttnghia)
-- Fix JNI code for left semi and anti joins ([#9207](https://github.com/rapidsai/cudf/pull/9207)) [@jlowe](https://github.com/jlowe)
-- Only install thrust when using a non &#39;system&#39; version ([#9206](https://github.com/rapidsai/cudf/pull/9206)) [@robertmaynard](https://github.com/robertmaynard)
-- Remove zlib from libcudf public CMake dependencies ([#9204](https://github.com/rapidsai/cudf/pull/9204)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix out-of-bounds memory read in orc gpuEncodeOrcColumnData ([#9196](https://github.com/rapidsai/cudf/pull/9196)) [@davidwendt](https://github.com/davidwendt)
-- Fix `gather()` for `STRUCT` inputs with no nulls in members. ([#9194](https://github.com/rapidsai/cudf/pull/9194)) [@mythrocks](https://github.com/mythrocks)
-- get_cucollections properly uses rapids_cpm_find ([#9189](https://github.com/rapidsai/cudf/pull/9189)) [@robertmaynard](https://github.com/robertmaynard)
-- rapids-export correctly reference build code block and doc strings ([#9186](https://github.com/rapidsai/cudf/pull/9186)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix logic while parsing the sum statistic for numerical orc columns ([#9183](https://github.com/rapidsai/cudf/pull/9183)) [@ayushdg](https://github.com/ayushdg)
-- Add handling for nulls in `dask_cudf.sorting.quantile_divisions` ([#9171](https://github.com/rapidsai/cudf/pull/9171)) [@charlesbluca](https://github.com/charlesbluca)
-- Approximate overflow detection in ORC statistics ([#9163](https://github.com/rapidsai/cudf/pull/9163)) [@vuule](https://github.com/vuule)
-- Use decimal precision metadata when reading from parquet files ([#9162](https://github.com/rapidsai/cudf/pull/9162)) [@shwina](https://github.com/shwina)
-- Fix variable name in Java build script ([#9161](https://github.com/rapidsai/cudf/pull/9161)) [@jlowe](https://github.com/jlowe)
-- Import rapids-cmake modules using the correct cmake variable. ([#9149](https://github.com/rapidsai/cudf/pull/9149)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix conditional joins with empty left table ([#9146](https://github.com/rapidsai/cudf/pull/9146)) [@vyasr](https://github.com/vyasr)
-- Fix joining on indexes with duplicate level names ([#9137](https://github.com/rapidsai/cudf/pull/9137)) [@shwina](https://github.com/shwina)
-- Fixes missing child column name in dtype while reading ORC file. ([#9134](https://github.com/rapidsai/cudf/pull/9134)) [@rgsl888prabhu](https://github.com/rgsl888prabhu)
-- Apply type metadata after column is slice-copied ([#9131](https://github.com/rapidsai/cudf/pull/9131)) [@isVoid](https://github.com/isVoid)
-- Fix a bug: inner_join_size return zero if build table is empty ([#9128](https://github.com/rapidsai/cudf/pull/9128)) [@PointKernel](https://github.com/PointKernel)
-- Fix multi hive-partition parquet reading in dask-cudf ([#9122](https://github.com/rapidsai/cudf/pull/9122)) [@rjzamora](https://github.com/rjzamora)
-- Support null literals in expressions ([#9117](https://github.com/rapidsai/cudf/pull/9117)) [@vyasr](https://github.com/vyasr)
-- Fix cudf::hash_join output size for struct joins ([#9107](https://github.com/rapidsai/cudf/pull/9107)) [@jlowe](https://github.com/jlowe)
-- Import fix ([#9104](https://github.com/rapidsai/cudf/pull/9104)) [@shwina](https://github.com/shwina)
-- Fix cudf::strings::is_fixed_point checking of overflow for decimal32 ([#9093](https://github.com/rapidsai/cudf/pull/9093)) [@davidwendt](https://github.com/davidwendt)
-- Fix branch_stack calculation in `row_bit_count()` ([#9076](https://github.com/rapidsai/cudf/pull/9076)) [@mythrocks](https://github.com/mythrocks)
-- Fetch rapids-cmake to work around cuCollection cmake issue ([#9075](https://github.com/rapidsai/cudf/pull/9075)) [@jlowe](https://github.com/jlowe)
-- Fix compilation errors in groupby benchmarks. ([#9072](https://github.com/rapidsai/cudf/pull/9072)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Preserve float16 upscaling ([#9069](https://github.com/rapidsai/cudf/pull/9069)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix memcheck read error in libcudf contiguous_split ([#9067](https://github.com/rapidsai/cudf/pull/9067)) [@davidwendt](https://github.com/davidwendt)
-- Add support for reading ORC file with no row group index ([#9060](https://github.com/rapidsai/cudf/pull/9060)) [@rgsl888prabhu](https://github.com/rgsl888prabhu)
-- Various multiindex related fixes ([#9036](https://github.com/rapidsai/cudf/pull/9036)) [@shwina](https://github.com/shwina)
-- Avoid rebuilding cython in build.sh ([#9034](https://github.com/rapidsai/cudf/pull/9034)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Add support for percentile dispatch in `dask_cudf` ([#9031](https://github.com/rapidsai/cudf/pull/9031)) [@galipremsagar](https://github.com/galipremsagar)
-- cudf resolve nvcc 11.0 compiler crashes during codegen ([#9028](https://github.com/rapidsai/cudf/pull/9028)) [@robertmaynard](https://github.com/robertmaynard)
-- Fetch correct grouping keys `agg` of dask groupby ([#9022](https://github.com/rapidsai/cudf/pull/9022)) [@galipremsagar](https://github.com/galipremsagar)
-- Allow `where()` to work with a Series and `other=cudf.NA` ([#9019](https://github.com/rapidsai/cudf/pull/9019)) [@sarahyurick](https://github.com/sarahyurick)
-- Use correct index when returning Series from `GroupBy.apply()` ([#9016](https://github.com/rapidsai/cudf/pull/9016)) [@charlesbluca](https://github.com/charlesbluca)
-- Fix `Dataframe` indexer setitem when array is passed ([#9006](https://github.com/rapidsai/cudf/pull/9006)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix ORC reading of files with struct columns that have null values ([#9005](https://github.com/rapidsai/cudf/pull/9005)) [@vuule](https://github.com/vuule)
-- Ensure JNI native libraries load when CompiledExpression loads ([#8997](https://github.com/rapidsai/cudf/pull/8997)) [@jlowe](https://github.com/jlowe)
-- Fix memory read error in get_dremel_data in page_enc.cu ([#8995](https://github.com/rapidsai/cudf/pull/8995)) [@davidwendt](https://github.com/davidwendt)
-- Fix memory write error in get_list_child_to_list_row_mapping utility ([#8994](https://github.com/rapidsai/cudf/pull/8994)) [@davidwendt](https://github.com/davidwendt)
-- Fix debug compile error for csv_test.cpp ([#8981](https://github.com/rapidsai/cudf/pull/8981)) [@davidwendt](https://github.com/davidwendt)
-- Fix memory read/write error in concatenate_lists_ignore_null ([#8978](https://github.com/rapidsai/cudf/pull/8978)) [@davidwendt](https://github.com/davidwendt)
-- Fix concatenation of `cudf.RangeIndex` ([#8970](https://github.com/rapidsai/cudf/pull/8970)) [@galipremsagar](https://github.com/galipremsagar)
-- Java conditional joins should not require matching column counts ([#8955](https://github.com/rapidsai/cudf/pull/8955)) [@jlowe](https://github.com/jlowe)
-- Fix concatenate empty structs ([#8947](https://github.com/rapidsai/cudf/pull/8947)) [@sperlingxx](https://github.com/sperlingxx)
-- Fix cuda-memcheck errors for some libcudf functions ([#8941](https://github.com/rapidsai/cudf/pull/8941)) [@davidwendt](https://github.com/davidwendt)
-- Apply series name to result of `SeriesGroupby.apply()` ([#8939](https://github.com/rapidsai/cudf/pull/8939)) [@charlesbluca](https://github.com/charlesbluca)
-- `cdef packed_columns` as `cppclass` instead of `struct` ([#8936](https://github.com/rapidsai/cudf/pull/8936)) [@charlesbluca](https://github.com/charlesbluca)
-- Inserting a `cudf.NA` into a DataFrame ([#8923](https://github.com/rapidsai/cudf/pull/8923)) [@sarahyurick](https://github.com/sarahyurick)
-- Support casting with Pandas dtype aliases ([#8920](https://github.com/rapidsai/cudf/pull/8920)) [@sarahyurick](https://github.com/sarahyurick)
-- Allow `sort_values` to accept same `kind` values as Pandas ([#8912](https://github.com/rapidsai/cudf/pull/8912)) [@sarahyurick](https://github.com/sarahyurick)
-- Enable casting to pandas nullable dtypes ([#8889](https://github.com/rapidsai/cudf/pull/8889)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Fix libcudf memory errors ([#8884](https://github.com/rapidsai/cudf/pull/8884)) [@karthikeyann](https://github.com/karthikeyann)
-- Throw KeyError when accessing field from struct with nonexistent key ([#8880](https://github.com/rapidsai/cudf/pull/8880)) [@NV-jpt](https://github.com/NV-jpt)
-- replace auto with auto&amp; ref for cast&lt;&amp;&gt; ([#8866](https://github.com/rapidsai/cudf/pull/8866)) [@karthikeyann](https://github.com/karthikeyann)
-- Add missing include&lt;optional&gt; in binops ([#8864](https://github.com/rapidsai/cudf/pull/8864)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix `select_dtypes` to work when non-class dtypes present in dataframe ([#8849](https://github.com/rapidsai/cudf/pull/8849)) [@sarahyurick](https://github.com/sarahyurick)
-- Re-enable JSON tests ([#8843](https://github.com/rapidsai/cudf/pull/8843)) [@vuule](https://github.com/vuule)
-- Support header with embedded delimiter in csv writer ([#8798](https://github.com/rapidsai/cudf/pull/8798)) [@davidwendt](https://github.com/davidwendt)
-
-## 📖 Documentation
-
-- Add IO docs page in `cudf` documentation ([#9145](https://github.com/rapidsai/cudf/pull/9145)) [@galipremsagar](https://github.com/galipremsagar)
-- use correct namespace in cuio code examples ([#9037](https://github.com/rapidsai/cudf/pull/9037)) [@cwharris](https://github.com/cwharris)
-- Restructuring `Contributing doc` ([#9026](https://github.com/rapidsai/cudf/pull/9026)) [@iskode](https://github.com/iskode)
-- Update stable version in readme ([#9008](https://github.com/rapidsai/cudf/pull/9008)) [@galipremsagar](https://github.com/galipremsagar)
-- Add spans and more include guidelines to libcudf developer guide ([#8931](https://github.com/rapidsai/cudf/pull/8931)) [@harrism](https://github.com/harrism)
-- Update Java build instructions to mention Arrow S3 and Docker ([#8867](https://github.com/rapidsai/cudf/pull/8867)) [@jlowe](https://github.com/jlowe)
-- List GDS-enabled formats in the docs ([#8805](https://github.com/rapidsai/cudf/pull/8805)) [@vuule](https://github.com/vuule)
-- Change cudf docs theme to pydata theme ([#8746](https://github.com/rapidsai/cudf/pull/8746)) [@galipremsagar](https://github.com/galipremsagar)
-
-## 🚀 New Features
-
-- Revert &quot;Add shallow hash function and shallow equality comparison for column_view ([#9185)&quot; (#9283](https://github.com/rapidsai/cudf/pull/9185)&quot; (#9283)) [@karthikeyann](https://github.com/karthikeyann)
-- Align `DataFrame.apply` signature with pandas ([#9275](https://github.com/rapidsai/cudf/pull/9275)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Add struct type support for `drop_list_duplicates` ([#9202](https://github.com/rapidsai/cudf/pull/9202)) [@ttnghia](https://github.com/ttnghia)
-- support CUDA async memory resource in JNI ([#9201](https://github.com/rapidsai/cudf/pull/9201)) [@rongou](https://github.com/rongou)
-- Add shallow hash function and shallow equality comparison for column_view ([#9185](https://github.com/rapidsai/cudf/pull/9185)) [@karthikeyann](https://github.com/karthikeyann)
-- Superimpose null masks for STRUCT columns. ([#9144](https://github.com/rapidsai/cudf/pull/9144)) [@mythrocks](https://github.com/mythrocks)
-- Implemented bindings for `ceil` timestamp operation ([#9141](https://github.com/rapidsai/cudf/pull/9141)) [@shaneding](https://github.com/shaneding)
-- Adding MAP type support for ORC Reader ([#9132](https://github.com/rapidsai/cudf/pull/9132)) [@rgsl888prabhu](https://github.com/rgsl888prabhu)
-- Implement `interleave_columns` for lists with arbitrary nested type ([#9130](https://github.com/rapidsai/cudf/pull/9130)) [@ttnghia](https://github.com/ttnghia)
-- Add python bindings to fixed-size window and groupby `rolling.var`, `rolling.std` ([#9097](https://github.com/rapidsai/cudf/pull/9097)) [@isVoid](https://github.com/isVoid)
-- Make AST operators nullable ([#9096](https://github.com/rapidsai/cudf/pull/9096)) [@vyasr](https://github.com/vyasr)
-- Java bindings for approx_percentile ([#9094](https://github.com/rapidsai/cudf/pull/9094)) [@andygrove](https://github.com/andygrove)
-- Add `dseries.struct.explode` ([#9086](https://github.com/rapidsai/cudf/pull/9086)) [@isVoid](https://github.com/isVoid)
-- Add support for BaseIndexer in Rolling APIs ([#9085](https://github.com/rapidsai/cudf/pull/9085)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove the option to pass data types as strings to `read_csv` and `read_json` ([#9079](https://github.com/rapidsai/cudf/pull/9079)) [@vuule](https://github.com/vuule)
-- Add handling for nested dicts in dask-cudf groupby ([#9054](https://github.com/rapidsai/cudf/pull/9054)) [@charlesbluca](https://github.com/charlesbluca)
-- Added Series.dt.is_quarter_start and Series.dt.is_quarter_end ([#9046](https://github.com/rapidsai/cudf/pull/9046)) [@TravisHester](https://github.com/TravisHester)
-- Support nested types for nth_element reduction ([#9043](https://github.com/rapidsai/cudf/pull/9043)) [@sperlingxx](https://github.com/sperlingxx)
-- Update sort groupby to use non-atomic operation ([#9035](https://github.com/rapidsai/cudf/pull/9035)) [@karthikeyann](https://github.com/karthikeyann)
-- Add support for struct type in ORC writer ([#9025](https://github.com/rapidsai/cudf/pull/9025)) [@vuule](https://github.com/vuule)
-- Implement `interleave_columns` for structs columns ([#9012](https://github.com/rapidsai/cudf/pull/9012)) [@ttnghia](https://github.com/ttnghia)
-- Add groupby first and last aggregations ([#9004](https://github.com/rapidsai/cudf/pull/9004)) [@shwina](https://github.com/shwina)
-- Add `DecimalBaseColumn` and move `as_decimal_column` ([#9001](https://github.com/rapidsai/cudf/pull/9001)) [@isVoid](https://github.com/isVoid)
-- Python/Cython bindings for multibyte_split ([#8998](https://github.com/rapidsai/cudf/pull/8998)) [@jdye64](https://github.com/jdye64)
-- Support scalar `months` in `add_calendrical_months`, extends API to INT32 support ([#8991](https://github.com/rapidsai/cudf/pull/8991)) [@isVoid](https://github.com/isVoid)
-- Added Series.dt.is_month_end ([#8989](https://github.com/rapidsai/cudf/pull/8989)) [@TravisHester](https://github.com/TravisHester)
-- Support for using tdigests to compute approximate percentiles. ([#8983](https://github.com/rapidsai/cudf/pull/8983)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Support &quot;unflatten&quot; of columns flattened via `flatten_nested_columns()`: ([#8956](https://github.com/rapidsai/cudf/pull/8956)) [@mythrocks](https://github.com/mythrocks)
-- Implement timestamp ceil ([#8942](https://github.com/rapidsai/cudf/pull/8942)) [@shaneding](https://github.com/shaneding)
-- Add nested column selection to parquet reader ([#8933](https://github.com/rapidsai/cudf/pull/8933)) [@devavret](https://github.com/devavret)
-- Expose conditional join size calculation ([#8928](https://github.com/rapidsai/cudf/pull/8928)) [@vyasr](https://github.com/vyasr)
-- Support Nulls in Timeseries Generator ([#8925](https://github.com/rapidsai/cudf/pull/8925)) [@isVoid](https://github.com/isVoid)
-- Avoid index equality check in `_CPackedColumns.from_py_table()` ([#8917](https://github.com/rapidsai/cudf/pull/8917)) [@charlesbluca](https://github.com/charlesbluca)
-- Add dot product binary op ([#8909](https://github.com/rapidsai/cudf/pull/8909)) [@charlesbluca](https://github.com/charlesbluca)
-- Expose `days_in_month` function in libcudf and add python bindings ([#8892](https://github.com/rapidsai/cudf/pull/8892)) [@isVoid](https://github.com/isVoid)
-- Series string repeat ([#8882](https://github.com/rapidsai/cudf/pull/8882)) [@sarahyurick](https://github.com/sarahyurick)
-- Python binding for quarters ([#8862](https://github.com/rapidsai/cudf/pull/8862)) [@shaneding](https://github.com/shaneding)
-- Expand CSV and JSON reader APIs to accept `dtypes` as a vector or map of `data_type` objects ([#8856](https://github.com/rapidsai/cudf/pull/8856)) [@vuule](https://github.com/vuule)
-- Add Java bindings for AST transform ([#8846](https://github.com/rapidsai/cudf/pull/8846)) [@jlowe](https://github.com/jlowe)
-- Series datetime is_month_start ([#8844](https://github.com/rapidsai/cudf/pull/8844)) [@sarahyurick](https://github.com/sarahyurick)
-- Support bracket syntax for cudf::strings::replace_with_backrefs group index values ([#8841](https://github.com/rapidsai/cudf/pull/8841)) [@davidwendt](https://github.com/davidwendt)
-- Support `VARIANCE` and `STD` aggregation in rolling op ([#8809](https://github.com/rapidsai/cudf/pull/8809)) [@isVoid](https://github.com/isVoid)
-- Add quarters to libcudf datetime ([#8779](https://github.com/rapidsai/cudf/pull/8779)) [@shaneding](https://github.com/shaneding)
-- Linear Interpolation of `nan`s via `cupy` ([#8767](https://github.com/rapidsai/cudf/pull/8767)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Enable compiled binary ops in libcudf, python and java ([#8741](https://github.com/rapidsai/cudf/pull/8741)) [@karthikeyann](https://github.com/karthikeyann)
-- Make groupby transform-like op order match original data order ([#8720](https://github.com/rapidsai/cudf/pull/8720)) [@isVoid](https://github.com/isVoid)
-- multibyte_split ([#8702](https://github.com/rapidsai/cudf/pull/8702)) [@cwharris](https://github.com/cwharris)
-- Implement JNI for `strings:repeat_strings` that repeats each string separately by different numbers of times ([#8572](https://github.com/rapidsai/cudf/pull/8572)) [@ttnghia](https://github.com/ttnghia)
-
-## 🛠️ Improvements
-
-- Pin max `dask` and `distributed` versions to `2021.09.1` ([#9286](https://github.com/rapidsai/cudf/pull/9286)) [@galipremsagar](https://github.com/galipremsagar)
-- Optimized fsspec data transfer for remote file-systems ([#9265](https://github.com/rapidsai/cudf/pull/9265)) [@rjzamora](https://github.com/rjzamora)
-- Skip dask-cudf tests on arm64 ([#9252](https://github.com/rapidsai/cudf/pull/9252)) [@Ethyling](https://github.com/Ethyling)
-- Use nvcomp&#39;s snappy compressor in ORC writer ([#9242](https://github.com/rapidsai/cudf/pull/9242)) [@devavret](https://github.com/devavret)
-- Only run imports tests on x86_64 ([#9241](https://github.com/rapidsai/cudf/pull/9241)) [@Ethyling](https://github.com/Ethyling)
-- Remove unnecessary call to device_uvector::release() ([#9237](https://github.com/rapidsai/cudf/pull/9237)) [@harrism](https://github.com/harrism)
-- Use nvcomp&#39;s snappy decompression in ORC reader ([#9235](https://github.com/rapidsai/cudf/pull/9235)) [@devavret](https://github.com/devavret)
-- Add grouped_rolling test with STRUCT groupby keys. ([#9228](https://github.com/rapidsai/cudf/pull/9228)) [@mythrocks](https://github.com/mythrocks)
-- Optimize `cudf.concat` for `axis=0` ([#9222](https://github.com/rapidsai/cudf/pull/9222)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix some libcudf calls not passing the stream parameter ([#9220](https://github.com/rapidsai/cudf/pull/9220)) [@davidwendt](https://github.com/davidwendt)
-- Add min and max bounds for random dataframe generator numeric types ([#9211](https://github.com/rapidsai/cudf/pull/9211)) [@galipremsagar](https://github.com/galipremsagar)
-- Improve performance of expression evaluation ([#9210](https://github.com/rapidsai/cudf/pull/9210)) [@vyasr](https://github.com/vyasr)
-- Misc optimizations in `cudf` ([#9203](https://github.com/rapidsai/cudf/pull/9203)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove Cython APIs for table view generation ([#9199](https://github.com/rapidsai/cudf/pull/9199)) [@vyasr](https://github.com/vyasr)
-- Add JNI support for drop_list_duplicates ([#9198](https://github.com/rapidsai/cudf/pull/9198)) [@revans2](https://github.com/revans2)
-- Update pandas versions in conda recipes and requirements.txt files ([#9197](https://github.com/rapidsai/cudf/pull/9197)) [@galipremsagar](https://github.com/galipremsagar)
-- Minor C++17 cleanup of `groupby.cu`: structured bindings, more concise lambda, etc ([#9193](https://github.com/rapidsai/cudf/pull/9193)) [@codereport](https://github.com/codereport)
-- Explicit about bitwidth difference between cudf boolean and arrow boolean ([#9192](https://github.com/rapidsai/cudf/pull/9192)) [@isVoid](https://github.com/isVoid)
-- Remove _source_index from MultiIndex ([#9191](https://github.com/rapidsai/cudf/pull/9191)) [@vyasr](https://github.com/vyasr)
-- Fix typo in the name of `cudf-testing-targets.cmake` ([#9190](https://github.com/rapidsai/cudf/pull/9190)) [@trxcllnt](https://github.com/trxcllnt)
-- Add support for single-digits in cudf::to_timestamps ([#9173](https://github.com/rapidsai/cudf/pull/9173)) [@davidwendt](https://github.com/davidwendt)
-- Fix cufilejni build include path ([#9168](https://github.com/rapidsai/cudf/pull/9168)) [@pxLi](https://github.com/pxLi)
-- `dask_cudf` dispatch registering cleanup ([#9160](https://github.com/rapidsai/cudf/pull/9160)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove unneeded stream/mr from a cudf::make_strings_column ([#9148](https://github.com/rapidsai/cudf/pull/9148)) [@davidwendt](https://github.com/davidwendt)
-- Upgrade `pandas` version in `cudf` ([#9147](https://github.com/rapidsai/cudf/pull/9147)) [@galipremsagar](https://github.com/galipremsagar)
-- make data chunk reader return unique_ptr ([#9129](https://github.com/rapidsai/cudf/pull/9129)) [@cwharris](https://github.com/cwharris)
-- Add backend for `percentile_lookup` dispatch ([#9118](https://github.com/rapidsai/cudf/pull/9118)) [@galipremsagar](https://github.com/galipremsagar)
-- Refactor implementation of column setitem ([#9110](https://github.com/rapidsai/cudf/pull/9110)) [@vyasr](https://github.com/vyasr)
-- Fix compile warnings found using nvcc 11.4 ([#9101](https://github.com/rapidsai/cudf/pull/9101)) [@davidwendt](https://github.com/davidwendt)
-- Update to UCX-Py 0.22 ([#9099](https://github.com/rapidsai/cudf/pull/9099)) [@pentschev](https://github.com/pentschev)
-- Simplify read_avro by removing unnecessary writer/impl classes ([#9090](https://github.com/rapidsai/cudf/pull/9090)) [@cwharris](https://github.com/cwharris)
-- Allowing %f in format to return nanoseconds ([#9081](https://github.com/rapidsai/cudf/pull/9081)) [@marlenezw](https://github.com/marlenezw)
-- Java bindings for cudf::hash_join ([#9080](https://github.com/rapidsai/cudf/pull/9080)) [@jlowe](https://github.com/jlowe)
-- Remove stale code in `ColumnBase._fill` ([#9078](https://github.com/rapidsai/cudf/pull/9078)) [@isVoid](https://github.com/isVoid)
-- Add support for `get_group` in GroupBy ([#9070](https://github.com/rapidsai/cudf/pull/9070)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove remaining &quot;support&quot; methods from DataFrame ([#9068](https://github.com/rapidsai/cudf/pull/9068)) [@vyasr](https://github.com/vyasr)
-- Update JNI java CSV APIs to not use deprecated API ([#9066](https://github.com/rapidsai/cudf/pull/9066)) [@revans2](https://github.com/revans2)
-- Added method to remove null_masks if the column has no nulls ([#9061](https://github.com/rapidsai/cudf/pull/9061)) [@razajafri](https://github.com/razajafri)
-- Consolidate Several Series and Dataframe Methods ([#9059](https://github.com/rapidsai/cudf/pull/9059)) [@isVoid](https://github.com/isVoid)
-- Remove usage of string based `set_dtypes` for `csv` &amp; `json` readers ([#9049](https://github.com/rapidsai/cudf/pull/9049)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove some debug print statements from gtests ([#9048](https://github.com/rapidsai/cudf/pull/9048)) [@davidwendt](https://github.com/davidwendt)
-- Support additional format specifiers in from_timestamps ([#9047](https://github.com/rapidsai/cudf/pull/9047)) [@davidwendt](https://github.com/davidwendt)
-- Expose expression base class publicly and simplify public AST API ([#9045](https://github.com/rapidsai/cudf/pull/9045)) [@vyasr](https://github.com/vyasr)
-- move filepath and mmap logic out of json/csv up to functions.cpp ([#9040](https://github.com/rapidsai/cudf/pull/9040)) [@cwharris](https://github.com/cwharris)
-- Refactor Index hierarchy ([#9039](https://github.com/rapidsai/cudf/pull/9039)) [@vyasr](https://github.com/vyasr)
-- cudf now leverages rapids-cmake to reduce CMake boilerplate ([#9030](https://github.com/rapidsai/cudf/pull/9030)) [@robertmaynard](https://github.com/robertmaynard)
-- Add support for `STRUCT` input to `groupby` ([#9024](https://github.com/rapidsai/cudf/pull/9024)) [@mythrocks](https://github.com/mythrocks)
-- Refactor Frame scans ([#9021](https://github.com/rapidsai/cudf/pull/9021)) [@vyasr](https://github.com/vyasr)
-- Remove duplicate `set_categories` code ([#9018](https://github.com/rapidsai/cudf/pull/9018)) [@isVoid](https://github.com/isVoid)
-- Map support for ParquetWriter ([#9013](https://github.com/rapidsai/cudf/pull/9013)) [@razajafri](https://github.com/razajafri)
-- Remove aliases of various api.types APIs from utils.dtypes. ([#9011](https://github.com/rapidsai/cudf/pull/9011)) [@vyasr](https://github.com/vyasr)
-- Java bindings for conditional join output sizes ([#9002](https://github.com/rapidsai/cudf/pull/9002)) [@jlowe](https://github.com/jlowe)
-- Remove _copy_construct factory ([#8999](https://github.com/rapidsai/cudf/pull/8999)) [@vyasr](https://github.com/vyasr)
-- ENH Allow arbitrary CMake config options in build.sh ([#8996](https://github.com/rapidsai/cudf/pull/8996)) [@dillon-cullinan](https://github.com/dillon-cullinan)
-- A small optimization for JNI copy column view to column vector ([#8985](https://github.com/rapidsai/cudf/pull/8985)) [@revans2](https://github.com/revans2)
-- Fix nvcc warnings in ORC writer ([#8975](https://github.com/rapidsai/cudf/pull/8975)) [@devavret](https://github.com/devavret)
-- Support nested structs in rank and dense rank ([#8962](https://github.com/rapidsai/cudf/pull/8962)) [@rwlee](https://github.com/rwlee)
-- Move compute_column API out of ast namespace ([#8957](https://github.com/rapidsai/cudf/pull/8957)) [@vyasr](https://github.com/vyasr)
-- Series datetime is_year_end and is_year_start ([#8954](https://github.com/rapidsai/cudf/pull/8954)) [@marlenezw](https://github.com/marlenezw)
-- Make Java AstNode public ([#8953](https://github.com/rapidsai/cudf/pull/8953)) [@jlowe](https://github.com/jlowe)
-- Replace allocate with device_uvector for subword_tokenize internal tables ([#8952](https://github.com/rapidsai/cudf/pull/8952)) [@davidwendt](https://github.com/davidwendt)
-- `cudf.dtype` function ([#8949](https://github.com/rapidsai/cudf/pull/8949)) [@shwina](https://github.com/shwina)
-- Refactor Frame reductions ([#8944](https://github.com/rapidsai/cudf/pull/8944)) [@vyasr](https://github.com/vyasr)
-- Add deprecation warning for `Series.set_mask` API ([#8943](https://github.com/rapidsai/cudf/pull/8943)) [@galipremsagar](https://github.com/galipremsagar)
-- Move AST evaluator into a separate header ([#8930](https://github.com/rapidsai/cudf/pull/8930)) [@vyasr](https://github.com/vyasr)
-- JNI Aggregation Type Changes ([#8919](https://github.com/rapidsai/cudf/pull/8919)) [@revans2](https://github.com/revans2)
-- Move template parameter to function parameter in cudf::detail::left_semi_anti_join ([#8914](https://github.com/rapidsai/cudf/pull/8914)) [@davidwendt](https://github.com/davidwendt)
-- Upgrade `arrow` &amp; `pyarrow` to `5.0.0` ([#8908](https://github.com/rapidsai/cudf/pull/8908)) [@galipremsagar](https://github.com/galipremsagar)
-- Add groupby_aggregation and groupby_scan_aggregation classes and force their usage. ([#8906](https://github.com/rapidsai/cudf/pull/8906)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Move `structs_column_tests.cu` to `.cpp`. ([#8902](https://github.com/rapidsai/cudf/pull/8902)) [@mythrocks](https://github.com/mythrocks)
-- Add stream and memory-resource parameters to struct-scalar copy ctor ([#8901](https://github.com/rapidsai/cudf/pull/8901)) [@davidwendt](https://github.com/davidwendt)
-- Combine linearizer and ast_plan ([#8900](https://github.com/rapidsai/cudf/pull/8900)) [@vyasr](https://github.com/vyasr)
-- Add Java bindings for conditional join gather maps ([#8888](https://github.com/rapidsai/cudf/pull/8888)) [@jlowe](https://github.com/jlowe)
-- Remove max version pin for `dask` &amp; `distributed` on development branch ([#8881](https://github.com/rapidsai/cudf/pull/8881)) [@galipremsagar](https://github.com/galipremsagar)
-- fix cufilejni build w/ c++17 ([#8877](https://github.com/rapidsai/cudf/pull/8877)) [@pxLi](https://github.com/pxLi)
-- Add struct accessor to dask-cudf ([#8874](https://github.com/rapidsai/cudf/pull/8874)) [@NV-jpt](https://github.com/NV-jpt)
-- Migrate dask-cudf CudfEngine to leverage ArrowDatasetEngine ([#8871](https://github.com/rapidsai/cudf/pull/8871)) [@rjzamora](https://github.com/rjzamora)
-- Add JNI for extract_quarter, add_calendrical_months, and is_leap_year ([#8863](https://github.com/rapidsai/cudf/pull/8863)) [@revans2](https://github.com/revans2)
-- Change cudf::scalar copy and move constructors to protected ([#8857](https://github.com/rapidsai/cudf/pull/8857)) [@davidwendt](https://github.com/davidwendt)
-- Replace `is_same&lt;&gt;::value` with `is_same_v&lt;&gt;` ([#8852](https://github.com/rapidsai/cudf/pull/8852)) [@codereport](https://github.com/codereport)
-- Add min `pytorch` version to `importorskip` in pytest ([#8851](https://github.com/rapidsai/cudf/pull/8851)) [@galipremsagar](https://github.com/galipremsagar)
-- Java bindings for regex replace ([#8847](https://github.com/rapidsai/cudf/pull/8847)) [@jlowe](https://github.com/jlowe)
-- Remove make strings children with null mask ([#8830](https://github.com/rapidsai/cudf/pull/8830)) [@davidwendt](https://github.com/davidwendt)
-- Refactor conditional joins ([#8815](https://github.com/rapidsai/cudf/pull/8815)) [@vyasr](https://github.com/vyasr)
-- Small cleanup (unused headers / commented code removals) ([#8799](https://github.com/rapidsai/cudf/pull/8799)) [@codereport](https://github.com/codereport)
-- ENH Replace gpuci_conda_retry with gpuci_mamba_retry ([#8770](https://github.com/rapidsai/cudf/pull/8770)) [@dillon-cullinan](https://github.com/dillon-cullinan)
-- Update cudf java bindings to 21.10.0-SNAPSHOT ([#8765](https://github.com/rapidsai/cudf/pull/8765)) [@pxLi](https://github.com/pxLi)
-- Refactor and improve join benchmarks with nvbench ([#8734](https://github.com/rapidsai/cudf/pull/8734)) [@PointKernel](https://github.com/PointKernel)
-- Refactor Python factories and remove usage of Table for libcudf output handling ([#8687](https://github.com/rapidsai/cudf/pull/8687)) [@vyasr](https://github.com/vyasr)
-- Optimize URL Decoding ([#8622](https://github.com/rapidsai/cudf/pull/8622)) [@gaohao95](https://github.com/gaohao95)
-- Parquet writer dictionary encoding refactor ([#8476](https://github.com/rapidsai/cudf/pull/8476)) [@devavret](https://github.com/devavret)
-- Use nvcomp&#39;s snappy decompression in parquet reader ([#8252](https://github.com/rapidsai/cudf/pull/8252)) [@devavret](https://github.com/devavret)
-- Use nvcomp&#39;s snappy compressor in parquet writer ([#8229](https://github.com/rapidsai/cudf/pull/8229)) [@devavret](https://github.com/devavret)
-
-# cuDF 21.08.00 (4 Aug 2021)
-
-## 🚨 Breaking Changes
-
-- Fix a crash in pack() when being handed tables with no columns. ([#8697](https://github.com/rapidsai/cudf/pull/8697)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Remove unused cudf::strings::create_offsets ([#8663](https://github.com/rapidsai/cudf/pull/8663)) [@davidwendt](https://github.com/davidwendt)
-- Add delimiter parameter to cudf::strings::capitalize() ([#8620](https://github.com/rapidsai/cudf/pull/8620)) [@davidwendt](https://github.com/davidwendt)
-- Change default datetime index resolution to ns to match pandas ([#8611](https://github.com/rapidsai/cudf/pull/8611)) [@vyasr](https://github.com/vyasr)
-- Add sequence_type parameter to cudf::strings::title function ([#8602](https://github.com/rapidsai/cudf/pull/8602)) [@davidwendt](https://github.com/davidwendt)
-- Add `strings::repeat_strings` API that can repeat each string a different number of times ([#8561](https://github.com/rapidsai/cudf/pull/8561)) [@ttnghia](https://github.com/ttnghia)
-- String-to-boolean conversion is different from Pandas ([#8549](https://github.com/rapidsai/cudf/pull/8549)) [@skirui-source](https://github.com/skirui-source)
-- Add accurate hash join size functions ([#8453](https://github.com/rapidsai/cudf/pull/8453)) [@PointKernel](https://github.com/PointKernel)
-- Expose a Decimal32Dtype in cuDF Python ([#8438](https://github.com/rapidsai/cudf/pull/8438)) [@skirui-source](https://github.com/skirui-source)
-- Update dask make_meta changes to be compatible with dask upstream ([#8426](https://github.com/rapidsai/cudf/pull/8426)) [@galipremsagar](https://github.com/galipremsagar)
-- Adapt `cudf::scalar` classes to changes in `rmm::device_scalar` ([#8411](https://github.com/rapidsai/cudf/pull/8411)) [@harrism](https://github.com/harrism)
-- Remove special Index class from the general index class hierarchy ([#8309](https://github.com/rapidsai/cudf/pull/8309)) [@vyasr](https://github.com/vyasr)
-- Add first-class dtype utilities ([#8308](https://github.com/rapidsai/cudf/pull/8308)) [@vyasr](https://github.com/vyasr)
-- ORC - Support reading multiple orc files/buffers in a single operation ([#8142](https://github.com/rapidsai/cudf/pull/8142)) [@jdye64](https://github.com/jdye64)
-- Upgrade arrow to 4.0.1 ([#7495](https://github.com/rapidsai/cudf/pull/7495)) [@galipremsagar](https://github.com/galipremsagar)
-
-## 🐛 Bug Fixes
-
-- Fix `contains` check in string column ([#8834](https://github.com/rapidsai/cudf/pull/8834)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove unused variable from `row_bit_count_test`. ([#8829](https://github.com/rapidsai/cudf/pull/8829)) [@mythrocks](https://github.com/mythrocks)
-- Fixes issue with null struct columns in ORC reader ([#8819](https://github.com/rapidsai/cudf/pull/8819)) [@rgsl888prabhu](https://github.com/rgsl888prabhu)
-- Set CMake vars for python/parquet support in libarrow builds ([#8808](https://github.com/rapidsai/cudf/pull/8808)) [@vyasr](https://github.com/vyasr)
-- Handle empty child columns in row_bit_count() ([#8791](https://github.com/rapidsai/cudf/pull/8791)) [@mythrocks](https://github.com/mythrocks)
-- Revert &quot;Remove cudf unneeded build time requirement of the cuda driver&quot; ([#8784](https://github.com/rapidsai/cudf/pull/8784)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix isort error in utils.pyx ([#8771](https://github.com/rapidsai/cudf/pull/8771)) [@charlesbluca](https://github.com/charlesbluca)
-- Handle sliced struct/list columns properly in concatenate() bounds checking. ([#8760](https://github.com/rapidsai/cudf/pull/8760)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Fix issues with `_CPackedColumns.serialize()` handling of host and device data ([#8759](https://github.com/rapidsai/cudf/pull/8759)) [@charlesbluca](https://github.com/charlesbluca)
-- Fix issues with `MultiIndex` in `dropna`, `stack` &amp; `reset_index` ([#8753](https://github.com/rapidsai/cudf/pull/8753)) [@galipremsagar](https://github.com/galipremsagar)
-- Write pandas extension types to parquet file metadata ([#8749](https://github.com/rapidsai/cudf/pull/8749)) [@devavret](https://github.com/devavret)
-- Fix `where` to handle `DataFrame` &amp; `Series` input combination ([#8747](https://github.com/rapidsai/cudf/pull/8747)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix `replace` to handle null values correctly ([#8744](https://github.com/rapidsai/cudf/pull/8744)) [@galipremsagar](https://github.com/galipremsagar)
-- Handle sliced structs properly in pack/contiguous_split. ([#8739](https://github.com/rapidsai/cudf/pull/8739)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Fix issue in slice() where columns with a positive offset were computing null counts incorrectly. ([#8738](https://github.com/rapidsai/cudf/pull/8738)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Fix `cudf.Series` constructor to handle list of sequences ([#8735](https://github.com/rapidsai/cudf/pull/8735)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix min/max sorted groupby aggregation on string column with nulls (argmin, argmax sentinel value missing on nulls) ([#8731](https://github.com/rapidsai/cudf/pull/8731)) [@karthikeyann](https://github.com/karthikeyann)
-- Fix orc reader assert on create data_type in debug ([#8706](https://github.com/rapidsai/cudf/pull/8706)) [@davidwendt](https://github.com/davidwendt)
-- Fix min/max inclusive cudf::scan for strings column ([#8705](https://github.com/rapidsai/cudf/pull/8705)) [@davidwendt](https://github.com/davidwendt)
-- JNI: Fix driver version assertion logic in testGetCudaRuntimeInfo ([#8701](https://github.com/rapidsai/cudf/pull/8701)) [@sperlingxx](https://github.com/sperlingxx)
-- Adding fix for skip_rows and crash in orc reader ([#8700](https://github.com/rapidsai/cudf/pull/8700)) [@rgsl888prabhu](https://github.com/rgsl888prabhu)
-- Bug fix: `replace_nulls_policy` functor not returning correct indices for gathermap ([#8699](https://github.com/rapidsai/cudf/pull/8699)) [@isVoid](https://github.com/isVoid)
-- Fix a crash in pack() when being handed tables with no columns. ([#8697](https://github.com/rapidsai/cudf/pull/8697)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Add post-processing steps to `dask_cudf.groupby.CudfSeriesGroupby.aggregate` ([#8694](https://github.com/rapidsai/cudf/pull/8694)) [@charlesbluca](https://github.com/charlesbluca)
-- JNI build no longer looks for Arrow in conda environment ([#8686](https://github.com/rapidsai/cudf/pull/8686)) [@jlowe](https://github.com/jlowe)
-- Handle arbitrarily different data in null list column rows when checking for equivalency. ([#8666](https://github.com/rapidsai/cudf/pull/8666)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Add ConfigureNVBench to avoid concurrent main() entry points ([#8662](https://github.com/rapidsai/cudf/pull/8662)) [@PointKernel](https://github.com/PointKernel)
-- Pin `*arrow` to use `*cuda` in `run` ([#8651](https://github.com/rapidsai/cudf/pull/8651)) [@jakirkham](https://github.com/jakirkham)
-- Add proper support for tolerances in testing methods. ([#8649](https://github.com/rapidsai/cudf/pull/8649)) [@vyasr](https://github.com/vyasr)
-- Support multi-char case conversion in capitalize function ([#8647](https://github.com/rapidsai/cudf/pull/8647)) [@davidwendt](https://github.com/davidwendt)
-- Fix repeated mangled names in read_csv with duplicate column names ([#8645](https://github.com/rapidsai/cudf/pull/8645)) [@karthikeyann](https://github.com/karthikeyann)
-- Temporarily disable libcudf example build tests ([#8642](https://github.com/rapidsai/cudf/pull/8642)) [@isVoid](https://github.com/isVoid)
-- Use conda-sourced cudf artifacts for libcudf example in CI ([#8638](https://github.com/rapidsai/cudf/pull/8638)) [@isVoid](https://github.com/isVoid)
-- Ensure dev environment uses Arrow GPU packages ([#8637](https://github.com/rapidsai/cudf/pull/8637)) [@charlesbluca](https://github.com/charlesbluca)
-- Fix bug that columns only initialized once when specified `columns` and `index` in dataframe ctor ([#8628](https://github.com/rapidsai/cudf/pull/8628)) [@isVoid](https://github.com/isVoid)
-- Propagate **kwargs through to as_*_column methods ([#8618](https://github.com/rapidsai/cudf/pull/8618)) [@shwina](https://github.com/shwina)
-- Fix orc_reader_benchmark.cpp compile error ([#8609](https://github.com/rapidsai/cudf/pull/8609)) [@davidwendt](https://github.com/davidwendt)
-- Fix missed renumbering of Aggregation values ([#8600](https://github.com/rapidsai/cudf/pull/8600)) [@revans2](https://github.com/revans2)
-- Update cmake to 3.20.5 in the Java Docker image ([#8593](https://github.com/rapidsai/cudf/pull/8593)) [@NvTimLiu](https://github.com/NvTimLiu)
-- Fix bug in replace_with_backrefs when group has greedy quantifier ([#8575](https://github.com/rapidsai/cudf/pull/8575)) [@davidwendt](https://github.com/davidwendt)
-- Apply metadata to keys before returning in `Frame._encode` ([#8560](https://github.com/rapidsai/cudf/pull/8560)) [@charlesbluca](https://github.com/charlesbluca)
-- Fix for strings containing special JSON characters in get_json_object(). ([#8556](https://github.com/rapidsai/cudf/pull/8556)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Fix debug compile error in gather_struct_tests.cpp ([#8554](https://github.com/rapidsai/cudf/pull/8554)) [@davidwendt](https://github.com/davidwendt)
-- String-to-boolean conversion is different from Pandas ([#8549](https://github.com/rapidsai/cudf/pull/8549)) [@skirui-source](https://github.com/skirui-source)
-- Fix `__repr__` output with `display.max_rows` is `None` ([#8547](https://github.com/rapidsai/cudf/pull/8547)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix size passed to column constructors in _with_type_metadata ([#8539](https://github.com/rapidsai/cudf/pull/8539)) [@shwina](https://github.com/shwina)
-- Properly retrieve last column when `-1` is specified for column index ([#8529](https://github.com/rapidsai/cudf/pull/8529)) [@isVoid](https://github.com/isVoid)
-- Fix importing `apply` from `dask` ([#8517](https://github.com/rapidsai/cudf/pull/8517)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix offset of the string dictionary length stream ([#8515](https://github.com/rapidsai/cudf/pull/8515)) [@vuule](https://github.com/vuule)
-- Fix double counting of selected columns in CSV reader ([#8508](https://github.com/rapidsai/cudf/pull/8508)) [@ochan1](https://github.com/ochan1)
-- Incorrect map size in scatter_to_gather corrupts struct columns ([#8507](https://github.com/rapidsai/cudf/pull/8507)) [@gerashegalov](https://github.com/gerashegalov)
-- replace_nulls properly propagates memory resource to gather calls ([#8500](https://github.com/rapidsai/cudf/pull/8500)) [@robertmaynard](https://github.com/robertmaynard)
-- Disallow groupby aggs for `StructColumns` ([#8499](https://github.com/rapidsai/cudf/pull/8499)) [@charlesbluca](https://github.com/charlesbluca)
-- Fixes out-of-bounds access for small files in unzip ([#8498](https://github.com/rapidsai/cudf/pull/8498)) [@elstehle](https://github.com/elstehle)
-- Adding support for writing empty dataframe ([#8490](https://github.com/rapidsai/cudf/pull/8490)) [@shaneding](https://github.com/shaneding)
-- Fix exclusive scan when including nulls and improve testing ([#8478](https://github.com/rapidsai/cudf/pull/8478)) [@harrism](https://github.com/harrism)
-- Add workaround for crash in libcudf debug build using output_indexalator in thrust::lower_bound ([#8432](https://github.com/rapidsai/cudf/pull/8432)) [@davidwendt](https://github.com/davidwendt)
-- Install only the same Thrust files that Thrust itself installs ([#8420](https://github.com/rapidsai/cudf/pull/8420)) [@robertmaynard](https://github.com/robertmaynard)
-- Add nightly version for ucx-py in ci script ([#8419](https://github.com/rapidsai/cudf/pull/8419)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix null_equality config of rolling_collect_set ([#8415](https://github.com/rapidsai/cudf/pull/8415)) [@sperlingxx](https://github.com/sperlingxx)
-- CollectSetAggregation: implement RollingAggregation interface ([#8406](https://github.com/rapidsai/cudf/pull/8406)) [@sperlingxx](https://github.com/sperlingxx)
-- Handle pre-sliced nested columns in contiguous_split. ([#8391](https://github.com/rapidsai/cudf/pull/8391)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Fix bitmask_tests.cpp host accessing device memory ([#8370](https://github.com/rapidsai/cudf/pull/8370)) [@davidwendt](https://github.com/davidwendt)
-- Fix concurrent_unordered_map to prevent accessing padding bits in pair_type ([#8348](https://github.com/rapidsai/cudf/pull/8348)) [@davidwendt](https://github.com/davidwendt)
-- BUG FIX: Raise appropriate strings error when concatenating strings column ([#8290](https://github.com/rapidsai/cudf/pull/8290)) [@skirui-source](https://github.com/skirui-source)
-- Make gpuCI and pre-commit style configurations consistent ([#8215](https://github.com/rapidsai/cudf/pull/8215)) [@charlesbluca](https://github.com/charlesbluca)
-- Add collect list to dask-cudf groupby aggregations ([#8045](https://github.com/rapidsai/cudf/pull/8045)) [@charlesbluca](https://github.com/charlesbluca)
-
-## 📖 Documentation
-
-- Update Python UDFs notebook ([#8810](https://github.com/rapidsai/cudf/pull/8810)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Fix dask.dataframe API docs  links after reorg ([#8772](https://github.com/rapidsai/cudf/pull/8772)) [@jsignell](https://github.com/jsignell)
-- Fix instructions for running cuDF/dask-cuDF tests in CONTRIBUTING.md ([#8724](https://github.com/rapidsai/cudf/pull/8724)) [@shwina](https://github.com/shwina)
-- Translate Markdown documentation to rST and remove recommonmark ([#8698](https://github.com/rapidsai/cudf/pull/8698)) [@vyasr](https://github.com/vyasr)
-- Fixed spelling mistakes in libcudf documentation ([#8664](https://github.com/rapidsai/cudf/pull/8664)) [@karthikeyann](https://github.com/karthikeyann)
-- Custom Sphinx Extension: `PandasCompat` ([#8643](https://github.com/rapidsai/cudf/pull/8643)) [@isVoid](https://github.com/isVoid)
-- Fix README.md ([#8535](https://github.com/rapidsai/cudf/pull/8535)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Change namespace contains_nulls to struct ([#8523](https://github.com/rapidsai/cudf/pull/8523)) [@davidwendt](https://github.com/davidwendt)
-- Add info about NVTX ranges to dev guide ([#8461](https://github.com/rapidsai/cudf/pull/8461)) [@jrhemstad](https://github.com/jrhemstad)
-- Fixed documentation bug in groupby agg method ([#8325](https://github.com/rapidsai/cudf/pull/8325)) [@ahmet-uyar](https://github.com/ahmet-uyar)
-
-## 🚀 New Features
-
-- Fix concatenating structs ([#8811](https://github.com/rapidsai/cudf/pull/8811)) [@shaneding](https://github.com/shaneding)
-- Implement JNI for groupby aggregations `M2` and `MERGE_M2` ([#8763](https://github.com/rapidsai/cudf/pull/8763)) [@ttnghia](https://github.com/ttnghia)
-- Bump `isort` to `5.6.4` and remove `isort` overrides made for 5.0.7 ([#8755](https://github.com/rapidsai/cudf/pull/8755)) [@charlesbluca](https://github.com/charlesbluca)
-- Implement `__setitem__` for `StructColumn` ([#8737](https://github.com/rapidsai/cudf/pull/8737)) [@shaneding](https://github.com/shaneding)
-- Add `is_leap_year` to `DateTimeProperties` and `DatetimeIndex` ([#8736](https://github.com/rapidsai/cudf/pull/8736)) [@isVoid](https://github.com/isVoid)
-- Add `struct.explode()` method ([#8729](https://github.com/rapidsai/cudf/pull/8729)) [@shwina](https://github.com/shwina)
-- Add `DataFrame.to_struct()` method to convert a DataFrame to a struct Series ([#8728](https://github.com/rapidsai/cudf/pull/8728)) [@shwina](https://github.com/shwina)
-- Add support for list type in ORC writer ([#8723](https://github.com/rapidsai/cudf/pull/8723)) [@vuule](https://github.com/vuule)
-- Fix slicing from struct columns and accessing struct columns ([#8719](https://github.com/rapidsai/cudf/pull/8719)) [@shaneding](https://github.com/shaneding)
-- Add `datetime::is_leap_year` ([#8711](https://github.com/rapidsai/cudf/pull/8711)) [@isVoid](https://github.com/isVoid)
-- Accessing struct columns from `dask_cudf` ([#8675](https://github.com/rapidsai/cudf/pull/8675)) [@shaneding](https://github.com/shaneding)
-- Added pct_change to Series ([#8650](https://github.com/rapidsai/cudf/pull/8650)) [@TravisHester](https://github.com/TravisHester)
-- Add strings support to cudf::shift function ([#8648](https://github.com/rapidsai/cudf/pull/8648)) [@davidwendt](https://github.com/davidwendt)
-- Support Scatter `struct_scalar` ([#8630](https://github.com/rapidsai/cudf/pull/8630)) [@isVoid](https://github.com/isVoid)
-- Struct scalar from host dictionary ([#8629](https://github.com/rapidsai/cudf/pull/8629)) [@shaneding](https://github.com/shaneding)
-- Add dayofyear and day_of_year to Series, DatetimeColumn, and DatetimeIndex ([#8626](https://github.com/rapidsai/cudf/pull/8626)) [@beckernick](https://github.com/beckernick)
-- JNI support for capitalize ([#8624](https://github.com/rapidsai/cudf/pull/8624)) [@firestarman](https://github.com/firestarman)
-- Add delimiter parameter to cudf::strings::capitalize() ([#8620](https://github.com/rapidsai/cudf/pull/8620)) [@davidwendt](https://github.com/davidwendt)
-- Add NVBench in CMake ([#8619](https://github.com/rapidsai/cudf/pull/8619)) [@PointKernel](https://github.com/PointKernel)
-- Change default datetime index resolution to ns to match pandas ([#8611](https://github.com/rapidsai/cudf/pull/8611)) [@vyasr](https://github.com/vyasr)
-- ListColumn `__setitem__` ([#8606](https://github.com/rapidsai/cudf/pull/8606)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Implement groupby aggregations `M2` and `MERGE_M2` ([#8605](https://github.com/rapidsai/cudf/pull/8605)) [@ttnghia](https://github.com/ttnghia)
-- Add sequence_type parameter to cudf::strings::title function ([#8602](https://github.com/rapidsai/cudf/pull/8602)) [@davidwendt](https://github.com/davidwendt)
-- Adding support for list and struct type in ORC Reader ([#8599](https://github.com/rapidsai/cudf/pull/8599)) [@rgsl888prabhu](https://github.com/rgsl888prabhu)
-- Benchmark for `strings::repeat_strings` APIs ([#8589](https://github.com/rapidsai/cudf/pull/8589)) [@ttnghia](https://github.com/ttnghia)
-- Nested scalar support for copy if else ([#8588](https://github.com/rapidsai/cudf/pull/8588)) [@gerashegalov](https://github.com/gerashegalov)
-- User specified decimal columns to float64 ([#8587](https://github.com/rapidsai/cudf/pull/8587)) [@jdye64](https://github.com/jdye64)
-- Add `get_element` for struct column ([#8578](https://github.com/rapidsai/cudf/pull/8578)) [@isVoid](https://github.com/isVoid)
-- Python changes for adding `__getitem__` for `struct` ([#8577](https://github.com/rapidsai/cudf/pull/8577)) [@shaneding](https://github.com/shaneding)
-- Add `strings::repeat_strings` API that can repeat each string a different number of times ([#8561](https://github.com/rapidsai/cudf/pull/8561)) [@ttnghia](https://github.com/ttnghia)
-- Refactor `tests/iterator_utilities.hpp` functions ([#8540](https://github.com/rapidsai/cudf/pull/8540)) [@ttnghia](https://github.com/ttnghia)
-- Support MERGE_LISTS and MERGE_SETS in Java package ([#8516](https://github.com/rapidsai/cudf/pull/8516)) [@sperlingxx](https://github.com/sperlingxx)
-- Decimal support csv reader ([#8511](https://github.com/rapidsai/cudf/pull/8511)) [@elstehle](https://github.com/elstehle)
-- Add column type tests ([#8505](https://github.com/rapidsai/cudf/pull/8505)) [@isVoid](https://github.com/isVoid)
-- Warn when downscaling decimal columns ([#8492](https://github.com/rapidsai/cudf/pull/8492)) [@ChrisJar](https://github.com/ChrisJar)
-- Add JNI for `strings::repeat_strings` ([#8491](https://github.com/rapidsai/cudf/pull/8491)) [@ttnghia](https://github.com/ttnghia)
-- Add `Index.get_loc` for Numerical, String Index support ([#8489](https://github.com/rapidsai/cudf/pull/8489)) [@isVoid](https://github.com/isVoid)
-- Expose half_up rounding in cuDF ([#8477](https://github.com/rapidsai/cudf/pull/8477)) [@shwina](https://github.com/shwina)
-- Java APIs to fetch CUDA runtime info ([#8465](https://github.com/rapidsai/cudf/pull/8465)) [@sperlingxx](https://github.com/sperlingxx)
-- Add `str.edit_distance_matrix` ([#8463](https://github.com/rapidsai/cudf/pull/8463)) [@isVoid](https://github.com/isVoid)
-- Support constructing `cudf.Scalar` objects from host side lists ([#8459](https://github.com/rapidsai/cudf/pull/8459)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Add accurate hash join size functions ([#8453](https://github.com/rapidsai/cudf/pull/8453)) [@PointKernel](https://github.com/PointKernel)
-- Add cudf::strings::integer_to_hex convert API ([#8450](https://github.com/rapidsai/cudf/pull/8450)) [@davidwendt](https://github.com/davidwendt)
-- Create objects from iterables that contain cudf.NA ([#8442](https://github.com/rapidsai/cudf/pull/8442)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- JNI bindings for sort_lists ([#8439](https://github.com/rapidsai/cudf/pull/8439)) [@sperlingxx](https://github.com/sperlingxx)
-- Expose a Decimal32Dtype in cuDF Python ([#8438](https://github.com/rapidsai/cudf/pull/8438)) [@skirui-source](https://github.com/skirui-source)
-- Replace `all_null()` and `all_valid()` by `iterator_all_nulls()` and `iterator_no_null()` in tests ([#8437](https://github.com/rapidsai/cudf/pull/8437)) [@ttnghia](https://github.com/ttnghia)
-- Implement groupby `MERGE_LISTS` and `MERGE_SETS` aggregates ([#8436](https://github.com/rapidsai/cudf/pull/8436)) [@ttnghia](https://github.com/ttnghia)
-- Add public libcudf match_dictionaries API ([#8429](https://github.com/rapidsai/cudf/pull/8429)) [@davidwendt](https://github.com/davidwendt)
-- Add move constructors for `string_scalar` and `struct_scalar` ([#8428](https://github.com/rapidsai/cudf/pull/8428)) [@ttnghia](https://github.com/ttnghia)
-- Implement `strings::repeat_strings` ([#8423](https://github.com/rapidsai/cudf/pull/8423)) [@ttnghia](https://github.com/ttnghia)
-- STRUCT column support for cudf::merge. ([#8422](https://github.com/rapidsai/cudf/pull/8422)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Implement reverse in libcudf ([#8410](https://github.com/rapidsai/cudf/pull/8410)) [@shaneding](https://github.com/shaneding)
-- Support multiple input files/buffers for read_json ([#8403](https://github.com/rapidsai/cudf/pull/8403)) [@jdye64](https://github.com/jdye64)
-- Improve test coverage for struct search ([#8396](https://github.com/rapidsai/cudf/pull/8396)) [@ttnghia](https://github.com/ttnghia)
-- Add `groupby.fillna` ([#8362](https://github.com/rapidsai/cudf/pull/8362)) [@isVoid](https://github.com/isVoid)
-- Enable AST-based joining ([#8214](https://github.com/rapidsai/cudf/pull/8214)) [@vyasr](https://github.com/vyasr)
-- Generalized null support in user defined functions ([#8213](https://github.com/rapidsai/cudf/pull/8213)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Add compiled binary operation ([#8192](https://github.com/rapidsai/cudf/pull/8192)) [@karthikeyann](https://github.com/karthikeyann)
-- Implement `.describe() ` for `DataFrameGroupBy` ([#8179](https://github.com/rapidsai/cudf/pull/8179)) [@skirui-source](https://github.com/skirui-source)
-- ORC - Support reading multiple orc files/buffers in a single operation ([#8142](https://github.com/rapidsai/cudf/pull/8142)) [@jdye64](https://github.com/jdye64)
-- Add Python bindings for `lists::concatenate_list_elements` and expose them as `.list.concat()` ([#8006](https://github.com/rapidsai/cudf/pull/8006)) [@shwina](https://github.com/shwina)
-- Use Arrow URI FileSystem backed instance to retrieve remote files ([#7709](https://github.com/rapidsai/cudf/pull/7709)) [@jdye64](https://github.com/jdye64)
-- Example to build custom application and link to libcudf ([#7671](https://github.com/rapidsai/cudf/pull/7671)) [@isVoid](https://github.com/isVoid)
-- Upgrade arrow to 4.0.1 ([#7495](https://github.com/rapidsai/cudf/pull/7495)) [@galipremsagar](https://github.com/galipremsagar)
-
-## 🛠️ Improvements
-
-- Provide a better error message when `CUDA::cuda_driver` not found ([#8794](https://github.com/rapidsai/cudf/pull/8794)) [@robertmaynard](https://github.com/robertmaynard)
-- Remove anonymous namespace from null_mask.cuh ([#8786](https://github.com/rapidsai/cudf/pull/8786)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Allow cudf to be built without libcuda.so existing ([#8751](https://github.com/rapidsai/cudf/pull/8751)) [@robertmaynard](https://github.com/robertmaynard)
-- Pin `mimesis` to `&lt;4.1` ([#8745](https://github.com/rapidsai/cudf/pull/8745)) [@galipremsagar](https://github.com/galipremsagar)
-- Update `conda` environment name for CI ([#8692](https://github.com/rapidsai/cudf/pull/8692)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Remove flatbuffers dependency ([#8671](https://github.com/rapidsai/cudf/pull/8671)) [@Ethyling](https://github.com/Ethyling)
-- Add options to build Arrow with Python and Parquet support ([#8670](https://github.com/rapidsai/cudf/pull/8670)) [@trxcllnt](https://github.com/trxcllnt)
-- Remove unused cudf::strings::create_offsets ([#8663](https://github.com/rapidsai/cudf/pull/8663)) [@davidwendt](https://github.com/davidwendt)
-- Update GDS lib version to 1.0.0 ([#8654](https://github.com/rapidsai/cudf/pull/8654)) [@pxLi](https://github.com/pxLi)
-- Support for groupby/scan rank and dense_rank aggregations ([#8652](https://github.com/rapidsai/cudf/pull/8652)) [@rwlee](https://github.com/rwlee)
-- Fix usage of deprecated arrow ipc API ([#8632](https://github.com/rapidsai/cudf/pull/8632)) [@revans2](https://github.com/revans2)
-- Use absolute imports in `cudf` ([#8631](https://github.com/rapidsai/cudf/pull/8631)) [@galipremsagar](https://github.com/galipremsagar)
-- ENH Add Java CI build script ([#8627](https://github.com/rapidsai/cudf/pull/8627)) [@dillon-cullinan](https://github.com/dillon-cullinan)
-- Add DeprecationWarning to `ser.str.subword_tokenize` ([#8603](https://github.com/rapidsai/cudf/pull/8603)) [@VibhuJawa](https://github.com/VibhuJawa)
-- Rewrite binary operations for improved performance and additional type support ([#8598](https://github.com/rapidsai/cudf/pull/8598)) [@vyasr](https://github.com/vyasr)
-- Fix `mypy` errors surfacing because of `numpy-1.21.0` ([#8595](https://github.com/rapidsai/cudf/pull/8595)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove unneeded includes from cudf::string_view headers ([#8594](https://github.com/rapidsai/cudf/pull/8594)) [@davidwendt](https://github.com/davidwendt)
-- Use cmake 3.20.1 as it is now required by rmm ([#8586](https://github.com/rapidsai/cudf/pull/8586)) [@robertmaynard](https://github.com/robertmaynard)
-- Remove device debug symbols from cmake CUDF_CUDA_FLAGS ([#8584](https://github.com/rapidsai/cudf/pull/8584)) [@davidwendt](https://github.com/davidwendt)
-- Dask-CuDF: use default Dask Dataframe optimizer ([#8581](https://github.com/rapidsai/cudf/pull/8581)) [@madsbk](https://github.com/madsbk)
-- Remove checking if an unsigned value is less than zero ([#8579](https://github.com/rapidsai/cudf/pull/8579)) [@robertmaynard](https://github.com/robertmaynard)
-- Remove strings_count parameter from cudf::strings::detail::create_chars_child_column ([#8576](https://github.com/rapidsai/cudf/pull/8576)) [@davidwendt](https://github.com/davidwendt)
-- Make `cudf.api.types` imports consistent ([#8571](https://github.com/rapidsai/cudf/pull/8571)) [@galipremsagar](https://github.com/galipremsagar)
-- Modernize libcudf basic example CMakeFile; updates CI build tests ([#8568](https://github.com/rapidsai/cudf/pull/8568)) [@isVoid](https://github.com/isVoid)
-- Rename concatenate_tests.cu to .cpp ([#8555](https://github.com/rapidsai/cudf/pull/8555)) [@davidwendt](https://github.com/davidwendt)
-- enable window lead/lag test on struct ([#8548](https://github.com/rapidsai/cudf/pull/8548)) [@wbo4958](https://github.com/wbo4958)
-- Add Java methods to split and write column views ([#8546](https://github.com/rapidsai/cudf/pull/8546)) [@razajafri](https://github.com/razajafri)
-- Small cleanup ([#8534](https://github.com/rapidsai/cudf/pull/8534)) [@codereport](https://github.com/codereport)
-- Unpin `dask` version in CI ([#8533](https://github.com/rapidsai/cudf/pull/8533)) [@galipremsagar](https://github.com/galipremsagar)
-- Added optional flag for building Arrow with S3 filesystem support ([#8531](https://github.com/rapidsai/cudf/pull/8531)) [@jdye64](https://github.com/jdye64)
-- Minor clean up of various internal column and frame utilities ([#8528](https://github.com/rapidsai/cudf/pull/8528)) [@vyasr](https://github.com/vyasr)
-- Rename some copying_test source files .cu to .cpp ([#8527](https://github.com/rapidsai/cudf/pull/8527)) [@davidwendt](https://github.com/davidwendt)
-- Correct the last warnings and issues when using newer cuda versions ([#8525](https://github.com/rapidsai/cudf/pull/8525)) [@robertmaynard](https://github.com/robertmaynard)
-- Correct unused parameter warnings in transform and unary ops ([#8521](https://github.com/rapidsai/cudf/pull/8521)) [@robertmaynard](https://github.com/robertmaynard)
-- Correct unused parameter warnings in string algorithms ([#8509](https://github.com/rapidsai/cudf/pull/8509)) [@robertmaynard](https://github.com/robertmaynard)
-- Add in JNI APIs for scan, replace_nulls, group_by.scan, and group_by.replace_nulls ([#8503](https://github.com/rapidsai/cudf/pull/8503)) [@revans2](https://github.com/revans2)
-- Fix `21.08` forward-merge conflicts ([#8502](https://github.com/rapidsai/cudf/pull/8502)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Fix Cython formatting command in Contributing.md. ([#8496](https://github.com/rapidsai/cudf/pull/8496)) [@marlenezw](https://github.com/marlenezw)
-- Bug/correct unused parameters in reshape and text ([#8495](https://github.com/rapidsai/cudf/pull/8495)) [@robertmaynard](https://github.com/robertmaynard)
-- Correct unused parameter warnings in partitioning and stream compact ([#8494](https://github.com/rapidsai/cudf/pull/8494)) [@robertmaynard](https://github.com/robertmaynard)
-- Correct unused parameter warnings in labelling and list algorithms ([#8493](https://github.com/rapidsai/cudf/pull/8493)) [@robertmaynard](https://github.com/robertmaynard)
-- Refactor index construction ([#8485](https://github.com/rapidsai/cudf/pull/8485)) [@vyasr](https://github.com/vyasr)
-- Correct unused parameter warnings in replace algorithms ([#8483](https://github.com/rapidsai/cudf/pull/8483)) [@robertmaynard](https://github.com/robertmaynard)
-- Correct unused parameter warnings in reduction algorithms ([#8481](https://github.com/rapidsai/cudf/pull/8481)) [@robertmaynard](https://github.com/robertmaynard)
-- Correct unused parameter warnings in io algorithms ([#8480](https://github.com/rapidsai/cudf/pull/8480)) [@robertmaynard](https://github.com/robertmaynard)
-- Correct unused parameter warnings in interop algorithms ([#8479](https://github.com/rapidsai/cudf/pull/8479)) [@robertmaynard](https://github.com/robertmaynard)
-- Correct unused parameter warnings in filling algorithms ([#8468](https://github.com/rapidsai/cudf/pull/8468)) [@robertmaynard](https://github.com/robertmaynard)
-- Correct unused parameter warnings in groupby ([#8467](https://github.com/rapidsai/cudf/pull/8467)) [@robertmaynard](https://github.com/robertmaynard)
-- use libcu++ time_point as timestamp ([#8466](https://github.com/rapidsai/cudf/pull/8466)) [@karthikeyann](https://github.com/karthikeyann)
-- Modify reprog_device::extract to return groups in a single pass ([#8460](https://github.com/rapidsai/cudf/pull/8460)) [@davidwendt](https://github.com/davidwendt)
-- Update minimum Dask requirement to 2021.6.0 ([#8458](https://github.com/rapidsai/cudf/pull/8458)) [@pentschev](https://github.com/pentschev)
-- Fix failures when performing binary operations on DataFrames with empty columns ([#8452](https://github.com/rapidsai/cudf/pull/8452)) [@ChrisJar](https://github.com/ChrisJar)
-- Fix conflicts in `8447` ([#8448](https://github.com/rapidsai/cudf/pull/8448)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Add serialization methods for `List` and `StructDtype` ([#8441](https://github.com/rapidsai/cudf/pull/8441)) [@charlesbluca](https://github.com/charlesbluca)
-- Replace make_empty_strings_column with make_empty_column ([#8435](https://github.com/rapidsai/cudf/pull/8435)) [@davidwendt](https://github.com/davidwendt)
-- JNI bindings for get_element ([#8433](https://github.com/rapidsai/cudf/pull/8433)) [@revans2](https://github.com/revans2)
-- Update dask make_meta changes to be compatible with dask upstream ([#8426](https://github.com/rapidsai/cudf/pull/8426)) [@galipremsagar](https://github.com/galipremsagar)
-- Unpin dask version on CI ([#8425](https://github.com/rapidsai/cudf/pull/8425)) [@galipremsagar](https://github.com/galipremsagar)
-- Add benchmark for strings/fixed_point convert APIs ([#8417](https://github.com/rapidsai/cudf/pull/8417)) [@davidwendt](https://github.com/davidwendt)
-- Adapt `cudf::scalar` classes to changes in `rmm::device_scalar` ([#8411](https://github.com/rapidsai/cudf/pull/8411)) [@harrism](https://github.com/harrism)
-- Add benchmark for strings/integers convert APIs ([#8402](https://github.com/rapidsai/cudf/pull/8402)) [@davidwendt](https://github.com/davidwendt)
-- Enable multi-file partitioning in dask_cudf.read_parquet ([#8393](https://github.com/rapidsai/cudf/pull/8393)) [@rjzamora](https://github.com/rjzamora)
-- Correct unused parameter warnings in rolling algorithms ([#8390](https://github.com/rapidsai/cudf/pull/8390)) [@robertmaynard](https://github.com/robertmaynard)
-- Correct unused parameters in column round and search ([#8389](https://github.com/rapidsai/cudf/pull/8389)) [@robertmaynard](https://github.com/robertmaynard)
-- Add functionality to apply `Dtype` metadata to `ColumnBase` ([#8373](https://github.com/rapidsai/cudf/pull/8373)) [@charlesbluca](https://github.com/charlesbluca)
-- Refactor setting stack size in regex code ([#8358](https://github.com/rapidsai/cudf/pull/8358)) [@davidwendt](https://github.com/davidwendt)
-- Update Java bindings to 21.08-SNAPSHOT ([#8344](https://github.com/rapidsai/cudf/pull/8344)) [@pxLi](https://github.com/pxLi)
-- Replace remaining uses of device_vector ([#8343](https://github.com/rapidsai/cudf/pull/8343)) [@harrism](https://github.com/harrism)
-- Statically link libnvcomp into libcudfjni ([#8334](https://github.com/rapidsai/cudf/pull/8334)) [@jlowe](https://github.com/jlowe)
-- Resolve auto merge conflicts for Branch 21.08 from branch 21.06 ([#8329](https://github.com/rapidsai/cudf/pull/8329)) [@galipremsagar](https://github.com/galipremsagar)
-- Minor code refactor for sorted_order ([#8326](https://github.com/rapidsai/cudf/pull/8326)) [@wbo4958](https://github.com/wbo4958)
-- Remove special Index class from the general index class hierarchy ([#8309](https://github.com/rapidsai/cudf/pull/8309)) [@vyasr](https://github.com/vyasr)
-- Add first-class dtype utilities ([#8308](https://github.com/rapidsai/cudf/pull/8308)) [@vyasr](https://github.com/vyasr)
-- Add option to link Java bindings with Arrow dynamically ([#8307](https://github.com/rapidsai/cudf/pull/8307)) [@jlowe](https://github.com/jlowe)
-- Refactor ColumnMethods and its subclasses to remove `column` argument and require `parent` argument ([#8306](https://github.com/rapidsai/cudf/pull/8306)) [@shwina](https://github.com/shwina)
-- Refactor `scatter` for list columns ([#8255](https://github.com/rapidsai/cudf/pull/8255)) [@isVoid](https://github.com/isVoid)
-- Expose pack/unpack API to Python ([#8153](https://github.com/rapidsai/cudf/pull/8153)) [@charlesbluca](https://github.com/charlesbluca)
-- Adding cudf.cut method ([#8002](https://github.com/rapidsai/cudf/pull/8002)) [@marlenezw](https://github.com/marlenezw)
-- Optimize string gather performance for large strings ([#7980](https://github.com/rapidsai/cudf/pull/7980)) [@gaohao95](https://github.com/gaohao95)
-- Add peak memory usage tracking to cuIO benchmarks ([#7770](https://github.com/rapidsai/cudf/pull/7770)) [@devavret](https://github.com/devavret)
-- Updating Clang Version to 11.0.0 ([#6695](https://github.com/rapidsai/cudf/pull/6695)) [@codereport](https://github.com/codereport)
-
-# cuDF 21.06.00 (9 Jun 2021)
-
-## 🚨 Breaking Changes
-
-- Add support for `make_meta_obj` dispatch in `dask-cudf` ([#8342](https://github.com/rapidsai/cudf/pull/8342)) [@galipremsagar](https://github.com/galipremsagar)
-- Add separator-on-null parameter to strings concatenate APIs ([#8282](https://github.com/rapidsai/cudf/pull/8282)) [@davidwendt](https://github.com/davidwendt)
-- Introduce a common parent class for NumericalColumn and DecimalColumn ([#8278](https://github.com/rapidsai/cudf/pull/8278)) [@vyasr](https://github.com/vyasr)
-- Update ORC statistics API to use C++17 standard library ([#8241](https://github.com/rapidsai/cudf/pull/8241)) [@vuule](https://github.com/vuule)
-- Preserve column hierarchy when getting NULL row from `LIST` column ([#8206](https://github.com/rapidsai/cudf/pull/8206)) [@isVoid](https://github.com/isVoid)
-- `Groupby.shift` c++ API refactor and python binding ([#8131](https://github.com/rapidsai/cudf/pull/8131)) [@isVoid](https://github.com/isVoid)
-
-## 🐛 Bug Fixes
-
-- Fix struct flattening to add a validity column only when the input column has null element ([#8374](https://github.com/rapidsai/cudf/pull/8374)) [@ttnghia](https://github.com/ttnghia)
-- Compilation fix: Remove redefinition for `std::is_same_v()` ([#8369](https://github.com/rapidsai/cudf/pull/8369)) [@mythrocks](https://github.com/mythrocks)
-- Add backward compatibility for `dask-cudf` to work with other versions of `dask` ([#8368](https://github.com/rapidsai/cudf/pull/8368)) [@galipremsagar](https://github.com/galipremsagar)
-- Handle empty results with nested types in copy_if_else ([#8359](https://github.com/rapidsai/cudf/pull/8359)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Handle nested column types properly for empty parquet files. ([#8350](https://github.com/rapidsai/cudf/pull/8350)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Raise error when unsupported arguments are passed to `dask_cudf.DataFrame.sort_values` ([#8349](https://github.com/rapidsai/cudf/pull/8349)) [@galipremsagar](https://github.com/galipremsagar)
-- Raise `NotImplementedError` for axis=1 in `rank` ([#8347](https://github.com/rapidsai/cudf/pull/8347)) [@galipremsagar](https://github.com/galipremsagar)
-- Add support for `make_meta_obj` dispatch in `dask-cudf` ([#8342](https://github.com/rapidsai/cudf/pull/8342)) [@galipremsagar](https://github.com/galipremsagar)
-- Update Java string concatenate test for single column ([#8330](https://github.com/rapidsai/cudf/pull/8330)) [@tgravescs](https://github.com/tgravescs)
-- Use empty_like in scatter ([#8314](https://github.com/rapidsai/cudf/pull/8314)) [@revans2](https://github.com/revans2)
-- Fix concatenate_lists_ignore_null on rows of all_nulls ([#8312](https://github.com/rapidsai/cudf/pull/8312)) [@sperlingxx](https://github.com/sperlingxx)
-- Add separator-on-null parameter to strings concatenate APIs ([#8282](https://github.com/rapidsai/cudf/pull/8282)) [@davidwendt](https://github.com/davidwendt)
-- COLLECT_LIST support returning empty output columns. ([#8279](https://github.com/rapidsai/cudf/pull/8279)) [@mythrocks](https://github.com/mythrocks)
-- Update io util to convert path like object to string ([#8275](https://github.com/rapidsai/cudf/pull/8275)) [@ayushdg](https://github.com/ayushdg)
-- Fix result column types for empty inputs to rolling window ([#8274](https://github.com/rapidsai/cudf/pull/8274)) [@mythrocks](https://github.com/mythrocks)
-- Actually test equality in assert_groupby_results_equal ([#8272](https://github.com/rapidsai/cudf/pull/8272)) [@shwina](https://github.com/shwina)
-- CMake always explicitly specify a source files extension ([#8270](https://github.com/rapidsai/cudf/pull/8270)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix struct binary search and struct flattening ([#8268](https://github.com/rapidsai/cudf/pull/8268)) [@ttnghia](https://github.com/ttnghia)
-- Revert &quot;patch thrust to fix intmax num elements limitation in scan_by_key&quot; ([#8263](https://github.com/rapidsai/cudf/pull/8263)) [@cwharris](https://github.com/cwharris)
-- upgrade dlpack to 0.5 ([#8262](https://github.com/rapidsai/cudf/pull/8262)) [@cwharris](https://github.com/cwharris)
-- Fixes CSV-reader type inference for thousands separator and decimal point ([#8261](https://github.com/rapidsai/cudf/pull/8261)) [@elstehle](https://github.com/elstehle)
-- Fix incorrect assertion in Java concat ([#8258](https://github.com/rapidsai/cudf/pull/8258)) [@sperlingxx](https://github.com/sperlingxx)
-- Copy nested types upon construction ([#8244](https://github.com/rapidsai/cudf/pull/8244)) [@isVoid](https://github.com/isVoid)
-- Preserve column hierarchy when getting NULL row from `LIST` column ([#8206](https://github.com/rapidsai/cudf/pull/8206)) [@isVoid](https://github.com/isVoid)
-- Clip decimal binary op precision at max precision ([#8194](https://github.com/rapidsai/cudf/pull/8194)) [@ChrisJar](https://github.com/ChrisJar)
-
-## 📖 Documentation
-
-- Add docstring for `dask_cudf.read_csv` ([#8355](https://github.com/rapidsai/cudf/pull/8355)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix cudf release version in readme ([#8331](https://github.com/rapidsai/cudf/pull/8331)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix structs column description in dev docs ([#8318](https://github.com/rapidsai/cudf/pull/8318)) [@isVoid](https://github.com/isVoid)
-- Update readme with correct CUDA versions ([#8315](https://github.com/rapidsai/cudf/pull/8315)) [@raydouglass](https://github.com/raydouglass)
-- Add description of the cuIO GDS integration ([#8293](https://github.com/rapidsai/cudf/pull/8293)) [@vuule](https://github.com/vuule)
-- Remove unused parameter from copy_partition kernel documentation ([#8283](https://github.com/rapidsai/cudf/pull/8283)) [@robertmaynard](https://github.com/robertmaynard)
-
-## 🚀 New Features
-
-- Add support merging b/w categorical data ([#8332](https://github.com/rapidsai/cudf/pull/8332)) [@galipremsagar](https://github.com/galipremsagar)
-- Java: Support struct scalar ([#8327](https://github.com/rapidsai/cudf/pull/8327)) [@sperlingxx](https://github.com/sperlingxx)
-- added _is_homogeneous property ([#8299](https://github.com/rapidsai/cudf/pull/8299)) [@shaneding](https://github.com/shaneding)
-- Added decimal writing for CSV writer ([#8296](https://github.com/rapidsai/cudf/pull/8296)) [@kaatish](https://github.com/kaatish)
-- Java: Support creating a scalar from utf8 string ([#8294](https://github.com/rapidsai/cudf/pull/8294)) [@firestarman](https://github.com/firestarman)
-- Add Java API for Concatenate strings with separator ([#8289](https://github.com/rapidsai/cudf/pull/8289)) [@tgravescs](https://github.com/tgravescs)
-- `strings::join_list_elements` options for empty list inputs ([#8285](https://github.com/rapidsai/cudf/pull/8285)) [@ttnghia](https://github.com/ttnghia)
-- Return python lists for __getitem__ calls to list type series ([#8265](https://github.com/rapidsai/cudf/pull/8265)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- add unit tests for lead/lag on list for row window ([#8259](https://github.com/rapidsai/cudf/pull/8259)) [@wbo4958](https://github.com/wbo4958)
-- Create a String column from UTF8 String byte arrays ([#8257](https://github.com/rapidsai/cudf/pull/8257)) [@firestarman](https://github.com/firestarman)
-- Support scattering `list_scalar` ([#8256](https://github.com/rapidsai/cudf/pull/8256)) [@isVoid](https://github.com/isVoid)
-- Implement `lists::concatenate_list_elements` ([#8231](https://github.com/rapidsai/cudf/pull/8231)) [@ttnghia](https://github.com/ttnghia)
-- Support for struct scalars. ([#8220](https://github.com/rapidsai/cudf/pull/8220)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Add support for decimal types in ORC writer ([#8198](https://github.com/rapidsai/cudf/pull/8198)) [@vuule](https://github.com/vuule)
-- Support create lists column from a `list_scalar` ([#8185](https://github.com/rapidsai/cudf/pull/8185)) [@isVoid](https://github.com/isVoid)
-- `Groupby.shift` c++ API refactor and python binding ([#8131](https://github.com/rapidsai/cudf/pull/8131)) [@isVoid](https://github.com/isVoid)
-- Add `groupby::replace_nulls(replace_policy)` api ([#7118](https://github.com/rapidsai/cudf/pull/7118)) [@isVoid](https://github.com/isVoid)
-
-## 🛠️ Improvements
-
-- Support Dask + Distributed 2021.05.1 ([#8392](https://github.com/rapidsai/cudf/pull/8392)) [@jakirkham](https://github.com/jakirkham)
-- Add aliases for string methods ([#8353](https://github.com/rapidsai/cudf/pull/8353)) [@shwina](https://github.com/shwina)
-- Update environment variable used to determine `cuda_version` ([#8321](https://github.com/rapidsai/cudf/pull/8321)) [@ajschmidt8](https://github.com/ajschmidt8)
-- JNI: Refactor the code of making column from scalar ([#8310](https://github.com/rapidsai/cudf/pull/8310)) [@firestarman](https://github.com/firestarman)
-- Update `CHANGELOG.md` links for calver ([#8303](https://github.com/rapidsai/cudf/pull/8303)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Merge `branch-0.19` into `branch-21.06` ([#8302](https://github.com/rapidsai/cudf/pull/8302)) [@ajschmidt8](https://github.com/ajschmidt8)
-- use address and length for GDS reads/writes ([#8301](https://github.com/rapidsai/cudf/pull/8301)) [@rongou](https://github.com/rongou)
-- Update cudfjni version to 21.06.0 ([#8292](https://github.com/rapidsai/cudf/pull/8292)) [@pxLi](https://github.com/pxLi)
-- Update docs build script ([#8284](https://github.com/rapidsai/cudf/pull/8284)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Make device_buffer streams explicit and enforce move construction ([#8280](https://github.com/rapidsai/cudf/pull/8280)) [@harrism](https://github.com/harrism)
-- Introduce a common parent class for NumericalColumn and DecimalColumn ([#8278](https://github.com/rapidsai/cudf/pull/8278)) [@vyasr](https://github.com/vyasr)
-- Do not add nulls to the hash table when null_equality::NOT_EQUAL is passed to left_semi_join and left_anti_join ([#8277](https://github.com/rapidsai/cudf/pull/8277)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Enable implicit casting when concatenating mixed types ([#8276](https://github.com/rapidsai/cudf/pull/8276)) [@ChrisJar](https://github.com/ChrisJar)
-- Fix CMake FindPackage rmm, pin dev envs&#39; dlpack to v0.3 ([#8271](https://github.com/rapidsai/cudf/pull/8271)) [@trxcllnt](https://github.com/trxcllnt)
-- Update cudfjni version to 21.06 ([#8267](https://github.com/rapidsai/cudf/pull/8267)) [@pxLi](https://github.com/pxLi)
-- support RMM aligned resource adapter in JNI ([#8266](https://github.com/rapidsai/cudf/pull/8266)) [@rongou](https://github.com/rongou)
-- Pass compiler environment variables to conda python build ([#8260](https://github.com/rapidsai/cudf/pull/8260)) [@Ethyling](https://github.com/Ethyling)
-- Remove abc inheritance from Serializable ([#8254](https://github.com/rapidsai/cudf/pull/8254)) [@vyasr](https://github.com/vyasr)
-- Move more methods into SingleColumnFrame ([#8253](https://github.com/rapidsai/cudf/pull/8253)) [@vyasr](https://github.com/vyasr)
-- Update ORC statistics API to use C++17 standard library ([#8241](https://github.com/rapidsai/cudf/pull/8241)) [@vuule](https://github.com/vuule)
-- Correct unused parameter warnings in dictionary algorithms ([#8239](https://github.com/rapidsai/cudf/pull/8239)) [@robertmaynard](https://github.com/robertmaynard)
-- Correct unused parameters in the copying algorithms ([#8232](https://github.com/rapidsai/cudf/pull/8232)) [@robertmaynard](https://github.com/robertmaynard)
-- IO statistics cleanup ([#8191](https://github.com/rapidsai/cudf/pull/8191)) [@kaatish](https://github.com/kaatish)
-- Refactor of rolling_window implementation. ([#8158](https://github.com/rapidsai/cudf/pull/8158)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Add a flag for allowing single quotes in JSON strings. ([#8144](https://github.com/rapidsai/cudf/pull/8144)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Column refactoring 2 ([#8130](https://github.com/rapidsai/cudf/pull/8130)) [@vyasr](https://github.com/vyasr)
-- support space in workspace ([#7956](https://github.com/rapidsai/cudf/pull/7956)) [@jolorunyomi](https://github.com/jolorunyomi)
-- Support collect_set on rolling window ([#7881](https://github.com/rapidsai/cudf/pull/7881)) [@sperlingxx](https://github.com/sperlingxx)
-
-# cuDF 0.19.0 (21 Apr 2021)
-
-## 🚨 Breaking Changes
-
-- Allow hash_partition to take a seed value ([#7771](https://github.com/rapidsai/cudf/pull/7771)) [@magnatelee](https://github.com/magnatelee)
-- Allow merging index column with data column using keyword &quot;on&quot; ([#7736](https://github.com/rapidsai/cudf/pull/7736)) [@skirui-source](https://github.com/skirui-source)
-- Change JNI API to avoid loading native dependencies when creating sort order classes. ([#7729](https://github.com/rapidsai/cudf/pull/7729)) [@revans2](https://github.com/revans2)
-- Replace device_vector with device_uvector in null_mask ([#7715](https://github.com/rapidsai/cudf/pull/7715)) [@harrism](https://github.com/harrism)
-- Don&#39;t identify decimals as strings. ([#7710](https://github.com/rapidsai/cudf/pull/7710)) [@vyasr](https://github.com/vyasr)
-- Fix Java Parquet write after writer API changes ([#7655](https://github.com/rapidsai/cudf/pull/7655)) [@revans2](https://github.com/revans2)
-- Convert cudf::concatenate APIs to use spans and device_uvector ([#7621](https://github.com/rapidsai/cudf/pull/7621)) [@harrism](https://github.com/harrism)
-- Update missing docstring examples in python public APIs ([#7546](https://github.com/rapidsai/cudf/pull/7546)) [@galipremsagar](https://github.com/galipremsagar)
-- Remove unneeded step parameter from strings::detail::copy_slice ([#7525](https://github.com/rapidsai/cudf/pull/7525)) [@davidwendt](https://github.com/davidwendt)
-- Rename ARROW_STATIC_LIB because it conflicts with one in FindArrow.cmake ([#7518](https://github.com/rapidsai/cudf/pull/7518)) [@trxcllnt](https://github.com/trxcllnt)
-- Match Pandas logic for comparing two objects with nulls ([#7490](https://github.com/rapidsai/cudf/pull/7490)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Add struct support to parquet writer ([#7461](https://github.com/rapidsai/cudf/pull/7461)) [@devavret](https://github.com/devavret)
-- Join APIs that return gathermaps ([#7454](https://github.com/rapidsai/cudf/pull/7454)) [@shwina](https://github.com/shwina)
-- `fixed_point` + `cudf::binary_operation` API Changes ([#7435](https://github.com/rapidsai/cudf/pull/7435)) [@codereport](https://github.com/codereport)
-- Fix BUG: Exception when PYTHONOPTIMIZE=2 ([#7434](https://github.com/rapidsai/cudf/pull/7434)) [@skirui-source](https://github.com/skirui-source)
-- Change nvtext::load_vocabulary_file to return a unique ptr ([#7424](https://github.com/rapidsai/cudf/pull/7424)) [@davidwendt](https://github.com/davidwendt)
-- Refactor strings column factories ([#7397](https://github.com/rapidsai/cudf/pull/7397)) [@harrism](https://github.com/harrism)
-- Use CMAKE_CUDA_ARCHITECTURES ([#7391](https://github.com/rapidsai/cudf/pull/7391)) [@robertmaynard](https://github.com/robertmaynard)
-- Upgrade pandas to 1.2 ([#7375](https://github.com/rapidsai/cudf/pull/7375)) [@galipremsagar](https://github.com/galipremsagar)
-- Rename `logical_cast` to `bit_cast` and allow additional conversions ([#7373](https://github.com/rapidsai/cudf/pull/7373)) [@ttnghia](https://github.com/ttnghia)
-- Rework libcudf CMakeLists.txt to export targets for CPM ([#7107](https://github.com/rapidsai/cudf/pull/7107)) [@trxcllnt](https://github.com/trxcllnt)
-
-## 🐛 Bug Fixes
-
-- Fix a `NameError` in meta dispatch API ([#7996](https://github.com/rapidsai/cudf/pull/7996)) [@galipremsagar](https://github.com/galipremsagar)
-- Reindex in `DataFrame.__setitem__` ([#7957](https://github.com/rapidsai/cudf/pull/7957)) [@galipremsagar](https://github.com/galipremsagar)
-- jitify direct-to-cubin compilation and caching. ([#7919](https://github.com/rapidsai/cudf/pull/7919)) [@cwharris](https://github.com/cwharris)
-- Use dynamic cudart for nvcomp in java build ([#7896](https://github.com/rapidsai/cudf/pull/7896)) [@abellina](https://github.com/abellina)
-- fix &quot;incompatible redefinition&quot; warnings ([#7894](https://github.com/rapidsai/cudf/pull/7894)) [@cwharris](https://github.com/cwharris)
-- cudf consistently specifies the cuda runtime ([#7887](https://github.com/rapidsai/cudf/pull/7887)) [@robertmaynard](https://github.com/robertmaynard)
-- disable verbose output for jitify_preprocess ([#7886](https://github.com/rapidsai/cudf/pull/7886)) [@cwharris](https://github.com/cwharris)
-- CMake jit_preprocess_files function only runs when needed ([#7872](https://github.com/rapidsai/cudf/pull/7872)) [@robertmaynard](https://github.com/robertmaynard)
-- Push DeviceScalar construction into cython for list.contains ([#7864](https://github.com/rapidsai/cudf/pull/7864)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- cudf now sets an install rpath of $ORIGIN ([#7863](https://github.com/rapidsai/cudf/pull/7863)) [@robertmaynard](https://github.com/robertmaynard)
-- Don&#39;t install Thrust examples, tests, docs, and python files ([#7811](https://github.com/rapidsai/cudf/pull/7811)) [@robertmaynard](https://github.com/robertmaynard)
-- Sort by index in groupby tests more consistently ([#7802](https://github.com/rapidsai/cudf/pull/7802)) [@shwina](https://github.com/shwina)
-- Revert &quot;Update conda recipes pinning of repo dependencies ([#7743)&quot; (#7793](https://github.com/rapidsai/cudf/pull/7743)&quot; (#7793)) [@raydouglass](https://github.com/raydouglass)
-- Add decimal column handling in copy_type_metadata ([#7788](https://github.com/rapidsai/cudf/pull/7788)) [@shwina](https://github.com/shwina)
-- Add column names validation in parquet writer ([#7786](https://github.com/rapidsai/cudf/pull/7786)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix Java explode outer unit tests ([#7782](https://github.com/rapidsai/cudf/pull/7782)) [@jlowe](https://github.com/jlowe)
-- Fix compiler warning about non-POD types passed through ellipsis ([#7781](https://github.com/rapidsai/cudf/pull/7781)) [@jrhemstad](https://github.com/jrhemstad)
-- User resource fix for replace_nulls ([#7769](https://github.com/rapidsai/cudf/pull/7769)) [@magnatelee](https://github.com/magnatelee)
-- Fix type dispatch for columnar replace_nulls ([#7768](https://github.com/rapidsai/cudf/pull/7768)) [@jlowe](https://github.com/jlowe)
-- Add `ignore_order` parameter to dask-cudf concat dispatch ([#7765](https://github.com/rapidsai/cudf/pull/7765)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix slicing and arrow representations of decimal columns ([#7755](https://github.com/rapidsai/cudf/pull/7755)) [@vyasr](https://github.com/vyasr)
-- Fixing issue with explode_outer position not nulling position entries of null rows ([#7754](https://github.com/rapidsai/cudf/pull/7754)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Implement scatter for struct columns ([#7752](https://github.com/rapidsai/cudf/pull/7752)) [@ttnghia](https://github.com/ttnghia)
-- Fix data corruption in string columns ([#7746](https://github.com/rapidsai/cudf/pull/7746)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix string length in stripe dictionary building ([#7744](https://github.com/rapidsai/cudf/pull/7744)) [@kaatish](https://github.com/kaatish)
-- Update conda recipes pinning of repo dependencies ([#7743](https://github.com/rapidsai/cudf/pull/7743)) [@mike-wendt](https://github.com/mike-wendt)
-- Enable dask dispatch to cuDF&#39;s `is_categorical_dtype` for cuDF objects ([#7740](https://github.com/rapidsai/cudf/pull/7740)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Fix dictionary size computation in ORC writer ([#7737](https://github.com/rapidsai/cudf/pull/7737)) [@vuule](https://github.com/vuule)
-- Fix `cudf::cast` overflow for `decimal64` to `int32_t` or smaller in certain cases ([#7733](https://github.com/rapidsai/cudf/pull/7733)) [@codereport](https://github.com/codereport)
-- Change JNI API to avoid loading native dependencies when creating sort order classes. ([#7729](https://github.com/rapidsai/cudf/pull/7729)) [@revans2](https://github.com/revans2)
-- Disable column_view data accessors for unsupported types ([#7725](https://github.com/rapidsai/cudf/pull/7725)) [@jrhemstad](https://github.com/jrhemstad)
-- Materialize `RangeIndex` when `index=True` in parquet writer ([#7711](https://github.com/rapidsai/cudf/pull/7711)) [@galipremsagar](https://github.com/galipremsagar)
-- Don&#39;t identify decimals as strings. ([#7710](https://github.com/rapidsai/cudf/pull/7710)) [@vyasr](https://github.com/vyasr)
-- Fix return type of `DataFrame.argsort` ([#7706](https://github.com/rapidsai/cudf/pull/7706)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix/correct cudf installed package requirements ([#7688](https://github.com/rapidsai/cudf/pull/7688)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix SparkMurmurHash3_32 hash inconsistencies with Apache Spark ([#7672](https://github.com/rapidsai/cudf/pull/7672)) [@jlowe](https://github.com/jlowe)
-- Fix ORC reader issue with reading empty string columns ([#7656](https://github.com/rapidsai/cudf/pull/7656)) [@rgsl888prabhu](https://github.com/rgsl888prabhu)
-- Fix Java Parquet write after writer API changes ([#7655](https://github.com/rapidsai/cudf/pull/7655)) [@revans2](https://github.com/revans2)
-- Fixing empty null lists throwing explode_outer for a loop. ([#7649](https://github.com/rapidsai/cudf/pull/7649)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Fix internal compiler error during JNI Docker build ([#7645](https://github.com/rapidsai/cudf/pull/7645)) [@jlowe](https://github.com/jlowe)
-- Fix Debug build break with device_uvectors in grouped_rolling.cu ([#7633](https://github.com/rapidsai/cudf/pull/7633)) [@mythrocks](https://github.com/mythrocks)
-- Parquet reader:  Fix issue when using skip_rows on non-nested columns containing nulls ([#7627](https://github.com/rapidsai/cudf/pull/7627)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Fix ORC reader for empty DataFrame/Table ([#7624](https://github.com/rapidsai/cudf/pull/7624)) [@rgsl888prabhu](https://github.com/rgsl888prabhu)
-- Fix specifying GPU architecture in JNI build ([#7612](https://github.com/rapidsai/cudf/pull/7612)) [@jlowe](https://github.com/jlowe)
-- Fix ORC writer OOM issue ([#7605](https://github.com/rapidsai/cudf/pull/7605)) [@vuule](https://github.com/vuule)
-- Fix 0.18 --&gt; 0.19 automerge ([#7589](https://github.com/rapidsai/cudf/pull/7589)) [@kkraus14](https://github.com/kkraus14)
-- Fix ORC issue with incorrect timestamp nanosecond values ([#7581](https://github.com/rapidsai/cudf/pull/7581)) [@vuule](https://github.com/vuule)
-- Fix missing Dask imports ([#7580](https://github.com/rapidsai/cudf/pull/7580)) [@kkraus14](https://github.com/kkraus14)
-- CMAKE_CUDA_ARCHITECTURES doesn&#39;t change when build-system invokes cmake ([#7579](https://github.com/rapidsai/cudf/pull/7579)) [@robertmaynard](https://github.com/robertmaynard)
-- Another fix for offsets_end() iterator in lists_column_view ([#7575](https://github.com/rapidsai/cudf/pull/7575)) [@ttnghia](https://github.com/ttnghia)
-- Fix ORC writer output corruption with string columns ([#7565](https://github.com/rapidsai/cudf/pull/7565)) [@vuule](https://github.com/vuule)
-- Fix cudf::lists::sort_lists failing for sliced column ([#7564](https://github.com/rapidsai/cudf/pull/7564)) [@ttnghia](https://github.com/ttnghia)
-- FIX Fix Anaconda upload args ([#7558](https://github.com/rapidsai/cudf/pull/7558)) [@dillon-cullinan](https://github.com/dillon-cullinan)
-- Fix index mismatch issue in equality related APIs ([#7555](https://github.com/rapidsai/cudf/pull/7555)) [@galipremsagar](https://github.com/galipremsagar)
-- FIX Revert gpuci_conda_retry on conda file output locations ([#7552](https://github.com/rapidsai/cudf/pull/7552)) [@dillon-cullinan](https://github.com/dillon-cullinan)
-- Fix offset_end iterator for lists_column_view, which was not correctl… ([#7551](https://github.com/rapidsai/cudf/pull/7551)) [@ttnghia](https://github.com/ttnghia)
-- Fix no such file dlpack.h error when build libcudf ([#7549](https://github.com/rapidsai/cudf/pull/7549)) [@chenrui17](https://github.com/chenrui17)
-- Update missing docstring examples in python public APIs ([#7546](https://github.com/rapidsai/cudf/pull/7546)) [@galipremsagar](https://github.com/galipremsagar)
-- Decimal32 Build Fix ([#7544](https://github.com/rapidsai/cudf/pull/7544)) [@razajafri](https://github.com/razajafri)
-- FIX Retry conda output location ([#7540](https://github.com/rapidsai/cudf/pull/7540)) [@dillon-cullinan](https://github.com/dillon-cullinan)
-- fix missing renames of dask git branches from master to main ([#7535](https://github.com/rapidsai/cudf/pull/7535)) [@kkraus14](https://github.com/kkraus14)
-- Remove detail from device_span ([#7533](https://github.com/rapidsai/cudf/pull/7533)) [@rwlee](https://github.com/rwlee)
-- Change dask and distributed branch to main ([#7532](https://github.com/rapidsai/cudf/pull/7532)) [@dantegd](https://github.com/dantegd)
-- Update JNI build to use CUDF_USE_ARROW_STATIC ([#7526](https://github.com/rapidsai/cudf/pull/7526)) [@jlowe](https://github.com/jlowe)
-- Make sure rmm::rmm CMake target is visible to cudf users ([#7524](https://github.com/rapidsai/cudf/pull/7524)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix contiguous_split not properly handling output partitions &gt; 2 GB. ([#7515](https://github.com/rapidsai/cudf/pull/7515)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Change jit launch to safe_launch ([#7510](https://github.com/rapidsai/cudf/pull/7510)) [@devavret](https://github.com/devavret)
-- Fix comparison between Datetime/Timedelta columns and NULL scalars ([#7504](https://github.com/rapidsai/cudf/pull/7504)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Fix off-by-one error in char-parallel string scalar replace ([#7502](https://github.com/rapidsai/cudf/pull/7502)) [@jlowe](https://github.com/jlowe)
-- Fix JNI deprecation of all, put it on the wrong version before ([#7501](https://github.com/rapidsai/cudf/pull/7501)) [@revans2](https://github.com/revans2)
-- Fix Series/Dataframe Mixed Arithmetic ([#7491](https://github.com/rapidsai/cudf/pull/7491)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Fix JNI build after removal of libcudf sub-libraries ([#7486](https://github.com/rapidsai/cudf/pull/7486)) [@jlowe](https://github.com/jlowe)
-- Correctly compile benchmarks ([#7485](https://github.com/rapidsai/cudf/pull/7485)) [@robertmaynard](https://github.com/robertmaynard)
-- Fix bool column corruption with ORC Reader ([#7483](https://github.com/rapidsai/cudf/pull/7483)) [@rgsl888prabhu](https://github.com/rgsl888prabhu)
-- Fix `__repr__` for categorical dtype ([#7476](https://github.com/rapidsai/cudf/pull/7476)) [@galipremsagar](https://github.com/galipremsagar)
-- Java cleaner synchronization ([#7474](https://github.com/rapidsai/cudf/pull/7474)) [@abellina](https://github.com/abellina)
-- Fix java float/double parsing tests ([#7473](https://github.com/rapidsai/cudf/pull/7473)) [@revans2](https://github.com/revans2)
-- Pass stream and user resource to make_default_constructed_scalar ([#7469](https://github.com/rapidsai/cudf/pull/7469)) [@magnatelee](https://github.com/magnatelee)
-- Improve stability of dask_cudf.DataFrame.var and dask_cudf.DataFrame.std ([#7453](https://github.com/rapidsai/cudf/pull/7453)) [@rjzamora](https://github.com/rjzamora)
-- Missing `device_storage_dispatch` change affecting `cudf::gather` ([#7449](https://github.com/rapidsai/cudf/pull/7449)) [@codereport](https://github.com/codereport)
-- fix cuFile JNI compile errors ([#7445](https://github.com/rapidsai/cudf/pull/7445)) [@rongou](https://github.com/rongou)
-- Support `Series.__setitem__` with key to a new row ([#7443](https://github.com/rapidsai/cudf/pull/7443)) [@isVoid](https://github.com/isVoid)
-- Fix BUG: Exception when PYTHONOPTIMIZE=2 ([#7434](https://github.com/rapidsai/cudf/pull/7434)) [@skirui-source](https://github.com/skirui-source)
-- Make inclusive scan safe for cases with leading nulls ([#7432](https://github.com/rapidsai/cudf/pull/7432)) [@magnatelee](https://github.com/magnatelee)
-- Fix typo in list_device_view::pair_rep_end() ([#7423](https://github.com/rapidsai/cudf/pull/7423)) [@mythrocks](https://github.com/mythrocks)
-- Fix string to double conversion and row equivalent comparison ([#7410](https://github.com/rapidsai/cudf/pull/7410)) [@ttnghia](https://github.com/ttnghia)
-- Fix thrust failure when transferring data from device_vector to host_vector with vectors of size 1 ([#7382](https://github.com/rapidsai/cudf/pull/7382)) [@ttnghia](https://github.com/ttnghia)
-- Fix std::exception catch-by-reference gcc9 compile error ([#7380](https://github.com/rapidsai/cudf/pull/7380)) [@davidwendt](https://github.com/davidwendt)
-- Fix skiprows issue with ORC Reader ([#7359](https://github.com/rapidsai/cudf/pull/7359)) [@rgsl888prabhu](https://github.com/rgsl888prabhu)
-- fix Arrow CMake file ([#7358](https://github.com/rapidsai/cudf/pull/7358)) [@rongou](https://github.com/rongou)
-- Fix lists::contains() for NaN and Decimals ([#7349](https://github.com/rapidsai/cudf/pull/7349)) [@mythrocks](https://github.com/mythrocks)
-- Handle cupy array in `Dataframe.__setitem__` ([#7340](https://github.com/rapidsai/cudf/pull/7340)) [@galipremsagar](https://github.com/galipremsagar)
-- Fix invalid-device-fn error in cudf::strings::replace_re with multiple regex&#39;s ([#7336](https://github.com/rapidsai/cudf/pull/7336)) [@davidwendt](https://github.com/davidwendt)
-- FIX Add codecov upload block to gpu script ([#6860](https://github.com/rapidsai/cudf/pull/6860)) [@dillon-cullinan](https://github.com/dillon-cullinan)
-
-## 📖 Documentation
-
-- Fix join API doxygen ([#7890](https://github.com/rapidsai/cudf/pull/7890)) [@shwina](https://github.com/shwina)
-- Add Resources to README. ([#7697](https://github.com/rapidsai/cudf/pull/7697)) [@bdice](https://github.com/bdice)
-- Add `isin` examples in Docstring ([#7479](https://github.com/rapidsai/cudf/pull/7479)) [@galipremsagar](https://github.com/galipremsagar)
-- Resolving unlinked type shorthands in cudf doc ([#7416](https://github.com/rapidsai/cudf/pull/7416)) [@isVoid](https://github.com/isVoid)
-- Fix typo in regex.md doc page ([#7363](https://github.com/rapidsai/cudf/pull/7363)) [@davidwendt](https://github.com/davidwendt)
-- Fix incorrect strings_column_view::chars_size documentation ([#7360](https://github.com/rapidsai/cudf/pull/7360)) [@jlowe](https://github.com/jlowe)
-
-## 🚀 New Features
-
-- Enable basic reductions for decimal columns ([#7776](https://github.com/rapidsai/cudf/pull/7776)) [@ChrisJar](https://github.com/ChrisJar)
-- Enable join on decimal columns ([#7764](https://github.com/rapidsai/cudf/pull/7764)) [@ChrisJar](https://github.com/ChrisJar)
-- Allow merging index column with data column using keyword &quot;on&quot; ([#7736](https://github.com/rapidsai/cudf/pull/7736)) [@skirui-source](https://github.com/skirui-source)
-- Implement DecimalColumn + Scalar and add cudf.Scalars of Decimal64Dtype ([#7732](https://github.com/rapidsai/cudf/pull/7732)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Add support for `unique` groupby aggregation ([#7726](https://github.com/rapidsai/cudf/pull/7726)) [@shwina](https://github.com/shwina)
-- Expose libcudf&#39;s label_bins function to cudf ([#7724](https://github.com/rapidsai/cudf/pull/7724)) [@vyasr](https://github.com/vyasr)
-- Adding support for equi-join on struct ([#7720](https://github.com/rapidsai/cudf/pull/7720)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Add decimal column comparison operations ([#7716](https://github.com/rapidsai/cudf/pull/7716)) [@isVoid](https://github.com/isVoid)
-- Implement scan operations for decimal columns ([#7707](https://github.com/rapidsai/cudf/pull/7707)) [@ChrisJar](https://github.com/ChrisJar)
-- Enable typecasting between decimal and int ([#7691](https://github.com/rapidsai/cudf/pull/7691)) [@ChrisJar](https://github.com/ChrisJar)
-- Enable decimal support in parquet writer ([#7673](https://github.com/rapidsai/cudf/pull/7673)) [@devavret](https://github.com/devavret)
-- Adds `list.unique` API ([#7664](https://github.com/rapidsai/cudf/pull/7664)) [@isVoid](https://github.com/isVoid)
-- Fix NaN handling in drop_list_duplicates ([#7662](https://github.com/rapidsai/cudf/pull/7662)) [@ttnghia](https://github.com/ttnghia)
-- Add `lists.sort_values` API ([#7657](https://github.com/rapidsai/cudf/pull/7657)) [@isVoid](https://github.com/isVoid)
-- Add is_integer API that can check for the validity of a string-to-integer conversion ([#7642](https://github.com/rapidsai/cudf/pull/7642)) [@ttnghia](https://github.com/ttnghia)
-- Adds `explode` API ([#7607](https://github.com/rapidsai/cudf/pull/7607)) [@isVoid](https://github.com/isVoid)
-- Adds `list.take`, python binding for `cudf::lists::segmented_gather` ([#7591](https://github.com/rapidsai/cudf/pull/7591)) [@isVoid](https://github.com/isVoid)
-- Implement cudf::label_bins() ([#7554](https://github.com/rapidsai/cudf/pull/7554)) [@vyasr](https://github.com/vyasr)
-- Add Python bindings for `lists::contains` ([#7547](https://github.com/rapidsai/cudf/pull/7547)) [@skirui-source](https://github.com/skirui-source)
-- cudf::row_bit_count() support. ([#7534](https://github.com/rapidsai/cudf/pull/7534)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Implement drop_list_duplicates ([#7528](https://github.com/rapidsai/cudf/pull/7528)) [@ttnghia](https://github.com/ttnghia)
-- Add Python bindings for `lists::extract_lists_element` ([#7505](https://github.com/rapidsai/cudf/pull/7505)) [@skirui-source](https://github.com/skirui-source)
-- Add explode_outer and explode_outer_position ([#7499](https://github.com/rapidsai/cudf/pull/7499)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Match Pandas logic for comparing two objects with nulls ([#7490](https://github.com/rapidsai/cudf/pull/7490)) [@brandon-b-miller](https://github.com/brandon-b-miller)
-- Add struct support to parquet writer ([#7461](https://github.com/rapidsai/cudf/pull/7461)) [@devavret](https://github.com/devavret)
-- Enable type conversion from float to decimal type ([#7450](https://github.com/rapidsai/cudf/pull/7450)) [@ChrisJar](https://github.com/ChrisJar)
-- Add cython for converting strings/fixed-point functions ([#7429](https://github.com/rapidsai/cudf/pull/7429)) [@davidwendt](https://github.com/davidwendt)
-- Add struct column support to cudf::sort and cudf::sorted_order ([#7422](https://github.com/rapidsai/cudf/pull/7422)) [@karthikeyann](https://github.com/karthikeyann)
-- Implement groupby collect_set ([#7420](https://github.com/rapidsai/cudf/pull/7420)) [@ttnghia](https://github.com/ttnghia)
-- Merge branch-0.18 into branch-0.19 ([#7411](https://github.com/rapidsai/cudf/pull/7411)) [@raydouglass](https://github.com/raydouglass)
-- Refactor strings column factories ([#7397](https://github.com/rapidsai/cudf/pull/7397)) [@harrism](https://github.com/harrism)
-- Add groupby scan operations (sort groupby) ([#7387](https://github.com/rapidsai/cudf/pull/7387)) [@karthikeyann](https://github.com/karthikeyann)
-- Add cudf::explode_position ([#7376](https://github.com/rapidsai/cudf/pull/7376)) [@hyperbolic2346](https://github.com/hyperbolic2346)
-- Add string conversion to/from decimal values libcudf APIs ([#7364](https://github.com/rapidsai/cudf/pull/7364)) [@davidwendt](https://github.com/davidwendt)
-- Add  groupby SUM_OF_SQUARES support ([#7362](https://github.com/rapidsai/cudf/pull/7362)) [@karthikeyann](https://github.com/karthikeyann)
-- Add `Series.drop` api ([#7304](https://github.com/rapidsai/cudf/pull/7304)) [@isVoid](https://github.com/isVoid)
-- get_json_object() implementation ([#7286](https://github.com/rapidsai/cudf/pull/7286)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Python API for `LIstMethods.len()` ([#7283](https://github.com/rapidsai/cudf/pull/7283)) [@isVoid](https://github.com/isVoid)
-- Support null_policy::EXCLUDE for COLLECT rolling aggregation ([#7264](https://github.com/rapidsai/cudf/pull/7264)) [@mythrocks](https://github.com/mythrocks)
-- Add support for special tokens in nvtext::subword_tokenizer ([#7254](https://github.com/rapidsai/cudf/pull/7254)) [@davidwendt](https://github.com/davidwendt)
-- Fix inplace update of data and add Series.update ([#7201](https://github.com/rapidsai/cudf/pull/7201)) [@galipremsagar](https://github.com/galipremsagar)
-- Implement `cudf::group_by` (hash) for `decimal32` and `decimal64` ([#7190](https://github.com/rapidsai/cudf/pull/7190)) [@codereport](https://github.com/codereport)
-- Adding support to specify &quot;level&quot; parameter  for `Dataframe.rename` ([#7135](https://github.com/rapidsai/cudf/pull/7135)) [@skirui-source](https://github.com/skirui-source)
-
-## 🛠️ Improvements
-
-- fix GDS include path for version 0.95 ([#7877](https://github.com/rapidsai/cudf/pull/7877)) [@rongou](https://github.com/rongou)
-- Update `dask` + `distributed` to `2021.4.0` ([#7858](https://github.com/rapidsai/cudf/pull/7858)) [@jakirkham](https://github.com/jakirkham)
-- Add ability to extract include dirs from `CUDF_HOME` ([#7848](https://github.com/rapidsai/cudf/pull/7848)) [@galipremsagar](https://github.com/galipremsagar)
-- Add USE_GDS as an option in build script ([#7833](https://github.com/rapidsai/cudf/pull/7833)) [@pxLi](https://github.com/pxLi)
-- add an allocate method with stream in java DeviceMemoryBuffer ([#7826](https://github.com/rapidsai/cudf/pull/7826)) [@rongou](https://github.com/rongou)
-- Constrain dask and distributed versions to 2021.3.1 ([#7825](https://github.com/rapidsai/cudf/pull/7825)) [@shwina](https://github.com/shwina)
-- Revert dask versioning of concat dispatch ([#7823](https://github.com/rapidsai/cudf/pull/7823)) [@galipremsagar](https://github.com/galipremsagar)
-- add copy methods in Java memory buffer ([#7791](https://github.com/rapidsai/cudf/pull/7791)) [@rongou](https://github.com/rongou)
-- Update README and CONTRIBUTING for 0.19 ([#7778](https://github.com/rapidsai/cudf/pull/7778)) [@robertmaynard](https://github.com/robertmaynard)
-- Allow hash_partition to take a seed value ([#7771](https://github.com/rapidsai/cudf/pull/7771)) [@magnatelee](https://github.com/magnatelee)
-- Turn on NVTX by default in java build ([#7761](https://github.com/rapidsai/cudf/pull/7761)) [@tgravescs](https://github.com/tgravescs)
-- Add Java bindings to join gather map APIs ([#7751](https://github.com/rapidsai/cudf/pull/7751)) [@jlowe](https://github.com/jlowe)
-- Add replacements column support for Java replaceNulls ([#7750](https://github.com/rapidsai/cudf/pull/7750)) [@jlowe](https://github.com/jlowe)
-- Add Java bindings for row_bit_count ([#7749](https://github.com/rapidsai/cudf/pull/7749)) [@jlowe](https://github.com/jlowe)
-- Remove unused JVM array creation ([#7748](https://github.com/rapidsai/cudf/pull/7748)) [@jlowe](https://github.com/jlowe)
-- Added JNI support for new is_integer ([#7739](https://github.com/rapidsai/cudf/pull/7739)) [@revans2](https://github.com/revans2)
-- Create and promote library aliases in libcudf installations ([#7734](https://github.com/rapidsai/cudf/pull/7734)) [@trxcllnt](https://github.com/trxcllnt)
-- Support groupby operations for decimal dtypes ([#7731](https://github.com/rapidsai/cudf/pull/7731)) [@vyasr](https://github.com/vyasr)
-- Memory map the input file only when GDS compatibility mode is not used ([#7717](https://github.com/rapidsai/cudf/pull/7717)) [@vuule](https://github.com/vuule)
-- Replace device_vector with device_uvector in null_mask ([#7715](https://github.com/rapidsai/cudf/pull/7715)) [@harrism](https://github.com/harrism)
-- Struct hashing support for SerialMurmur3 and SparkMurmur3 ([#7714](https://github.com/rapidsai/cudf/pull/7714)) [@jlowe](https://github.com/jlowe)
-- Add gbenchmark for nvtext replace-tokens function ([#7708](https://github.com/rapidsai/cudf/pull/7708)) [@davidwendt](https://github.com/davidwendt)
-- Use stream in groupby calls ([#7705](https://github.com/rapidsai/cudf/pull/7705)) [@karthikeyann](https://github.com/karthikeyann)
-- Update codeowners file ([#7701](https://github.com/rapidsai/cudf/pull/7701)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Cleanup groupby to use host_span, device_span, device_uvector ([#7698](https://github.com/rapidsai/cudf/pull/7698)) [@karthikeyann](https://github.com/karthikeyann)
-- Add gbenchmark for nvtext ngrams functions ([#7693](https://github.com/rapidsai/cudf/pull/7693)) [@davidwendt](https://github.com/davidwendt)
-- Misc Python/Cython optimizations ([#7686](https://github.com/rapidsai/cudf/pull/7686)) [@shwina](https://github.com/shwina)
-- Add gbenchmark for nvtext tokenize functions ([#7684](https://github.com/rapidsai/cudf/pull/7684)) [@davidwendt](https://github.com/davidwendt)
-- Add column_device_view to orc writer ([#7676](https://github.com/rapidsai/cudf/pull/7676)) [@kaatish](https://github.com/kaatish)
-- cudf_kafka now uses cuDF CMake export targets (CPM) ([#7674](https://github.com/rapidsai/cudf/pull/7674)) [@robertmaynard](https://github.com/robertmaynard)
-- Add gbenchmark for nvtext normalize functions ([#7668](https://github.com/rapidsai/cudf/pull/7668)) [@davidwendt](https://github.com/davidwendt)
-- Resolve unnecessary import of thrust/optional.hpp in types.hpp ([#7667](https://github.com/rapidsai/cudf/pull/7667)) [@vyasr](https://github.com/vyasr)
-- Feature/optimize accessor copy ([#7660](https://github.com/rapidsai/cudf/pull/7660)) [@vyasr](https://github.com/vyasr)
-- Fix `find_package(cudf)` ([#7658](https://github.com/rapidsai/cudf/pull/7658)) [@trxcllnt](https://github.com/trxcllnt)
-- Work-around for gcc7 compile error on Centos7 ([#7652](https://github.com/rapidsai/cudf/pull/7652)) [@davidwendt](https://github.com/davidwendt)
-- Add in JNI support for count_elements ([#7651](https://github.com/rapidsai/cudf/pull/7651)) [@revans2](https://github.com/revans2)
-- Fix issues with building cudf in a non-conda environment ([#7647](https://github.com/rapidsai/cudf/pull/7647)) [@galipremsagar](https://github.com/galipremsagar)
-- Refactor ConfigureCUDA to not conditionally insert compiler flags ([#7643](https://github.com/rapidsai/cudf/pull/7643)) [@robertmaynard](https://github.com/robertmaynard)
-- Add gbenchmark for converting strings to/from timestamps ([#7641](https://github.com/rapidsai/cudf/pull/7641)) [@davidwendt](https://github.com/davidwendt)
-- Handle constructing a `cudf.Scalar` from a `cudf.Scalar` ([#7639](https://github.com/rapidsai/cudf/pull/7639)) [@shwina](https://github.com/shwina)
-- Add in JNI support for table partition ([#7637](https://github.com/rapidsai/cudf/pull/7637)) [@revans2](https://github.com/revans2)
-- Add explicit fixed_point merge test ([#7635](https://github.com/rapidsai/cudf/pull/7635)) [@codereport](https://github.com/codereport)
-- Add JNI support for IDENTITY hash partitioning ([#7626](https://github.com/rapidsai/cudf/pull/7626)) [@revans2](https://github.com/revans2)
-- Java support on explode_outer ([#7625](https://github.com/rapidsai/cudf/pull/7625)) [@sperlingxx](https://github.com/sperlingxx)
-- Java support of casting string from/to decimal ([#7623](https://github.com/rapidsai/cudf/pull/7623)) [@sperlingxx](https://github.com/sperlingxx)
-- Convert cudf::concatenate APIs to use spans and device_uvector ([#7621](https://github.com/rapidsai/cudf/pull/7621)) [@harrism](https://github.com/harrism)
-- Add gbenchmark for cudf::strings::translate function ([#7617](https://github.com/rapidsai/cudf/pull/7617)) [@davidwendt](https://github.com/davidwendt)
-- Use file(COPY ) over file(INSTALL ) so cmake output is reduced ([#7616](https://github.com/rapidsai/cudf/pull/7616)) [@robertmaynard](https://github.com/robertmaynard)
-- Use rmm::device_uvector in place of rmm::device_vector for ORC reader/writer and cudf::io::column_buffer ([#7614](https://github.com/rapidsai/cudf/pull/7614)) [@vuule](https://github.com/vuule)
-- Refactor Java host-side buffer concatenation to expose separate steps ([#7610](https://github.com/rapidsai/cudf/pull/7610)) [@jlowe](https://github.com/jlowe)
-- Add gbenchmarks for string substrings functions ([#7603](https://github.com/rapidsai/cudf/pull/7603)) [@davidwendt](https://github.com/davidwendt)
-- Refactor string conversion check ([#7599](https://github.com/rapidsai/cudf/pull/7599)) [@ttnghia](https://github.com/ttnghia)
-- JNI: Pass names of children struct columns to native Arrow IPC writer ([#7598](https://github.com/rapidsai/cudf/pull/7598)) [@firestarman](https://github.com/firestarman)
-- Revert &quot;ENH Fix stale GHA and prevent duplicates &quot; ([#7595](https://github.com/rapidsai/cudf/pull/7595)) [@mike-wendt](https://github.com/mike-wendt)
-- ENH Fix stale GHA and prevent duplicates ([#7594](https://github.com/rapidsai/cudf/pull/7594)) [@mike-wendt](https://github.com/mike-wendt)
-- Fix auto-detecting GPU architectures ([#7593](https://github.com/rapidsai/cudf/pull/7593)) [@trxcllnt](https://github.com/trxcllnt)
-- Reduce cudf library size ([#7583](https://github.com/rapidsai/cudf/pull/7583)) [@robertmaynard](https://github.com/robertmaynard)
-- Optimize cudf::make_strings_column for long strings ([#7576](https://github.com/rapidsai/cudf/pull/7576)) [@davidwendt](https://github.com/davidwendt)
-- Always build and export the cudf::cudftestutil target ([#7574](https://github.com/rapidsai/cudf/pull/7574)) [@trxcllnt](https://github.com/trxcllnt)
-- Eliminate literal parameters to uvector::set_element_async and device_scalar::set_value ([#7563](https://github.com/rapidsai/cudf/pull/7563)) [@harrism](https://github.com/harrism)
-- Add gbenchmark for strings::concatenate ([#7560](https://github.com/rapidsai/cudf/pull/7560)) [@davidwendt](https://github.com/davidwendt)
-- Update Changelog Link ([#7550](https://github.com/rapidsai/cudf/pull/7550)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Add gbenchmarks for strings replace regex functions ([#7541](https://github.com/rapidsai/cudf/pull/7541)) [@davidwendt](https://github.com/davidwendt)
-- Add `__repr__` for Column and ColumnAccessor ([#7531](https://github.com/rapidsai/cudf/pull/7531)) [@shwina](https://github.com/shwina)
-- Support Decimal DIV changes in cudf ([#7527](https://github.com/rapidsai/cudf/pull/7527)) [@razajafri](https://github.com/razajafri)
-- Remove unneeded step parameter from strings::detail::copy_slice ([#7525](https://github.com/rapidsai/cudf/pull/7525)) [@davidwendt](https://github.com/davidwendt)
-- Use device_uvector, device_span in sort groupby ([#7523](https://github.com/rapidsai/cudf/pull/7523)) [@karthikeyann](https://github.com/karthikeyann)
-- Add gbenchmarks for strings extract function ([#7522](https://github.com/rapidsai/cudf/pull/7522)) [@davidwendt](https://github.com/davidwendt)
-- Rename ARROW_STATIC_LIB because it conflicts with one in FindArrow.cmake ([#7518](https://github.com/rapidsai/cudf/pull/7518)) [@trxcllnt](https://github.com/trxcllnt)
-- Reduce compile time/size for scan.cu ([#7516](https://github.com/rapidsai/cudf/pull/7516)) [@davidwendt](https://github.com/davidwendt)
-- Change device_vector to device_uvector in nvtext source files ([#7512](https://github.com/rapidsai/cudf/pull/7512)) [@davidwendt](https://github.com/davidwendt)
-- Removed unneeded includes from traits.hpp ([#7509](https://github.com/rapidsai/cudf/pull/7509)) [@davidwendt](https://github.com/davidwendt)
-- FIX Remove random build directory generation for ccache ([#7508](https://github.com/rapidsai/cudf/pull/7508)) [@dillon-cullinan](https://github.com/dillon-cullinan)
-- xfail failing pytest in pandas 1.2.3 ([#7507](https://github.com/rapidsai/cudf/pull/7507)) [@galipremsagar](https://github.com/galipremsagar)
-- JNI bit cast ([#7493](https://github.com/rapidsai/cudf/pull/7493)) [@revans2](https://github.com/revans2)
-- Combine rolling window function tests ([#7480](https://github.com/rapidsai/cudf/pull/7480)) [@mythrocks](https://github.com/mythrocks)
-- Prepare Changelog for Automation ([#7477](https://github.com/rapidsai/cudf/pull/7477)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Java support for explode position ([#7471](https://github.com/rapidsai/cudf/pull/7471)) [@sperlingxx](https://github.com/sperlingxx)
-- Update 0.18 changelog entry ([#7463](https://github.com/rapidsai/cudf/pull/7463)) [@ajschmidt8](https://github.com/ajschmidt8)
-- JNI: Support skipping nulls for collect aggregation ([#7457](https://github.com/rapidsai/cudf/pull/7457)) [@firestarman](https://github.com/firestarman)
-- Join APIs that return gathermaps ([#7454](https://github.com/rapidsai/cudf/pull/7454)) [@shwina](https://github.com/shwina)
-- Remove dependence on managed memory for multimap test ([#7451](https://github.com/rapidsai/cudf/pull/7451)) [@jrhemstad](https://github.com/jrhemstad)
-- Use cuFile for Parquet IO when available ([#7444](https://github.com/rapidsai/cudf/pull/7444)) [@vuule](https://github.com/vuule)
-- Statistics cleanup ([#7439](https://github.com/rapidsai/cudf/pull/7439)) [@kaatish](https://github.com/kaatish)
-- Add gbenchmarks for strings filter functions ([#7438](https://github.com/rapidsai/cudf/pull/7438)) [@davidwendt](https://github.com/davidwendt)
-- `fixed_point` + `cudf::binary_operation` API Changes ([#7435](https://github.com/rapidsai/cudf/pull/7435)) [@codereport](https://github.com/codereport)
-- Improve string gather performance ([#7433](https://github.com/rapidsai/cudf/pull/7433)) [@jlowe](https://github.com/jlowe)
-- Don&#39;t use user resource for a temporary allocation in sort_by_key ([#7431](https://github.com/rapidsai/cudf/pull/7431)) [@magnatelee](https://github.com/magnatelee)
-- Detail APIs for datetime functions ([#7430](https://github.com/rapidsai/cudf/pull/7430)) [@magnatelee](https://github.com/magnatelee)
-- Replace thrust::max_element with thrust::reduce in strings findall_re ([#7428](https://github.com/rapidsai/cudf/pull/7428)) [@davidwendt](https://github.com/davidwendt)
-- Add gbenchmark for strings split/split_record functions ([#7427](https://github.com/rapidsai/cudf/pull/7427)) [@davidwendt](https://github.com/davidwendt)
-- Update JNI build to use CMAKE_CUDA_ARCHITECTURES ([#7425](https://github.com/rapidsai/cudf/pull/7425)) [@jlowe](https://github.com/jlowe)
-- Change nvtext::load_vocabulary_file to return a unique ptr ([#7424](https://github.com/rapidsai/cudf/pull/7424)) [@davidwendt](https://github.com/davidwendt)
-- Simplify type dispatch with `device_storage_dispatch` ([#7419](https://github.com/rapidsai/cudf/pull/7419)) [@codereport](https://github.com/codereport)
-- Java support for casting of nested child columns ([#7417](https://github.com/rapidsai/cudf/pull/7417)) [@razajafri](https://github.com/razajafri)
-- Improve scalar string replace performance for long strings ([#7415](https://github.com/rapidsai/cudf/pull/7415)) [@jlowe](https://github.com/jlowe)
-- Remove unneeded temporary device vector for strings scatter specialization ([#7409](https://github.com/rapidsai/cudf/pull/7409)) [@davidwendt](https://github.com/davidwendt)
-- bitmask_or implementation with bitmask refactor ([#7406](https://github.com/rapidsai/cudf/pull/7406)) [@rwlee](https://github.com/rwlee)
-- Add other cudf::strings::replace functions to current strings replace gbenchmark ([#7403](https://github.com/rapidsai/cudf/pull/7403)) [@davidwendt](https://github.com/davidwendt)
-- Clean up included headers in `device_operators.cuh` ([#7401](https://github.com/rapidsai/cudf/pull/7401)) [@codereport](https://github.com/codereport)
-- Move nullable index iterator to indexalator factory ([#7399](https://github.com/rapidsai/cudf/pull/7399)) [@davidwendt](https://github.com/davidwendt)
-- ENH Pass ccache variables to conda recipe &amp; use Ninja in CI ([#7398](https://github.com/rapidsai/cudf/pull/7398)) [@Ethyling](https://github.com/Ethyling)
-- upgrade maven-antrun-plugin to support maven parallel builds ([#7393](https://github.com/rapidsai/cudf/pull/7393)) [@rongou](https://github.com/rongou)
-- Add gbenchmark for strings find/contains functions ([#7392](https://github.com/rapidsai/cudf/pull/7392)) [@davidwendt](https://github.com/davidwendt)
-- Use CMAKE_CUDA_ARCHITECTURES ([#7391](https://github.com/rapidsai/cudf/pull/7391)) [@robertmaynard](https://github.com/robertmaynard)
-- Refactor libcudf strings::replace to use make_strings_children utility ([#7384](https://github.com/rapidsai/cudf/pull/7384)) [@davidwendt](https://github.com/davidwendt)
-- Added in JNI support for out of core sort algorithm ([#7381](https://github.com/rapidsai/cudf/pull/7381)) [@revans2](https://github.com/revans2)
-- Upgrade pandas to 1.2 ([#7375](https://github.com/rapidsai/cudf/pull/7375)) [@galipremsagar](https://github.com/galipremsagar)
-- Rename `logical_cast` to `bit_cast` and allow additional conversions ([#7373](https://github.com/rapidsai/cudf/pull/7373)) [@ttnghia](https://github.com/ttnghia)
-- jitify 2 support ([#7372](https://github.com/rapidsai/cudf/pull/7372)) [@cwharris](https://github.com/cwharris)
-- compile_udf: Cache PTX for similar functions ([#7371](https://github.com/rapidsai/cudf/pull/7371)) [@gmarkall](https://github.com/gmarkall)
-- Add string scalar replace benchmark ([#7369](https://github.com/rapidsai/cudf/pull/7369)) [@jlowe](https://github.com/jlowe)
-- Add gbenchmark for strings contains_re/count_re functions ([#7366](https://github.com/rapidsai/cudf/pull/7366)) [@davidwendt](https://github.com/davidwendt)
-- Update orc reader and writer fuzz tests ([#7357](https://github.com/rapidsai/cudf/pull/7357)) [@galipremsagar](https://github.com/galipremsagar)
-- Improve url_decode performance for long strings ([#7353](https://github.com/rapidsai/cudf/pull/7353)) [@jlowe](https://github.com/jlowe)
-- `cudf::ast` Small Refactorings ([#7352](https://github.com/rapidsai/cudf/pull/7352)) [@codereport](https://github.com/codereport)
-- Remove std::cout and print in the scatter test function EmptyListsOfNullableStrings. ([#7342](https://github.com/rapidsai/cudf/pull/7342)) [@ttnghia](https://github.com/ttnghia)
-- Use `cudf::detail::make_counting_transform_iterator` ([#7338](https://github.com/rapidsai/cudf/pull/7338)) [@codereport](https://github.com/codereport)
-- Change block size parameter from a global to a template param. ([#7333](https://github.com/rapidsai/cudf/pull/7333)) [@nvdbaranec](https://github.com/nvdbaranec)
-- Partial clean up of ORC writer ([#7324](https://github.com/rapidsai/cudf/pull/7324)) [@vuule](https://github.com/vuule)
-- Add gbenchmark for cudf::strings::to_lower ([#7316](https://github.com/rapidsai/cudf/pull/7316)) [@davidwendt](https://github.com/davidwendt)
-- Update Java bindings version to 0.19-SNAPSHOT ([#7307](https://github.com/rapidsai/cudf/pull/7307)) [@pxLi](https://github.com/pxLi)
-- Move `cudf::test::make_counting_transform_iterator` to `cudf/detail/iterator.cuh` ([#7306](https://github.com/rapidsai/cudf/pull/7306)) [@codereport](https://github.com/codereport)
-- Use string literals in `fixed_point` `release_assert`s ([#7303](https://github.com/rapidsai/cudf/pull/7303)) [@codereport](https://github.com/codereport)
-- Fix merge conflicts for #7295 ([#7297](https://github.com/rapidsai/cudf/pull/7297)) [@ajschmidt8](https://github.com/ajschmidt8)
-- Add UTF-8 chars to create_random_column&lt;string_view&gt; benchmark utility ([#7292](https://github.com/rapidsai/cudf/pull/7292)) [@davidwendt](https://github.com/davidwendt)
-- Abstracting block reduce and block scan from cuIO kernels with `cub` apis ([#7278](https://github.com/rapidsai/cudf/pull/7278)) [@rgsl888prabhu](https://github.com/rgsl888prabhu)
-- Build.sh use cmake --build to drive build system invocation ([#7270](https://github.com/rapidsai/cudf/pull/7270)) [@robertmaynard](https://github.com/robertmaynard)
-- Refactor dictionary support for reductions any/all ([#7242](https://github.com/rapidsai/cudf/pull/7242)) [@davidwendt](https://github.com/davidwendt)
-- Replace stream.value() with stream for stream_view args ([#7236](https://github.com/rapidsai/cudf/pull/7236)) [@karthikeyann](https://github.com/karthikeyann)
-- Interval index and interval_range ([#7182](https://github.com/rapidsai/cudf/pull/7182)) [@marlenezw](https://github.com/marlenezw)
-- avro reader integration tests ([#7156](https://github.com/rapidsai/cudf/pull/7156)) [@cwharris](https://github.com/cwharris)
-- Rework libcudf CMakeLists.txt to export targets for CPM ([#7107](https://github.com/rapidsai/cudf/pull/7107)) [@trxcllnt](https://github.com/trxcllnt)
-- Adding Interval Dtype ([#6984](https://github.com/rapidsai/cudf/pull/6984)) [@marlenezw](https://github.com/marlenezw)
-- Cleaning up `for` loops with `make_(counting_)transform_iterator` ([#6546](https://github.com/rapidsai/cudf/pull/6546)) [@codereport](https://github.com/codereport)
-
-# cuDF 0.18.0 (24 Feb 2021)
-
-## Breaking Changes 🚨
-
-- Default `groupby` to `sort=False` (#7180) @isVoid
-- Add libcudf API for parsing of ORC statistics (#7136) @vuule
-- Replace ORC writer api with class (#7099) @rgsl888prabhu
-- Pack/unpack functionality to convert tables to and from a serialized format. (#7096) @nvdbaranec
-- Replace parquet writer api with class (#7058) @rgsl888prabhu
-- Add days check to cudf::is_timestamp using cuda::std::chrono classes (#7028) @davidwendt
-- Fix default parameter values of `write_csv` and `write_parquet` (#6967) @vuule
-- Align `Series.groupby` API to match Pandas (#6964) @kkraus14
-- Share `factorize` implementation with Index and cudf module (#6885) @brandon-b-miller
-
-## Bug Fixes 🐛
-
-- Remove incorrect std::move call on return variable (#7319) @davidwendt
-- Fix failing CI ORC test (#7313) @vuule
-- Disallow constructing frames from a ColumnAccessor (#7298) @shwina
-- fix java cuFile tests (#7296) @rongou
-- Fix style issues related to NumPy (#7279) @shwina
-- Fix bug when `iloc` slice terminates at before-the-zero position (#7277) @isVoid
-- Fix copying dtype metadata after calling libcudf functions (#7271) @shwina
-- Move lists utility function definition out of header (#7266) @mythrocks
-- Throw if bool column would cause incorrect result when writing to ORC (#7261) @vuule
-- Use `uvector` in `replace_nulls`; Fix `sort_helper::grouped_value` doc (#7256) @isVoid
-- Remove floating point types from cudf::sort fast-path (#7250) @davidwendt
-- Disallow picking output columns from nested columns. (#7248) @devavret
-- Fix `loc` for Series with a MultiIndex (#7243) @shwina
-- Fix Arrow column test leaks (#7241) @tgravescs
-- Fix test column vector leak (#7238) @kuhushukla
-- Fix some bugs in java scalar support for decimal (#7237) @revans2
-- Improve `assert_eq` handling of scalar (#7220) @isVoid
-- Fix missing null_count() comparison in test framework and related failures (#7219) @nvdbaranec
-- Remove floating point types from radix sort fast-path (#7215) @davidwendt
-- Fixing parquet benchmarks (#7214) @rgsl888prabhu
-- Handle various parameter combinations in `replace` API (#7207) @galipremsagar
-- Export mock aws credentials for s3 tests (#7176) @ayushdg
-- Add `MultiIndex.rename` API (#7172) @isVoid
-- Fix importing list &amp; struct types in `from_arrow` (#7162) @galipremsagar
-- Fixing parquet precision writing failing if scale is equal to precision (#7146) @hyperbolic2346
-- Update s3 tests to use moto_server (#7144) @ayushdg
-- Fix JIT cache multi-process test flakiness in slow drives (#7142) @devavret
-- Fix compilation errors in libcudf (#7138) @galipremsagar
-- Fix compilation failure caused by `-Wall` addition. (#7134) @codereport
-- Add informative error message for `sep` in CSV writer (#7095) @galipremsagar
-- Add JIT cache per compute capability (#7090) @devavret
-- Implement `__hash__` method for ListDtype (#7081) @galipremsagar
-- Only upload packages that were built (#7077) @raydouglass
-- Fix comparisons between Series and cudf.NA (#7072) @brandon-b-miller
-- Handle `nan` values correctly in `Series.one_hot_encoding` (#7059) @galipremsagar
-- Add `unstack()` support for non-multiindexed dataframes (#7054) @isVoid
-- Fix `read_orc` for decimal type (#7034) @rgsl888prabhu
-- Fix backward compatibility of loading a 0.16 pkl file (#7033) @galipremsagar
-- Decimal casts in JNI became a NOOP (#7032) @revans2
-- Restore usual instance/subclass checking to cudf.DateOffset (#7029) @shwina
-- Add days check to cudf::is_timestamp using cuda::std::chrono classes (#7028) @davidwendt
-- Fix to_csv delimiter handling of timestamp format (#7023) @davidwendt
-- Pin librdkakfa to gcc 7 compatible version (#7021) @raydouglass
-- Fix `fillna` &amp; `dropna` to also consider `np.nan` as a missing value (#7019) @galipremsagar
-- Fix round operator&#39;s HALF_EVEN computation for negative integers (#7014) @nartal1
-- Skip Thrust sort patch if already applied (#7009) @harrism
-- Fix `cudf::hash_partition` for `decimal32` and `decimal64` (#7006) @codereport
-- Fix Thrust unroll patch command (#7002) @harrism
-- Fix loc behaviour when key of incorrect type is used (#6993) @shwina
-- Fix int to datetime conversion in csv_read (#6991) @kaatish
-- fix excluding cufile tests by default (#6988) @rongou
-- Fix java cufile tests when cufile is not installed (#6987) @revans2
-- Make `cudf::round` for `fixed_point` when `scale = -decimal_places` a no-op (#6975) @codereport
-- Fix type comparison for java (#6970) @revans2
-- Fix default parameter values of `write_csv` and `write_parquet` (#6967) @vuule
-- Align `Series.groupby` API to match Pandas (#6964) @kkraus14
-- Fix timestamp parsing in ORC reader for timezones without transitions (#6959) @vuule
-- Fix typo in numerical.py (#6957) @rgsl888prabhu
-- `fixed_point_value` double-shifts in `fixed_point` construction (#6950) @codereport
-- fix libcu++ include path for jni (#6948) @rongou
-- Fix groupby agg/apply behaviour when no key columns are provided (#6945) @shwina
-- Avoid inserting null elements into join hash table when nulls are treated as unequal (#6943) @hyperbolic2346
-- Fix cudf::merge gtest for dictionary columns (#6942) @davidwendt
-- Pass numeric scalars of the same dtype through numeric binops (#6938) @brandon-b-miller
-- Fix N/A detection for empty fields in CSV reader (#6922) @vuule
-- Fix rmm_mode=managed parameter for gtests (#6912) @davidwendt
-- Fix nullmask offset handling in parquet and orc writer (#6889) @kaatish
-- Correct the sampling range when sampling with replacement (#6884) @ChrisJar
-- Handle nested string columns with no children in contiguous_split. (#6864) @nvdbaranec
-- Fix `columns` &amp; `index` handling in dataframe constructor (#6838) @galipremsagar
-
-## Documentation 📖
-
-- Update readme (#7318) @shwina
-- Fix typo in cudf.core.column.string.extract docs (#7253) @adelevie
-- Update doxyfile project number (#7161) @davidwendt
-- Update 10 minutes to cuDF and CuPy with new APIs (#7158) @ChrisJar
-- Cross link RMM &amp; libcudf Doxygen docs (#7149) @ajschmidt8
-- Add documentation for support dtypes in all IO formats (#7139) @galipremsagar
-- Add groupby docs (#7100) @shwina
-- Update cudf python docstrings with new null representation (`&lt;NA&gt;`) (#7050) @galipremsagar
-- Make Doxygen comments formatting consistent (#7041) @vuule
-- Add docs for working with missing data (#7010) @galipremsagar
-- Remove warning in from_dlpack and to_dlpack methods (#7001) @miguelusque
-- libcudf Developer Guide (#6977) @harrism
-- Add JNI wrapper for the cuFile API (GDS) (#6940) @rongou
-
-## New Features 🚀
-
-- Support `numeric_only` field for `rank()` (#7213) @isVoid
-- Add support for `cudf::binary_operation` `TRUE_DIV` for `decimal32` and `decimal64` (#7198) @codereport
-- Implement COLLECT rolling window aggregation (#7189) @mythrocks
-- Add support for array-like inputs in `cudf.get_dummies` (#7181) @galipremsagar
-- Default `groupby` to `sort=False` (#7180) @isVoid
-- Add libcudf lists column count_elements API (#7173) @davidwendt
-- Implement `cudf::group_by` (sort) for `decimal32` and `decimal64` (#7169) @codereport
-- Add encoding and compression argument to CSV writer (#7168) @VibhuJawa
-- `cudf::rolling_window` `SUM` support for `decimal32` and `decimal64` (#7147) @codereport
-- Adding support for explode to cuDF (#7140) @hyperbolic2346
-- Add libcudf API for parsing of ORC statistics (#7136) @vuule
-- update GDS/cuFile location for 0.9 release (#7131) @rongou
-- Add Segmented sort (#7122) @karthikeyann
-- Add `cudf::binary_operation` `NULL_MIN`, `NULL_MAX` &amp; `NULL_EQUALS` for `decimal32` and `decimal64` (#7119) @codereport
-- Add `scale` and `value` methods to `fixed_point` (#7109) @codereport
-- Replace ORC writer api with class (#7099) @rgsl888prabhu
-- Pack/unpack functionality to convert tables to and from a serialized format. (#7096) @nvdbaranec
-- Improve `digitize` API (#7071) @isVoid
-- Add List types support in data generator (#7064) @galipremsagar
-- `cudf::scan` support for `decimal32` and `decimal64` (#7063) @codereport
-- `cudf::rolling` `ROW_NUMBER` support for `decimal32` and `decimal64` (#7061) @codereport
-- Replace parquet writer api with class (#7058) @rgsl888prabhu
-- Support contains() on lists of primitives (#7039) @mythrocks
-- Implement `cudf::rolling` for `decimal32` and `decimal64` (#7037) @codereport
-- Add `ffill` and `bfill` to string columns (#7036) @isVoid
-- Enable round in cudf for DataFrame and Series (#7022) @ChrisJar
-- Extend `replace_nulls_policy` to `string` and `dictionary` type (#7004) @isVoid
-- Add segmented_gather(list_column, gather_list) (#7003) @karthikeyann
-- Add `method` field to `fillna` for fixed width columns (#6998) @isVoid
-- Manual merge of branch 0.17 into branch 0.18 (#6995) @shwina
-- Implement `cudf::reduce` for `decimal32` and `decimal64` (part 2) (#6980) @codereport
-- Add Ufunc alias look up for appropriate numpy ufunc dispatching (#6973) @VibhuJawa
-- Add pytest-xdist to dev environment.yml (#6958) @galipremsagar
-- Add `Index.set_names` api (#6929) @galipremsagar
-- Add `replace_null` API with `replace_policy` parameter, `fixed_width` column support (#6907) @isVoid
-- Share `factorize` implementation with Index and cudf module (#6885) @brandon-b-miller
-- Implement update() function (#6883) @skirui-source
-- Add groupby idxmin, idxmax aggregation (#6856) @karthikeyann
-- Implement `cudf::reduce` for `decimal32` and `decimal64` (part 1) (#6814) @codereport
-- Implement cudf.DateOffset for months (#6775) @brandon-b-miller
-- Add Python DecimalColumn (#6715) @shwina
-- Add dictionary support to libcudf groupby functions (#6585) @davidwendt
-
-## Improvements 🛠️
-
-- Update stale GHA with exemptions &amp; new labels (#7395) @mike-wendt
-- Add GHA to mark issues/prs as stale/rotten (#7388) @Ethyling
-- Unpin from numpy &lt; 1.20 (#7335) @shwina
-- Prepare Changelog for Automation (#7309) @galipremsagar
-- Prepare Changelog for Automation (#7272) @ajschmidt8
-- Add JNI support for converting Arrow buffers to CUDF ColumnVectors (#7222) @tgravescs
-- Add coverage for `skiprows` and `num_rows` in parquet reader fuzz testing (#7216) @galipremsagar
-- Define and implement more behavior for merging on categorical variables (#7209) @brandon-b-miller
-- Add CudfSeriesGroupBy to optimize dask_cudf groupby-mean (#7194) @rjzamora
-- Add dictionary column support to rolling_window (#7186) @davidwendt
-- Modify the semantics of `end` pointers in cuIO to match standard library (#7179) @vuule
-- Adding unit tests for `fixed_point` with extremely large `scale`s (#7178) @codereport
-- Fast path single column sort (#7167) @davidwendt
-- Fix -Werror=sign-compare errors in device code (#7164) @trxcllnt
-- Refactor cudf::string_view host and device code (#7159) @davidwendt
-- Enable logic for GPU auto-detection in cudfjni (#7155) @gerashegalov
-- Java bindings for Fixed-point type support for Parquet (#7153) @razajafri
-- Add Java interface for the new API &#39;explode&#39; (#7151) @firestarman
-- Replace offsets with iterators in cuIO utilities and CSV parser (#7150) @vuule
-- Add gbenchmarks for reduction aggregations any() and all() (#7129) @davidwendt
-- Update JNI for contiguous_split packed results (#7127) @jlowe
-- Add JNI and Java bindings for list_contains (#7125) @kuhushukla
-- Add Java unit tests for window aggregate &#39;collect&#39; (#7121) @firestarman
-- verify window operations on decimal with java tests (#7120) @sperlingxx
-- Adds in JNI support for creating an list column from existing columns (#7112) @revans2
-- Build libcudf with -Wall (#7105) @trxcllnt
-- Add column_device_view pointers to EncColumnDesc (#7097) @kaatish
-- Add `pyorc` to dev environment (#7085) @galipremsagar
-- JNI support for creating struct column from existing columns and fixed bug in struct with no children (#7084) @revans2
-- Fastpath single strings column in cudf::sort (#7075) @davidwendt
-- Upgrade nvcomp to 1.2.1 (#7069) @rongou
-- Refactor ORC `ProtobufReader` to make it more extendable (#7055) @vuule
-- Add Java tests for decimal casts (#7051) @sperlingxx
-- Auto-label PRs based on their content (#7044) @jolorunyomi
-- Create sort gbenchmark for strings column (#7040) @davidwendt
-- Refactor io memory fetches to use hostdevice_vector methods (#7035) @ChrisJar
-- Spark Murmur3 hash functionality (#7024) @rwlee
-- Fix libcudf strings logic where size_type is used to access INT32 column data (#7020) @davidwendt
-- Adding decimal writing support to parquet (#7017) @hyperbolic2346
-- Add compression=&quot;infer&quot; as default for dask_cudf.read_csv (#7013) @rjzamora
-- Correct ORC docstring; other minor cuIO improvements (#7012) @vuule
-- Reduce number of hostdevice_vector allocations in parquet reader (#7005) @devavret
-- Check output size overflow on strings gather (#6997) @davidwendt
-- Improve representation of `MultiIndex` (#6992) @galipremsagar
-- Disable some pragma unroll statements in thrust sort.h (#6982) @davidwendt
-- Minor `cudf::round` internal refactoring (#6976) @codereport
-- Add Java bindings for URL conversion (#6972) @jlowe
-- Enable strict_decimal_types in parquet reading (#6969) @sperlingxx
-- Add in basic support to JNI for logical_cast (#6954) @revans2
-- Remove duplicate file array_tests.cpp (#6953) @karthikeyann
-- Add null mask `fixed_point_column_wrapper` constructors (#6951) @codereport
-- Update Java bindings version to 0.18-SNAPSHOT (#6949) @jlowe
-- Use simplified `rmm::exec_policy` (#6939) @harrism
-- Add null count test for apply_boolean_mask (#6903) @harrism
-- Implement DataFrame.quantile for datetime and timedelta data types (#6902) @ChrisJar
-- Remove **kwargs from string/categorical methods (#6750) @shwina
-- Refactor rolling.cu to reduce compile time (#6512) @mythrocks
-- Add static type checking via Mypy (#6381) @shwina
-- Update to official libcu++ on Github (#6275) @trxcllnt
-
-# cuDF 0.17.0 (10 Dec 2020)
-
-## New Features
-
-- PR #6116 Add `filters` parameter to Python `read_orc` function or filtering
-- PR #6848 Added Java bindings for writing parquet files with INT96 timestamps
-- PR #6460 Add is_timestamp format check API
-- PR #6647 Implement `cudf::round` floating point and integer types (`HALF_EVEN`)
-- PR #6562 Implement `cudf::round` floating point and integer types (`HALF_UP`)
-- PR #6685 Implement `cudf::round` `decimal32` & `decimal64` (`HALF_UP` and `HALF_EVEN`)
-- PR #6711 Implement `cudf::cast` for `decimal32/64` to/from integer and floating point
-- PR #6777 Implement `cudf::unary_operation` for `decimal32` & `decimal64`
-- PR #6729 Implement `cudf::cast` for `decimal32/64` to/from different `type_id`
-- PR #6792 Implement `cudf::clamp` for `decimal32` and `decimal64`
-- PR #6845 Implement `cudf::copy_if_else` for `decimal32` and `decimal64`
-- PR #6805 Implement `cudf::detail::copy_if` for `decimal32` and `decimal64`
-- PR #6843 Implement `cudf::copy_range` for `decimal32` and `decimal64`
-- PR #6528 Enable `fixed_point` binary operations
-- PR #6460 Add is_timestamp format check API
-- PR #6568 Add function to create hashed vocabulary file from raw vocabulary
-- PR #6142 Add Python `read_orc_statistics` function for reading file- and stripe-level statistics
-- PR #6581 Add JNI API to check if PTDS is enabled
-- PR #6615 Add support for list and struct types to contiguous_split
-- PR #6625 Add INT96 timestamp writing option to parquet writer
-- PR #6592 Add `cudf.to_numeric` function
-- PR #6598 Add strings::contains API with target column parameter
-- PR #6638 Add support for `pipe` API
-- PR #6737 New build process (Project Flash)
-- PR #6652 Add support for struct columns in concatenate
-- PR #6675 Add DecimalDtype to cuDF
-- PR #6739 Add Java bindings for is_timestamp
-- PR #6808 Add support for reading decimal32 and decimal64 from parquet
-- PR #6781 Add serial murmur3 hashing
-- PR #6811 First class support for unbounded window function bounds
-- PR #6768 Add support for scatter() on list columns
-- PR #6796 Add create_metadata_file in dask_cudf
-- PR #6765 Cupy fallback for __array_function__ and __array_ufunc__ for cudf.Series
-- PR #6817 Add support for scatter() on lists-of-struct columns
-- PR #6805 Implement `cudf::detail::copy_if` for `decimal32` and `decimal64`
-- PR #6483 Add `agg` function to aggregate dataframe using one or more operations
-- PR #6726 Support selecting different hash functions in hash_partition
-- PR #6619 Improve Dockerfile
-- PR #6831 Added parquet chunked writing ability for list columns
-
-## Improvements
-
-- PR #6430 Add struct type support to `to_arrow` and `from_arrow`
-- PR #6384 Add CSV fuzz tests with varying function parameters
-- PR #6385 Add JSON fuzz tests with varying function parameters
-- PR #6398 Remove function constructor macros in parquet reader
-- PR #6432 Add dictionary support to `cudf::upper_bound` and `cudf::lower_bound`
-- PR #6461 Replace index type-dispatch call with indexalator in cudf::scatter
-- PR #6415 Support `datetime64` in row-wise op
-- PR #6457 Replace index type-dispatch call with indexalator in `cudf::gather`
-- PR #6413 Replace Python NVTX package with conda-forge source
-- PR #6442 Remove deprecated `DataFrame.from_gpu_matrix`, `DataFrame.to_gpu_matrix`, `DataFrame.add_column` APIs and method parameters
-- PR #6502 Add dictionary support to `cudf::merge`
-- PR #6471 Replace index type-dispatch call with indexalator in cudf::strings::substring
-- PR #6485 Add File IO to cuIO benchmarks
-- PR #6504 Update Java bindings version to 0.17-SNAPSHOT
-- PR #6875 Remove bounds check for `cudf::gather`
-- PR #6489 Add `AVRO` fuzz tests with varying function parameters
-- PR #6540 Add dictionary support to `cudf::unary_operation`
-- PR #6537 Refactor ORC timezone
-- PR #6527 Refactor DeviceColumnViewAccess to avoid JNI returning an array
-- PR #6690 Explicitly set legacy or per-thread default stream in JNI
-- PR #6545 Pin cmake policies to cmake 3.17 version
-- PR #6556 Add dictionary support to `cudf::inner_join`, `cudf::left_join` and `cudf::full_join`
-- PR #6557 Support nullable timestamp columns in time range window functions
-- PR #6566 Remove `reinterpret_cast` conversions between pointer types in ORC
-- PR #6544 Remove `fixed_point` precise round
-- PR #6552 Use `assert_exceptions_equal` to assert exceptions in pytests
-- PR #6555 Adapt JNI build to libcudf composition of multiple libraries
-- PR #6559 Refactoring cooperative loading with single thread loading.
-- PR #6564 Load JNI library dependencies with a thread pool
-- PR #6571 Add ORC fuzz tests with varying function parameters
-- PR #6578 Add in java column to row conversion
-- PR #6573 Create `cudf::detail::byte_cast` for `cudf::byte_cast`
-- PR #6597 Use thread-local to track CUDA device in JNI
-- PR #6599 Replace `size()==0` with `empty()`, `is_empty()`
-- PR #6514 Initial work for decimal type in Java/JNI
-- PR #6605 Reduce HtoD copies in `cudf::concatenate` of string columns
-- PR #6608 Improve subword tokenizer docs
-- PR #6610 Add ability to set scalar values in `cudf.DataFrame`
-- PR #6612 Update JNI to new RMM cuda_stream_view API
-- PR #6646 Replace `cudaStream_t` with `rmm::cuda_stream_view` (part 1)
-- PR #6648 Replace `cudaStream_t` with `rmm::cuda_stream_view` (part 2)
-- PR #6744 Replace `cudaStream_t` with `rmm::cuda_stream_view` (part 3)
-- PR #6579 Update scatter APIs to use reference wrapper / const scalar
-- PR #6614 Add support for conversion to Pandas nullable dtypes and fix related issue in `cudf.to_json`
-- PR #6622 Update `to_pandas` api docs
-- PR #6623 Add operator overloading to column and clean up error messages
-- PR #6644 Cover different CSV reader/writer options in benchmarks
-- PR #6741 Cover different ORC and Parquet reader/writer options in benchmarks
-- PR #6651 Add cudf::dictionary::make_dictionary_pair_iterator
-- PR #6666 Add dictionary support to `cudf::reduce`
-- PR #6635 Add cudf::test::dictionary_column_wrapper class
-- PR #6702 Fix orc read corruption on boolean column
-- PR #6676 Add dictionary support to `cudf::quantile`
-- PR #6673 Parameterize avro and json benchmark
-- PR #6609 Support fixed-point decimal for HostColumnVector
-- PR #6703 Add list column statistics writing to Parquet writer
-- PR #6662 `RangeIndex` supports `step` parameter
-- PR #6712 Remove `reinterpret_cast` conversions between pointer types in Avro
-- PR #6705 Add nested type support to Java table serialization
-- PR #6709 Raise informative error while converting a pandas dataframe with duplicate columns
-- PR #6727 Remove 2nd type-dispatcher call from cudf::reduce
-- PR #6749 Update nested JNI builder so we can do it incrementally
-- PR #6748 Add Java API to concatenate serialized tables to ContiguousTable
-- PR #6764 Add dictionary support to `cudf::minmax`
-- PR #6734 Binary operations support for decimal type in cudf Java
-- PR #6761 Add Java/JNI bindings for round
-- PR #6776 Use `void` return type for kernel wrapper functions instead of returning `cudaError_t`
-- PR #6786 Add nested type support to ColumnVector#getDeviceMemorySize
-- PR #6780 Move `cudf::cast` tests to separate test file
-- PR #6809 size_type overflow checking when concatenating columns
-- PR #6789 Rename `unary_op` to `unary_operator`
-- PR #6770 Support building decimal columns with Table.TestBuilder
-- PR #6815 Add wildcard path support to `read_parquet`
-- PR #6800 Push DeviceScalar to cython-only
-- PR #6822 Split out `cudf::distinct_count` from `drop_duplicates.cu`
-- PR #6813 Enable `expand=False` in `.str.split` and `.str.rsplit`
-- PR #6829 Enable workaround to write categorical columns in csv
-- PR #6819 Use CMake 3.19 for RMM when building cuDF jar
-- PR #6833 Use settings.xml if existing for internal build
-- PR #6839 Handle index when dispatching __array_function__ and __array_ufunc__ to cupy for cudf.Series
-- PR #6835 Move template param to member var to improve compile of hash/groupby.cu
-- PR #6837 Avoid gather when copying strings view from start of strings column
-- PR #6859 Move align_ptr_for_type() from cuda.cuh to alignment.hpp
-- PR #6807 Refactor `std::array` usage in row group index writing in ORC
-- PR #6914 Enable groupby `list` aggregation for strings
-- PR #6908 Parquet option for strictly decimal reading
-
-## Bug Fixes
-
-- PR #6446 Fix integer parsing in CSV and JSON for values outside of int64 range
-- PR #6506 Fix DateTime type value truncation while writing to csv
-- PR #6509 Disable JITIFY log printing
-- PR #6517 Handle index equality in `Series` and `DataFrame` equality checks
-- PR #6519 Fix end-of-string marking boundary condition in subword-tokenizer
-- PR #6543 Handle `np.nan` values in `isna`/`isnull`/`notna`/`notnull`
-- PR #6549 Fix memory_usage calls for list columns
-- PR #6575 Fix JNI RMM initialize with no pool allocator limit
-- PR #6636 Fix orc boolean column corruption issue
-- PR #6582 Add missing `device_scalar` stream parameters
-- PR #6596 Fix memory usage calculation
-- PR #6595 Fix JNI build, broken by to_arrow() signature change
-- PR #6601 Fix timezone offset when reading ORC files
-- PR #6603 Use correct stream in hash_join.
-- PR #6616 Block `fixed_point` `cudf::concatenate` with different scales
-- PR #6607 Fix integer overflow in ORC encoder
-- PR #6617 Fix JNI native dependency load order
-- PR #6621 Fix subword tokenizer metadata for token count equal to max_sequence_length
-- PR #6629 Fix JNI CMake
-- PR #6633 Fix Java HostColumnVector unnecessarily loading native dependencies
-- PR #6643 Fix csv writer handling embedded comma delimiter
-- PR #6640 Add error message for unsupported `axis` parameter in DataFrame APIs
-- PR #6686 Fix output size for orc read for skip_rows option
-- PR #6710 Fix an out-of-bounds indexing error in gather() for lists
-- PR #6670 Fix a bug where PTX parser fails to correctly parse a python lambda generated UDF
-- PR #6687 Fix issue where index name of caller object is being modified in csv writer
-- PR #6735 Fix hash join where row hash values would end up equal to the reserved empty key value
-- PR #6696 Fix release_assert.
-- PR #6692 Fix handling of empty column name in csv writer
-- PR #6693 Fix issue related to `na_values` input in `read_csv`
-- PR #6701 Fix issue when `numpy.str_` is given as input to string parameters in io APIs
-- PR #6704 Fix leak warnings in JNI unit tests
-- PR #6713 Fix missing call to cudaStreamSynchronize in get_value
-- PR #6708 Apply `na_rep` to column names in csv writer
-- PR #6720 Fix implementation of `dtype` parameter in `cudf.read_csv`
-- PR #6721 Add missing serialization methods for ListColumn
-- PR #6722 Fix index=False bug in dask_cudf.read_parquet
-- PR #6766 Fix race conditions in parquet
-- PR #6728 Fix cudf python docs and associated build warnings
-- PR #6732 Fix cuDF benchmarks build with static Arrow lib and fix rapids-compose cuDF JNI build
-- PR #6742 Fix concat bug in dask_cudf Series/Index creation
-- PR #6632 Fix DataFrame initialization from list of dicts
-- PR #6767 Fix sort order of parameters in `test_scalar_invalid_implicit_conversion` pytest
-- PR #6771 Fix index handling in parquet reader and writer
-- PR #6787 Update java reduction APIs to reflect C++ changes
-- PR #6790 Fix result representation in groupby.apply
-- PR #6794 Fix AVRO reader issues with empty input
-- PR #6798 Fix `read_avro` docs
-- PR #6824 Fix JNI build
-- PR #6826 Fix resource management in Java ColumnBuilder
-- PR #6830 Fix categorical scalar insertion
-- PR #6844 Fix uint32_t undefined errors
-- PR #6854 Fix the parameter order of writeParquetBufferBegin
-- PR #6855 Fix `.str.replace_with_backrefs` docs examples
-- PR #6853 Fix contiguous split of null string columns
-- PR #6860 Move codecov upload to build script
-- PR #6861 Fix compile error in type_dispatch_benchmark.cu
-- PR #6864 Handle contiguous_split corner case for nested string columns with no children
-- PR #6869 Avoid dependency resolution failure in latest version of pip by explicitly specifying versions for dask and distributed
-- PR #6806 Force install of local conda artifacts
-- PR #6887 Fix typo and `0-d` numpy array handling in binary operation
-- PR #6898 Fix missing clone overrides on derived aggregations
-- PR #6899 Update JNI to new gather boundary check API
-
-
-# cuDF 0.16.0 (21 Oct 2020)
-
-## New Features
-
-- PR #5779 Add DataFrame.pivot() and DataFrame.unstack()
-- PR #5975 Add strings `filter_characters` API
-- PR #5843 Add `filters` parameter to Python `read_parquet` function for filtering row groups
-- PR #5974 Use libcudf instead of cupy for `arange` or column creation from a scalar.
-- PR #5494 Add Abstract Syntax Tree (AST) evaluator.
-- PR #6076 Add durations type support for csv writer, reader
-- PR #5874 Add `COLLECT` groupby aggregation
-- PR #6330 Add ability to query if PTDS is enabled
-- PR #6119 Add support for `dayofweek` property in `DateTimeIndex` and `DatetimeProperties`
-- PR #6171 Java and Jni support for Struct columns
-- PR #6125 Add support for `Series.mode` and `DataFrame.mode`
-- PR #6271 Add support to deep-copy struct columns from struct column-view
-- PR #6262 Add nth_element series aggregation with null handling
-- PR #6316 Add StructColumn to Python API
-- PR #6247 Add `minmax` reduction function
-- PR #6232 `Json` and `Avro` benchmarking in python
-- PR #6139 Add column conversion to big endian byte list.
-- PR #6220 Add `list_topics()` to supply list of underlying Kafka connection topics
-- PR #6254 Add `cudf::make_dictionary_from_scalar` factory function
-- PR #6262 Add nth_element series aggregation with null handling
-- PR #6277 Add support for LEAD/LAG window functions for fixed-width types
-- PR #6318 Add support for reading Struct and map types from Parquet files
-- PR #6315 Native code for string-map lookups, for cudf-java
-- PR #6302 Add custom dataframe accessors
-- PR #6301 Add JNI bindings to nvcomp
-- PR #6328 Java and JNI bindings for getMapValue/map_lookup
-- PR #6371 Use ColumnViewAccess on Host side
-- PR #6392 add hash based groupby mean aggregation
-- PR #6511 Add LogicalType to Parquet reader
-- PR #6297 cuDF Python Scalars
-- PR #6723 Support creating decimal vectors from scalar
-
-## Improvements
-
-- PR #6393 Fix some misspelled words
-- PR #6292 Remove individual size tracking from JNI tracking resource adaptor
-- PR #5946 Add cython and python support for libcudf `to_arrow` and `from_arrow`
-- PR #5919 Remove max_strings and max_chars from nvtext::subword_tokenize
-- PR #5956 Add/Update tests for cuStreamz
-- PR #5953 Use stable sort when doing a sort groupby
-- PR #5973 Link to the Code of Conduct in CONTRIBUTING.md
-- PR #6354 Perform shallow clone of external projects
-- PR #6388 Add documentation for building `libboost_filesystem.a` from source
-- PR #5917 Just use `None` for `strides` in `Buffer`
-- PR #6015 Upgrade CUB/Thrust to the latest commit
-- PR #5971 Add cuStreamz README for basic installation and use
-- PR #6024 Expose selecting multiple ORC stripes to read from Python
-- PR #6155 Use the CUB submodule in Thrust instead of fetching CUB separately
-- PR #6321 Add option in JNI code to use `arena_memory_resource`
-- PR #6002 Add Java bindings for md5
-- PR #6311 Switch Thrust to use the NVIDIA/thrust repo
-- PR #6060 Add support for all types in `Series.describe` and `DataFrame.describe`
-- PR #6051 Add builder API for cuIO `parquet_writer_options` and `parquet_reader_options`
-- PR #6067 Added compute codes for aarch64 devices
-- PR #5861 `fixed_point` Column Optimization (store `scale` in `data_type`)
-- PR #6083 Small cleanup
-- PR #6355 Make sure PTDS mode is compatible between libcudf and JNI
-- PR #6120 Consolidate functionality in NestedHostColumnVector and HostColumnVector
-- PR #6092 Add `name` and `dtype` field to `Index.copy`
-- PR #5984 Support gather() on CUDF struct columns
-- PR #6103 Small refactor of `print_differences`
-- PR #6124 Fix gcc-9 compilation errors on tests
-- PR #6122 Add builder API for cuIO `csv_writer_options` and `csv_reader_options`
-- PR #6141 Fix typo in custreamz README that was a result of recent changes
-- PR #6162 Reduce output parameters in cuio csv and json reader internals
-- PR #6146 Added element/validity pair constructors for fixed_width and string wrappers
-- PR #6143 General improvements for java arrow IPC.
-- PR #6138 Add builder API for cuIO `orc_writer_options` and `orc_reader_options`
-- PR #6152 Change dictionary indices to uint32
-- PR #6099 Add fluent builder apis to `json_reader_options` and `avro_reader_options`
-- PR #6163 Use `Column.full` instead of `scalar_broadcast_to` or `cupy.zeros`
-- PR #6176 Fix cmake warnings for GoogleTest, GoogleBenchmark, and Arrow external projects
-- PR #6149 Update to Arrow v1.0.1
-- PR #6421 Use `pandas.testing` in `cudf`
-- PR #6357 Use `pandas.testing` in `dask-cudf`
-- PR #6201 Expose libcudf test utilities headers for external project use.
-- PR #6174 Data profile support in random data generator; Expand cuIO benchmarks
-- PR #6189 Avoid deprecated pyarrow.compat for parquet
-- PR #6184 Add cuda 11 dev environment.yml
-- PR #6186 Update JNI to look for cub in new location
-- PR #6194 Remove unnecessary memory-resource parameter in `cudf::contains` API
-- PR #6195 Update JNI to use parquet options builder
-- PR #6190 Avoid reading full csv files for metadata in dask_cudf
-- PR #6197 Remove librmm dependency for libcudf
-- PR #6205 Add dictionary support to cudf::contains
-- PR #6213 Reduce subscript usage in cuio in favor of pointer dereferencing
-- PR #6230 Support any unsigned int type for dictionary indices
-- PR #6202 Add additional parameter support to `DataFrame.drop`
-- PR #6214 Small clean up to use more algorithms
-- PR #6209 Remove CXX11 ABI handling from CMake
-- PR #6223 Remove CXX11 ABI flag from JNI build
-- PR #6114 Implement Fuzz tests for cuIO
-- PR #6231 Adds `inplace`, `append`, `verify_integrity` fields to `DataFrame.set_index`
-- PR #6215 Add cmake command-line setting for spdlog logging level
-- PR #6242 Added cudf::detail::host_span and device_span
-- PR #6240 Don't shallow copy index in as_index() unless necessary
-- PR #6204 Add dockerfile and script to build cuDF jar
-- PR #6248 Optimize groupby-agg in dask_cudf
-- PR #6243 Move `equals()` logic to `Frame`
-- PR #6245 Split up replace.cu into multiple source files
-- PR #6218 increase visibility/consistency for cuio reader writer private member variable names.
-- PR #6268 Add file tags to libcudf doxygen
-- PR #6265 Update JNI to use ORC options builder
-- PR #6273 Update JNI to use ORC options builder
-- PR #6293 Replace shuffle warp reduce with cub calls
-- PR #6287 Make java aggregate API follow C++ API
-- PR #6303 Use cudf test dtypes so timedelta tests are deterministic
-- PR #6329 Update and clean-up gpuCI scripts
-- PR #6299 Add lead and lag to java
-- PR #6327 Add dictionary specialization to `cudf::replace_nulls`
-- PR #6306 Remove cpw macros from page encode kernels
-- PR #6375 Parallelize Cython compilation in addition to Cythonization
-- PR #6303 Use cudf test dtypes so timedelta tests are deterministic
-- PR #6326 Simplify internal csv/json kernel parameters
-- PR #6308 Add dictionary support to cudf::scatter with scalar
-- PR #6367 Add JNI bindings for byte casting
-- PR #6312 Conda recipe dependency cleanup
-- PR #6346 Remove macros from CompactProtocolWriter
-- PR #6347 Add dictionary support to cudf::copy_range
-- PR #6352 Add specific Topic support for Kafka "list_topics()" metadata requests
-- PR #6332 Add support to return csv as string when `path=None` in `to_csv`
-- PR #6358 Add Parquet fuzz tests with varying function parameters
-- PR #6369 Add dictionary support to `cudf::find_and_replace`
-- PR #6373 Add dictionary support to `cudf::clamp`
-- PR #6377 Update ci/local/README.md
-- PR #6383 Removed `move.pxd`, use standard library `move`
-- PR #6400 Removed unused variables
-- PR #6409 Allow CuPy 8.x
-- PR #6407 Add RMM_LOGGING_LEVEL flag to Java docker build
-- PR #6425 Factor out csv parse_options creation to pure function
-- PR #6438 Fetch nvcomp v1.1.0 for JNI build
-- PR #6459 Add `map` method to series
-- PR #6379 Add list hashing functionality to MD5
-- PR #6498 Add helper method to ColumnBuilder with some nits
-- PR #6336 Add `join` functionality in cudf concat
-- PR #6653 Replaced SHFL_XOR calls with cub::WarpReduce
-- PR #6751 Rework ColumnViewAccess and its usage
-- PR #6698 Remove macros from ORC reader and writer
-- PR #6782 Replace cuio macros with constexpr and inline functions
-
-## Bug Fixes
-
-- PR #6073 Fix issue related to `.loc` in case of `DatetimeIndex`
-- PR #6081 Fix issue where fsspec thinks it has a protocol string
-- PR #6100 Fix issue in `Series.factorize` to correctly pick `na_sentinel` value
-- PR #6106 Fix datetime limit in csv due to 32-bit arithmetic
-- PR #6113 Fix to_timestamp to initialize default year to 1970
-- PR #6110 Handle `format` for other input types in `to_datetime`
-- PR #6118 Fix Java build for ORC read args change and update package version
-- PR #6121 Replace calls to get_default_resource with get_current_device_resource
-- PR #6128 Add support for numpy RandomState handling in `sample`
-- PR #6134 Fix CUDA C/C++ debug builds
-- PR #6137 Fix issue where `np.nan` is being return instead of `NAT` for datetime/duration types
-- PR #6298 Fix gcc-9 compilation error in dictionary/remove_keys.cu
-- PR #6172 Fix slice issue with empty column
-- PR #6342 Fix array out-of-bound errors in Orc writer
-- PR #6154 Warnings on row-wise op only when non-numeric columns are found.
-- PR #6150 Fix issue related to inferring `datetime64` format with UTC timezone in string data
-- PR #6179 `make_elements` copies to `iterator` without adjusting `size`
-- PR #6387 Remove extra `std::move` call in java/src/main/native/src/map_lookup.cu
-- PR #6182 Fix cmake build of arrow
-- PR #6288 Fix gcc-9 compilation error with `ColumnVectorJni.cpp`
-- PR #6173 Fix normalize_characters offset logic on sliced strings column
-- PR #6159 Fix issue related to empty `Dataframe` with columns input to `DataFrame.append`
-- PR #6199 Fix index preservation for dask_cudf parquet
-- PR #6207 Remove shared libs from Java sources jar
-- PR #6217 Fixed missing bounds checking when storing validity in parquet reader
-- PR #6212 Update codeowners file
-- PR #6389 Fix RMM logging level so that it can be turned off from the command line
-- PR #6157 Fix issue related to `Series.concat` to concat a non-empty and empty series.
-- PR #6226 Add in some JNI checks for null handles
-- PR #6183 Fix issues related to `Series.acos` for consistent output regardless of dtype
-- PR #6234 Add float infinity parsing in csv reader
-- PR #6251 Replace remaining calls to RMM `get_default_resource`
-- PR #6257 Support truncated fractions in `cudf::strings::to_timestamp`
-- PR #6259 Fix compilation error with GCC 8
-- PR #6258 Pin libcudf conda recipe to boost 1.72.0
-- PR #6264 Remove include statement for missing rmm/mr/device/default_memory_resource.hpp file
-- PR #6296 Handle double quote and escape character in json
-- PR #6294 Fix read parquet key error when reading empty pandas DataFrame with cudf
-- PR #6285 Removed unsafe `reinterpret_cast` and implicit pointer-to-bool casts
-- PR #6281 Fix unreachable code warning in datetime.cuh
-- PR #6286 Fix `read_csv` `int32` overflow
-- PR #6466 Fix ORC reader issue with decimal type
-- PR #6310 Replace a misspelled reference to `master` branch with `main` branch in a comment in changelog.sh
-- PR #6289 Revert #6206
-- PR #6291 Fix issue related to row-wise operations in `cudf.DataFrame`
-- PR #6304 Fix span_tests.cu includes
-- PR #6331 Avoids materializing `RangeIndex` during frame concatnation (when not needed)
-- PR #6278 Add filter tests for struct columns
-- PR #6344 Fix rolling-window count for null input
-- PR #6353 Rename `skip_rows` parameter to `skiprows` in `read_parquet`, `read_avro` and `read_orc`
-- PR #6361 Detect overflow in hash join
-- PR #6386 Removed c-style pointer casts and redundant `reinterpret_cast`s in cudf::io
-- PR #6397 Fix `build.sh` when `PARALLEL_LEVEL` environment variable isn't set
-- PR #6366 Fix Warp Reduce calls in cuio statistics calculation to account for NaNs
-- PR #6345 Fix ambiguous constructor compile error with devtoolset
-- PR #6335 Fix conda commands for outdated python version
-- PR #6372 Fix issue related to reading a nullable boolean column in `read_parquet` when `engine=pyarrow`
-- PR #6378 Fix index handling in `fillna` and incorrect pytests
-- PR #6380 Avoid problematic column-index check in dask_cudf.read_parquet test
-- PR #6403 Fix error handling in notebook tests
-- PR #6408 Avoid empty offset list in hash_partition output
-- PR #6402 Update JNI build to pull fixed nvcomp commit
-- PR #6410 Fix uses of dangerous default values in Python code
-- PR #6424 Check for null data in close for ColumnBuilder
-- PR #6426 Fix `RuntimeError` when `np.bool_` is passed as `header` in `to_csv`
-- PR #6443 Make java apis getList and getStruct public
-- PR #6445 Add `dlpack` to run section of libcudf conda recipe to fix downstream build issues
-- PR #6450 Make java Column Builder row agnostic
-- PR #6309 Make all CI `.sh` scripts have a consistent set of permissions
-- PR #6491 Remove repo URL from Java build-info
-- PR #6462 Bug fixes for ColumnBuilder
-- PR #6497 Fixes a data corruption issue reading list columns from Parquet files with multiple row groups.
-
-
-
-# cuDF 0.15.0 (26 Aug 2020)
-
-## New Features
-
-- PR #5292 Add unsigned int type columns to libcudf
-- PR #5287 Add `index.join` support
-- PR #5222 Adding clip feature support to DataFrame and Series
-- PR #5318 Support/leverage DataFrame.shuffle in dask_cudf
-- PR #4546 Support pandas 1.0+
-- PR #5331 Add `cudf::drop_nans`
-- PR #5327 Add `cudf::cross_join` feature
-- PR #5204 Concatenate strings columns using row separator as strings column
-- PR #5342 Add support for `StringMethods.__getitem__`
-- PR #5358 Add zero-copy `column_view` cast for compatible types
-- PR #3504 Add External Kafka Datasource
-- PR #5356 Use `size_type` instead of `scalar` in `cudf::repeat`.
-- PR #5397 Add internal implementation of nested loop equijoins.
-- PR #5303 Add slice_strings functionality using delimiter string
-- PR #5394 Enable cast and binops with duration types (builds on PR 5359)
-- PR #5301 Add Java bindings for `zfill`
-- PR #5411 Enable metadata collection for chunked parquet writer
-- PR #5359 Add duration types
-- PR #5364 Validate array interface during buffer construction
-- PR #5418 Add support for `DataFrame.info`
-- PR #5425 Add Python `Groupby.rolling()`
-- PR #5434 Add nvtext function generate_character_grams
-- PR #5442 Add support for `cudf.isclose`
-- PR #5444 Remove usage of deprecated RMM APIs and headers.
-- PR #5463 Add `.str.byte_count` python api and cython(bindings)
-- PR #5488 Add plumbings for `.str.replace_tokens`
-- PR #5502 Add Unsigned int types support in dlpack
-- PR #5497 Add `.str.isinteger` & `.str.isfloat`
-- PR #5511 Port of clx subword tokenizer to cudf
-- PR #5528 Add unsigned int reading and writing support to parquet
-- PR #5510 Add support for `cudf.Index` to create Indexes
-- PR #5618 Add Kafka as a cudf datasource
-- PR #5668 Adding support for `cudf.testing`
-- PR #5460 Add support to write to remote filesystems
-- PR #5454 Add support for `DataFrame.append`, `Index.append`, `Index.difference` and `Index.empty`
-- PR #5536 Parquet reader - add support for multiple sources
-- PR #5654 Adding support for `cudf.DataFrame.sample` and `cudf.Series.sample`
-- PR #5607 Add Java bindings for duration types
-- PR #5612 Add `is_hex` strings API
-- PR #5625 String conversion to and from duration types
-- PR #5659 Added support for rapids-compose for Java bindings and other enhancements
-- PR #5637 Parameterize Null comparator behaviour in Joins
-- PR #5623 Add `is_ipv4` strings API
-- PR #5723 Parquet reader - add support for nested LIST columns
-- PR #5669 Add support for reading JSON files with missing or out-of-order fields
-- PR #5674 Support JIT backend on PowerPC64
-- PR #5629 Add `ListColumn` and `ListDtype`
-- PR #5658 Add `filter_tokens` nvtext API
-- PR #5666 Add `filter_characters_of_type` strings API
-- PR #5778 Add support for `cudf::table` to `arrow::Table` and `arrow::Table` to `cudf::table`
-- PR #5673 Always build and test with per-thread default stream enabled in the GPU CI build
-- PR #5438 Add MD5 hash support
-- PR #5704 Initial `fixed_point` Column Support
-- PR #5716 Add `double_type_dispatcher` to libcudf
-- PR #5739 Add `nvtext::detokenize` API
-- PR #5645 Enforce pd.NA and Pandas nullable dtype parity
-- PR #5729 Create nvtext normalize_characters API from the subword_tokenize internal function
-- PR #5572 Add `cudf::encode` API.
-- PR #5767 Add `nvtext::porter_stemmer_measure` and `nvtext::is_letter` APIs
-- PR #5753 Add `cudf::lists::extract_list_element` API
-- PR #5568 Add support for `Series.keys()` and `DataFrame.keys()`
-- PR #5782 Add Kafka support to custreamz
-- PR #5642 Add `GroupBy.groups()`
-- PR #5811 Add `nvtext::edit_distance` API
-- PR #5789 Add groupby support for duration types
-- PR #5810 Make Cython subdirs packages and simplify package_data
-- PR #6005 Add support for Ampere
-- PR #5807 Initial support for struct columns
-- PR #5817 Enable more `fixed_point` unit tests by introducing "scale-less" constructor
-- PR #5822 Add `cudf_kafka` to `custreamz` run time conda dependency and fix bash syntax issue
-- PR #5903 Add duration support for Parquet reader, writer
-- PR #5845 Add support for `mask_to_bools`
-- PR #5851 Add support for `Index.sort_values`
-- PR #5904 Add slice/split support for LIST columns
-- PR #5857 Add dtypes information page in python docs
-- PR #5859 Add conversion form `fixed_point` to `bool`
-- PR #5781 Add duration types support in cudf(python/cython)
-- PR #5815 LIST Support for ColumnVector
-- PR #5931 Support for `add_calendrical_months` API
-- PR #5992 Add support for `.dt.strftime`
-- PR #6075 Parquet writer - add support for nested LIST columns
-
-## Improvements
-
-- PR #5492 compile_udf: compile straight to PTX instead of using @jit
-- PR #5605 Automatically flush RMM allocate/free logs in JNI
-- PR #5632 Switch JNI code to use `pool_memory_resource` instead of CNMeM
-- PR #5486 Link Boost libraries statically in the Java build
-- PR #5479 Link Arrow libraries statically
-- PR #5414 Use new release of Thrust/CUB in the JNI build
-- PR #5403 Update required CMake version to 3.14 in contribution guide
-- PR #5245 Add column reduction benchmark
-- PR #5315 Use CMake `FetchContent` to obtain `cub` and `thrust`
-- PR #5398 Use CMake `FetchContent` to obtain `jitify` and `libcudacxx`
-- PR #5268 Rely on NumPy arrays for out-of-band pickling
-- PR #5288 Drop `auto_pickle` decorator #5288
-- PR #5231 Type `Buffer` as `uint8`
-- PR #5305 Add support for `numpy`/`cupy` array in `DataFrame` construction
-- PR #5308 Coerce frames to `Buffer`s in deserialization
-- PR #5309 Handle host frames in serialization
-- PR #5312 Test serializing `Series` after `slice`
-- PR #5248 Support interleave_columns for string types
-- PR #5332 Remove outdated dask-xgboost docs
-- PR #5349 Improve libcudf documentation CSS style
-- PR #5317 Optimize fixed_point rounding shift for integers
-- PR #5386 Remove `cub` from `include_dirs` in `setup.py`
-- PR #5373 Remove legacy nvstrings/nvcategory/nvtext
-- PR #5362 Remove dependency on `rmm._DevicePointer`
-- PR #5302 Add missing comparison operators to `fixed_point` type
-- PR #5824 Mark host frames as not needing to be writeable
-- PR #5354 Split Dask deserialization methods by dask/cuda
-- PR #5363 Handle `0-dim` inputs while broadcasting to a column
-- PR #5396 Remove legacy tests env variable from build.sh
-- PR #5374 Port nvtext character_tokenize API to libcudf
-- PR #5389 Expose typed accessors for Java HostMemoryBuffer
-- PR #5379 Avoid chaining `Buffer`s
-- PR #5387 Port nvtext replace_tokens API to libcudf
-- PR #5381 Change numpy usages to cupy in `10min.ipynb`
-- PR #5408 Update pyrrow and arrow-cpp to 0.17.1
-- PR #5366 Add benchmarks for cuIO writers
-- PR #5913 Call cudaMemcpyAsync/cudaMemsetAsync in JNI
-- PR #5405 Add Error message to `StringColumn.unary_operator`
-- PR #5424 Add python plumbing for `.str.character_tokenize`
-- PR #5420 Aligning signature of `Series.value_counts` to Pandas
-- PR #5535 Update document for XGBoost usage with dask-cuda
-- PR #5431 Adding support for unsigned int
-- PR #5426 Refactor strings code to minimize calls to regex
-- PR #5433 Add support for column inputs in `strings::starts_with` and `strings::ends_with`
-- PR #5427 Add Java bindings for unsigned data types
-- PR #5429 Improve text wrapping in libcudf documentation
-- PR #5443 Remove unused `is_simple` trait
-- PR #5441 Update Java HostMemoryBuffer to only load native libs when necessary
-- PR #5452 Add support for strings conversion using negative timestamps
-- PR #5437 Improve libcudf join documentation
-- PR #5458 Install meta packages for dependencies
-- PR #5467 Move doc customization scripts to Jenkins
-- PR #5468 Add cudf::unique_count(table_view)
-- PR #5482 Use rmm::device_uvector in place of rmm::device_vector in copy_if
-- PR #5483 Add NVTX range calls to dictionary APIs
-- PR #5477 Add `is_index_type` trait
-- PR #5487 Use sorted lists instead of sets for pytest parameterization
-- PR #5491 allow build libcudf in custom dir
-- PR #5501 Adding only unsigned types support for categorical column codes
-- PR #5570 Add Index APIs such as `Int64Index`, `UInt64Index` and others
-- PR #5503 Change `unique_count` to `distinct_count`
-- PR #5514 `convert_datetime.cu` Small Cleanup
-- PR #5496 Rename .cu tests (zero cuda kernels) to .cpp files
-- PR #5518 split iterator and gather tests to speedup build tests
-- PR #5526 Change `type_id` to enum class
-- PR #5559 Java APIs for missing date/time operators
-- PR #5582 Add support for axis and other parameters to `DataFrame.sort_index` and fix other bunch of issues.
-- PR #5562 Add missing join type for java
-- PR #5584 Refactor `CompactProtocolReader::InitSchema`
-- PR #5591 Add `__arrow_array__` protocol and raise a descriptive error message
-- PR #5635 Ad cuIO reader benchmarks for CSV, ORC and Parquet
-- PR #5601 Instantiate Table instances in `Frame._concat` to avoid `DF.insert()` overhead
-- PR #5602 Add support for concatenation of `Series` & `DataFrame` in `cudf.concat` when `axis=0`
-- PR #5603 Refactor JIT `parser.cpp`
-- PR #5643 Update `isort` to 5.0.4
-- PR #5648 OO interface for hash join with explicit `build/probe` semantic
-- PR #5662 Make Java ColumnVector(long nativePointer) constructor public
-- PR #5681 Pin black, flake8 and isort
-- PR #5679 Use `pickle5` to test older Python versions
-- PR #5684 Use `pickle5` in `Serializable` (when available)
-- PR #5419 Support rolling, groupby_rolling for durations
-- PR #5687 Change strings::split_record to return a lists column
-- PR #5708 Add support for `dummy_na` in `get_dummies`
-- PR #5709 Update java build to help cu-spacial with java bindings
-- PR #5713 Remove old NVTX utilities
-- PR #5726 Replace use of `assert_frame_equal` in tests with `assert_eq`
-- PR #5720 Replace owning raw pointers with std::unique_ptr
-- PR #5702 Add inherited methods to python docs and other docs fixes
-- PR #5733 Add support for `size` property in `DataFrame`/ `Series` / `Index`/ `MultiIndex`
-- PR #5735 Force timestamp creation only with duration
-- PR #5743 Reduce number of test cases in concatenate benchmark
-- PR #5748 Disable `tolist` API in `Series` & `Index` and add `tolist` dispatch in `dask-cudf`
-- PR #5744 Reduce number of test cases in reduction benchmark
-- PR #5756 Switch JNI code to use the RMM owning wrapper
-- PR #5725 Integrate Gbenchmarks into CI
-- PR #5752 Add cuDF internals documentation (ColumnAccessor)
-- PR #5759 Fix documentation describing JIT cache default location
-- PR #5780 Add Java bindings for pad
-- PR #5775 Update dask_cudf.read_parquet to align with upstream improvements
-- PR #5785 Enable computing views of ListColumns
-- PR #5791 Get nullable_pd_dtype from kwargs if provided in assert_eq
-- PR #5786 JNI Header Cleanup for cuSpatial
-- PR #5800 Expose arrow datasource instead of directly taking a RandomAccessFile
-- PR #5795 Clarify documentation on Boost dependency
-- PR #5803 Add in Java support for the repeat command
-- PR #5806 Expose the error message from native exception when throwing an OOM exception
-- PR #5825 Enable ORC statistics generation by default
-- PR #5771 Enable gather/slicing/joins with ListColumns in Python
-- PR #5834 Add support for dictionary column in concatenate
-- PR #5832 Make dictionary_wrapper constructor from a value explicit
-- PR #5833 Pin `dask` and `distributed` version to `2.22.0`
-- PR #5856 Bump Pandas support to >=1.0,<1.2
-- PR #5855 Java interface to limit RMM maximum pool size
-- PR #5853 Disable `fixed_point` for use in `copy_if`
-- PR #5854 Raise informative error in `DataFrame.iterrows` and `DataFrame.itertuples`
-- PR #5864 Replace cnmem with pool_memory_resource in test/benchmark fixtures
-- PR #5863 Explicitly require `ucx-py` on CI
-- PR #5879 Added support of sub-types and object wrappers in concat()
-- PR #5884 Use S3 bucket directly for benchmark plugni
-- PR #5881 Add in JVM extractListElement and stringSplitRecord
-- PR #5885 Add in java support for merge sort
-- PR #5894 Small code improvement / cleanup
-- PR #5899 Add in gather support for Java
-- PR #5906 Add macros for showing line of failures in unit tests
-- PR #5933 Add in APIs to read/write arrow IPC formatted data from java
-- PR #3918 Update cuDF internals doc
-- PR #5970 Map data to pandas through arrow, always
-- PR #6012 Remove `cudf._cuda` and replace usages with `rmm._cuda`
-- PR #6045 Parametrize parquet_reader_list tests
-- PR #6053 Import traits.hpp for cudftestutils consumers
-
-## Bug Fixes
-
-- PR #6034 Specify `--basetemp` for `py.test` run
-- PR #5793 Fix leak in mutable_table_device_view by deleting _descendant_storage in table_device_view_base::destroy
-- PR #5525 Make sure to allocate bitmasks of string columns only once
-- PR #5336 Initialize conversion tables on a per-context basis
-- PR #5283 Fix strings::ipv4_to_integers overflow to negative
-- PR #5269 Explicitly require NumPy
-- PR #5271 Fix issue when different dtype values are passed to `.cat.add_categories`
-- PR #5333 Fix `DataFrame.loc` issue with list like argument
-- PR #5299 Update package version for Java bindings
-- PR #5300 Add support to ignore `None` in `cudf.concat` input
-- PR #5334 Fix pickling sizeof test
-- PR #5337 Fix broken alias from DataFrame.{at,iat} to {loc, iloc}
-- PR #5347 Fix APPLY_BOOLEAN_MASK_BENCH segfault
-- PR #5368 Fix loc indexing issue with `datetime` type index
-- PR #5367 Fix API for `cudf::repeat` in `cudf::cross_join`
-- PR #5377 Handle array of cupy scalars in to_column
-- PR #5326 Fix `DataFrame.__init__` for list of scalar inputs and related dask issue
-- PR #5383 Fix cython `type_id` enum mismatch
-- PR #5982 Fix gcc-9 compile errors under CUDA 11
-- PR #5382 Fix CategoricalDtype equality comparisons
-- PR #5989 Fix gcc-9 warnings on narrowing conversion
-- PR #5385 Fix index issues in `DataFrame.from_gpu_matrix`
-- PR #5390 Fix Java data type IDs and string interleave test
-- PR #5392 Fix documentation links
-- PR #5978 Fix option to turn off NVTX
-- PR #5410 Fix compile warning by disallowing bool column type for slice_strings
-- PR #5404 Fix issue with column creation when chunked arrays are passed
-- PR #5409 Use the correct memory resource when creating empty null masks
-- PR #5399 Fix cpp compiler warnings of unreachable code
-- PR #5439 Fix nvtext ngrams_tokenize performance for multi-byte UTF8
-- PR #5446 Fix compile error caused by out-of-date PR merge (4990)
-- PR #5983 Fix JNI gcc-9 compile error under CUDA 11
-- PR #5423 Fix any() reduction ignore nulls
-- PR #5459 Fix str.translate to convert table characters to UTF-8
-- PR #5480 Fix merge sort docs
-- PR #5465 Fix benchmark out of memory errors due to multiple initialization
-- PR #5473 Fix RLEv2 patched base in ORC reader
-- PR #5472 Fix str concat issue with indexed series
-- PR #5478 Fix `loc` and `iloc` doc
-- PR #5484 Ensure flat index after groupby if nlevels == 1
-- PR #5489 Fix drop_nulls/boolean_mask corruption for large columns
-- PR #5504 Remove some java assertions that are not needed
-- PR #5516 Update gpuCI image in local build script
-- PR #5529 Fix issue with negative timestamp in orc writer
-- PR #5523 Handle `dtype` of `Buffer` objects when not passed explicitly
-- PR #5534 Fix the java build around type_id
-- PR #5564 Fix CudfEngine.read_metadata API in dask_cudf
-- PR #5537 Fix issue related to using `set_index` on a string series
-- PR #5561 Fix `copy_bitmask` issue with offset
-- PR #5609 Fix loc and iloc issue with column like input
-- PR #5578 Fix getattr logic in GroupBy
-- PR #5490 Fix python column view
-- PR #5613 Fix assigning an equal length object into a masked out Series
-- PR #5608 Fix issue related to string types being represented as binary types
-- PR #5619 Fix issue related to typecasting when using a `CategoricalDtype`
-- PR #5649 Fix issue when empty Dataframe with index are passed to `cudf.concat`
-- PR #5644 Fix issue related to Dataframe init when passing in `columns`
-- PR #5340 Disable iteration in cudf objects and add support for `DataFrame` initialization with list of `Series`
-- PR #5663 Move Duration types under Timestamps in doxygen Modules page
-- PR #5664 Update conda upload versions for new supported CUDA/Python
-- PR #5656 Fix issue with incorrect docker image being used in local build script
-- PR #5671 Fix chunksize issue with `DataFrame.to_csv`
-- PR #5672 Fix crash in parquet writer while writing large string data
-- PR #5675 Allow lists_column_wrappers to be constructed from incomplete hierarchies.
-- PR #5691 Raise error on incompatible mixed-type input for a column
-- PR #5692 Fix compilation issue with gcc 7.4.0 and CUDA 10.1
-- PR #5693 Add fix missing from PR 5656 to update local docker image to py3.7
-- PR #5703 Small fix for dataframe constructor with cuda array interface objects that don't have `descr` field
-- PR #5727 Fix `Index.__repr__` to allow representation of null values
-- PR #5719 Fix Frame._concat() with categorical columns
-- PR #5736 Disable unsigned type in ORC writer benchmarks
-- PR #5745 Update JNI cast for inability to cast timestamp and integer types
-- PR #5750 Add RMM_ROOT/include to the spdlog search path in JNI build
-- PR #5763 Update Java slf4j version to match Spark 3.0
-- PR #5816 Always preserve list column hierarchies across operations.
-- PR #5766 Fix issue related to `iloc` and slicing a `DataFrame`
-- PR #5827 Revert fallback for `tolist` being absent
-- PR #5774 Add fallback for when `tolist` is absent
-- PR #5319 Disallow SUM and specialize MEAN of timestamp types
-- PR #5797 Fix a missing data issue in some Parquet files
-- PR #5787 Fix column create from dictionary column view
-- PR #5764 Remove repetition of install instructions
-- PR #5926 Fix SeriesGroupBy.nunique() to return a Series
-- PR #5813 Fix normalizer exception with all-null strings column
-- PR #5820 Fix ListColumn.to_arrow for all null case
-- PR #5837 Bash syntax error in prebuild.sh preventing `cudf_kafka` and `libcudf_kafka` from being uploaded to Anaconda
-- PR #5841 Added custreamz functions that were missing in interface layer
-- PR #5844 Fix `.str.cat` when objects with different index are passed
-- PR #5849 Modify custreamz api to integrate seamlessly with python streamz
-- PR #5866 cudf_kafka python version inconsistencies in Anaconda packages
-- PR #5872 libcudf_kafka r_path is causing docker build failures on centos7
-- PR #5869 Fix bug in parquet writer in writing string column with offset
-- PR #5910 Propagate `CUDA` insufficient driver error to the user
-- PR #5914 Link CUDA against libcudf_kafka
-- PR #5895 Do not break kafka client consumption loop on local client timeout
-- PR #5915 Fix reference count on Java DeviceMemoryBuffer after contiguousSplit
-- PR #5941 Fix issue related to `string` to `datetime64` column typecast
-- PR #5927 Fix return type of `MultiIndex.argsort`
-- PR #5942 Fix JIT cache multiprocess test failure
-- PR #5929 Revised assertEquals for List Columns in java tests
-- PR #5947 Fix null count for child device column vector
-- PR #5951 Fix mkdir error in benchmark build
-- PR #5949 Find Arrow include directory for JNI builds
-- PR #5964 Fix API doc page title tag
-- PR #5981 Handle `nat` in `fillna` for datetime and timedelta types
-- PR #6016 Fix benchmark fixture segfault
-- PR #6003 Fix concurrent JSON reads crash
-- PR #6032 Change black version to 19.10b0 in .pre-commit-config.yaml
-- PR #6041 Fix Java memory resource handler to rethrow original exception object
-- PR #6057 Fix issue in parquet reader with reading columns out of file-order
-- PR #6098 Patch Thrust to workaround CUDA_CUB_RET_IF_FAIL macro clearing CUDA errors
-
-
-# cuDF 0.14.0 (03 Jun 2020)
-
-## New Features
-
-- PR #5042 Use RMM for Numba
-- PR #4472 Add new `partition` API to replace `scatter_to_tables`.
-- PR #4626 LogBase binops
-- PR #4750 Normalize NANs and Zeroes (JNI Bindings)
-- PR #4689 Compute last day of the month for a given date
-- PR #4771 Added in an option to statically link against cudart
-- PR #4788 Add cudf::day_of_year API
-- PR #4789 Disallow timestamp sum and diffs via binary ops
-- PR #4815 Add JNI total memory allocated API
-- PR #4906 Add Java bindings for interleave_columns
-- PR #4900 Add `get_element` to obtain scalar from a column given an index
-- PR #4938 Add Java bindings for strip
-- PR #4923 Add Java and JNI bindings for string split
-- PR #4972 Add list_view (cudf::LIST) type
-- PR #4990 Add lists_column_view, list_column_wrapper, lists support for concatenate
-- PR #5073 gather support for cudf::LIST columns
-- PR #5004 Added a null considering min/max binary op
-- PR #4992 Add Java bindings for converting nans to nulls
-- PR #4975 Add Java bindings for first and last aggregate expressions based on nth
-- PR #5036 Add positive remainder binary op functionality
-- PR #5055 Add atan2 binary op
-- PR #5099 Add git commit hook for clang-format
-- PR #5072 Adding cython binding to `get_element`
-- PR #5092 Add `cudf::replace_nans`
-- PR #4881 Support row_number in rolling_window
-- PR #5068 Add Java bindings for arctan2
-- PR #5132 Support out-of-band buffers in Python pickling
-- PR #5139 Add ``Serializable`` ABC for Python
-- PR #5149 Add Java bindings for PMOD
-- PR #5153 Add Java bindings for extract
-- PR #5196 Add Java bindings for NULL_EQUALS, NULL_MAX and NULL_MIN
-- PR #5192 Add support for `cudf.to_datetime`
-- PR #5203 Add Java bindings for is_integer and is_float
-- PR #5205 Add ci test for libcudf, libnvstrings headers existence check in meta.yml
-- PR #5239 Support for custom cuIO datasource classes
-- PR #5293 Add Java bindings for replace_with_backrefs
-
-## Improvements
-
-- PR #5235 Make DataFrame.clean_renderable_dataframe() and DataFrame.get_renderable_dataframe non-public methods
-- PR #4995 Add CMake option for per-thread default stream
-- PR #5033 Fix Numba deprecations warnings with Numba 0.49+
-- PR #4950 Fix import errors with Numba 0.49+
-- PR #4825 Update the iloc exp in dataframe.py
-- PR #4450 Parquet writer: add parameter to retrieve the raw file metadata
-- PR #4531 Add doc note on conda `channel_priority`
-- PR #4479 Adding cuda 10.2 support via conda environment file addition
-- PR #4486 Remove explicit template parameter from detail::scatter.
-- PR #4471 Consolidate partitioning functionality into a single header.
-- PR #4483 Add support fill() on dictionary columns
-- PR #4498 Adds in support for chunked writers to java
-- PR #4073 Enable contiguous split java test
-- PR #4527 Add JNI and java bindings for matches_re
-- PR #4606 Fix `scan` unit test and upgrade to more appropriate algorithms
-- PR #4527 Add JNI and java bindings for `matches_re`
-- PR #4532 Parquet reader: add support for multiple pandas index columns
-- PR #4599 Add Java and JNI bindings for string replace
-- PR #4655 Raise error for list like dtypes in cudf
-- PR #4548 Remove string_view is_null method
-- PR #4645 Add Alias for `kurtosis` as `kurt`
-- PR #4703 Optimize strings concatenate for many columns
-- PR #4769 Remove legacy code from libcudf
-- PR #4668 Add Java bindings for log2/log10 unary ops and log_base binary op
-- PR #4616 Enable different RMM allocation modes in unit tests
-- PR #4520 Fix several single char -> single char case mapping values. Add support for single -> multi char mappings.
-- PR #4700 Expose events and more stream functionality in java
-- PR #4699 Make Java's MemoryBuffer public and add MemoryBuffer.slice
-- PR #4691 Fix compiler argument syntax for ccache
-- PR #4792 Port `gather`, `scatter`, and `type_dispatcher` benchmarks to libcudf++
-- PR #3581 Remove `bool8`
-- PR #4692 Add GPU and CUDA validations
-- PR #4705 quantile cython bindings
-- PR #4627 Remove legacy Cython
-- PR #4688 Add Java count aggregation to include null values
-- PR #4331 Improved test for double that considers an epsilon
-- PR #4731 Avoid redundant host->device copies when reading the entire CSV/JSON file
-- PR #4739 Add missing aggregations for cudf::experimental::reduce
-- PR #4738 Remove stop-gaps in StringMethods and enable related tests
-- PR #4745 Fix `fsspec` related issue and upgrade `fsspec` version
-- PR #4779 Allow reading arbitrary stripes/rowgroup lists in CPP columnar readers
-- PR #4766 Update to use header-only NVTX v3 and remove need to link against nvtx.
-- PR #4716 Remove direct calls to RMM_ALLOC/RMM_FREE
-- PR #4765 Add in java support for sequence
-- PR #4772 Cleanup `dask_cudf` `to_parquet` and enable `"_metadata"` creation
-- PR #4733 Fix `isin` docs for `DataFrame`, `Series`, `Index`, and add `DataFrame.isin` support
-- PR #4767 Remove linking against `gtest_main` and `gmock_main` in unit tests
-- PR #4660 Port `cudf::partition` api to python/cython
-- PR #4799 Remove null_count() and has_nulls() from column_device_view
-- PR #4778 Remove `scatter_to_tables` from libcudf, cython and python
-- PR #4783 Add support for child columns to mutable_column_device_view
-- PR #4802 Refactor `cudf::transpose` to increase performance.
-- PR #4776 Improve doxygen comments for libcudf string/timestamp conversion formats
-- PR #4793 Add `cudf._cuda` to setup.py
-- PR #4790 Replace the use of deprecated rmm APIs in the test environment
-- PR #4809 Improve libcudf doc rendering and add a new main page
-- PR #4811 Add precision to subsecond specifier in timestamp/string conversion format
-- PR #4543 Add `inplace` parameter support for `Series.replace` & `DataFrame.replace`
-- PR #4816 Remove java API use of deprecated RMM APIs
-- PR #4817 Fix `fixed_point` documentation
-- PR #4844 Change Doxygen color to RAPIDS purple and documentation improvement
-- PR #4840 Add docs for `T`, `empty` & `values`
-- PR #4841 Remove unused `single_lane_block_popc_reduce` function
-- PR #4842 Added Java bindings for titlizing a String column
-- PR #4847 Replace legacy NVTX calls with "standalone" NVTX bindings calls
-- PR #4851 Performance improvements relating to `concat`
-- PR #4852 Add NVTX range calls to strings and nvtext APIs
-- PR #4849 Update Java bindings to use new NVTX API
-- PR #4845 Add CUDF_FUNC_RANGE to top-level cuIO function APIs
-- PR #4848 Side step `unique_count` calculation in `scatter_by_map`
-- PR #4863 Create is_integer/is_float functions for checking characters before calling to_integers/to_floats
-- PR #4864 Add support for `__array__` method in cuDF
-- PR #4853 Added CUDA_TRY to multiple places in libcudf code
-- PR #4870 Add chunked parquet file writing from python
-- PR #4865 Add docs and clarify limitations of `applymap`
-- PR #4867 Parquet reader: coalesce adjacent column chunk reads
-- PR #4871 Add in the build information when building the java jar file
-- PR #4869 Expose contiguous table when deserializing from Java
-- PR #4878 Remove obsolete string_from_host utility
-- PR #4873 Prevent mutable_view() from invoking null count
-- PR #4806 Modify doc and correct cupy array conversions in `10min-cudf-cupy.ipynb`
-- PR #4877 Fix `DataFrame.mask` and align `mask` & `where` behavior with pandas
-- PR #4884 Add more NVTX annotations in cuDF Python
-- PR #4902 Use ContextDecorator instead of contextmanager for nvtx.annotate
-- PR #4894 Add annotations for the `.columns` property and setter
-- PR #4901 Improve unit tests for casting Java numeric types to string
-- PR #4888 Handle dropping of nan's & nulls using `skipna` parameter in Statistical reduction ops
-- PR #4903 Improve internal documentation of cudf-io compression/decompression kernels
-- PR #4905 Get decorated function name as message when annotating
-- PR #4907 Reuse EventAttributes across NVTX annotations
-- PR #4912 Drop old `valid` check in `element_indexing`
-- PR #4924 Properly handle npartition argument in rearrange_by_hash
-- PR #4918 Adding support for `cupy.ndarray` in `series.loc`
-- PR #4909 Added ability to transform a column using cuda method in Java bindings
-- PR #3259 Add .clang-format file & format all files
-- PR #4943 Fix-up error handling in GPU detection
-- PR #4917 Add support for casting unsupported `dtypes` of same kind
-- PR #4928 Misc performance improvements for `scatter_by_map`
-- PR #4927 Use stack for memory in `deviceGetName`
-- P# #4933 Enable nop annotate
-- PR #4929 Java methods ensure calling thread's CUDA device matches RMM device
-- PR #4956 Dropping `find_first_value` and `find_last_value`
-- PR #4962 Add missing parameters to `DataFrame.replace` & `Series.replace`
-- PR #4960 Return the result of `to_json`
-- PR #4963 Use `cudaDeviceAttr` in `getDeviceAttribute`
-- PR #4953 add documentation for supported NVIDIA GPUs and CUDA versions for cuDF
-- PR #4967 Add more comments to top-level gpuinflate and debrotli kernels
-- PR #4968 Add CODE_OF_CONDUCT.md
-- PR #4980 Change Java HostMemoryBuffer default to prefer pinned memory
-- PR #4994 clang-format "cpp/tests" directory
-- PR #4993 Remove Java memory prediction code
-- PR #4985 Add null_count to Python Column ctors and use already computed null_count when possible
-- PR #4998 Clean up dispatch of aggregation methods in result_cache
-- PR #5000 Performance improvements in `isin` and dask_cudf backend
-- PR #5002 Fix Column.__reduce__ to accept `null_count`
-- PR #5006 Add Java bindings for strip, lstrip and rstrip
-- PR #5047 Add Cython binding for libcudf++ CSV reader
-- PR #5027 Move nvstrings standalone docs pages to libcudf doxygen pages
-- PR #4947 Add support for `CategoricalColumn` to be type-casted with different categories
-- PR #4822 Add constructor to `pq_chunked_state` to enable using RAII idiom
-- PR #5024 CSV reader input stage optimizations
-- PR #5061 Add support for writing parquet to python file-like objects
-- PR #5034 Use loc to apply boolmask to frame efficiently when constructing query result
-- PR #5039 Make `annotate` picklable
-- PR #5045 Remove call to `unique()` in concat when `axis=1`
-- PR #5023 Object oriented join and column agnostic typecasting
-- PR #5049 Add grouping of libcudf apis into doxygen modules
-- PR #5069 Remove duplicate documentation from detail headers
-- PR #5075 Add simple row-group aggregation mechanism in dask_cudf read_parquet
-- PR #5084 Improve downcasting in `Series.label_encoding()` to reduce memory usage
-- PR #5085 Print more precise numerical strings in unit tests
-- PR #5028 Add Docker 19 support to local gpuci build
-- PR #5093 Add `.cat.as_known` related test in `dask_cudf`
-- PR #5100 Add documentation on libcudf doxygen guidelines
-- PR #5106 Add detail API for `cudf::concatenate` with tables
-- PR #5104 Add missing `.inl` files to clang-format and git commit hook
-- PR #5112 Adding `htoi` and `ip2int` support to `StringMethods`
-- PR #5101 Add POSITION_INDEPENDENT_CODE flag to static cudftestutil library
-- PR #5109 Update CONTRIBUTING.md for `clang-format` pre-commit hook
-- PR #5054 Change String typecasting to be inline with Pandas
-- PR #5123 Display more useful info on `clang-format` CI Failure
-- PR #5058 Adding cython binding for CSV writer
-- PR #5156 Raise error when applying boolean mask containing null values.
-- PR #5137 Add java bindings for getSizeInBytes in DType
-- PR #5194 Update Series.fillna to reflect dtype behavior
-- PR #5159 Add `make_meta_object` in `dask_cudf` backend and add `str.split` test
-- PR #5147 Use logging_resource_adaptor from RMM in the JNI code
-- PR #5184 Fix style checks
-- PR #5198 Add detail headers for strings converter functions
-- PR #5199 Add index support in `DataFrame.query`
-- PR #5227 Refactor `detail::gather` API to make use of scoped enumerators
-- PR #5218 Reduce memory usage when categorifying column with null values.
-- PR #5209 Add `nan_as_null` support to `cudf.from_pandas`
-- PR #5207 Break up backref_re.cu into multiple source files to improve compile time
-- PR #5155 Fix cudf documentation misspellings
-- PR #5208 Port search and join benchmark to libcudf++
-- PR #5214 Move docs build script into repository
-- PR #5219 Add per context cache for JIT kernels
-- PR #5250 Improve `to_csv()` support for writing to buffers
-- PR #5233 Remove experimental namespace used during libcudf++ refactor
-- PR #5213 Documentation enhancements to `cudf` python APIs
-- PR #5251 Fix more mispellings in cpp comments and strings
-- PR #5261 Add short git commit to conda package name
-- PR #5254 Deprecate nvstrings, nvcategory and nvtext
-- PR #5270 Add support to check for "NaT" and "None" strings while typecasting to `datetime64`
-- PR #5298 Remove unused native deps from java library
-- PR #5216 Make documentation uniform for params
-
-## Bug Fixes
-
-- PR #5221 Fix the use of user-provided resource on temporary values
-- PR #5181 Allocate null count using the default resource in `copy_if`
-- PR #5141 Use user-provided resource correctly in `unary_operation()` and `shift()`
-- PR #5064 Fix `hash()` and `construct_join_output_df()` to use user-provided memory resource correctly
-- PR #4386 Update Java package to 0.14
-- PR #4466 Fix merge key column sorting
-- PR #4402 Fix `cudf::strings::join_strings` logic with all-null strings and null narep
-- PR #4610 Fix validity bug in string scalar factory
-- PR #4570 Fixing loc ordering issue in dataframe
-- PR #4612 Fix invalid index handling in cudf:dictionary:add-keys call to gather
-- PR #4614 Fix cuda-memcheck errors found in column_tests.cu and copying/utility_tests.cu
-- PR #4614 Fix cuda-memcheck errors found in `column_tests.cu` and `copying/utility_tests.cu`
-- PR #4639 Fix java column of empty strings issue
-- PR #4613 Fix issue related to downcasting in `.loc`
-- PR #4615 Fix potential OOB write in ORC writer compression stage
-- PR #4587 Fix non-regex libcudf contains methods to return true when target is an empty string
-- PR #4617 Fix memory leak in aggregation object destructor
-- PR #4633 String concatenation fix in `DataFrame.rename`
-- PR #4609 Fix to handle `Series.factorize` when index is set
-- PR #4659 Fix strings::replace_re handling empty regex pattern
-- PR #4652 Fix misaligned error when computing regex device structs
-- PR #4651 Fix hashing benchmark missing includes
-- PR #4672 Fix docs for `value_counts` and update test cases
-- PR #4672 Fix `__setitem__` handling list of column names
-- PR #4673 Fix regex infinite loop while parsing invalid quantifier pattern
-- PR #4679 Fix comments for make_dictionary_column factory functions
-- PR #4711 Fix column leaks in Java unit test
-- pR #4721 Fix string binop to update nulls appropriately
-- PR #4722 Fix strings::pad when using pad::both with odd width
-- PR #4743 Fix loc issue with Multiindex on DataFrame and Series
-- PR #4725 Fix issue java with not setting GPU on background thread
-- PR #4701 Fix issue related to mixed input types in `as_column`
-- PR #4748 Fix strings::all_characters_of_type to allow verify-types mask
-- PR #4747 Fix random failures of decompression gtests
-- PR #4749 Setting `nan_as_null=True` while creating a column in DataFrame creation
-- PR #4761 Fix issues with `nan_as_null` in certain case
-- PR #4650 Fix type mismatch & result format issue in `searchsorted`
-- PR #4755 Fix Java build to deal with new quantiles API
-- PR #4720 Fix issue related to `dtype` param not being adhered in case of cuda arrays
-- PR #4756 Fix regex error checking for valid quantifier condition
-- PR #4777 Fix data pointer for column slices of zero length
-- PR #4770 Fix readonly flag in `Column. __cuda_array_interface__`
-- PR #4800 Fix dataframe slicing with strides
-- PR #4796 Fix groupby apply for operations that fail on empty groups
-- PR #4801 gitignore `_cuda/*.cpp` files
-- PR #4805 Fix hash_object_dispatch definitions in dask_cudf
-- PR #4813 Fix `GenericIndex` printing
-- PR #4804 Fix issue related `repartition` during hash based repartition
-- PR #4814 Raise error if `to_csv` does not get `filename/path`
-- PR #4821 Port apply_boolean_mask_benchmark to new cudf::column types
-- PR #4826 Move memory resource from RmmTestEnvironment to the custom gtest main() scope
-- PR #4839 Update Java bindings for timestamp cast formatting changes
-- PR #4797 Fix string timestamp to datetime conversion with `ms` and `ns`
-- PR #4854 Fix several cases of incorrect downcasting of operands in binops
-- PR #4834 Fix bug in transform in handling single line UDFs
-- PR #4857 Change JIT cache default directory to $HOME/.cudf
-- PR #4807 Fix `categories` duplication in `dask_cudf`
-- PR #4846 Fix CSV parsing with byte_range parameter and string columns
-- PR #4883 Fix series get/set to match pandas
-- PR #4861 Fix to_integers illegal-memory-access with all-empty strings column
-- PR #4860 Fix issues in HostMemoryBufferTest, and testNormalizeNANsAndZeros
-- PR #4879 Fix output for `cudf.concat` with `axis=1` for pandas parity
-- PR #4838 Fix to support empty inputs to `replace` method
-- PR #4859 JSON reader: fix data type inference for string columns
-- PR #4868 Temporary fix to skip validation on Dask related runs
-- PR #4872 Fix broken column wrapper constructors in merge benchmark
-- PR #4875 Fix cudf::strings::from_integer logic converting min integer to string
-- PR #4876 Mark Java cleaner objects as being cleaned even if exception is thrown
-- PR #4780 Handle nulls in Statistical column operations
-- PR #4886 Minimize regex-find calls in multi-replace cudf::strings::replace_re function
-- PR #4887 Remove `developer.rst` and any links
-- PR #4915 Fix to `reset_index` inplace in MultiIndex and other places
-- PR #4899 Fix series inplace handling
-- PR #4940 Fix boolean mask issue with large sized Dataframe
-- PR #4889 Fix multi-index merging
-- PR #4922 Fix cudf::strings:split logic for many columns
-- PR #4949 Fix scatter, gather benchmark constructor call
-- PR #4958 Fix strings::replace perf for long strings
-- PR #4965 Raise Error when there are duplicate columns sent to `cudf.concat`
-- PR #4983 Fix from_cudf in dask_cudf
-- PR #4996 Parquet writer: fix potentially zero-sized string dictionary
-- PR #5009 Fix pickling for string and categorical columns
-- PR #4984 Fix groupby nth aggregation negative n and exclude nulls
-- PR #5011 Fix DataFrame loc issue with boolean masking
-- PR #4977 Fix compilation of cuDF benchmarks with build.sh
-- PR #5018 Fix crash when JIT cache dir inaccessible. Fix inter version cache clash for custom cache path.
-- PR #5005 Fix CSV reader error when only one of the row selection parameters is set
-- PR #5022 Add timestamp header to transform
-- PR #5021 Fix bug with unsigned right shift and scalar lhs
-- PR #5020 Fix `conda install pre_commit` not found when setting up dev environment
-- PR #5030 Fix Groupby sort=True
-- PR #5029 Change temporary dir to working dir for cudf io tests
-- PR #5040 Fix `make_scalar_iterator()` and `make_pair_iterator(scalar)` to not copy values to host
-- PR #5041 Fix invalid java test for shift right unsigned
-- PR #5043 Remove invalid examples page libcudf doxygen
-- PR #5060 Fix unsigned char limits issue in JIT by updating Jitify
-- PR #5070 Fix libcudf++ csv reader support for hex dtypes, doublequotes and empty columns
-- PR #5057 Fix metadata_out parameter not reaching parquet `write_all`
-- PR #5076 Fix JNI code for null_policy enum change
-- PR #5031 grouped_time_range_rolling_window assumes ASC sort order
-- PR #5032 grouped_time_range_rolling_window should permit invocation without specifying grouping_keys
-- PR #5103 Fix `read_csv` issue with names and header
-- PR #5090 Fix losing nulls while creating DataFrame from dictionary
-- PR #5089 Return false for sign-only string in libcudf is_float and is_integer
-- PR #5124 `DataFrame.rename` support for renaming indexes w/ default for `index`
-- PR #5108 Fix float-to-string convert for -0.0
-- PR #5111 Fix header not being included in legacy jit transform.
-- PR #5115 Fix hex-to-integer logic when string has prefix '0x'
-- PR #5118 Fix naming for java string length operators
-- PR #5129 Fix missed reference in tests from 5118
-- PR #5122 Fix `clang-format` `custrings` bug
-- PR #5138 Install `contextvars` backport on Python 3.6
-- PR #5145 Fix an issue with calling an aggregation operation on `SeriesGroupBy`
-- PR #5148 Fix JNI build for GCC 8
-- PR #5162 Fix issues related to empty `Dataframe` in `as_gpu_matrix` & `astype`
-- PR #5167 Fix regex extract match to return empty string
-- PR #5163 Fix parquet INT96 timestamps before the epoch
-- PR #5165 Fix potentially missing last row in libcudf++ csv reader
-- PR #5185 Fix flake8 configuration and issues from new flake8 version
-- PR #5193 Fix OOB read in csv reader
-- PR #5191 Fix the use of the device memory resource
-- PR #5212 Fix memory leak in `dlpack.pyx:from_dlpack()`
-- PR #5224 Add new headers from 5198 to libcudf/meta.yaml
-- PR #5228 Fix datetime64 scalar dtype handling for unsupported time units
-- PR #5256 ORC reader: fix loading individual timestamp columns
-- PR #5285 Fix DEBUG compilation failure due to `fixed_point.hpp`
-
-
-# cuDF 0.13.0 (31 Mar 2020)
-
-## New Features
-
-- PR #4360 Added Java bindings for bitwise shift operators
-- PR #3577 Add initial dictionary support to column classes
-- PR #3777 Add support for dictionary column in gather
-- PR #3693 add string support, skipna to scan operation
-- PR #3662 Define and implement `shift`.
-- PR #3861 Added Series.sum feature for String
-- PR #4069 Added cast of numeric columns from/to String
-- PR #3681 Add cudf::experimental::boolean_mask_scatter
-- PR #4040 Add support for n-way merge of sorted tables
-- PR #4053 Multi-column quantiles.
-- PR #4100 Add set_keys function for dictionary columns
-- PR #3894 Add remove_keys functions for dictionary columns
-- PR #4107 Add groupby nunique aggregation
-- PR #4235 Port nvtx.pyx to use non-legacy libcudf APIs
-- PR #4153 Support Dask serialization protocol on cuDF objects
-- PR #4127 Add python API for n-way sorted merge (merge_sorted)
-- PR #4164 Add Buffer "constructor-kwargs" header
-- PR #4172 Add groupby nth aggregation
-- PR #4159 Add COUNT aggregation that includes null values
-- PR #4190 Add libcudf++ transpose Cython implementation
-- PR #4063 Define and implement string capitalize and title API
-- PR #4217 Add libcudf++ quantiles Cython implementation
-- PR #4216 Add cudf.Scalar Python type
-- PR #3782 Add `fixed_point` class to support DecimalType
-- PR #4272 Add stable sorted order
-- PR #4129 Add libcudf++ interleave_columns and tile Cython implementation
-- PR #4262 Port unaryops.pyx to use libcudf++ APIs
-- PR #4276 Port avro.pyx to libcudf++
-- PR #4259 Ability to create Java host buffers from memory-mapped files
-- PR #4240 Add groupby::groups()
-- PR #4294 Add Series rank and Dataframe rank
-- PR #4304 Add new NVTX infrastructure and add ranges to all top-level compute APIs.
-- PR #4319 Add repartition_by_hash API to dask_cudf
-- PR #4315 ShiftLeft, ShiftRight, ShiftRightUnsigned binops
-- PR #4321 Expose Python Semi and Anti Joins
-- PR #4291 Add Java callback support for RMM events
-- PR #4298 Port orc.pyx to libcudf++
-- PR #4344 Port concat.pyx to libcudf++
-- PR #4329 Add support for dictionary columns in scatter
-- PR #4352 Add factory function make_column_from_scalar
-- PR #4381 Add Java support for copying buffers with asynchronous streams
-- PR #4288 Add libcudf++ shift Cython implementation
-- PR #4338 Add cudf::sequence() for generating an incrementing list of numeric values
-- PR #4456 Add argmin/max and string min/max to sort groupby
-- PR #4564 Added Java bindings for clamp operator.
-- PR #4602 Add Cython bindings for functions in `datetime.hpp`
-- PR #4670 Add java and JNI bindings for contains_re
-- PR #4363 Grouped Rolling Window support
-- PR #4798 Add UDF support to grouped rolling window
-- PR #3917 Add dictionary add_keys function
-- PR #3842 ORC writer: add support for column statistics
-- PR #4088 Added asString() on ColumnVector in Java that takes a format string
-- PR #4484 Port CSV writer to libcudf++
-
-## Improvements
-
-- PR #4641 Add replace example in dataframe.py and update 10min.ipynb
-- PR #4140 Add cudf series examples and corr() method for dataframe in dataframe.py
-- PR #4187 exposed getNativeView method in Java bindings
-- PR #3525 build.sh option to disable nvtx
-- PR #3748 Optimize hash_partition using shared memory
-- PR #3808 Optimize hash_partition using shared memory and cub block scan
-- PR #3698 Add count_(un)set_bits functions taking multiple ranges and updated slice to compute null counts at once.
-- PR #3909 Move java backend to libcudf++
-- PR #3971 Adding `as_table` to convert Column to Table in python
-- PR #3910 Adding sinh, cosh, tanh, asinh, acosh, atanh cube root and rint unary support.
-- PR #3972 Add Java bindings for left_semi_join and left_anti_join
-- PR #3975 Simplify and generalize data handling in `Buffer`
-- PR #3985 Update RMM include files and remove extraneously included header files.
-- PR #3601 Port UDF functionality for rolling windows to libcudf++
-- PR #3911 Adding null boolean handling for copy_if_else
-- PR #4003 Drop old `to_device` utility wrapper function
-- PR #4002 Adding to_frame and fix for categorical column issue
-- PR #4009 build script update to enable cudf build without installing
-- PR #3897 Port cuIO JSON reader to cudf::column types
-- PR #4008 Eliminate extra copy in column constructor
-- PR #4013 Add cython definition for io readers cudf/io/io_types.hpp
-- PR #4028 Port json.pyx to use new libcudf APIs
-- PR #4014 ORC/Parquet: add count parameter to stripe/rowgroup-based reader API
-- PR #3880 Add aggregation infrastructure support for cudf::reduce
-- PR #4059 Add aggregation infrastructure support for cudf::scan
-- PR #4021 Change quantiles signature for clarity.
-- PR #4057 Handle offsets in cython Column class
-- PR #4045 Reorganize `libxx` directory
-- PR #4029 Port stream_compaction.pyx to use libcudf++ APIs
-- PR #4031 Docs build scripts and instructions update
-- PR #4062 Improve how java classifiers are produced
-- PR #4038 JNI and Java support for is_nan and is_not_nan
-- PR #3786 Adding string support to rolling_windows
-- PR #4067 Removed unused `CATEGORY` type ID.
-- PR #3891 Port NVStrings (r)split_record to contiguous_(r)split_record
-- PR #4070 Port NVText normalize_spaces to use libcudf strings column
-- PR #4072 Allow round_robin_partition to single partition
-- PR #4064 Add cudaGetDeviceCount to JNI layer
-- PR #4075 Port nvtext ngrams-tokenize to libcudf++
-- PR #4087 Add support for writing large Parquet files in a chunked manner.
-- PR #3716 Update cudf.to_parquet to use new GPU accelerated Parquet writer
-- PR #4083 Use two partitions in test_groupby_multiindex_reset_index
-- PR #4071 Add Java bindings for round robin partition
-- PR #4079 Simply use `mask.size` to create the array view
-- PR #4092 Keep mask on GPU for bit unpacking
-- PR #4081 Copy from `Buffer`'s pointer directly to host
-- PR #4105 Change threshold of using optimized hash partition code
-- PR #4101 Redux serialize `Buffer` directly with `__cuda_array_interface__`
-- PR #4098 Remove legacy calls from libcudf strings column code
-- PR #4044 Port join.pyx to use libcudf++ APIs
-- PR #4111 Use `Buffer`'s to serialize `StringColumn`
-- PR #4567 Optimize `__reduce__` in `StringColumn`
-- PR #4590 Register a few more types for Dask serialization
-- PR #4113 Get `len` of `StringColumn`s without `nvstrings`
-- PR #4147 Remove workaround for UNKNOWN_NULL_COUNT in contiguous_split.
-- PR #4130 Renames in-place `cudf::experimental::fill` to `cudf::experimental::fill_in_place`
-- PR #4136 Add `Index.names` property
-- PR #4139 Port rolling.pyx to new libcudf APIs
-- PR #4143 Renames in-place `cudf::experimental::copy_range` to `cudf::experimental::copy_range_in_place`
-- PR #4144 Release GIL when calling libcudf++ functions
-- PR #4082 Rework MultiColumns in cuDF
-- PR #4149 Use "type-serialized" for pickled types like Dask
-- PR #4174 Port hash groupby to libcudf++
-- PR #4171 Split java host and device vectors to make a vector truly immutable
-- PR #4167 Port `search` to libcudf++ (support multi-column searchsorted)
-- PR #4163 Assert Dask CUDA serializers have `Buffer` frames
-- PR #4165 List serializable classes once
-- PR #4168 IO readers: do not create null mask for non-nullable columns
-- PR #4177 Use `uint8` type for host array copy of `Buffer`
-- PR #4183 Update Google Test Execution
-- PR #4182 Rename cuDF serialize functions to be more generic
-- PR #4176 Add option to parallelize setup.py's cythonize
-- PR #4191 Porting sort.pyx to use new libcudf APIs
-- PR #4196 reduce CHANGELOG.md merge conflicts
-- PR #4197 Added notebook testing to gpuCI gpu build
-- PR #4220 Port strings wrap functionality.
-- PR #4204 Port nvtext create-ngrams function
-- PR #4219 Port dlpack.pyx to use new libcudf APIs
-- PR #4225 Remove stale notebooks
-- PR #4233 Porting replace.pyx to use new libcudf APIs
-- PR #4223 Fix a few of the Cython warnings
-- PR #4224 Optimize concatenate for many columns
-- PR #4234 Add BUILD_LEGACY_TESTS cmake option
-- PR #4231 Support for custom cuIO data_sink classes.
-- PR #4251 Add class to docs in `dask-cudf` `derived_from`
-- PR #4261 libxx Cython reorganization
-- PR #4274 Support negative position values in slice_strings
-- PR #4282 Porting nvstrings conversion functions from new libcudf++ to Python/Cython
-- PR #4290 Port Parquet to use new libcudf APIs
-- PR #4299 Convert cudf::shift to column-based api
-- PR #4301 Add support for writing large ORC files in a chunked manner
-- PR #4306 Use libcudf++ `unary.pyx` cast instead of legacy cast
-- PR #4295 Port reduce.pyx to libcudf++ API
-- PR #4305 Move gpuarrow.pyx and related libarrow_cuda files into `_libxx`
-- PR #4244 Port nvstrings Substring Gather/Scatter functions to cuDF Python/Cython
-- PR #4280 Port nvstrings Numeric Handling functions to cuDF Python/Cython
-- PR #4278 Port filling.pyx to libcudf++ API
-- PR #4328 Add memory threshold callbacks for Java RMM event handler
-- PR #4336 Move a bunch of internal nvstrings code to use native StringColumns
-- PR #4166 Port `is_sorted.pyx` to use libcudf++ APIs
-- PR #4351 Remove a bunch of internal usage of Numba; set rmm as cupy allocator
-- PR #4333 nvstrings case/capitalization cython bindings
-- PR #4345 Removed an undesirable backwards include from /include to /src in cuIO writers.hpp
-- PR #4367 Port copying.pyx to use new libcudf
-- PR #4362 Move pq_chunked_state struct into it's own header to match how orc writer is doing it.
-- PR #4339 Port libcudf strings `wrap` api to cython/python
-- PR #4236 Update dask_cudf.io.to_parquet to use cudf to_parquet
-- PR #4311 Port nvstrings String Manipulations functions to cuDF Python/Cython
-- PR #4373 Port nvstrings Regular Expressions functions to cuDF Python/Cython
-- PR #4308 Replace dask_cudf sort_values and improve set_index
-- PR #4407 Enable `.str.slice` & `.str.get` and `.str.zfill` unit-tests
-- PR #4412 Require Dask + Distributed 2.12.0+
-- PR #4377 Support loading avro files that contain nested arrays
-- PR #4436 Enable `.str.cat` and fix `.str.split` on python side
-- PR #4405 Port nvstrings (Sub)string Comparisons functions to cuDF Python/Cython
-- PR #4316 Add Java and JNI bindings for substring expression
-- PR #4314 Add Java and JNI bindings for string contains
-- PR #4461 Port nvstrings Miscellaneous functions to cuDF Python/Cython
-- PR #4495 Port nvtext to cuDF Python/Cython
-- PR #4503 Port binaryop.pyx to libcudf++ API
-- PR #4499 Adding changes to handle include `keep_index` and `RangeIndex`
-- PR #4533 Import `tlz` for optional `cytoolz` support
-- PR #4493 Skip legacy testing in CI
-- PR #4346 Port groupby Cython/Python to use libcudf++ API
-- PR #4524 Updating `__setitem__` for DataFrame to use scalar scatter
-- PR #4611 Fix to use direct slicing in iloc for multiindex than using gather under `_get_row_major`
-- PR #4534 Disable deprecation warnings as errors.
-- PR #4542 Remove RMM init/finalize in cudf test fixture.
-- PR #4506 Check for multi-dimensional data in column/Series creation
-- PR #4549 Add option to disable deprecation warnings.
-- PR #4516 Add negative value support for `.str.get`
-- PR #4563 Remove copying to host for metadata generation in `generate_pandas_metadata`
-- PR #4554 Removed raw RMM allocation from `column_device_view`
-- PR #4619 Remove usage of `nvstrings` in `data_array_view`
-- PR #4654 Upgrade version of `numba` required to `>=0.48.0`
-- PR #4035 Port NVText tokenize function to libcudf++
-- PR #4042 Port cudf/io/functions.hpp to Cython for use in IO bindings
-- PR #4058 Port hash.pyx to use libcudf++ APIs
-- PR #4133 Mask cleanup and fixes: use `int32` dtype, ensure 64 byte padding, handle offsets
-
-## Bug Fixes
-
-- PR #3888 Drop `ptr=None` from `DeviceBuffer` call
-- PR #3976 Fix string serialization and memory_usage method to be consistent
-- PR #3902 Fix conversion of large size GPU array to dataframe
-- PR #3953 Fix overflow in column_buffer when computing the device buffer size
-- PR #3959 Add missing hash-dispatch function for cudf.Series
-- PR #3970 Fix for Series Pickle
-- PR #3964 Restore legacy NVStrings and NVCategory dependencies in Java jar
-- PR #3982 Fix java unary op enum and add missing ops
-- PR #3999 Fix issue serializing empty string columns (java)
-- PR #3979 Add `name` to Series serialize and deserialize
-- PR #4005 Fix null mask allocation bug in gather_bitmask
-- PR #4000 Fix dask_cudf sort_values performance for single partitions
-- PR #4007 Fix for copy_bitmask issue with uninitialized device_buffer
-- PR #4037 Fix JNI quantile compile issue
-- PR #4054 Fixed JNI to deal with reduction API changes
-- PR #4052 Fix for round-robin when num_partitions divides nrows.
-- PR #4061 Add NDEBUG guard on `constexpr_assert`.
-- PR #4049 Fix `cudf::split` issue returning one less than expected column vectors
-- PR #4065 Parquet writer: fix for out-of-range dictionary indices
-- PR #4066 Fixed mismatch with dtype enums
-- PR #4078 Fix joins for when column_in_common input parameter is empty
-- PR #4080 Fix multi-index dask test with sort issue
-- PR #4084 Update Java for removal of CATEGORY type
-- PR #4086 ORC reader: fix potentially incorrect timestamp decoding in the last rowgroup
-- PR #4089 Fix dask groupby mutliindex test case issues in join
-- PR #4097 Fix strings concatenate logic with column offsets
-- PR #4076 All null string entries should have null data buffer
-- PR #4109 Use rmm::device_vector instead of thrust::device_vector
-- PR #4113 Use `.nvstrings` in `StringColumn.sum(...)`
-- PR #4116 Fix a bug in contiguous_split() where tables with mixed column types could corrupt string output
-- PR #4125 Fix type enum to account for added Dictionary type in `types.hpp`
-- PR #4132 Fix `hash_partition` null mask allocation
-- PR #4137 Update Java for mutating fill and rolling window changes
-- PR #4184 Add missing except+ to Cython bindings
-- PR #4141 Fix NVStrings test_convert failure in 10.2 build
-- PR #4156 Make fill/copy_range no-op on empty columns
-- PR #4158 Fix merge issue with empty table return if one of the two tables are empty
-- PR #4162 Properly handle no index metadata generation for to_parquet
-- PR #4175 Fix `__sizeof__` calculation in `StringColumn`
-- PR #4155 Update groupby group_offsets size and fix unnecessary device dispatch.
-- PR #4186 Fix from_timestamps 12-hour specifiers support
-- PR #4198 Fix constructing `RangeIndex` from `range`
-- PR #4192 Parquet writer: fix OOB read when computing string hash
-- PR #4201 Fix java window tests
-- PR #4199 Fix potential race condition in memcpy_block
-- PR #4221 Fix series dict alignment to not drop index name
-- PR #4218 Fix `get_aggregation` definition with `except *`
-- PR #4215 Fix performance regression in strings::detail::concatenate
-- PR #4214 Alter ValueError exception for GPU accelerated Parquet writer to properly report `categorical` columns are not supported.
-- PR #4232 Fix handling empty tuples of children in string columns
-- PR #4222 Fix no-return compile error in binop-null-test
-- PR #4242 Fix for rolling tests CI failure
-- PR #4245 Fix race condition in parquet reader
-- PR #4253 Fix dictionary decode and set_keys with column offset
-- PR #4258 Fix dask-cudf losing index name in `reset_index`
-- PR #4268 Fix java build for hash aggregate
-- PR #4275 Fix bug in searching nullable values in non-nullable search space in `upper_bound`
-- PR #4273 Fix losing `StringIndex` name in dask `_meta_nonempty`
-- PR #4279 Fix converting `np.float64` to Scalar
-- PR #4285 Add init files for cython pkgs and fix `setup.py`
-- PR #4287 Parquet reader: fix empty string potentially read as null
-- PR #4310 Fix empty values case in groupby
-- PR #4297 Fix specification of package_data in setup.py
-- PR #4302 Fix `_is_local_filesystem` check
-- PR #4303 Parquet reader: fix empty columns missing from table
-- PR #4317 Fix fill() when using string_scalar with an empty string
-- PR #4324 Fix slice_strings for out-of-range start position value
-- PR #4115 Serialize an empty column table with non zero rows
-- PR #4327 Preemptive dispatch fix for changes in dask#5973
-- PR #4379 Correct regex reclass count variable to number of pairs instead of the number of literals
-- PR #4364 Fix libcudf zfill strings to ignore '+/-' chars
-- PR #4358 Fix strings::concat where narep is an empty string
-- PR #4369 Fix race condition in gpuinflate
-- PR #4390 Disable ScatterValid and ScatterNull legacy tests
-- PR #4399 Make scalar destructor virtual.
-- PR #4398 Fixes the failure in groupby in MIN/MAX on strings when some groups are empty
-- PR #4406 Fix sorted merge issue with null values and ascending=False
-- PR #4445 Fix string issue for parquet reader and support `keep_index` for `scatter_to_tables`
-- PR #4423 Tighten up Dask serialization checks
-- PR #4537 Use `elif` in Dask deserialize check
-- PR #4682 Include frame lengths in Dask serialized header
-- PR #4438 Fix repl-template error for replace_with_backrefs
-- PR #4434 Fix join_strings logic with all-null strings and non-null narep
-- PR #4465 Fix use_pandas_index having no effect in libcudf++ parquet reader
-- PR #4464 Update Cmake to always link in libnvToolsExt
-- PR #4467 Fix dropna issue for a DataFrame having np.nan
-- PR #4480 Fix string_scalar.value to return an empty string_view for empty string-scalar
-- PR #4474 Fix to not materialize RangeIndex in copy_categories
-- PR #4496 Skip tests which require 2+ GPUs
-- PR #4494 Update Java memory event handler for new RMM resource API
-- PR #4505 Fix 0 length buffers during serialization
-- PR #4482 Fix `.str.rsplit`, `.str.split`, `.str.find`, `.str.rfind`, `.str.index`, `.str.rindex` and enable related tests
-- PR #4513 Backport scalar virtual destructor fix
-- PR #4519 Remove `n` validation for `nlargest` & `nsmallest` and add negative support for `n`
-- PR #4596 Fix `_popn` issue with performance
-- PR #4526 Fix index slicing issue for index in case of an empty dataframe
-- PR #4538 Fix cudf::strings::slice_strings(step=-1) for empty strings
-- PR #4557 Disable compile-errors on deprecation warnings, for JNI
-- PR #4669 Fix `dask_cudf` categorical nonempty meta handling
-- PR #4576 Fix typo in `serialize.py`
-- PR #4571 Load JNI native dependencies for Scalar class
-- PR #4598 Fix to handle `pd.DataFrame` in `DataFrame.__init__`
-- PR #4594 Fix exec dangling pointer issue in legacy groupby
-- PR #4591 Fix issue when reading consecutive rowgroups
-- PR #4600 Fix missing include in benchmark_fixture.hpp
-- PR #4588 Fix ordering issue in `MultiIndex`
-- PR #4632 Fix handling of empty inputs to concatenate
-- PR #4630 Remove dangling reference to RMM exec policy in drop duplicates tests.
-- PR #4625 Fix hash-based repartition bug in dask_cudf
-- PR #4662 Fix to handle `keep_index` in `partition_by_hash`
-- PR #4683 Fix Slicing issue with categorical column in DataFrame
-- PR #4676 Fix bug in `_shuffle_group` for repartition
-- PR #4681 Fix `test_repr` tests that were generating a `RangeIndex` for column names
-- PR #4729 Fix `fsspec` versioning to prevent dask test failures
-- PR #4145 Support empty index case in DataFrame._from_table
-- PR #4108 Fix dtype bugs in dask_cudf metadata (metadata_nonempty overhaul)
-- PR #4138 Really fix strings concatenate logic with column offsets
-- PR #4119 Fix binary ops slowdown using jitify -remove-unused-globals
-
-
-# cuDF 0.12.0 (04 Feb 2020)
-
-## New Features
-
-- PR #3759 Updated 10 Minutes with clarification on how `dask_cudf` uses `cudf` API
-- PR #3224 Define and implement new join APIs.
-- PR #3284 Add gpu-accelerated parquet writer
-- PR #3254 Python redesign for libcudf++
-- PR #3336 Add `from_dlpack` and `to_dlpack`
-- PR #3555 Add column names support to libcudf++ io readers and writers
-- PR #3527 Add string functionality for merge API
-- PR #3610 Add memory_usage to DataFrame and Series APIs
-- PR #3557 Add contiguous_split() function.
-- PR #3619 Support CuPy 7
-- PR #3604 Add nvtext ngrams-tokenize function
-- PR #3403 Define and implement new stack + tile APIs
-- PR #3627 Adding cudf::sort and cudf::sort_by_key
-- PR #3597 Implement new sort based groupby
-- PR #3776 Add column equivalence comparator (using epsilon for float equality)
-- PR #3667 Define and implement round-robin partition API.
-- PR #3690 Add bools_to_mask
-- PR #3761 Introduce a Frame class and make Index, DataFrame and Series subclasses
-- PR #3538 Define and implement left semi join and left anti join
-- PR #3683 Added support for multiple delimiters in `nvtext.token_count()`
-- PR #3792 Adding is_nan and is_notnan
-- PR #3594 Adding clamp support to libcudf++
-
-## Improvements
-
-- PR #3124 Add support for grand-children in cudf column classes
-- PR #3292 Port NVStrings regex contains function
-- PR #3409 Port NVStrings regex replace function
-- PR #3417 Port NVStrings regex findall function
-- PR #3351 Add warning when filepath resolves to multiple files in cudf readers
-- PR #3370 Port NVStrings strip functions
-- PR #3453 Port NVStrings IPv4 convert functions to cudf strings column
-- PR #3441 Port NVStrings url encode/decode to cudf strings column
-- PR #3364 Port NVStrings split functions
-- PR #3463 Port NVStrings partition/rpartition to cudf strings column
-- PR #3502 ORC reader: add option to read DECIMALs as INT64
-- PR #3461 Add a new overload to allocate_like() that takes explicit type and size params.
-- PR #3590 Specialize hash functions for floating point
-- PR #3569 Use `np.asarray` in `StringColumn.deserialize`
-- PR #3553 Support Python NoneType in numeric binops
-- PR #3511 Support DataFrame / Series mixed arithmetic
-- PR #3567 Include `strides` in `__cuda_array_interface__`
-- PR #3608 Update OPS codeowner group name
-- PR #3431 Port NVStrings translate to cudf strings column
-- PR #3507 Define and implement new binary operation APIs
-- PR #3620 Add stream parameter to unary ops detail API
-- PR #3593 Adding begin/end for mutable_column_device_view
-- PR #3587 Merge CHECK_STREAM & CUDA_CHECK_LAST to CHECK_CUDA
-- PR #3733 Rework `hash_partition` API
-- PR #3655 Use move with make_pair to avoid copy construction
-- PR #3402 Define and implement new quantiles APIs
-- PR #3612 Add ability to customize the JIT kernel cache path
-- PR #3647 Remove PatchedNumbaDeviceArray with CuPy 6.6.0
-- PR #3641 Remove duplicate definitions of CUDA_DEVICE_CALLABLE
-- PR #3640 Enable memory_usage in dask_cudf (also adds pd.Index from_pandas)
-- PR #3654 Update Jitify submodule ref to include gcc-8 fix
-- PR #3639 Define and implement `nans_to_nulls`
-- PR #3561 Rework contains implementation in search
-- PR #3616 Add aggregation infrastructure for argmax/argmin.
-- PR #3673 Parquet reader: improve rounding of timestamp conversion to seconds
-- PR #3699 Stringify libcudacxx headers for binary op JIT
-- PR #3697 Improve column insert performance for wide frames
-- PR #3653 Make `gather_bitmask_kernel` more reusable.
-- PR #3710 Remove multiple CMake configuration steps from root build script
-- PR #3657 Define and implement compiled binops for string column comparisons
-- PR #3520 Change read_parquet defaults and add warnings
-- PR #3780 Java APIs for selecting a GPU
-- PR #3796 Improve on round-robin with the case when number partitions greater than number of rows.
-- PR #3805 Avoid CuPy 7.1.0 for now
-- PR #3758 detail::scatter variant with map iterator support
-- PR #3882 Fail loudly when creating a StringColumn from nvstrings with > MAX_VAL(int32) bytes
-- PR #3823 Add header file for detail search functions
-- PR #2438 Build GBench Benchmarks in CI
-- PR #3713 Adding aggregation support to rolling_window
-- PR #3875 Add abstract sink for IO writers, used by ORC and Parquet writers for now
-- PR #3916 Refactor gather bindings
-
-## Bug Fixes
-
-- PR #3618 Update 10 minutes to cudf and cupy to hide warning that were being shown in the docs
-- PR #3550 Update Java package to 0.12
-- PR #3549 Fix index name issue with iloc with RangeIndex
-- PR #3562 Fix 4GB limit for gzipped-compressed csv files
-- PR #2981 enable build.sh to build all targets without installation
-- PR #3563 Use `__cuda_array_interface__` for serialization
-- PR #3564 Fix cuda memory access error in gather_bitmask_kernel
-- PR #3548 Replaced CUDA_RT_CALL with CUDA_TRY
-- PR #3486 Pandas > 0.25 compatibility
-- PR #3622 Fix new warnings and errors when building with gcc-8
-- PR #3588 Remove avro reader column order reversal
-- PR #3629 Fix hash map test failure
-- PR #3637 Fix sorted set_index operations in dask_cudf
-- PR #3663 Fix libcudf++ ORC reader microseconds and milliseconds conversion
-- PR #3668 Fixing CHECK_CUDA debug build issue
-- PR #3684 Fix ends_with logic for matching string case
-- PR #3691 Fix create_offsets to handle offset correctly
-- PR #3687 Fixed bug while passing input GPU memory pointer in `nvtext.scatter_count()`
-- PR #3701 Fix hash_partition hashing all columns instead of columns_to_hash
-- PR #3694 Allow for null columns parameter in `csv_writer`
-- PR #3706 Removed extra type-dispatcher call from merge
-- PR #3704 Changed the default delimiter to `whitespace` for nvtext methods.
-- PR #3741 Construct DataFrame from dict-of-Series with alignment
-- PR #3724 Update rmm version to match release
-- PR #3743 Fix for `None` data in `__array_interface__`
-- PR #3731 Fix performance of zero sized dataframe slice
-- PR #3709 Fix inner_join incorrect result issue
-- PR #3734 Update numba to 0.46 in conda files
-- PR #3738 Update libxx cython types.hpp path
-- PR #3672 Fix to_host issue with column_view having offset
-- PR #3730 CSV reader: Set invalid float values to NaN/null
-- PR #3670 Floor when casting between timestamps of different precisions
-- PR #3728 Fix apply_boolean_mask issue with non-null string column
-- PR #3769 Don't look for a `name` attribute in column
-- PR #3783 Bind cuDF operators to Dask Dataframe
-- PR #3775 Fix segfault when reading compressed CSV files larger than 4GB
-- PR #3799 Align indices of Series inputs when adding as columns to DataFrame
-- PR #3803 Keep name when unpickling Index objects
-- PR #3804 Fix cuda crash in AVRO reader
-- PR #3766 Remove references to cudf::type_id::CATEGORY from IO code
-- PR #3817 Don't always deepcopy an index
-- PR #3821 Fix OOB read in gpuinflate prefetcher
-- PR #3829 Parquet writer: fix empty dataframe causing cuda launch errors
-- PR #3835 Fix memory leak in Cython when dealing with nulls in string columns
-- PR #3866 Remove unnecessary if check in NVStrings.create_offsets
-- PR #3858 Fixes the broken debug build after #3728
-- PR #3850 Fix merge typecast scope issue and resulting memory leak
-- PR #3855 Fix MultiColumn recreation with reset_index
-- PR #3869 Fixed size calculation in NVStrings::byte_count()
-- PR #3868 Fix apply_grouped moving average example
-- PR #3900 Properly link `NVStrings` and `NVCategory` into tests
-- PR #3868 Fix apply_grouped moving average example
-- PR #3871 Fix `split_out` error
-- PR #3886 Fix string column materialization from column view
-- PR #3893 Parquet reader: fix segfault reading empty parquet file
-- PR #3931 Dask-cudf groupby `.agg` multicolumn handling fix
-- PR #4017 Fix memory leaks in `GDF_STRING` cython handling and `nans_to_nulls` cython
-
-
-# cuDF 0.11.0 (11 Dec 2019)
-
-## New Features
-
-- PR #2905 Added `Series.median()` and null support for `Series.quantile()`
-- PR #2930 JSON Reader: Support ARROW_RANDOM_FILE input
-- PR #2956 Add `cudf::stack` and `cudf::tile`
-- PR #2980 Added nvtext is_vowel/is_consonant functions
-- PR #2987 Add `inplace` arg to `DataFrame.reset_index` and `Series`
-- PR #3011 Added libcudf++ transition guide
-- PR #3129 Add strings column factory from `std::vector`s
-- PR #3054 Add parquet reader support for decimal data types
-- PR #3022 adds DataFrame.astype for cuDF dataframes
-- PR #2962 Add isnull(), notnull() and related functions
-- PR #3025 Move search files to legacy
-- PR #3068 Add `scalar` class
-- PR #3094 Adding `any` and `all` support from libcudf
-- PR #3130 Define and implement new `column_wrapper`
-- PR #3143 Define and implement new copying APIs `slice` and `split`
-- PR #3161 Move merge files to legacy
-- PR #3079 Added support to write ORC files given a local path
-- PR #3192 Add dtype param to cast `DataFrame` on init
-- PR #3213 Port cuIO to libcudf++
-- PR #3222 Add nvtext character tokenizer
-- PR #3223 Java expose underlying buffers
-- PR #3300 Add `DataFrame.insert`
-- PR #3263 Define and implement new `valid_if`
-- PR #3278 Add `to_host` utility to copy `column_view` to host
-- PR #3087 Add new cudf::experimental bool8 wrapper
-- PR #3219 Construct column from column_view
-- PR #3250 Define and implement new merge APIs
-- PR #3144 Define and implement new hashing APIs `hash` and `hash_partition`
-- PR #3229 Define and implement new search APIs
-- PR #3308 java add API for memory usage callbacks
-- PR #2691 Row-wise reduction and scan operations via CuPy
-- PR #3291 Add normalize_nans_and_zeros
-- PR #3187 Define and implement new replace APIs
-- PR #3356 Add vertical concatenation for table/columns
-- PR #3344 java split API
-- PR #2791 Add `groupby.std()`
-- PR #3368 Enable dropna argument in dask_cudf groupby
-- PR #3298 add null replacement iterator for column_device_view
-- PR #3297 Define and implement new groupby API.
-- PR #3396 Update device_atomics with new bool8 and timestamp specializations
-- PR #3411 Java host memory management API
-- PR #3393 Implement df.cov and enable covariance/correlation in dask_cudf
-- PR #3401 Add dask_cudf ORC writer (to_orc)
-- PR #3331 Add copy_if_else
-- PR #3427 Define and Implement new multi-search API
-- PR #3442 Add Bool-index + Multi column + DataFrame support for set-item
-- PR #3172 Define and implement new fill/repeat/copy_range APIs
-- PR #3490 Add pair iterators for columns
-- PR #3497 Add DataFrame.drop(..., inplace=False) argument
-- PR #3469 Add string functionality for replace API
-- PR #3273 Define and implement new reduction APIs
-
-## Improvements
-
-- PR #2904 Move gpu decompressors to cudf::io namespace
-- PR #2977 Moved old C++ test utilities to legacy directory.
-- PR #2965 Fix slow orc reader perf with large uncompressed blocks
-- PR #2995 Move JIT type utilities to legacy directory
-- PR #2927 Add ``Table`` and ``TableView`` extension classes that wrap legacy cudf::table
-- PR #3005 Renames `cudf::exp` namespace to `cudf::experimental`
-- PR #3008 Make safe versions of `is_null` and `is_valid` in `column_device_view`
-- PR #3026 Move fill and repeat files to legacy
-- PR #3027 Move copying.hpp and related source to legacy folder
-- PR #3014 Snappy decompression optimizations
-- PR #3032 Use `asarray` to coerce indices to a NumPy array
-- PR #2996 IO Readers: Replace `cuio::device_buffer` with `rmm::device_buffer`
-- PR #3051 Specialized hash function for strings column
-- PR #3065 Select and Concat for cudf::experimental::table
-- PR #3080 Move `valid_if.cuh` to `legacy/`
-- PR #3052 Moved replace.hpp functionality to legacy
-- PR #3091 Move join files to legacy
-- PR #3092 Implicitly init RMM if Java allocates before init
-- PR #3029 Update gdf_ numeric types with stdint and move to cudf namespace
-- PR #3052 Moved replace.hpp functionality to legacy
-- PR #2955 Add cmake option to only build for present GPU architecture
-- PR #3070 Move functions.h and related source to legacy
-- PR #2951 Allow set_index to handle a list of column names
-- PR #3093 Move groupby files to legacy
-- PR #2988 Removing GIS functionality (now part of cuSpatial library)
-- PR #3067 Java method to return size of device memory buffer
-- PR #3083 Improved some binary operation tests to include null testing.
-- PR #3084 Update to arrow-cpp and pyarrow 0.15.0
-- PR #3071 Move cuIO to legacy
-- PR #3126 Round 2 of snappy decompression optimizations
-- PR #3046 Define and implement new copying APIs `empty_like` and `allocate_like`
-- PR #3128 Support MultiIndex in DataFrame.join
-- PR #2971 Added initial gather and scatter methods for strings_column_view
-- PR #3133 Port NVStrings to cudf column: count_characters and count_bytes
-- PR #2991 Added strings column functions concatenate and join_strings
-- PR #3028 Define and implement new `gather` APIs.
-- PR #3135 Add nvtx utilities to cudf::nvtx namespace
-- PR #3021 Java host side concat of serialized buffers
-- PR #3138 Move unary files to legacy
-- PR #3170 Port NVStrings substring functions to cudf strings column
-- PR #3159 Port NVStrings is-chars-types function to cudf strings column
-- PR #3154 Make `table_view_base.column()` const and add `mutable_table_view.column()`
-- PR #3175 Set cmake cuda version variables
-- PR #3171 Move deprecated error macros to legacy
-- PR #3191 Port NVStrings integer convert ops to cudf column
-- PR #3189 Port NVStrings find ops to cudf column
-- PR #3352 Port NVStrings convert float functions to cudf strings column
-- PR #3193 Add cuPy as a formal dependency
-- PR #3195 Support for zero columned `table_view`
-- PR #3165 Java device memory size for string category
-- PR #3205 Move transform files to legacy
-- PR #3202 Rename and move error.hpp to public headers
-- PR #2878 Use upstream merge code in dask_cudf
-- PR #3217 Port NVStrings upper and lower case conversion functions
-- PR #3350 Port NVStrings booleans convert functions
-- PR #3231 Add `column::release()` to give up ownership of contents.
-- PR #3157 Use enum class rather than enum for mask_allocation_policy
-- PR #3232 Port NVStrings datetime conversion to cudf strings column
-- PR #3136 Define and implement new transpose API
-- PR #3237 Define and implement new transform APIs
-- PR #3245 Move binaryop files to legacy
-- PR #3241 Move stream_compaction files to legacy
-- PR #3166 Move reductions to legacy
-- PR #3261 Small cleanup: remove `== true`
-- PR #3271 Update rmm API based on `rmm.reinitialize(...)` change
-- PR #3266 Remove optional checks for CuPy
-- PR #3268 Adding null ordering per column feature when sorting
-- PR #3239 Adding floating point specialization to comparators for NaNs
-- PR #3270 Move predicates files to legacy
-- PR #3281 Add to_host specialization for strings in column test utilities
-- PR #3282 Add `num_bitmask_words`
-- PR #3252 Add new factory methods to include passing an existing null mask
-- PR #3288 Make `bit.cuh` utilities usable from host code.
-- PR #3287 Move rolling windows files to legacy
-- PR #3182 Define and implement new unary APIs `is_null` and `is_not_null`
-- PR #3314 Drop `cython` from run requirements
-- PR #3301 Add tests for empty column wrapper.
-- PR #3294 Update to arrow-cpp and pyarrow 0.15.1
-- PR #3310 Add `row_hasher` and `element_hasher` utilities
-- PR #3272 Support non-default streams when creating/destroying hash maps
-- PR #3286 Clean up the starter code on README
-- PR #3332 Port NVStrings replace to cudf strings column
-- PR #3354 Define and implement new `scatter` APIs
-- PR #3322 Port NVStrings pad operations to cudf strings column
-- PR #3345 Add cache member for number of characters in string_view class
-- PR #3299 Define and implement new `is_sorted` APIs
-- PR #3328 Partition by stripes in dask_cudf ORC reader
-- PR #3243 Use upstream join code in dask_cudf
-- PR #3371 Add `select` method to `table_view`
-- PR #3309 Add java and JNI bindings for search bounds
-- PR #3305 Define and implement new rolling window APIs
-- PR #3380 Concatenate columns of strings
-- PR #3382 Add fill function for strings column
-- PR #3391 Move device_atomics_tests.cu files to legacy
-- PR #3303 Define and implement new stream compaction APIs `copy_if`, `drop_nulls`,
-           `apply_boolean_mask`, `drop_duplicate` and `unique_count`.
-- PR #3387 Strings column gather function
-- PR #3440 Strings column scatter function
-- PR #3389 Move quantiles.hpp + group_quantiles.hpp files to legacy
-- PR #3397 Port unary cast to libcudf++
-- PR #3398 Move reshape.hpp files to legacy
-- PR #3395 Port NVStrings regex extract to cudf strings column
-- PR #3423 Port NVStrings htoi to cudf strings column
-- PR #3425 Strings column copy_if_else implementation
-- PR #3422 Move utilities to legacy
-- PR #3201 Define and implement new datetime_ops APIs
-- PR #3421 Port NVStrings find_multiple to cudf strings column
-- PR #3448 Port scatter_to_tables to libcudf++
-- PR #3458 Update strings sections in the transition guide
-- PR #3462 Add `make_empty_column` and update `empty_like`.
-- PR #3465 Port `aggregation` traits and utilities.
-- PR #3214 Define and implement new unary operations APIs
-- PR #3475 Add `bitmask_to_host` column utility
-- PR #3487 Add is_boolean trait and random timestamp generator for testing
-- PR #3492 Small cleanup (remove std::abs) and comment
-- PR #3407 Allow multiple row-groups per task in dask_cudf read_parquet
-- PR #3512 Remove unused CUDA conda labels
-- PR #3500 cudf::fill()/cudf::repeat() support for strings columns.
-- PR #3438 Update scalar and scalar_device_view to better support strings
-- PR #3414 Add copy_range function for strings column
-- PR #3685 Add string support to contiguous_split.
-- PR #3471 Add scalar/column, column/scalar and scalar/scalar overloads to copy_if_else.
-- PR #3451 Add support for implicit typecasting of join columns
-
-## Bug Fixes
-
-- PR #2895 Fixed dask_cudf group_split behavior to handle upstream rearrange_by_divisions
-- PR #3048 Support for zero columned tables
-- PR #3030 Fix snappy decoding regression in PR #3014
-- PR #3041 Fixed exp to experimental namespace name change issue
-- PR #3056 Add additional cmake hint for finding local build of RMM files
-- PR #3060 Move copying.hpp includes to legacy
-- PR #3139 Fixed java RMM auto initialization
-- PR #3141 Java fix for relocated IO headers
-- PR #3149 Rename column_wrapper.cuh to column_wrapper.hpp
-- PR #3168 Fix mutable_column_device_view head const_cast
-- PR #3199 Update JNI includes for legacy moves
-- PR #3204 ORC writer: Fix ByteRLE encoding of NULLs
-- PR #2994 Fix split_out-support but with hash_object_dispatch
-- PR #3212 Fix string to date casting when format is not specified
-- PR #3218 Fixes `row_lexicographic_comparator` issue with handling two tables
-- PR #3228 Default initialize RMM when Java native dependencies are loaded
-- PR #3012 replacing instances of `to_gpu_array` with `mem`
-- PR #3236 Fix Numba 0.46+/CuPy 6.3 interface compatibility
-- PR #3276 Update JNI includes for legacy moves
-- PR #3256 Fix orc writer crash with multiple string columns
-- PR #3211 Fix breaking change caused by rapidsai/rmm#167
-- PR #3265 Fix dangling pointer in `is_sorted`
-- PR #3267 ORC writer: fix incorrect ByteRLE encoding of long literal runs
-- PR #3277 Fix invalid reference to deleted temporary in `is_sorted`.
-- PR #3274 ORC writer: fix integer RLEv2 mode2 unsigned base value encoding
-- PR #3279 Fix shutdown hang issues with pinned memory pool init executor
-- PR #3280 Invalid children check in mutable_column_device_view
-- PR #3289 fix java memory usage API for empty columns
-- PR #3293 Fix loading of csv files zipped on MacOS (disabled zip min version check)
-- PR #3295 Fix storing storing invalid RMM exec policies.
-- PR #3307 Add pd.RangeIndex to from_pandas to fix dask_cudf meta_nonempty bug
-- PR #3313 Fix public headers including non-public headers
-- PR #3318 Revert arrow to 0.15.0 temporarily to unblock downstream projects CI
-- PR #3317 Fix index-argument bug in dask_cudf parquet reader
-- PR #3323 Fix `insert` non-assert test case
-- PR #3341 Fix `Series` constructor converting NoneType to "None"
-- PR #3326 Fix and test for detail::gather map iterator type inference
-- PR #3334 Remove zero-size exception check from make_strings_column factories
-- PR #3333 Fix compilation issues with `constexpr` functions not marked `__device__`
-- PR #3340 Make all benchmarks use cudf base fixture to initialize RMM pool
-- PR #3337 Fix Java to pad validity buffers to 64-byte boundary
-- PR #3362 Fix `find_and_replace` upcasting series for python scalars and lists
-- PR #3357 Disabling `column_view` iterators for non fixed-width types
-- PR #3383 Fix : properly compute null counts for rolling_window.
-- PR #3386 Removing external includes from `column_view.hpp`
-- PR #3369 Add write_partition to dask_cudf to fix to_parquet bug
-- PR #3388 Support getitem with bools when DataFrame has a MultiIndex
-- PR #3408 Fix String and Column (De-)Serialization
-- PR #3372 Fix dask-distributed scatter_by_map bug
-- PR #3419 Fix a bug in parse_into_parts (incomplete input causing walking past the end of string).
-- PR #3413 Fix dask_cudf read_csv file-list bug
-- PR #3416 Fix memory leak in ColumnVector when pulling strings off the GPU
-- PR #3424 Fix benchmark build by adding libcudacxx to benchmark's CMakeLists.txt
-- PR #3435 Fix diff and shift for empty series
-- PR #3439 Fix index-name bug in StringColumn concat
-- PR #3445 Fix ORC Writer default stripe size
-- PR #3459 Fix printing of invalid entries
-- PR #3466 Fix gather null mask allocation for invalid index
-- PR #3468 Fix memory leak issue in `drop_duplicates`
-- PR #3474 Fix small doc error in capitalize Docs
-- PR #3491 Fix more doc errors in NVStrings
-- PR #3478 Fix as_index deep copy via Index.rename inplace arg
-- PR #3476 Fix ORC reader timezone conversion
-- PR #3188 Repr slices up large DataFrames
-- PR #3519 Fix strings column concatenate handling zero-sized columns
-- PR #3530 Fix copy_if_else test case fail issue
-- PR #3523 Fix lgenfe issue with debug build
-- PR #3532 Fix potential use-after-free in cudf parquet reader
-- PR #3540 Fix unary_op null_mask bug and add missing test cases
-- PR #3559 Use HighLevelGraph api in DataFrame constructor (Fix upstream compatibility)
-- PR #3572 Fix CI Issue with hypothesis tests that are flaky
-
-
-# cuDF 0.10.0 (16 Oct 2019)
-
-## New Features
-
-- PR #2423 Added `groupby.quantile()`
-- PR #2522 Add Java bindings for NVStrings backed upper and lower case mutators
-- PR #2605 Added Sort based groupby in libcudf
-- PR #2607 Add Java bindings for parsing JSON
-- PR #2629 Add dropna= parameter to groupby
-- PR #2585 ORC & Parquet Readers: Remove millisecond timestamp restriction
-- PR #2507 Add GPU-accelerated ORC Writer
-- PR #2559 Add Series.tolist()
-- PR #2653 Add Java bindings for rolling window operations
-- PR #2480 Merge `custreamz` codebase into `cudf` repo
-- PR #2674 Add __contains__ for Index/Series/Column
-- PR #2635 Add support to read from remote and cloud sources like s3, gcs, hdfs
-- PR #2722 Add Java bindings for NVTX ranges
-- PR #2702 Add make_bool to dataset generation functions
-- PR #2394 Move `rapidsai/custrings` into `cudf`
-- PR #2734 Final sync of custrings source into cudf
-- PR #2724 Add libcudf support for __contains__
-- PR #2777 Add python bindings for porter stemmer measure functionality
-- PR #2781 Add issorted to is_monotonic
-- PR #2685 Add cudf::scatter_to_tables and cython binding
-- PR #2743 Add Java bindings for NVStrings timestamp2long as part of String ColumnVector casting
-- PR #2785 Add nvstrings Python docs
-- PR #2786 Add benchmarks option to root build.sh
-- PR #2802 Add `cudf::repeat()` and `cudf.Series.repeat()`
-- PR #2773 Add Fisher's unbiased kurtosis and skew for Series/DataFrame
-- PR #2748 Parquet Reader: Add option to specify loading of PANDAS index
-- PR #2807 Add scatter_by_map to DataFrame python API
-- PR #2836 Add nvstrings.code_points method
-- PR #2844 Add Series/DataFrame notnull
-- PR #2858 Add GTest type list utilities
-- PR #2870 Add support for grouping by Series of arbitrary length
-- PR #2719 Series covariance and Pearson correlation
-- PR #2207 Beginning of libcudf overhaul: introduce new column and table types
-- PR #2869 Add `cudf.CategoricalDtype`
-- PR #2838 CSV Reader: Support ARROW_RANDOM_FILE input
-- PR #2655 CuPy-based Series and Dataframe .values property
-- PR #2803 Added `edit_distance_matrix()` function to calculate pairwise edit distance for each string on a given nvstrings object.
-- PR #2811 Start of cudf strings column work based on 2207
-- PR #2872 Add Java pinned memory pool allocator
-- PR #2969 Add findAndReplaceAll to ColumnVector
-- PR #2814 Add Datetimeindex.weekday
-- PR #2999 Add timestamp conversion support for string categories
-- PR #2918 Add cudf::column timestamp wrapper types
-
-## Improvements
-
-- PR #2578 Update legacy_groupby to use libcudf group_by_without_aggregation
-- PR #2581 Removed `managed` allocator from hash map classes.
-- PR #2571 Remove unnecessary managed memory from gdf_column_concat
-- PR #2648 Cython/Python reorg
-- PR #2588 Update Series.append documentation
-- PR #2632 Replace dask-cudf set_index code with upstream
-- PR #2682 Add cudf.set_allocator() function for easier allocator init
-- PR #2642 Improve null printing and testing
-- PR #2747 Add missing Cython headers / cudftestutil lib to conda package for cuspatial build
-- PR #2706 Compute CSV format in device code to speedup performance
-- PR #2673 Add support for np.longlong type
-- PR #2703 move dask serialization dispatch into cudf
-- PR #2728 Add YYMMDD to version tag for nightly conda packages
-- PR #2729 Handle file-handle input in to_csv
-- PR #2741 CSV Reader: Move kernel functions into its own file
-- PR #2766 Improve nvstrings python cmake flexibility
-- PR #2756 Add out_time_unit option to csv reader, support timestamp resolutions
-- PR #2771 Stopgap alias for to_gpu_matrix()
-- PR #2783 Support mapping input columns to function arguments in apply kernels
-- PR #2645 libcudf unique_count for Series.nunique
-- PR #2817 Dask-cudf: `read_parquet` support for remote filesystems
-- PR #2823 improve java data movement debugging
-- PR #2806 CSV Reader: Clean-up row offset operations
-- PR #2640 Add dask wait/persist example to 10 minute guide
-- PR #2828 Optimizations of kernel launch configuration for `DataFrame.apply_rows` and `DataFrame.apply_chunks`
-- PR #2831 Add `column` argument to `DataFrame.drop`
-- PR #2775 Various optimizations to improve __getitem__ and __setitem__ performance
-- PR #2810 cudf::allocate_like can optionally always allocate a mask.
-- PR #2833 Parquet reader: align page data allocation sizes to 4-bytes to satisfy cuda-memcheck
-- PR #2832 Using the new Python bindings for UCX
-- PR #2856 Update group_split_cudf to use scatter_by_map
-- PR #2890 Optionally keep serialized table data on the host.
-- PR #2778 Doc: Updated and fixed some docstrings that were formatted incorrectly.
-- PR #2830 Use YYMMDD tag in custreamz nightly build
-- PR #2875 Java: Remove synchronized from register methods in MemoryCleaner
-- PR #2887 Minor snappy decompression optimization
-- PR #2899 Use new RMM API based on Cython
-- PR #2788 Guide to Python UDFs
-- PR #2919 Change java API to use operators in groupby namespace
-- PR #2909 CSV Reader: Avoid row offsets host vector default init
-- PR #2834 DataFrame supports setting columns via attribute syntax `df.x = col`
-- PR #3147 DataFrame can be initialized from rows via list of tuples
-- PR #3539 Restrict CuPy to 6
-
-## Bug Fixes
-
-- PR #2584 ORC Reader: fix parsing of `DECIMAL` index positions
-- PR #2619 Fix groupby serialization/deserialization
-- PR #2614 Update Java version to match
-- PR #2601 Fixes nlargest(1) issue in Series and Dataframe
-- PR #2610 Fix a bug in index serialization (properly pass DeviceNDArray)
-- PR #2621 Fixes the floordiv issue of not promoting float type when rhs is 0
-- PR #2611 Types Test: fix static casting from negative int to string
-- PR #2618 IO Readers: Fix datasource memory map failure for multiple reads
-- PR #2628 groupby_without_aggregation non-nullable input table produces non-nullable output
-- PR #2615 fix string category partitioning in java API
-- PR #2641 fix string category and timeunit concat in the java API
-- PR #2649 Fix groupby issue resulting from column_empty bug
-- PR #2658 Fix astype() for null categorical columns
-- PR #2660 fix column string category and timeunit concat in the java API
-- PR #2664 ORC reader: fix `skip_rows` larger than first stripe
-- PR #2654 Allow Java gdfOrderBy to work with string categories
-- PR #2669 AVRO reader: fix non-deterministic output
-- PR #2668 Update Java bindings to specify timestamp units for ORC and Parquet readers
-- PR #2679 AVRO reader: fix cuda errors when decoding compressed streams
-- PR #2692 Add concatenation for data-frame with different headers (empty and non-empty)
-- PR #2651 Remove nvidia driver installation from ci/cpu/build.sh
-- PR #2697 Ensure csv reader sets datetime column time units
-- PR #2698 Return RangeIndex from contiguous slice of RangeIndex
-- PR #2672 Fix null and integer handling in round
-- PR #2704 Parquet Reader: Fix crash when loading string column with nulls
-- PR #2725 Fix Jitify issue with running on Turing using CUDA version < 10
-- PR #2731 Fix building of benchmarks
-- PR #2738 Fix java to find new NVStrings locations
-- PR #2736 Pin Jitify branch to v0.10 version
-- PR #2742 IO Readers: Fix possible silent failures when creating `NvStrings` instance
-- PR #2753 Fix java quantile API calls
-- PR #2762 Fix validity processing for time in java
-- PR #2796 Fix handling string slicing and other nvstrings delegated methods with dask
-- PR #2769 Fix link to API docs in README.md
-- PR #2772 Handle multiindex pandas Series #2772
-- PR #2749 Fix apply_rows/apply_chunks pessimistic null mask to use in_cols null masks only
-- PR #2752 CSV Reader: Fix exception when there's no rows to process
-- PR #2716 Added Exception for `StringMethods` in string methods
-- PR #2787 Fix Broadcasting `None` to `cudf-series`
-- PR #2794 Fix async race in NVCategory::get_value and get_value_bounds
-- PR #2795 Fix java build/cast error
-- PR #2496 Fix improper merge of two dataframes when names differ
-- PR #2824 Fix issue with incorrect result when Numeric Series replace is called several times
-- PR #2751 Replace value with null
-- PR #2765 Fix Java inequality comparisons for string category
-- PR #2818 Fix java join API to use new C++ join API
-- PR #2841 Fix nvstrings.slice and slice_from for range (0,0)
-- PR #2837 Fix join benchmark
-- PR #2809 Add hash_df and group_split dispatch functions for dask
-- PR #2843 Parquet reader: fix skip_rows when not aligned with page or row_group boundaries
-- PR #2851 Deleted existing dask-cudf/record.txt
-- PR #2854 Fix column creation from ephemeral objects exposing __cuda_array_interface__
-- PR #2860 Fix boolean indexing when the result is a single row
-- PR #2859 Fix tail method issue for string columns
-- PR #2852 Fixed `cumsum()` and `cumprod()` on boolean series.
-- PR #2865 DaskIO: Fix `read_csv` and `read_orc` when input is list of files
-- PR #2750 Fixed casting values to cudf::bool8 so non-zero values always cast to true
-- PR #2873 Fixed dask_cudf read_partition bug by generating ParquetDatasetPiece
-- PR #2850 Fixes dask_cudf.read_parquet on partitioned datasets
-- PR #2896 Properly handle `axis` string keywords in `concat`
-- PR #2926 Update rounding algorithm to avoid using fmod
-- PR #2968 Fix Java dependency loading when using NVTX
-- PR #2963 Fix ORC writer uncompressed block indexing
-- PR #2928 CSV Reader: Fix using `byte_range` for large datasets
-- PR #2983 Fix sm_70+ race condition in gpu_unsnap
-- PR #2964 ORC Writer: Segfault when writing mixed numeric and string columns
-- PR #3007 Java: Remove unit test that frees RMM invalid pointer
-- PR #3009 Fix orc reader RLEv2 patch position regression from PR #2507
-- PR #3002 Fix CUDA invalid configuration errors reported after loading an ORC file without data
-- PR #3035 Update update-version.sh for new docs locations
-- PR #3038 Fix uninitialized stream parameter in device_table deleter
-- PR #3064 Fixes groupby performance issue
-- PR #3061 Add rmmInitialize to nvstrings gtests
-- PR #3058 Fix UDF doc markdown formatting
-- PR #3059 Add nvstrings python build instructions to contributing.md
-
-
-# cuDF 0.9.0 (21 Aug 2019)
-
-## New Features
-
-- PR #1993 Add CUDA-accelerated series aggregations: mean, var, std
-- PR #2111 IO Readers: Support memory buffer, file-like object, and URL inputs
-- PR #2012 Add `reindex()` to DataFrame and Series
-- PR #2097 Add GPU-accelerated AVRO reader
-- PR #2098 Support binary ops on DFs and Series with mismatched indices
-- PR #2160 Merge `dask-cudf` codebase into `cudf` repo
-- PR #2149 CSV Reader: Add `hex` dtype for explicit hexadecimal parsing
-- PR #2156 Add `upper_bound()` and `lower_bound()` for libcudf tables and `searchsorted()` for cuDF Series
-- PR #2158 CSV Reader: Support single, non-list/dict argument for `dtype`
-- PR #2177 CSV Reader: Add `parse_dates` parameter for explicit date inference
-- PR #1744 cudf::apply_boolean_mask and cudf::drop_nulls support for cudf::table inputs (multi-column)
-- PR #2196 Add `DataFrame.dropna()`
-- PR #2197 CSV Writer: add `chunksize` parameter for `to_csv`
-- PR #2215 `type_dispatcher` benchmark
-- PR #2179 Add Java quantiles
-- PR #2157 Add __array_function__ to DataFrame and Series
-- PR #2212 Java support for ORC reader
-- PR #2224 Add DataFrame isna, isnull, notna functions
-- PR #2236 Add Series.drop_duplicates
-- PR #2105 Add hash-based join benchmark
-- PR #2316 Add unique, nunique, and value_counts for datetime columns
-- PR #2337 Add Java support for slicing a ColumnVector
-- PR #2049 Add cudf::merge (sorted merge)
-- PR #2368 Full cudf+dask Parquet Support
-- PR #2380 New cudf::is_sorted checks whether cudf::table is sorted
-- PR #2356 Java column vector standard deviation support
-- PR #2221 MultiIndex full indexing - Support iloc and wildcards for loc
-- PR #2429 Java support for getting length of strings in a ColumnVector
-- PR #2415 Add `value_counts` for series of any type
-- PR #2446 Add __array_function__ for index
-- PR #2437 ORC reader: Add 'use_np_dtypes' option
-- PR #2382 Add CategoricalAccessor add, remove, rename, and ordering methods
-- PR #2464 Native implement `__cuda_array_interface__` for Series/Index/Column objects
-- PR #2425 Rolling window now accepts array-based user-defined functions
-- PR #2442 Add __setitem__
-- PR #2449 Java support for getting byte count of strings in a ColumnVector
-- PR #2492 Add groupby.size() method
-- PR #2358 Add cudf::nans_to_nulls: convert floating point column into bitmask
-- PR #2489 Add drop argument to set_index
-- PR #2491 Add Java bindings for ORC reader 'use_np_dtypes' option
-- PR #2213 Support s/ms/us/ns DatetimeColumn time unit resolutions
-- PR #2536 Add _constructor properties to Series and DataFrame
-
-## Improvements
-
-- PR #2103 Move old `column` and `bitmask` files into `legacy/` directory
-- PR #2109 added name to Python column classes
-- PR #1947 Cleanup serialization code
-- PR #2125 More aggregate in java API
-- PR #2127 Add in java Scalar tests
-- PR #2088 Refactor of Python groupby code
-- PR #2130 Java serialization and deserialization of tables.
-- PR #2131 Chunk rows logic added to csv_writer
-- PR #2129 Add functions in the Java API to support nullable column filtering
-- PR #2165 made changes to get_dummies api for it to be available in MethodCache
-- PR #2171 Add CodeCov integration, fix doc version, make --skip-tests work when invoking with source
-- PR #2184 handle remote orc files for dask-cudf
-- PR #2186 Add `getitem` and `getattr` style access to Rolling objects
-- PR #2168 Use cudf.Column for CategoricalColumn's categories instead of a tuple
-- PR #2193 DOC: cudf::type_dispatcher documentation for specializing dispatched functors
-- PR #2199 Better java support for appending strings
-- PR #2176 Added column dtype support for datetime, int8, int16 to csv_writer
-- PR #2209 Matching `get_dummies` & `select_dtypes` behavior to pandas
-- PR #2217 Updated Java bindings to use the new groupby API
-- PR #2214 DOC: Update doc instructions to build/install `cudf` and `dask-cudf`
-- PR #2220 Update Java bindings for reduction rename
-- PR #2232 Move CodeCov upload from build script to Jenkins
-- PR #2225 refactor to use libcudf for gathering columns in dataframes
-- PR #2293 Improve join performance (faster compute_join_output_size)
-- PR #2300 Create separate dask codeowners for dask-cudf codebase
-- PR #2304 gdf_group_by_without_aggregations returns gdf_column
-- PR #2309 Java readers: remove redundant copy of result pointers
-- PR #2307 Add `black` and `isort` to style checker script
-- PR #2345 Restore removal of old groupby implementation
-- PR #2342 Improve `astype()` to operate all ways
-- PR #2329 using libcudf cudf::copy for column deep copy
-- PR #2344 DOC: docs on code formatting for contributors
-- PR #2376 Add inoperative axis= and win_type= arguments to Rolling()
-- PR #2378 remove dask for (de-)serialization of cudf objects
-- PR #2353 Bump Arrow and Dask versions
-- PR #2377 Replace `standard_python_slice` with just `slice.indices()`
-- PR #2373 cudf.DataFrame enhancements & Series.values support
-- PR #2392 Remove dlpack submodule; make cuDF's Cython API externally accessible
-- PR #2430 Updated Java bindings to use the new unary API
-- PR #2406 Moved all existing `table` related files to a `legacy/` directory
-- PR #2350 Performance related changes to get_dummies
-- PR #2420 Remove `cudautils.astype` and replace with `typecast.apply_cast`
-- PR #2456 Small improvement to typecast utility
-- PR #2458 Fix handling of thirdparty packages in `isort` config
-- PR #2459 IO Readers: Consolidate all readers to use `datasource` class
-- PR #2475 Exposed type_dispatcher.hpp, nvcategory_util.hpp and wrapper_types.hpp in the include folder
-- PR #2484 Enabled building libcudf as a static library
-- PR #2453 Streamline CUDA_REL environment variable
-- PR #2483 Bundle Boost filesystem dependency in the Java jar
-- PR #2486 Java API hash functions
-- PR #2481 Adds the ignore_null_keys option to the java api
-- PR #2490 Java api: support multiple aggregates for the same column
-- PR #2510 Java api: uses table based apply_boolean_mask
-- PR #2432 Use pandas formatting for console, html, and latex output
-- PR #2573 Bump numba version to 0.45.1
-- PR #2606 Fix references to notebooks-contrib
-
-## Bug Fixes
-
-- PR #2086 Fixed quantile api behavior mismatch in series & dataframe
-- PR #2128 Add offset param to host buffer readers in java API.
-- PR #2145 Work around binops validity checks for java
-- PR #2146 Work around unary_math validity checks for java
-- PR #2151 Fixes bug in cudf::copy_range where null_count was invalid
-- PR #2139 matching to pandas describe behavior & fixing nan values issue
-- PR #2161 Implicitly convert unsigned to signed integer types in binops
-- PR #2154 CSV Reader: Fix bools misdetected as strings dtype
-- PR #2178 Fix bug in rolling bindings where a view of an ephemeral column was being taken
-- PR #2180 Fix issue with isort reordering `importorskip` below imports depending on them
-- PR #2187 fix to honor dtype when numpy arrays are passed to columnops.as_column
-- PR #2190 Fix issue in astype conversion of string column to 'str'
-- PR #2208 Fix issue with calling `head()` on one row dataframe
-- PR #2229 Propagate exceptions from Cython cdef functions
-- PR #2234 Fix issue with local build script not properly building
-- PR #2223 Fix CUDA invalid configuration errors reported after loading small compressed ORC files
-- PR #2162 Setting is_unique and is_monotonic-related attributes
-- PR #2244 Fix ORC RLEv2 delta mode decoding with nonzero residual delta width
-- PR #2297 Work around `var/std` unsupported only at debug build
-- PR #2302 Fixed java serialization corner case
-- PR #2355 Handle float16 in binary operations
-- PR #2311 Fix copy behaviour for GenericIndex
-- PR #2349 Fix issues with String filter in java API
-- PR #2323 Fix groupby on categoricals
-- PR #2328 Ensure order is preserved in CategoricalAccessor._set_categories
-- PR #2202 Fix issue with unary ops mishandling empty input
-- PR #2326 Fix for bug in DLPack when reading multiple columns
-- PR #2324 Fix cudf Docker build
-- PR #2325 Fix ORC RLEv2 patched base mode decoding with nonzero patch width
-- PR #2235 Fix get_dummies to be compatible with dask
-- PR #2332 Zero initialize gdf_dtype_extra_info
-- PR #2355 Handle float16 in binary operations
-- PR #2360 Fix missing dtype handling in cudf.Series & columnops.as_column
-- PR #2364 Fix quantile api and other trivial issues around it
-- PR #2361 Fixed issue with `codes` of CategoricalIndex
-- PR #2357 Fixed inconsistent type of index created with from_pandas vs direct construction
-- PR #2389 Fixed Rolling __getattr__ and __getitem__ for offset based windows
-- PR #2402 Fixed bug in valid mask computation in cudf::copy_if (apply_boolean_mask)
-- PR #2401 Fix to a scalar datetime(of type Days) issue
-- PR #2386 Correctly allocate output valids in groupby
-- PR #2411 Fixed failures on binary op on single element string column
-- PR #2422 Fix Pandas logical binary operation incompatibilites
-- PR #2447 Fix CodeCov posting build statuses temporarily
-- PR #2450 Fix erroneous null handling in `cudf.DataFrame`'s `apply_rows`
-- PR #2470 Fix issues with empty strings and string categories (Java)
-- PR #2471 Fix String Column Validity.
-- PR #2481 Fix java validity buffer serialization
-- PR #2485 Updated bytes calculation to use size_t to avoid overflow in column concat
-- PR #2461 Fix groupby multiple aggregations same column
-- PR #2514 Fix cudf::drop_nulls threshold handling in Cython
-- PR #2516 Fix utilities include paths and meta.yaml header paths
-- PR #2517 Fix device memory leak in to_dlpack tensor deleter
-- PR #2431 Fix local build generated file ownerships
-- PR #2511 Added import of orc, refactored exception handlers to not squash fatal exceptions
-- PR #2527 Fix index and column input handling in dask_cudf read_parquet
-- PR #2466 Fix `dataframe.query` returning null rows erroneously
-- PR #2548 Orc reader: fix non-deterministic data decoding at chunk boundaries
-- PR #2557 fix cudautils import in string.py
-- PR #2521 Fix casting datetimes from/to the same resolution
-- PR #2545 Fix MultiIndexes with datetime levels
-- PR #2560 Remove duplicate `dlpack` definition in conda recipe
-- PR #2567 Fix ColumnVector.fromScalar issues while dealing with null scalars
-- PR #2565 Orc reader: fix incorrect data decoding of int64 data types
-- PR #2577 Fix search benchmark compilation error by adding necessary header
-- PR #2604 Fix a bug in copying.pyx:_normalize_types that upcasted int32 to int64
-
-
-# cuDF 0.8.0 (27 June 2019)
-
-## New Features
-
-- PR #1524 Add GPU-accelerated JSON Lines parser with limited feature set
-- PR #1569 Add support for Json objects to the JSON Lines reader
-- PR #1622 Add Series.loc
-- PR #1654 Add cudf::apply_boolean_mask: faster replacement for gdf_apply_stencil
-- PR #1487 cython gather/scatter
-- PR #1310 Implemented the slice/split functionality.
-- PR #1630 Add Python layer to the GPU-accelerated JSON reader
-- PR #1745 Add rounding of numeric columns via Numba
-- PR #1772 JSON reader: add support for BytesIO and StringIO input
-- PR #1527 Support GDF_BOOL8 in readers and writers
-- PR #1819 Logical operators (AND, OR, NOT) for libcudf and cuDF
-- PR #1813 ORC Reader: Add support for stripe selection
-- PR #1828 JSON Reader: add support for bool8 columns
-- PR #1833 Add column iterator with/without nulls
-- PR #1665 Add the point-in-polygon GIS function
-- PR #1863 Series and Dataframe methods for all and any
-- PR #1908 cudf::copy_range and cudf::fill for copying/assigning an index or range to a constant
-- PR #1921 Add additional formats for typecasting to/from strings
-- PR #1807 Add Series.dropna()
-- PR #1987 Allow user defined functions in the form of ptx code to be passed to binops
-- PR #1948 Add operator functions like `Series.add()` to DataFrame and Series
-- PR #1954 Add skip test argument to GPU build script
-- PR #2018 Add bindings for new groupby C++ API
-- PR #1984 Add rolling window operations Series.rolling() and DataFrame.rolling()
-- PR #1542 Python method and bindings for to_csv
-- PR #1995 Add Java API
-- PR #1998 Add google benchmark to cudf
-- PR #1845 Add cudf::drop_duplicates, DataFrame.drop_duplicates
-- PR #1652 Added `Series.where()` feature
-- PR #2074 Java Aggregates, logical ops, and better RMM support
-- PR #2140 Add a `cudf::transform` function
-- PR #2068 Concatenation of different typed columns
-
-## Improvements
-
-- PR #1538 Replacing LesserRTTI with inequality_comparator
-- PR #1703 C++: Added non-aggregating `insert` to `concurrent_unordered_map` with specializations to store pairs with a single atomicCAS when possible.
-- PR #1422 C++: Added a RAII wrapper for CUDA streams
-- PR #1701 Added `unique` method for stringColumns
-- PR #1713 Add documentation for Dask-XGBoost
-- PR #1666 CSV Reader: Improve performance for files with large number of columns
-- PR #1725 Enable the ability to use a single column groupby as its own index
-- PR #1759 Add an example showing simultaneous rolling averages to `apply_grouped` documentation
-- PR #1746 C++: Remove unused code: `windowed_ops.cu`, `sorting.cu`, `hash_ops.cu`
-- PR #1748 C++: Add `bool` nullability flag to `device_table` row operators
-- PR #1764 Improve Numerical column: `mean_var` and `mean`
-- PR #1767 Speed up Python unit tests
-- PR #1770 Added build.sh script, updated CI scripts and documentation
-- PR #1739 ORC Reader: Add more pytest coverage
-- PR #1696 Added null support in `Series.replace()`.
-- PR #1390 Added some basic utility functions for `gdf_column`'s
-- PR #1791 Added general column comparison code for testing
-- PR #1795 Add printing of git submodule info to `print_env.sh`
-- PR #1796 Removing old sort based group by code and gdf_filter
-- PR #1811 Added functions for copying/allocating `cudf::table`s
-- PR #1838 Improve columnops.column_empty so that it returns typed columns instead of a generic Column
-- PR #1890 Add utils.get_dummies- a pandas-like wrapper around one_hot-encoding
-- PR #1823 CSV Reader: default the column type to string for empty dataframes
-- PR #1827 Create bindings for scalar-vector binops, and update one_hot_encoding to use them
-- PR #1817 Operators now support different sized dataframes as long as they don't share different sized columns
-- PR #1855 Transition replace_nulls to new C++ API and update corresponding Cython/Python code
-- PR #1858 Add `std::initializer_list` constructor to `column_wrapper`
-- PR #1846 C++ type-erased gdf_equal_columns test util; fix gdf_equal_columns logic error
-- PR #1390 Added some basic utility functions for `gdf_column`s
-- PR #1391 Tidy up bit-resolution-operation and bitmask class code
-- PR #1882 Add iloc functionality to MultiIndex dataframes
-- PR #1884 Rolling windows: general enhancements and better coverage for unit tests
-- PR #1886 support GDF_STRING_CATEGORY columns in apply_boolean_mask, drop_nulls and other libcudf functions
-- PR #1896 Improve performance of groupby with levels specified in dask-cudf
-- PR #1915 Improve iloc performance for non-contiguous row selection
-- PR #1859 Convert read_json into a C++ API
-- PR #1919 Rename libcudf namespace gdf to namespace cudf
-- PR #1850 Support left_on and right_on for DataFrame merge operator
-- PR #1930 Specialize constructor for `cudf::bool8` to cast argument to `bool`
-- PR #1938 Add default constructor for `column_wrapper`
-- PR #1930 Specialize constructor for `cudf::bool8` to cast argument to `bool`
-- PR #1952 consolidate libcudf public API headers in include/cudf
-- PR #1949 Improved selection with boolmask using libcudf `apply_boolean_mask`
-- PR #1956 Add support for nulls in `query()`
-- PR #1973 Update `std::tuple` to `std::pair` in top-most libcudf APIs and C++ transition guide
-- PR #1981 Convert read_csv into a C++ API
-- PR #1868 ORC Reader: Support row index for speed up on small/medium datasets
-- PR #1964 Added support for list-like types in Series.str.cat
-- PR #2005 Use HTML5 details tag in bug report issue template
-- PR #2003 Removed few redundant unit-tests from test_string.py::test_string_cat
-- PR #1944 Groupby design improvements
-- PR #2017 Convert `read_orc()` into a C++ API
-- PR #2011 Convert `read_parquet()` into a C++ API
-- PR #1756 Add documentation "10 Minutes to cuDF and dask_cuDF"
-- PR #2034 Adding support for string columns concatenation using "add" binary operator
-- PR #2042 Replace old "10 Minutes" guide with new guide for docs build process
-- PR #2036 Make library of common test utils to speed up tests compilation
-- PR #2022 Facilitating get_dummies to be a high level api too
-- PR #2050 Namespace IO readers and add back free-form `read_xxx` functions
-- PR #2104 Add a functional ``sort=`` keyword argument to groupby
-- PR #2108 Add `find_and_replace` for StringColumn for replacing single values
-- PR #1803 cuDF/CuPy interoperability documentation
-
-## Bug Fixes
-
-- PR #1465 Fix for test_orc.py and test_sparse_df.py test failures
-- PR #1583 Fix underlying issue in `as_index()` that was causing `Series.quantile()` to fail
-- PR #1680 Add errors= keyword to drop() to fix cudf-dask bug
-- PR #1651 Fix `query` function on empty dataframe
-- PR #1616 Fix CategoricalColumn to access categories by index instead of iteration
-- PR #1660 Fix bug in `loc` when indexing with a column name (a string)
-- PR #1683 ORC reader: fix timestamp conversion to UTC
-- PR #1613 Improve CategoricalColumn.fillna(-1) performance
-- PR #1642 Fix failure of CSV_TEST gdf_csv_test.SkiprowsNrows on multiuser systems
-- PR #1709 Fix handling of `datetime64[ms]` in `dataframe.select_dtypes`
-- PR #1704 CSV Reader: Add support for the plus sign in number fields
-- PR #1687 CSV reader: return an empty dataframe for zero size input
-- PR #1757 Concatenating columns with null columns
-- PR #1755 Add col_level keyword argument to melt
-- PR #1758 Fix df.set_index() when setting index from an empty column
-- PR #1749 ORC reader: fix long strings of NULL values resulting in incorrect data
-- PR #1742 Parquet Reader: Fix index column name to match PANDAS compat
-- PR #1782 Update libcudf doc version
-- PR #1783 Update conda dependencies
-- PR #1786 Maintain the original series name in series.unique output
-- PR #1760 CSV Reader: fix segfault when dtype list only includes columns from usecols list
-- PR #1831 build.sh: Assuming python is in PATH instead of using PYTHON env var
-- PR #1839 Raise an error instead of segfaulting when transposing a DataFrame with StringColumns
-- PR #1840 Retain index correctly during merge left_on right_on
-- PR #1825 cuDF: Multiaggregation Groupby Failures
-- PR #1789 CSV Reader: Fix missing support for specifying `int8` and `int16` dtypes
-- PR #1857 Cython Bindings: Handle `bool` columns while calling `column_view_from_NDArrays`
-- PR #1849 Allow DataFrame support methods to pass arguments to the methods
-- PR #1847 Fixed #1375 by moving the nvstring check into the wrapper function
-- PR #1864 Fixing cudf reduction for POWER platform
-- PR #1869 Parquet reader: fix Dask timestamps not matching with Pandas (convert to milliseconds)
-- PR #1876 add dtype=bool for `any`, `all` to treat integer column correctly
-- PR #1875 CSV reader: take NaN values into account in dtype detection
-- PR #1873 Add column dtype checking for the all/any methods
-- PR #1902 Bug with string iteration in _apply_basic_agg
-- PR #1887 Fix for initialization issue in pq_read_arg,orc_read_arg
-- PR #1867 JSON reader: add support for null/empty fields, including the 'null' literal
-- PR #1891 Fix bug #1750 in string column comparison
-- PR #1909 Support of `to_pandas()` of boolean series with null values
-- PR #1923 Use prefix removal when two aggs are called on a SeriesGroupBy
-- PR #1914 Zero initialize gdf_column local variables
-- PR #1959 Add support for comparing boolean Series to scalar
-- PR #1966 Ignore index fix in series append
-- PR #1967 Compute index __sizeof__ only once for DataFrame __sizeof__
-- PR #1977 Support CUDA installation in default system directories
-- PR #1982 Fixes incorrect index name after join operation
-- PR #1985 Implement `GDF_PYMOD`, a special modulo that follows python's sign rules
-- PR #1991 Parquet reader: fix decoding of NULLs
-- PR #1990 Fixes a rendering bug in the `apply_grouped` documentation
-- PR #1978 Fix for values being filled in an empty dataframe
-- PR #2001 Correctly create MultiColumn from Pandas MultiColumn
-- PR #2006 Handle empty dataframe groupby construction for dask
-- PR #1965 Parquet Reader: Fix duplicate index column when it's already in `use_cols`
-- PR #2033 Add pip to conda environment files to fix warning
-- PR #2028 CSV Reader: Fix reading of uncompressed files without a recognized file extension
-- PR #2073 Fix an issue when gathering columns with NVCategory and nulls
-- PR #2053 cudf::apply_boolean_mask return empty column for empty boolean mask
-- PR #2066 exclude `IteratorTest.mean_var_output` test from debug build
-- PR #2069 Fix JNI code to use read_csv and read_parquet APIs
-- PR #2071 Fix bug with unfound transitive dependencies for GTests in Ubuntu 18.04
-- PR #2089 Configure Sphinx to render params correctly
-- PR #2091 Fix another bug with unfound transitive dependencies for `cudftestutils` in Ubuntu 18.04
-- PR #2115 Just apply `--disable-new-dtags` instead of trying to define all the transitive dependencies
-- PR #2106 Fix errors in JitCache tests caused by sharing of device memory between processes
-- PR #2120 Fix errors in JitCache tests caused by running multiple threads on the same data
-- PR #2102 Fix memory leak in groupby
-- PR #2113 fixed typo in to_csv code example
-
-
-# cudf 0.7.2 (16 May 2019)
-
-## New Features
-
-- PR #1735 Added overload for atomicAdd on int64. Streamlined implementation of custom atomic overloads.
-- PR #1741 Add MultiIndex concatenation
-
-## Bug Fixes
-
-- PR #1718 Fix issue with SeriesGroupBy MultiIndex in dask-cudf
-- PR #1734 Python: fix performance regression for groupby count() aggregations
-- PR #1768 Cython: fix handling read only schema buffers in gpuarrow reader
-
-
-# cudf 0.7.1 (11 May 2019)
-
-## New Features
-
-- PR #1702 Lazy load MultiIndex to return groupby performance to near optimal.
-
-## Bug Fixes
-
-- PR #1708 Fix handling of `datetime64[ms]` in `dataframe.select_dtypes`
-
-
-# cuDF 0.7.0 (10 May 2019)
-
-## New Features
-
-- PR #982 Implement gdf_group_by_without_aggregations and gdf_unique_indices functions
-- PR #1142 Add `GDF_BOOL` column type
-- PR #1194 Implement overloads for CUDA atomic operations
-- PR #1292 Implemented Bitwise binary ops AND, OR, XOR (&, |, ^)
-- PR #1235 Add GPU-accelerated Parquet Reader
-- PR #1335 Added local_dict arg in `DataFrame.query()`.
-- PR #1282 Add Series and DataFrame.describe()
-- PR #1356 Rolling windows
-- PR #1381 Add DataFrame._get_numeric_data
-- PR #1388 Add CODEOWNERS file to auto-request reviews based on where changes are made
-- PR #1396 Add DataFrame.drop method
-- PR #1413 Add DataFrame.melt method
-- PR #1412 Add DataFrame.pop()
-- PR #1419 Initial CSV writer function
-- PR #1441 Add Series level cumulative ops (cumsum, cummin, cummax, cumprod)
-- PR #1420 Add script to build and test on a local gpuCI image
-- PR #1440 Add DatetimeColumn.min(), DatetimeColumn.max()
-- PR #1455 Add Series.Shift via Numba kernel
-- PR #1441 Add Series level cumulative ops (cumsum, cummin, cummax, cumprod)
-- PR #1461 Add Python coverage test to gpu build
-- PR #1445 Parquet Reader: Add selective reading of rows and row group
-- PR #1532 Parquet Reader: Add support for INT96 timestamps
-- PR #1516 Add Series and DataFrame.ndim
-- PR #1556 Add libcudf C++ transition guide
-- PR #1466 Add GPU-accelerated ORC Reader
-- PR #1565 Add build script for nightly doc builds
-- PR #1508 Add Series isna, isnull, and notna
-- PR #1456 Add Series.diff() via Numba kernel
-- PR #1588 Add Index `astype` typecasting
-- PR #1301 MultiIndex support
-- PR #1599 Level keyword supported in groupby
-- PR #929 Add support operations to dataframe
-- PR #1609 Groupby accept list of Series
-- PR #1658 Support `group_keys=True` keyword in groupby method
-
-## Improvements
-
-- PR #1531 Refactor closures as private functions in gpuarrow
-- PR #1404 Parquet reader page data decoding speedup
-- PR #1076 Use `type_dispatcher` in join, quantiles, filter, segmented sort, radix sort and hash_groupby
-- PR #1202 Simplify README.md
-- PR #1149 CSV Reader: Change convertStrToValue() functions to `__device__` only
-- PR #1238 Improve performance of the CUDA trie used in the CSV reader
-- PR #1245 Use file cache for JIT kernels
-- PR #1278 Update CONTRIBUTING for new conda environment yml naming conventions
-- PR #1163 Refactored UnaryOps. Reduced API to two functions: `gdf_unary_math` and `gdf_cast`. Added `abs`, `-`, and `~` ops. Changed bindings to Cython
-- PR #1284 Update docs version
-- PR #1287 add exclude argument to cudf.select_dtype function
-- PR #1286 Refactor some of the CSV Reader kernels into generic utility functions
-- PR #1291 fillna in `Series.to_gpu_array()` and `Series.to_array()` can accept the scalar too now.
-- PR #1005 generic `reduction` and `scan` support
-- PR #1349 Replace modernGPU sort join with thrust.
-- PR #1363 Add a dataframe.mean(...) that raises NotImplementedError to satisfy `dask.dataframe.utils.is_dataframe_like`
-- PR #1319 CSV Reader: Use column wrapper for gdf_column output alloc/dealloc
-- PR #1376 Change series quantile default to linear
-- PR #1399 Replace CFFI bindings for NVTX functions with Cython bindings
-- PR #1389 Refactored `set_null_count()`
-- PR #1386 Added macros `GDF_TRY()`, `CUDF_TRY()` and `ASSERT_CUDF_SUCCEEDED()`
-- PR #1435 Rework CMake and conda recipes to depend on installed libraries
-- PR #1391 Tidy up bit-resolution-operation and bitmask class code
-- PR #1439 Add cmake variable to enable compiling CUDA code with -lineinfo
-- PR #1462 Add ability to read parquet files from arrow::io::RandomAccessFile
-- PR #1453 Convert CSV Reader CFFI to Cython
-- PR #1479 Convert Parquet Reader CFFI to Cython
-- PR #1397 Add a utility function for producing an overflow-safe kernel launch grid configuration
-- PR #1382 Add GPU parsing of nested brackets to cuIO parsing utilities
-- PR #1481 Add cudf::table constructor to allocate a set of `gdf_column`s
-- PR #1484 Convert GroupBy CFFI to Cython
-- PR #1463 Allow and default melt keyword argument var_name to be None
-- PR #1486 Parquet Reader: Use device_buffer rather than device_ptr
-- PR #1525 Add cudatoolkit conda dependency
-- PR #1520 Renamed `src/dataframe` to `src/table` and moved `table.hpp`. Made `types.hpp` to be type declarations only.
-- PR #1492 Convert transpose CFFI to Cython
-- PR #1495 Convert binary and unary ops CFFI to Cython
-- PR #1503 Convert sorting and hashing ops CFFI to Cython
-- PR #1522 Use latest release version in update-version CI script
-- PR #1533 Remove stale join CFFI, fix memory leaks in join Cython
-- PR #1521 Added `row_bitmask` to compute bitmask for rows of a table. Merged `valids_ops.cu` and `bitmask_ops.cu`
-- PR #1553 Overload `hash_row` to avoid using initial hash values. Updated `gdf_hash` to select between overloads
-- PR #1585 Updated `cudf::table` to maintain own copy of wrapped `gdf_column*`s
-- PR #1559 Add `except +` to all Cython function definitions to catch C++ exceptions properly
-- PR #1617 `has_nulls` and `column_dtypes` for `cudf::table`
-- PR #1590 Remove CFFI from the build / install process entirely
-- PR #1536 Convert gpuarrow CFFI to Cython
-- PR #1655 Add `Column._pointer` as a way to access underlying `gdf_column*` of a `Column`
-- PR #1655 Update readme conda install instructions for cudf version 0.6 and 0.7
-
-
-## Bug Fixes
-
-- PR #1233 Fix dtypes issue while adding the column to `str` dataframe.
-- PR #1254 CSV Reader: fix data type detection for floating-point numbers in scientific notation
-- PR #1289 Fix looping over each value instead of each category in concatenation
-- PR #1293 Fix Inaccurate error message in join.pyx
-- PR #1308 Add atomicCAS overload for `int8_t`, `int16_t`
-- PR #1317 Fix catch polymorphic exception by reference in ipc.cu
-- PR #1325 Fix dtype of null bitmasks to int8
-- PR #1326 Update build documentation to use -DCMAKE_CXX11_ABI=ON
-- PR #1334 Add "na_position" argument to CategoricalColumn sort_by_values
-- PR #1321 Fix out of bounds warning when checking Bzip2 header
-- PR #1359 Add atomicAnd/Or/Xor for integers
-- PR #1354 Fix `fillna()` behaviour when replacing values with different dtypes
-- PR #1347 Fixed core dump issue while passing dict_dtypes without column names in `cudf.read_csv()`
-- PR #1379 Fixed build failure caused due to error: 'col_dtype' may be used uninitialized
-- PR #1392 Update cudf Dockerfile and package_versions.sh
-- PR #1385 Added INT8 type to `_schema_to_dtype` for use in GpuArrowReader
-- PR #1393 Fixed a bug in `gdf_count_nonzero_mask()` for the case of 0 bits to count
-- PR #1395 Update CONTRIBUTING to use the environment variable CUDF_HOME
-- PR #1416 Fix bug at gdf_quantile_exact and gdf_quantile_appox
-- PR #1421 Fix remove creation of series multiple times during `add_column()`
-- PR #1405 CSV Reader: Fix memory leaks on read_csv() failure
-- PR #1328 Fix CategoricalColumn to_arrow() null mask
-- PR #1433 Fix NVStrings/categories includes
-- PR #1432 Update NVStrings to 0.7.* to coincide with 0.7 development
-- PR #1483 Modify CSV reader to avoid cropping blank quoted characters in non-string fields
-- PR #1446 Merge 1275 hotfix from master into branch-0.7
-- PR #1447 Fix legacy groupby apply docstring
-- PR #1451 Fix hash join estimated result size is not correct
-- PR #1454 Fix local build script improperly change directory permissions
-- PR #1490 Require Dask 1.1.0+ for `is_dataframe_like` test or skip otherwise.
-- PR #1491 Use more specific directories & groups in CODEOWNERS
-- PR #1497 Fix Thrust issue on CentOS caused by missing default constructor of host_vector elements
-- PR #1498 Add missing include guard to device_atomics.cuh and separated DEVICE_ATOMICS_TEST
-- PR #1506 Fix csv-write call to updated NVStrings method
-- PR #1510 Added nvstrings `fillna()` function
-- PR #1507 Parquet Reader: Default string data to GDF_STRING
-- PR #1535 Fix doc issue to ensure correct labelling of cudf.series
-- PR #1537 Fix `undefined reference` link error in HashPartitionTest
-- PR #1548 Fix ci/local/build.sh README from using an incorrect image example
-- PR #1551 CSV Reader: Fix integer column name indexing
-- PR #1586 Fix broken `scalar_wrapper::operator==`
-- PR #1591 ORC/Parquet Reader: Fix missing import for FileNotFoundError exception
-- PR #1573 Parquet Reader: Fix crash due to clash with ORC reader datasource
-- PR #1607 Revert change of `column.to_dense_buffer` always return by copy for performance concerns
-- PR #1618 ORC reader: fix assert & data output when nrows/skiprows isn't aligned to stripe boundaries
-- PR #1631 Fix failure of TYPES_TEST on some gcc-7 based systems.
-- PR #1641 CSV Reader: Fix skip_blank_lines behavior with Windows line terminators (
-)
-- PR #1648 ORC reader: fix non-deterministic output when skiprows is non-zero
-- PR #1676 Fix groupby `as_index` behaviour with `MultiIndex`
-- PR #1659 Fix bug caused by empty groupbys and multiindex slicing throwing exceptions
-- PR #1656 Correct Groupby failure in dask when un-aggregable columns are left in dataframe.
-- PR #1689 Fix groupby performance regression
-- PR #1694 Add Cython as a runtime dependency since it's required in `setup.py`
-
-
-# cuDF 0.6.1 (25 Mar 2019)
-
-## Bug Fixes
-
-- PR #1275 Fix CentOS exception in DataFrame.hash_partition from using value "returned" by a void function
-
-
-# cuDF 0.6.0 (22 Mar 2019)
-
-## New Features
-
-- PR #760 Raise `FileNotFoundError` instead of `GDF_FILE_ERROR` in `read_csv` if the file does not exist
-- PR #539 Add Python bindings for replace function
-- PR #823 Add Doxygen configuration to enable building HTML documentation for libcudf C/C++ API
-- PR #807 CSV Reader: Add byte_range parameter to specify the range in the input file to be read
-- PR #857 Add Tail method for Series/DataFrame and update Head method to use iloc
-- PR #858 Add series feature hashing support
-- PR #871 CSV Reader: Add support for NA values, including user specified strings
-- PR #893 Adds PyArrow based parquet readers / writers to Python, fix category dtype handling, fix arrow ingest buffer size issues
-- PR #867 CSV Reader: Add support for ignoring blank lines and comment lines
-- PR #887 Add Series digitize method
-- PR #895 Add Series groupby
-- PR #898 Add DataFrame.groupby(level=0) support
-- PR #920 Add feather, JSON, HDF5 readers / writers from PyArrow / Pandas
-- PR #888 CSV Reader: Add prefix parameter for column names, used when parsing without a header
-- PR #913 Add DLPack support: convert between cuDF DataFrame and DLTensor
-- PR #939 Add ORC reader from PyArrow
-- PR #918 Add Series.groupby(level=0) support
-- PR #906 Add binary and comparison ops to DataFrame
-- PR #958 Support unary and binary ops on indexes
-- PR #964 Add `rename` method to `DataFrame`, `Series`, and `Index`
-- PR #985 Add `Series.to_frame` method
-- PR #985 Add `drop=` keyword to reset_index method
-- PR #994 Remove references to pygdf
-- PR #990 Add external series groupby support
-- PR #988 Add top-level merge function to cuDF
-- PR #992 Add comparison binaryops to DateTime columns
-- PR #996 Replace relative path imports with absolute paths in tests
-- PR #995 CSV Reader: Add index_col parameter to specify the column name or index to be used as row labels
-- PR #1004 Add `from_gpu_matrix` method to DataFrame
-- PR #997 Add property index setter
-- PR #1007 Replace relative path imports with absolute paths in cudf
-- PR #1013 select columns with df.columns
-- PR #1016 Rename Series.unique_count() to nunique() to match pandas API
-- PR #947 Prefixsum to handle nulls and float types
-- PR #1029 Remove rest of relative path imports
-- PR #1021 Add filtered selection with assignment for Dataframes
-- PR #872 Adding NVCategory support to cudf apis
-- PR #1052 Add left/right_index and left/right_on keywords to merge
-- PR #1091 Add `indicator=` and `suffixes=` keywords to merge
-- PR #1107 Add unsupported keywords to Series.fillna
-- PR #1032 Add string support to cuDF python
-- PR #1136 Removed `gdf_concat`
-- PR #1153 Added function for getting the padded allocation size for valid bitmask
-- PR #1148 Add cudf.sqrt for dataframes and Series
-- PR #1159 Add Python bindings for libcudf dlpack functions
-- PR #1155 Add __array_ufunc__ for DataFrame and Series for sqrt
-- PR #1168 to_frame for series accepts a name argument
-
-
-## Improvements
-
-- PR #1218 Add dask-cudf page to API docs
-- PR #892 Add support for heterogeneous types in binary ops with JIT
-- PR #730 Improve performance of `gdf_table` constructor
-- PR #561 Add Doxygen style comments to Join CUDA functions
-- PR #813 unified libcudf API functions by replacing gpu_ with gdf_
-- PR #822 Add support for `__cuda_array_interface__` for ingest
-- PR #756 Consolidate common helper functions from unordered map and multimap
-- PR #753 Improve performance of groupby sum and average, especially for cases with few groups.
-- PR #836 Add ingest support for arrow chunked arrays in Column, Series, DataFrame creation
-- PR #763 Format doxygen comments for csv_read_arg struct
-- PR #532 CSV Reader: Use type dispatcher instead of switch block
-- PR #694 Unit test utilities improvements
-- PR #878 Add better indexing to Groupby
-- PR #554 Add `empty` method and `is_monotonic` attribute to `Index`
-- PR #1040 Fixed up Doxygen comment tags
-- PR #909 CSV Reader: Avoid host->device->host copy for header row data
-- PR #916 Improved unit testing and error checking for `gdf_column_concat`
-- PR #941 Replace `numpy` call in `Series.hash_encode` with `numba`
-- PR #942 Added increment/decrement operators for wrapper types
-- PR #943 Updated `count_nonzero_mask` to return `num_rows` when the mask is null
-- PR #952 Added trait to map C++ type to `gdf_dtype`
-- PR #966 Updated RMM submodule.
-- PR #998 Add IO reader/writer modules to API docs, fix for missing cudf.Series docs
-- PR #1017 concatenate along columns for Series and DataFrames
-- PR #1002 Support indexing a dataframe with another boolean dataframe
-- PR #1018 Better concatenation for Series and Dataframes
-- PR #1036 Use Numpydoc style docstrings
-- PR #1047 Adding gdf_dtype_extra_info to gdf_column_view_augmented
-- PR #1054 Added default ctor to SerialTrieNode to overcome Thrust issue in CentOS7 + CUDA10
-- PR #1024 CSV Reader: Add support for hexadecimal integers in integral-type columns
-- PR #1033 Update `fillna()` to use libcudf function `gdf_replace_nulls`
-- PR #1066 Added inplace assignment for columns and select_dtypes for dataframes
-- PR #1026 CSV Reader: Change the meaning and type of the quoting parameter to match Pandas
-- PR #1100 Adds `CUDF_EXPECTS` error-checking macro
-- PR #1092 Fix select_dtype docstring
-- PR #1111 Added cudf::table
-- PR #1108 Sorting for datetime columns
-- PR #1120 Return a `Series` (not a `Column`) from `Series.cat.set_categories()`
-- PR #1128 CSV Reader: The last data row does not need to be line terminated
-- PR #1183 Bump Arrow version to 0.12.1
-- PR #1208 Default to CXX11_ABI=ON
-- PR #1252 Fix NVStrings dependencies for cuda 9.2 and 10.0
-- PR #2037 Optimize the existing `gather` and `scatter` routines in `libcudf`
-
-## Bug Fixes
-
-- PR #821 Fix flake8 issues revealed by flake8 update
-- PR #808 Resolved renamed `d_columns_valids` variable name
-- PR #820 CSV Reader: fix the issue where reader adds additional rows when file uses
- as a line terminator
-- PR #780 CSV Reader: Fix scientific notation parsing and null values for empty quotes
-- PR #815 CSV Reader: Fix data parsing when tabs are present in the input CSV file
-- PR #850 Fix bug where left joins where the left df has 0 rows causes a crash
-- PR #861 Fix memory leak by preserving the boolean mask index
-- PR #875 Handle unnamed indexes in to/from arrow functions
-- PR #877 Fix ingest of 1 row arrow tables in from arrow function
-- PR #876 Added missing `<type_traits>` include
-- PR #889 Deleted test_rmm.py which has now moved to RMM repo
-- PR #866 Merge v0.5.1 numpy ABI hotfix into 0.6
-- PR #917 value_counts return int type on empty columns
-- PR #611 Renamed `gdf_reduce_optimal_output_size()` -> `gdf_reduction_get_intermediate_output_size()`
-- PR #923 fix index for negative slicing for cudf dataframe and series
-- PR #927 CSV Reader: Fix category GDF_CATEGORY hashes not being computed properly
-- PR #921 CSV Reader: Fix parsing errors with delim_whitespace, quotations in the header row, unnamed columns
-- PR #933 Fix handling objects of all nulls in series creation
-- PR #940 CSV Reader: Fix an issue where the last data row is missing when using byte_range
-- PR #945 CSV Reader: Fix incorrect datetime64 when milliseconds or space separator are used
-- PR #959 Groupby: Problem with column name lookup
-- PR #950 Converting dataframe/recarry with non-contiguous arrays
-- PR #963 CSV Reader: Fix another issue with missing data rows when using byte_range
-- PR #999 Fix 0 sized kernel launches and empty sort_index exception
-- PR #993 Fix dtype in selecting 0 rows from objects
-- PR #1009 Fix performance regression in `to_pandas` method on DataFrame
-- PR #1008 Remove custom dask communication approach
-- PR #1001 CSV Reader: Fix a memory access error when reading a large (>2GB) file with date columns
-- PR #1019 Binary Ops: Fix error when one input column has null mask but other doesn't
-- PR #1014 CSV Reader: Fix false positives in bool value detection
-- PR #1034 CSV Reader: Fix parsing floating point precision and leading zero exponents
-- PR #1044 CSV Reader: Fix a segfault when byte range aligns with a page
-- PR #1058 Added support for `DataFrame.loc[scalar]`
-- PR #1060 Fix column creation with all valid nan values
-- PR #1073 CSV Reader: Fix an issue where a column name includes the return character
-- PR #1090 Updating Doxygen Comments
-- PR #1080 Fix dtypes returned from loc / iloc because of lists
-- PR #1102 CSV Reader: Minor fixes and memory usage improvements
-- PR #1174: Fix release script typo
-- PR #1137 Add prebuild script for CI
-- PR #1118 Enhanced the `DataFrame.from_records()` feature
-- PR #1129 Fix join performance with index parameter from using numpy array
-- PR #1145 Issue with .agg call on multi-column dataframes
-- PR #908 Some testing code cleanup
-- PR #1167 Fix issue with null_count not being set after inplace fillna()
-- PR #1184 Fix iloc performance regression
-- PR #1185 Support left_on/right_on and also on=str in merge
-- PR #1200 Fix allocating bitmasks with numba instead of rmm in allocate_mask function
-- PR #1213 Fix bug with csv reader requesting subset of columns using wrong datatype
-- PR #1223 gpuCI: Fix label on rapidsai channel on gpu build scripts
-- PR #1242 Add explicit Thrust exec policy to fix NVCATEGORY_TEST segfault on some platforms
-- PR #1246 Fix categorical tests that failed due to bad implicit type conversion
-- PR #1255 Fix overwriting conda package main label uploads
-- PR #1259 Add dlpack includes to pip build
-
-
-# cuDF 0.5.1 (05 Feb 2019)
-
-## Bug Fixes
-
-- PR #842 Avoid using numpy via cimport to prevent ABI issues in Cython compilation
-
-
-# cuDF 0.5.0 (28 Jan 2019)
-
-## New Features
-
-- PR #722 Add bzip2 decompression support to `read_csv()`
-- PR #693 add ZLIB-based GZIP/ZIP support to `read_csv_strings()`
-- PR #411 added null support to gdf_order_by (new API) and cudf_table::sort
-- PR #525 Added GitHub Issue templates for bugs, documentation, new features, and questions
-- PR #501 CSV Reader: Add support for user-specified decimal point and thousands separator to read_csv_strings()
-- PR #455 CSV Reader: Add support for user-specified decimal point and thousands separator to read_csv()
-- PR #439 add `DataFrame.drop` method similar to pandas
-- PR #356 add `DataFrame.transpose` method and `DataFrame.T` property similar to pandas
-- PR #505 CSV Reader: Add support for user-specified boolean values
-- PR #350 Implemented Series replace function
-- PR #490 Added print_env.sh script to gather relevant environment details when reporting cuDF issues
-- PR #474 add ZLIB-based GZIP/ZIP support to `read_csv()`
-- PR #547 Added melt similar to `pandas.melt()`
-- PR #491 Add CI test script to check for updates to CHANGELOG.md in PRs
-- PR #550 Add CI test script to check for style issues in PRs
-- PR #558 Add CI scripts for cpu-based conda and gpu-based test builds
-- PR #524 Add Boolean Indexing
-- PR #564 Update python `sort_values` method to use updated libcudf `gdf_order_by` API
-- PR #509 CSV Reader: Input CSV file can now be passed in as a text or a binary buffer
-- PR #607 Add `__iter__` and iteritems to DataFrame class
-- PR #643 added a new api gdf_replace_nulls that allows a user to replace nulls in a column
-
-## Improvements
-
-- PR #426 Removed sort-based groupby and refactored existing groupby APIs. Also improves C++/CUDA compile time.
-- PR #461 Add `CUDF_HOME` variable in README.md to replace relative pathing.
-- PR #472 RMM: Created centralized rmm::device_vector alias and rmm::exec_policy
-- PR #500 Improved the concurrent hash map class to support partitioned (multi-pass) hash table building.
-- PR #454 Improve CSV reader docs and examples
-- PR #465 Added templated C++ API for RMM to avoid explicit cast to `void**`
-- PR #513 `.gitignore` tweaks
-- PR #521 Add `assert_eq` function for testing
-- PR #502 Simplify Dockerfile for local dev, eliminate old conda/pip envs
-- PR #549 Adds `-rdynamic` compiler flag to nvcc for Debug builds
-- PR #472 RMM: Created centralized rmm::device_vector alias and rmm::exec_policy
-- PR #577 Added external C++ API for scatter/gather functions
-- PR #500 Improved the concurrent hash map class to support partitioned (multi-pass) hash table building
-- PR #583 Updated `gdf_size_type` to `int`
-- PR #500 Improved the concurrent hash map class to support partitioned (multi-pass) hash table building
-- PR #617 Added .dockerignore file. Prevents adding stale cmake cache files to the docker container
-- PR #658 Reduced `JOIN_TEST` time by isolating overflow test of hash table size computation
-- PR #664 Added Debugging instructions to README
-- PR #651 Remove noqa marks in `__init__.py` files
-- PR #671 CSV Reader: uncompressed buffer input can be parsed without explicitly specifying compression as None
-- PR #684 Make RMM a submodule
-- PR #718 Ensure sum, product, min, max methods pandas compatibility on empty datasets
-- PR #720 Refactored Index classes to make them more Pandas-like, added CategoricalIndex
-- PR #749 Improve to_arrow and from_arrow Pandas compatibility
-- PR #766 Remove TravisCI references, remove unused variables from CMake, fix ARROW_VERSION in Cmake
-- PR #773 Add build-args back to Dockerfile and handle dependencies based on environment yml file
-- PR #781 Move thirdparty submodules to root and symlink in /cpp
-- PR #843 Fix broken cudf/python API examples, add new methods to the API index
-
-## Bug Fixes
-
-- PR #569 CSV Reader: Fix days being off-by-one when parsing some dates
-- PR #531 CSV Reader: Fix incorrect parsing of quoted numbers
-- PR #465 Added templated C++ API for RMM to avoid explicit cast to `void**`
-- PR #473 Added missing <random> include
-- PR #478 CSV Reader: Add api support for auto column detection, header, mangle_dupe_cols, usecols
-- PR #495 Updated README to correct where cffi pytest should be executed
-- PR #501 Fix the intermittent segfault caused by the `thousands` and `compression` parameters in the csv reader
-- PR #502 Simplify Dockerfile for local dev, eliminate old conda/pip envs
-- PR #512 fix bug for `on` parameter in `DataFrame.merge` to allow for None or single column name
-- PR #511 Updated python/cudf/bindings/join.pyx to fix cudf merge printing out dtypes
-- PR #513 `.gitignore` tweaks
-- PR #521 Add `assert_eq` function for testing
-- PR #537 Fix CMAKE_CUDA_STANDARD_REQURIED typo in CMakeLists.txt
-- PR #447 Fix silent failure in initializing DataFrame from generator
-- PR #545 Temporarily disable csv reader thousands test to prevent segfault (test re-enabled in PR #501)
-- PR #559 Fix Assertion error while using `applymap` to change the output dtype
-- PR #575 Update `print_env.sh` script to better handle missing commands
-- PR #612 Prevent an exception from occurring with true division on integer series.
-- PR #630 Fix deprecation warning for `pd.core.common.is_categorical_dtype`
-- PR #622 Fix Series.append() behaviour when appending values with different numeric dtype
-- PR #603 Fix error while creating an empty column using None.
-- PR #673 Fix array of strings not being caught in from_pandas
-- PR #644 Fix return type and column support of dataframe.quantile()
-- PR #634 Fix create `DataFrame.from_pandas()` with numeric column names
-- PR #654 Add resolution check for GDF_TIMESTAMP in Join
-- PR #648 Enforce one-to-one copy required when using `numba>=0.42.0`
-- PR #645 Fix cmake build type handling not setting debug options when CMAKE_BUILD_TYPE=="Debug"
-- PR #669 Fix GIL deadlock when launching multiple python threads that make Cython calls
-- PR #665 Reworked the hash map to add a way to report the destination partition for a key
-- PR #670 CMAKE: Fix env include path taking precedence over libcudf source headers
-- PR #674 Check for gdf supported column types
-- PR #677 Fix 'gdf_csv_test_Dates' gtest failure due to missing nrows parameter
-- PR #604 Fix the parsing errors while reading a csv file using `sep` instead of `delimiter`.
-- PR #686 Fix converting nulls to NaT values when converting Series to Pandas/Numpy
-- PR #689 CSV Reader: Fix behavior with skiprows+header to match pandas implementation
-- PR #691 Fixes Join on empty input DFs
-- PR #706 CSV Reader: Fix broken dtype inference when whitespace is in data
-- PR #717 CSV reader: fix behavior when parsing a csv file with no data rows
-- PR #724 CSV Reader: fix build issue due to parameter type mismatch in a std::max call
-- PR #734 Prevents reading undefined memory in gpu_expand_mask_bits numba kernel
-- PR #747 CSV Reader: fix an issue where CUDA allocations fail with some large input files
-- PR #750 Fix race condition for handling NVStrings in CMake
-- PR #719 Fix merge column ordering
-- PR #770 Fix issue where RMM submodule pointed to wrong branch and pin other to correct branches
-- PR #778 Fix hard coded ABI off setting
-- PR #784 Update RMM submodule commit-ish and pip paths
-- PR #794 Update `rmm::exec_policy` usage to fix segmentation faults when used as temporary allocator.
-- PR #800 Point git submodules to branches of forks instead of exact commits
-
-
-# cuDF 0.4.0 (05 Dec 2018)
-
-## New Features
-
-- PR #398 add pandas-compatible `DataFrame.shape()` and `Series.shape()`
-- PR #394 New documentation feature "10 Minutes to cuDF"
-- PR #361 CSV Reader: Add support for strings with delimiters
-
-## Improvements
-
- - PR #436 Improvements for type_dispatcher and wrapper structs
- - PR #429 Add CHANGELOG.md (this file)
- - PR #266 use faster CUDA-accelerated DataFrame column/Series concatenation.
- - PR #379 new C++ `type_dispatcher` reduces code complexity in supporting many data types.
- - PR #349 Improve performance for creating columns from memoryview objects
- - PR #445 Update reductions to use type_dispatcher. Adds integer types support to sum_of_squares.
- - PR #448 Improve installation instructions in README.md
- - PR #456 Change default CMake build to Release, and added option for disabling compilation of tests
-
-## Bug Fixes
-
- - PR #444 Fix csv_test CUDA too many resources requested fail.
- - PR #396 added missing output buffer in validity tests for groupbys.
- - PR #408 Dockerfile updates for source reorganization
- - PR #437 Add cffi to Dockerfile conda env, fixes "cannot import name 'librmm'"
- - PR #417 Fix `map_test` failure with CUDA 10
- - PR #414 Fix CMake installation include file paths
- - PR #418 Properly cast string dtypes to programmatic dtypes when instantiating columns
- - PR #427 Fix and tests for Concatenation illegal memory access with nulls
-
-
-# cuDF 0.3.0 (23 Nov 2018)
-
-## New Features
-
- - PR #336 CSV Reader string support
-
-## Improvements
-
- - PR #354 source code refactored for better organization. CMake build system overhaul. Beginning of transition to Cython bindings.
- - PR #290 Add support for typecasting to/from datetime dtype
- - PR #323 Add handling pyarrow boolean arrays in input/out, add tests
- - PR #325 GDF_VALIDITY_UNSUPPORTED now returned for algorithms that don't support non-empty valid bitmasks
- - PR #381 Faster InputTooLarge Join test completes in ms rather than minutes.
- - PR #373 .gitignore improvements
- - PR #367 Doc cleanup & examples for DataFrame methods
- - PR #333 Add Rapids Memory Manager documentation
- - PR #321 Rapids Memory Manager adds file/line location logging and convenience macros
- - PR #334 Implement DataFrame `__copy__` and `__deepcopy__`
- - PR #271 Add NVTX ranges to pygdf
- - PR #311 Document system requirements for conda install
-
-## Bug Fixes
-
- - PR #337 Retain index on `scale()` function
- - PR #344 Fix test failure due to PyArrow 0.11 Boolean handling
- - PR #364 Remove noexcept from managed_allocator;  CMakeLists fix for NVstrings
- - PR #357 Fix bug that made all series be considered booleans for indexing
- - PR #351 replace conda env configuration for developers
- - PRs #346 #360 Fix CSV reading of negative numbers
- - PR #342 Fix CMake to use conda-installed nvstrings
- - PR #341 Preserve categorical dtype after groupby aggregations
- - PR #315 ReadTheDocs build update to fix missing libcuda.so
- - PR #320 FIX out-of-bounds access error in reductions.cu
- - PR #319 Fix out-of-bounds memory access in libcudf count_valid_bits
- - PR #303 Fix printing empty dataframe
-
-
-# cuDF 0.2.0 and cuDF 0.1.0
-
-These were initial releases of cuDF based on previously separate pyGDF and libGDF libraries.
diff --git a/README.md b/README.md
index fd8b0365807..f1b010394d6 100644
--- a/README.md
+++ b/README.md
@@ -89,7 +89,7 @@ conda install -c rapidsai -c conda-forge -c nvidia \
 We also provide [nightly Conda packages](https://anaconda.org/rapidsai-nightly) built from the HEAD
 of our latest development branch.
 
-Note: cuDF is supported only on Linux, and with Python versions 3.9 and later.
+Note: cuDF is supported only on Linux, and with Python versions 3.10 and later.
 
 See the [RAPIDS installation guide](https://docs.rapids.ai/install) for more OS and version info.
 
diff --git a/build.sh b/build.sh
index 52bb1e64d16..211e1db9fbf 100755
--- a/build.sh
+++ b/build.sh
@@ -17,11 +17,12 @@ ARGS=$*
 # script, and that this script resides in the repo dir!
 REPODIR=$(cd $(dirname $0); pwd)
 
-VALIDARGS="clean libcudf cudf cudfjar dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n --pydevelop -l --allgpuarch --disable_nvtx --opensource_nvcomp  --show_depr_warn --ptds -h --build_metrics --incl_cache_stats --disable_large_strings"
-HELP="$0 [clean] [libcudf] [cudf] [cudfjar] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [--cmake-args=\\\"<args>\\\"]
+VALIDARGS="clean libcudf pylibcudf cudf cudfjar dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n --pydevelop -l --allgpuarch --disable_nvtx --opensource_nvcomp  --show_depr_warn --ptds -h --build_metrics --incl_cache_stats --disable_large_strings"
+HELP="$0 [clean] [libcudf] [pylibcudf] [cudf] [cudfjar] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [--cmake-args=\\\"<args>\\\"]
    clean                         - remove all existing build artifacts and configuration (start
                                    over)
    libcudf                       - build the cudf C++ code only
+   pylibcudf                     - build the pylibcudf Python package
    cudf                          - build the cudf Python package
    cudfjar                       - build cudf JAR with static libcudf using devtoolset toolchain
    dask_cudf                     - build the dask_cudf Python package
@@ -53,10 +54,11 @@ KAFKA_LIB_BUILD_DIR=${KAFKA_LIB_BUILD_DIR:=${REPODIR}/cpp/libcudf_kafka/build}
 CUDF_KAFKA_BUILD_DIR=${REPODIR}/python/cudf_kafka/build
 CUDF_BUILD_DIR=${REPODIR}/python/cudf/build
 DASK_CUDF_BUILD_DIR=${REPODIR}/python/dask_cudf/build
+PYLIBCUDF_BUILD_DIR=${REPODIR}/python/pylibcudf/build
 CUSTREAMZ_BUILD_DIR=${REPODIR}/python/custreamz/build
 CUDF_JAR_JAVA_BUILD_DIR="$REPODIR/java/target"
 
-BUILD_DIRS="${LIB_BUILD_DIR} ${CUDF_BUILD_DIR} ${DASK_CUDF_BUILD_DIR} ${KAFKA_LIB_BUILD_DIR} ${CUDF_KAFKA_BUILD_DIR} ${CUSTREAMZ_BUILD_DIR} ${CUDF_JAR_JAVA_BUILD_DIR}"
+BUILD_DIRS="${LIB_BUILD_DIR} ${CUDF_BUILD_DIR} ${DASK_CUDF_BUILD_DIR} ${KAFKA_LIB_BUILD_DIR} ${CUDF_KAFKA_BUILD_DIR} ${CUSTREAMZ_BUILD_DIR} ${CUDF_JAR_JAVA_BUILD_DIR} ${PYLIBCUDF_BUILD_DIR}"
 
 # Set defaults for vars modified by flags to this script
 VERBOSE_FLAG=""
@@ -268,7 +270,7 @@ fi
 ################################################################################
 # Configure, build, and install libcudf
 
-if buildAll || hasArg libcudf || hasArg cudf || hasArg cudfjar; then
+if buildAll || hasArg libcudf || hasArg pylibcudf || hasArg cudf || hasArg cudfjar; then
     if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then
         CUDF_CMAKE_CUDA_ARCHITECTURES="${CUDF_CMAKE_CUDA_ARCHITECTURES:-NATIVE}"
         if [[ "$CUDF_CMAKE_CUDA_ARCHITECTURES" == "NATIVE" ]]; then
@@ -340,6 +342,14 @@ if buildAll || hasArg libcudf; then
     fi
 fi
 
+# Build and install the pylibcudf Python package
+if buildAll || hasArg pylibcudf; then
+
+    cd ${REPODIR}/python/pylibcudf
+    SKBUILD_CMAKE_ARGS="-DCMAKE_PREFIX_PATH=${INSTALL_PREFIX};-DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR};-DCMAKE_CUDA_ARCHITECTURES=${CUDF_CMAKE_CUDA_ARCHITECTURES};${EXTRA_CMAKE_ARGS}" \
+        python ${PYTHON_ARGS_FOR_INSTALL} .
+fi
+
 # Build and install the cudf Python package
 if buildAll || hasArg cudf; then
 
diff --git a/ci/build_docs.sh b/ci/build_docs.sh
index 14dc7a59048..c67d127e635 100755
--- a/ci/build_docs.sh
+++ b/ci/build_docs.sh
@@ -29,7 +29,7 @@ PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
 rapids-mamba-retry install \
   --channel "${CPP_CHANNEL}" \
   --channel "${PYTHON_CHANNEL}" \
-  libcudf cudf dask-cudf
+  libcudf pylibcudf cudf dask-cudf
 
 export RAPIDS_DOCS_DIR="$(mktemp -d)"
 
diff --git a/ci/build_python.sh b/ci/build_python.sh
index 79e09432779..2e3f70ba767 100755
--- a/ci/build_python.sh
+++ b/ci/build_python.sh
@@ -22,9 +22,16 @@ CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
 # TODO: Remove `--no-test` flag once importing on a CPU
 # node works correctly
 # With boa installed conda build forwards to the boa builder
+
+RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \
+  --no-test \
+  --channel "${CPP_CHANNEL}" \
+  conda/recipes/pylibcudf
+
 RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \
   --no-test \
   --channel "${CPP_CHANNEL}" \
+  --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
   conda/recipes/cudf
 
 RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \
diff --git a/ci/build_wheel_cudf.sh b/ci/build_wheel_cudf.sh
index 1b563bc499c..e5565c4b53c 100755
--- a/ci/build_wheel_cudf.sh
+++ b/ci/build_wheel_cudf.sh
@@ -5,12 +5,27 @@ set -euo pipefail
 
 package_dir="python/cudf"
 
-export SKBUILD_CMAKE_ARGS="-DUSE_LIBARROW_FROM_PYARROW=ON"
+RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
-./ci/build_wheel.sh ${package_dir}
+# Downloads libcudf and pylibcudf wheels from this current build,
+# then ensures 'cudf' wheel builds always use the 'libcudf' and 'pylibcudf' just built in the same CI run.
+#
+# Using env variable PIP_CONSTRAINT is necessary to ensure the constraints
+# are used when creating the isolated build environment.
+RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp /tmp/libcudf_dist
+RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python /tmp/pylibcudf_dist
+echo "libcudf-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/libcudf_dist/libcudf_*.whl)" > /tmp/constraints.txt
+echo "pylibcudf-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/pylibcudf_dist/pylibcudf_*.whl)" >> /tmp/constraints.txt
+export PIP_CONSTRAINT="/tmp/constraints.txt"
 
-python -m auditwheel repair -w ${package_dir}/final_dist ${package_dir}/dist/*
+./ci/build_wheel.sh ${package_dir}
 
+python -m auditwheel repair \
+    --exclude libcudf.so \
+    --exclude libnvcomp.so \
+    --exclude libnvcomp_bitcomp.so \
+    --exclude libnvcomp_gdeflate.so \
+    -w ${package_dir}/final_dist \
+    ${package_dir}/dist/*
 
-RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
-RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist
+RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python ${package_dir}/final_dist
diff --git a/ci/build_wheel_libcudf.sh b/ci/build_wheel_libcudf.sh
new file mode 100755
index 00000000000..8975381ceba
--- /dev/null
+++ b/ci/build_wheel_libcudf.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+
+set -euo pipefail
+
+package_dir="python/libcudf"
+
+./ci/build_wheel.sh ${package_dir}
+
+RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
+
+mkdir -p ${package_dir}/final_dist
+python -m auditwheel repair -w ${package_dir}/final_dist ${package_dir}/dist/*
+
+RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp ${package_dir}/final_dist
diff --git a/ci/build_wheel_pylibcudf.sh b/ci/build_wheel_pylibcudf.sh
new file mode 100755
index 00000000000..0e4745bda28
--- /dev/null
+++ b/ci/build_wheel_pylibcudf.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+
+set -euo pipefail
+
+package_dir="python/pylibcudf"
+
+RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
+
+# Downloads libcudf wheel from this current build,
+# then ensures 'pylibcudf' wheel builds always use the 'libcudf' just built in the same CI run.
+#
+# Using env variable PIP_CONSTRAINT is necessary to ensure the constraints
+# are used when creating the isolated build environment.
+RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp /tmp/libcudf_dist
+echo "libcudf-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/libcudf_dist/libcudf_*.whl)" > /tmp/constraints.txt
+export PIP_CONSTRAINT="/tmp/constraints.txt"
+
+./ci/build_wheel.sh ${package_dir}
+
+python -m auditwheel repair \
+    --exclude libcudf.so \
+    --exclude libnvcomp.so \
+    --exclude libnvcomp_bitcomp.so \
+    --exclude libnvcomp_gdeflate.so \
+    -w ${package_dir}/final_dist \
+    ${package_dir}/dist/*
+
+RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist
diff --git a/ci/cudf_pandas_scripts/fetch_pandas_versions.py b/ci/cudf_pandas_scripts/fetch_pandas_versions.py
new file mode 100644
index 00000000000..b6913f947e8
--- /dev/null
+++ b/ci/cudf_pandas_scripts/fetch_pandas_versions.py
@@ -0,0 +1,24 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+import requests
+from packaging.version import Version
+from packaging.specifiers import SpecifierSet
+import argparse
+
+def get_pandas_versions(pandas_range):
+    url = "https://pypi.org/pypi/pandas/json"
+    response = requests.get(url)
+    data = response.json()
+    versions = [Version(v) for v in data['releases']]
+    specifier = SpecifierSet(pandas_range.lstrip("pandas"))
+    matching_versions = [v for v in versions if v in specifier]
+    matching_minors = sorted(set(".".join((str(v.major), str(v.minor))) for v in matching_versions), key=Version)
+    return matching_minors
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Filter pandas versions by prefix.")
+    parser.add_argument("pandas_range", type=str, help="The version prefix to filter by.")
+    args = parser.parse_args()
+
+    versions = get_pandas_versions(args.pandas_range)
+    print(','.join(versions))
diff --git a/ci/cudf_pandas_scripts/pandas-tests/diff.sh b/ci/cudf_pandas_scripts/pandas-tests/diff.sh
index 6cf70a2347f..5dbb4ba991c 100755
--- a/ci/cudf_pandas_scripts/pandas-tests/diff.sh
+++ b/ci/cudf_pandas_scripts/pandas-tests/diff.sh
@@ -12,7 +12,7 @@ RAPIDS_FULL_VERSION=$(<./VERSION)
 rapids-logger "Github job name: ${GH_JOB_NAME}"
 rapids-logger "Rapids version: ${RAPIDS_FULL_VERSION}"
 
-PY_VER="39"
+PY_VER="310"
 MAIN_ARTIFACT=$(rapids-s3-path)cuda12_$(arch)_py${PY_VER}.main-${RAPIDS_FULL_VERSION}-results.json
 PR_ARTIFACT=$(rapids-s3-path)cuda12_$(arch)_py${PY_VER}.pr-${RAPIDS_FULL_VERSION}-results.json
 
diff --git a/ci/cudf_pandas_scripts/pandas-tests/run.sh b/ci/cudf_pandas_scripts/pandas-tests/run.sh
index 48ee4a05628..e5cd4436a3a 100755
--- a/ci/cudf_pandas_scripts/pandas-tests/run.sh
+++ b/ci/cudf_pandas_scripts/pandas-tests/run.sh
@@ -11,8 +11,17 @@ rapids-logger "Running Pandas tests using $PANDAS_TESTS_BRANCH branch and rapids
 rapids-logger "PR number: ${RAPIDS_REF_NAME:-"unknown"}"
 
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
-RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep
-python -m pip install $(ls ./local-cudf-dep/cudf*.whl)[test,pandas-tests]
+
+# Download the cudf, libcudf, and pylibcudf built in the previous step
+RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist
+RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist
+RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist
+
+# echo to expand wildcard before adding `[extra]` requires for pip
+python -m pip install \
+  "$(echo ./dist/cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test,pandas-tests]" \
+  "$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
+  "$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)"
 
 RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"}
 RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/
diff --git a/ci/cudf_pandas_scripts/run_tests.sh b/ci/cudf_pandas_scripts/run_tests.sh
index 1c3b99953fb..1c2724a9a5d 100755
--- a/ci/cudf_pandas_scripts/run_tests.sh
+++ b/ci/cudf_pandas_scripts/run_tests.sh
@@ -9,13 +9,20 @@ RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
 RAPIDS_COVERAGE_DIR=${RAPIDS_COVERAGE_DIR:-"${PWD}/coverage-results"}
 mkdir -p "${RAPIDS_TESTS_DIR}" "${RAPIDS_COVERAGE_DIR}"
 
+DEPENDENCIES_PATH="dependencies.yaml"
+package_name="pandas"
+
+# Use grep to find the line containing the package name and version constraint
+pandas_version_constraint=$(grep -oP "pandas>=\d+\.\d+,\<\d+\.\d+\.\d+dev\d+" $DEPENDENCIES_PATH)
+
 # Function to display script usage
 function display_usage {
-    echo "Usage: $0 [--no-cudf]"
+    echo "Usage: $0 [--no-cudf] [pandas-version]"
 }
 
 # Default value for the --no-cudf option
 no_cudf=false
+PANDAS_VERSION=""
 
 # Parse command-line arguments
 while [[ $# -gt 0 ]]; do
@@ -25,9 +32,14 @@ while [[ $# -gt 0 ]]; do
             shift
             ;;
         *)
-            echo "Error: Unknown option $1"
-            display_usage
-            exit 1
+            if [[ -z "$PANDAS_VERSION" ]]; then
+                PANDAS_VERSION=$1
+                shift
+            else
+                echo "Error: Unknown option $1"
+                display_usage
+                exit 1
+            fi
             ;;
     esac
 done
@@ -36,13 +48,55 @@ if [ "$no_cudf" = true ]; then
     echo "Skipping cudf install"
 else
     RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
-    RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep
-    python -m pip install $(ls ./local-cudf-dep/cudf*.whl)[test,cudf-pandas-tests]
+
+    # Download the cudf, libcudf, and pylibcudf built in the previous step
+    RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist
+    RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist
+    RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist
+
+    echo "" > ./constraints.txt
+    if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then
+        # `test_python` constraints are for `[test]` not `[cudf-pandas-tests]`
+        rapids-dependency-file-generator \
+            --output requirements \
+            --file-key test_python \
+            --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \
+        | tee ./constraints.txt
+    fi
+
+    python -m pip install \
+        -v \
+        --constraint ./constraints.txt \
+        "$(echo ./dist/cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test,cudf-pandas-tests]" \
+        "$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
+        "$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)"
 fi
 
+python -m pip install ipykernel
+python -m ipykernel install --user --name python3
+
+# The third-party integration tests are ignored because they are run nightly in seperate CI job
 python -m pytest -p cudf.pandas \
+    --ignore=./python/cudf/cudf_pandas_tests/third_party_integration_tests/ \
     --cov-config=./python/cudf/.coveragerc \
     --cov=cudf \
     --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-pandas-coverage.xml" \
     --cov-report=term \
     ./python/cudf/cudf_pandas_tests/
+
+output=$(python ci/cudf_pandas_scripts/fetch_pandas_versions.py $pandas_version_constraint)
+
+# Convert the comma-separated list into an array
+IFS=',' read -r -a versions <<< "$output"
+
+for version in "${versions[@]}"; do
+    echo "Installing pandas version: ${version}"
+    python -m pip install "numpy>=1.23,<2.0a0" "pandas==${version}"
+    python -m pytest -p cudf.pandas \
+    --ignore=./python/cudf/cudf_pandas_tests/third_party_integration_tests/ \
+    --cov-config=./python/cudf/.coveragerc \
+    --cov=cudf \
+    --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-pandas-coverage.xml" \
+    --cov-report=term \
+    ./python/cudf/cudf_pandas_tests/
+done
diff --git a/ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh b/ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh
new file mode 100755
index 00000000000..d44d25d658c
--- /dev/null
+++ b/ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
+# All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+cleanup() {
+    rm ${TEST_DIR}/results-*.pickle
+}
+
+trap cleanup EXIT
+
+runtest() {
+    local lib=$1
+    local mode=$2
+
+    local plugin=""
+    if [ "$mode" = "cudf" ]; then
+        plugin="-p cudf.pandas"
+    fi
+
+    pytest \
+    $plugin \
+    -v \
+    --continue-on-collection-errors \
+    --cache-clear \
+    --numprocesses=${NUM_PROCESSES} \
+    --dist=worksteal \
+    ${TEST_DIR}/test_${lib}*.py
+}
+
+main() {
+    local lib=$1
+
+    # generation phase
+    runtest ${lib} "gold"
+    runtest ${lib} "cudf"
+
+    # assertion phase
+    pytest \
+    --compare \
+    -p cudf.pandas \
+    -v \
+    --continue-on-collection-errors \
+    --cache-clear \
+    --numprocesses=${NUM_PROCESSES} \
+    --dist=worksteal \
+    ${TEST_DIR}/test_${lib}*.py
+}
+
+main $@
diff --git a/ci/cudf_pandas_scripts/third-party-integration/test.sh b/ci/cudf_pandas_scripts/third-party-integration/test.sh
new file mode 100755
index 00000000000..f8ddbaba0f3
--- /dev/null
+++ b/ci/cudf_pandas_scripts/third-party-integration/test.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+
+# Common setup steps shared by Python test jobs
+
+set -euo pipefail
+
+write_output() {
+  local key="$1"
+  local value="$2"
+  echo "$key=$value" | tee --append "${GITHUB_OUTPUT:-/dev/null}"
+}
+
+extract_lib_from_dependencies_yaml() {
+    local file=$1
+    # Parse all keys in dependencies.yaml under the "files" section,
+    # extract all the keys that start with "test_", and extract the rest
+    local extracted_libs="$(yq -o json $file | jq -rc '.files | with_entries(select(.key | contains("test_"))) | keys | map(sub("^test_"; ""))')"
+    echo $extracted_libs
+}
+
+main() {
+    local dependencies_yaml="$1"
+
+    LIBS=$(extract_lib_from_dependencies_yaml "$dependencies_yaml")
+    LIBS=${LIBS#[}
+    LIBS=${LIBS%]}
+
+    for lib in ${LIBS//,/ }; do
+        lib=$(echo "$lib" | tr -d '""')
+        echo "Running tests for library $lib"
+
+        CUDA_MAJOR=$(if [ "$lib" = "tensorflow" ]; then echo "11"; else echo "12"; fi)
+
+        . /opt/conda/etc/profile.d/conda.sh
+
+        rapids-logger "Generate Python testing dependencies"
+        rapids-dependency-file-generator \
+          --config "$dependencies_yaml" \
+          --output conda \
+          --file-key test_${lib} \
+          --matrix "cuda=${CUDA_MAJOR};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
+
+        rapids-mamba-retry env create --yes -f env.yaml -n test
+
+        # Temporarily allow unbound variables for conda activation.
+        set +u
+        conda activate test
+        set -u
+
+        repo_root=$(git rev-parse --show-toplevel)
+        TEST_DIR=${repo_root}/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests
+
+        rapids-print-env
+
+        rapids-logger "Check GPU usage"
+        nvidia-smi
+
+        EXITCODE=0
+        trap "EXITCODE=1" ERR
+        set +e
+
+        rapids-logger "pytest ${lib}"
+
+        NUM_PROCESSES=8
+        serial_libraries=(
+            "tensorflow"
+        )
+        for serial_library in "${serial_libraries[@]}"; do
+            if [ "${lib}" = "${serial_library}" ]; then
+                NUM_PROCESSES=1
+            fi
+        done
+
+        TEST_DIR=${TEST_DIR} NUM_PROCESSES=${NUM_PROCESSES} ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh ${lib}
+
+        rapids-logger "Test script exiting with value: ${EXITCODE}"
+    done
+
+    exit ${EXITCODE}
+}
+
+main "$@"
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index ad96aff3930..be55b49870f 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -18,18 +18,16 @@ CURRENT_MINOR=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[2]}')
 CURRENT_PATCH=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[3]}')
 CURRENT_SHORT_TAG=${CURRENT_MAJOR}.${CURRENT_MINOR}
 
-#Get <major>.<minor> for next version
+# Get <major>.<minor> for next version
 NEXT_MAJOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[1]}')
 NEXT_MINOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[2]}')
 NEXT_PATCH=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[3]}')
 NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR}
-NEXT_UCX_PY_VERSION="$(curl -sL https://version.gpuci.io/rapids/${NEXT_SHORT_TAG}).*"
 
 # Need to distutils-normalize the versions for some use cases
 CURRENT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${CURRENT_SHORT_TAG}'))")
 NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_SHORT_TAG}'))")
 PATCH_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_PATCH}'))")
-echo "current is ${CURRENT_SHORT_TAG_PEP440}, next is ${NEXT_SHORT_TAG_PEP440}"
 
 echo "Preparing release $CURRENT_TAG => $NEXT_FULL_TAG"
 
@@ -51,8 +49,10 @@ DEPENDENCIES=(
   dask-cuda
   dask-cudf
   kvikio
+  libcudf
   libkvikio
   librmm
+  pylibcudf
   rapids-dask-dependency
   rmm
 )
@@ -61,7 +61,7 @@ for DEP in "${DEPENDENCIES[@]}"; do
     sed_runner "/-.* ${DEP}\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*/==${NEXT_SHORT_TAG_PEP440}.*,>=0.0.0a0/g" "${FILE}"
   done
   for FILE in python/*/pyproject.toml; do
-    sed_runner "/\"${DEP}==/ s/==.*\"/==${NEXT_SHORT_TAG_PEP440}.*,>=0.0.0a0\"/g" ${FILE}
+    sed_runner "/\"${DEP}==/ s/==.*\"/==${NEXT_SHORT_TAG_PEP440}.*,>=0.0.0a0\"/g" "${FILE}"
   done
 done
 
@@ -77,9 +77,9 @@ sed_runner "s/CUDF_TAG branch-${CURRENT_SHORT_TAG}/CUDF_TAG branch-${NEXT_SHORT_
 # CI files
 for FILE in .github/workflows/*.yaml .github/workflows/*.yml; do
   sed_runner "/shared-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}"
-  sed_runner "s/dask-cuda.git@branch-[^\"\s]\+/dask-cuda.git@branch-${NEXT_SHORT_TAG}/g" ${FILE};
+  sed_runner "s/dask-cuda.git@branch-[^\"\s]\+/dask-cuda.git@branch-${NEXT_SHORT_TAG}/g" "${FILE}"
 done
-sed_runner "s/branch-[0-9]+\.[0-9]+/branch-${NEXT_SHORT_TAG}/g" ci/test_wheel_cudf_polars.sh
+sed_runner "s/branch-[0-9]\+\.[0-9]\+/branch-${NEXT_SHORT_TAG}/g" ci/test_wheel_cudf_polars.sh
 
 # Java files
 NEXT_FULL_JAVA_TAG="${NEXT_SHORT_TAG}.${PATCH_PEP440}-SNAPSHOT"
diff --git a/ci/test_python_common.sh b/ci/test_python_common.sh
index e8849588aa5..d0675b0431a 100755
--- a/ci/test_python_common.sh
+++ b/ci/test_python_common.sh
@@ -14,7 +14,8 @@ ENV_YAML_DIR="$(mktemp -d)"
 rapids-dependency-file-generator \
   --output conda \
   --file-key test_python \
-  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee "${ENV_YAML_DIR}/env.yaml"
+  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \
+    | tee "${ENV_YAML_DIR}/env.yaml"
 
 rapids-mamba-retry env create --yes -f "${ENV_YAML_DIR}/env.yaml" -n test
 
diff --git a/ci/test_python_cudf.sh b/ci/test_python_cudf.sh
index 217dd2fd9a8..ae34047e87f 100755
--- a/ci/test_python_cudf.sh
+++ b/ci/test_python_cudf.sh
@@ -15,7 +15,7 @@ trap "EXITCODE=1" ERR
 set +e
 
 rapids-logger "pytest pylibcudf"
-pushd python/cudf/cudf/pylibcudf_tests
+pushd python/pylibcudf/pylibcudf/tests
 python -m pytest \
   --cache-clear \
   --dist=worksteal \
diff --git a/ci/test_wheel_cudf.sh b/ci/test_wheel_cudf.sh
index fdb61278d36..28ded2f8e0f 100755
--- a/ci/test_wheel_cudf.sh
+++ b/ci/test_wheel_cudf.sh
@@ -4,10 +4,31 @@
 set -eou pipefail
 
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
-RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
+
+# Download the cudf, libcudf, and pylibcudf built in the previous step
+RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist
+RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist
+RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist
+
+rapids-logger "Install cudf, pylibcudf, and test requirements"
+
+# Constrain to minimum dependency versions if job is set up as "oldest"
+echo "" > ./constraints.txt
+if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then
+    rapids-dependency-file-generator \
+        --output requirements \
+        --file-key py_test_cudf \
+        --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \
+      | tee ./constraints.txt
+fi
 
 # echo to expand wildcard before adding `[extra]` requires for pip
-python -m pip install $(echo ./dist/cudf*.whl)[test]
+python -m pip install \
+    -v \
+    --constraint ./constraints.txt \
+  "$(echo ./dist/cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" \
+  "$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
+  "$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]"
 
 RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"}
 RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/
@@ -15,7 +36,7 @@ mkdir -p "${RAPIDS_TESTS_DIR}"
 
 
 rapids-logger "pytest pylibcudf"
-pushd python/cudf/cudf/pylibcudf_tests
+pushd python/pylibcudf/pylibcudf/tests
 python -m pytest \
   --cache-clear \
   --dist=worksteal \
diff --git a/ci/test_wheel_cudf_polars.sh b/ci/test_wheel_cudf_polars.sh
index cc9f5788685..9844090258a 100755
--- a/ci/test_wheel_cudf_polars.sh
+++ b/ci/test_wheel_cudf_polars.sh
@@ -10,7 +10,7 @@ set -eou pipefail
 # files in cudf_polars/pylibcudf", rather than "are there changes
 # between upstream and this branch which touch cudf_polars/pylibcudf"
 # TODO: is the target branch exposed anywhere in an environment variable?
-if [ -n "$(git diff --name-only origin/branch-24.10...HEAD -- python/cudf_polars/ python/cudf/cudf/_lib/pylibcudf/)" ];
+if [ -n "$(git diff --name-only origin/branch-24.10...HEAD -- python/cudf_polars/ python/pylibcudf/)" ];
 then
     HAS_CHANGES=1
 else
@@ -18,14 +18,30 @@ else
 fi
 
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
-RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist
+RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 python ./dist
 
-# Download the cudf built in the previous step
-RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep
-python -m pip install ./local-cudf-dep/cudf*.whl
+# Download libcudf and pylibcudf built in the previous step
+RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist
+RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist
 
-rapids-logger "Install cudf_polars"
-python -m pip install $(echo ./dist/cudf_polars*.whl)[test]
+rapids-logger "Installing cudf_polars and its dependencies"
+# Constraint to minimum dependency versions if job is set up as "oldest"
+echo "" > ./constraints.txt
+if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then
+    rapids-dependency-file-generator \
+        --output requirements \
+        --file-key py_test_cudf_polars \
+        --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \
+      | tee ./constraints.txt
+fi
+
+# echo to expand wildcard before adding `[extra]` requires for pip
+python -m pip install \
+    -v \
+    --constraint ./constraints.txt \
+    "$(echo ./dist/cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" \
+    "$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
+    "$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)"
 
 rapids-logger "Run cudf_polars tests"
 
diff --git a/ci/test_wheel_dask_cudf.sh b/ci/test_wheel_dask_cudf.sh
index c3800d3cc25..0d39807d56c 100755
--- a/ci/test_wheel_dask_cudf.sh
+++ b/ci/test_wheel_dask_cudf.sh
@@ -4,14 +4,32 @@
 set -eou pipefail
 
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
-RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist
+RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 python ./dist
 
-# Download the cudf built in the previous step
-RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep
-python -m pip install ./local-cudf-dep/cudf*.whl
+# Download the cudf, libcudf, and pylibcudf built in the previous step
+RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist
+RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist
+RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist
+
+rapids-logger "Install dask_cudf, cudf, pylibcudf, and test requirements"
+# Constraint to minimum dependency versions if job is set up as "oldest"
+echo "" > ./constraints.txt
+if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then
+    rapids-dependency-file-generator \
+        --output requirements \
+        --file-key py_test_dask_cudf \
+        --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \
+      | tee ./constraints.txt
+fi
 
 # echo to expand wildcard before adding `[extra]` requires for pip
-python -m pip install $(echo ./dist/dask_cudf*.whl)[test]
+python -m pip install \
+  -v \
+  --constraint ./constraints.txt \
+  "$(echo ./dist/cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
+  "$(echo ./dist/dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" \
+  "$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
+  "$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)"
 
 RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"}
 RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index d04804cafaf..7f6967d7287 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -37,37 +37,37 @@ dependencies:
 - hypothesis
 - identify>=2.5.20
 - ipython
-- libarrow-acero==16.1.0.*
-- libarrow-dataset==16.1.0.*
-- libarrow==16.1.0.*
+- jupyter_client
 - libcufile-dev=1.4.0.31
 - libcufile=1.4.0.31
 - libcurand-dev=10.3.0.86
 - libcurand=10.3.0.86
 - libkvikio==24.10.*,>=0.0.0a0
-- libparquet==16.1.0.*
 - librdkafka>=1.9.0,<1.10.0a0
 - librmm==24.10.*,>=0.0.0a0
 - make
 - moto>=4.0.8
 - msgpack-python
 - myst-nb
+- nbconvert
+- nbformat
 - nbsphinx
 - ninja
 - notebook
 - numba>=0.57
-- numpy>=1.23,<2.0a0
+- numpy>=1.23,<3.0a0
 - numpydoc
 - nvcc_linux-64=11.8
 - nvcomp==3.0.6
 - nvtx>=0.2.1
+- openpyxl
 - packaging
+- pandas
 - pandas>=2.0,<2.2.3dev0
 - pandoc
-- pip
 - pre-commit
 - ptxcompiler
-- pyarrow==16.1.0.*
+- pyarrow>=14.0.0,<18.0.0a0
 - pydata-sphinx-theme!=0.14.2
 - pytest-benchmark
 - pytest-cases>=3.8.2
@@ -75,14 +75,14 @@ dependencies:
 - pytest-xdist
 - pytest<8
 - python-confluent-kafka>=1.9.0,<1.10.0a0
-- python>=3.9,<3.12
+- python>=3.10,<3.12
 - pytorch>=2.1.0
 - rapids-build-backend>=0.3.0,<0.4.0.dev0
 - rapids-dask-dependency==24.10.*,>=0.0.0a0
 - rich
 - rmm==24.10.*,>=0.0.0a0
 - s3fs>=2022.3.0
-- scikit-build-core>=0.7.0
+- scikit-build-core>=0.10.0
 - scipy
 - spdlog>=1.12.0,<1.13
 - sphinx
@@ -97,6 +97,4 @@ dependencies:
 - transformers==4.39.3
 - typing_extensions>=4.0.0
 - zlib>=1.2.13
-- pip:
-  - git+https://github.com/python-streamz/streamz.git@master
 name: all_cuda-118_arch-x86_64
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
index e2c3558030d..c1315e73f16 100644
--- a/conda/environments/all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -38,33 +38,33 @@ dependencies:
 - hypothesis
 - identify>=2.5.20
 - ipython
-- libarrow-acero==16.1.0.*
-- libarrow-dataset==16.1.0.*
-- libarrow==16.1.0.*
+- jupyter_client
 - libcufile-dev
 - libcurand-dev
 - libkvikio==24.10.*,>=0.0.0a0
-- libparquet==16.1.0.*
 - librdkafka>=1.9.0,<1.10.0a0
 - librmm==24.10.*,>=0.0.0a0
 - make
 - moto>=4.0.8
 - msgpack-python
 - myst-nb
+- nbconvert
+- nbformat
 - nbsphinx
 - ninja
 - notebook
 - numba>=0.57
-- numpy>=1.23,<2.0a0
+- numpy>=1.23,<3.0a0
 - numpydoc
 - nvcomp==3.0.6
 - nvtx>=0.2.1
+- openpyxl
 - packaging
+- pandas
 - pandas>=2.0,<2.2.3dev0
 - pandoc
-- pip
 - pre-commit
-- pyarrow==16.1.0.*
+- pyarrow>=14.0.0,<18.0.0a0
 - pydata-sphinx-theme!=0.14.2
 - pynvjitlink>=0.0.0a0
 - pytest-benchmark
@@ -73,14 +73,14 @@ dependencies:
 - pytest-xdist
 - pytest<8
 - python-confluent-kafka>=1.9.0,<1.10.0a0
-- python>=3.9,<3.12
+- python>=3.10,<3.12
 - pytorch>=2.1.0
 - rapids-build-backend>=0.3.0,<0.4.0.dev0
 - rapids-dask-dependency==24.10.*,>=0.0.0a0
 - rich
 - rmm==24.10.*,>=0.0.0a0
 - s3fs>=2022.3.0
-- scikit-build-core>=0.7.0
+- scikit-build-core>=0.10.0
 - scipy
 - spdlog>=1.12.0,<1.13
 - sphinx
@@ -95,6 +95,4 @@ dependencies:
 - transformers==4.39.3
 - typing_extensions>=4.0.0
 - zlib>=1.2.13
-- pip:
-  - git+https://github.com/python-streamz/streamz.git@master
 name: all_cuda-125_arch-x86_64
diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
index 9137f099ad1..e22b4a4eddc 100644
--- a/conda/recipes/cudf/meta.yaml
+++ b/conda/recipes/cudf/meta.yaml
@@ -62,12 +62,10 @@ requirements:
     - python
     - cython >=3.0.3
     - rapids-build-backend >=0.3.0,<0.4.0.dev0
-    - scikit-build-core >=0.7.0
+    - scikit-build-core >=0.10.0
     - dlpack >=0.8,<1.0
-    # TODO: Change to `2.0` for NumPy 2
-    - numpy 1.23
-    - pyarrow ==16.1.0.*
     - libcudf ={{ version }}
+    - pylibcudf ={{ version }}
     - rmm ={{ minor_version }}
     {% if cuda_major == "11" %}
     - cudatoolkit
@@ -83,10 +81,10 @@ requirements:
     - pandas >=2.0,<2.2.3dev0
     - cupy >=12.0.0
     - numba >=0.57
-    # TODO: Update `numpy` in `host` when dropping `<2.0a0`
-    - numpy >=1.23,<2.0a0
-    - {{ pin_compatible('pyarrow', max_pin='x.x') }}
+    - numpy >=1.23,<3.0a0
+    - pyarrow>=14.0.0,<18.0.0a0
     - libcudf ={{ version }}
+    - pylibcudf ={{ version }}
     - {{ pin_compatible('rmm', max_pin='x.x') }}
     - fsspec >=0.6.0
     {% if cuda_major == "11" %}
diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml
index 1b0e0e2c236..d04d9b21a46 100644
--- a/conda/recipes/cudf_kafka/meta.yaml
+++ b/conda/recipes/cudf_kafka/meta.yaml
@@ -58,10 +58,10 @@ requirements:
     - python
     - cython >=3.0.3
     - cuda-version ={{ cuda_version }}
-    - cudf ={{ version }}
+    - pylibcudf ={{ version }}
     - libcudf_kafka ={{ version }}
     - rapids-build-backend >=0.3.0,<0.4.0.dev0
-    - scikit-build-core >=0.7.0
+    - scikit-build-core >=0.10.0
     {% if cuda_major != "11" %}
     - cuda-cudart-dev
     {% endif %}
@@ -69,7 +69,7 @@ requirements:
     - python
     - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
     - libcudf_kafka ={{ version }}
-    - cudf ={{ version }}
+    - pylibcudf ={{ version }}
     {% if cuda_major != "11" %}
     - cuda-cudart
     {% endif %}
diff --git a/conda/recipes/libcudf/conda_build_config.yaml b/conda/recipes/libcudf/conda_build_config.yaml
index ff7458caf82..4b1c4cca828 100644
--- a/conda/recipes/libcudf/conda_build_config.yaml
+++ b/conda/recipes/libcudf/conda_build_config.yaml
@@ -19,9 +19,6 @@ c_stdlib_version:
 cmake_version:
   - ">=3.26.4,!=3.30.0"
 
-libarrow_version:
-  - "==16.1.0"
-
 dlpack_version:
   - ">=0.8,<1.0"
 
diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
index aa1c94a4bca..1c2e9e8dd98 100644
--- a/conda/recipes/libcudf/meta.yaml
+++ b/conda/recipes/libcudf/meta.yaml
@@ -64,7 +64,6 @@ requirements:
     {% endif %}
     - cuda-version ={{ cuda_version }}
     - nvcomp {{ nvcomp_version }}
-    - libarrow {{ libarrow_version }}
     - dlpack {{ dlpack_version }}
     - librdkafka {{ librdkafka_version }}
     - fmt {{ fmt_version }}
@@ -92,7 +91,6 @@ outputs:
         - cmake {{ cmake_version }}
       host:
         - cuda-version ={{ cuda_version }}
-        - libarrow {{ libarrow_version }}
       run:
         - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
         {% if cuda_major == "11" %}
diff --git a/conda/recipes/pylibcudf/build.sh b/conda/recipes/pylibcudf/build.sh
new file mode 100644
index 00000000000..483346504db
--- /dev/null
+++ b/conda/recipes/pylibcudf/build.sh
@@ -0,0 +1,4 @@
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
+
+# This assumes the script is executed from the root of the repo directory
+./build.sh pylibcudf
diff --git a/conda/recipes/pylibcudf/conda_build_config.yaml b/conda/recipes/pylibcudf/conda_build_config.yaml
new file mode 100644
index 00000000000..af894cccda0
--- /dev/null
+++ b/conda/recipes/pylibcudf/conda_build_config.yaml
@@ -0,0 +1,20 @@
+c_compiler_version:
+  - 11
+
+cxx_compiler_version:
+  - 11
+
+c_stdlib:
+  - sysroot
+
+c_stdlib_version:
+  - "2.17"
+
+cmake_version:
+  - ">=3.26.4,!=3.30.0"
+
+cuda_compiler:
+  - cuda-nvcc
+
+cuda11_compiler:
+  - nvcc
diff --git a/conda/recipes/pylibcudf/meta.yaml b/conda/recipes/pylibcudf/meta.yaml
new file mode 100644
index 00000000000..7c1efa0176c
--- /dev/null
+++ b/conda/recipes/pylibcudf/meta.yaml
@@ -0,0 +1,104 @@
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
+
+{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') %}
+{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
+{% set py_version = environ['CONDA_PY'] %}
+{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
+{% set cuda_major = cuda_version.split('.')[0] %}
+{% set date_string = environ['RAPIDS_DATE_STRING'] %}
+
+package:
+  name: pylibcudf
+  version: {{ version }}
+
+source:
+  path: ../../..
+
+build:
+  number: {{ GIT_DESCRIBE_NUMBER }}
+  string: cuda{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+  script_env:
+    - AWS_ACCESS_KEY_ID
+    - AWS_SECRET_ACCESS_KEY
+    - AWS_SESSION_TOKEN
+    - CMAKE_C_COMPILER_LAUNCHER
+    - CMAKE_CUDA_COMPILER_LAUNCHER
+    - CMAKE_CXX_COMPILER_LAUNCHER
+    - CMAKE_GENERATOR
+    - PARALLEL_LEVEL
+    - SCCACHE_BUCKET
+    - SCCACHE_IDLE_TIMEOUT
+    - SCCACHE_REGION
+    - SCCACHE_S3_KEY_PREFIX=pylibcudf-aarch64 # [aarch64]
+    - SCCACHE_S3_KEY_PREFIX=pylibcudf-linux64 # [linux64]
+    - SCCACHE_S3_USE_SSL
+    - SCCACHE_S3_NO_CREDENTIALS
+  ignore_run_exports:
+    # libcudf's run_exports pinning is looser than we would like
+    - libcudf
+  ignore_run_exports_from:
+    {% if cuda_major == "11" %}
+    - {{ compiler('cuda11') }}
+    {% else %}
+    - {{ compiler('cuda') }}
+    - cuda-cudart-dev
+    - libcufile-dev  # [linux64]
+    {% endif %}
+
+requirements:
+  build:
+    - cmake {{ cmake_version }}
+    - ninja
+    - {{ compiler('c') }}
+    - {{ compiler('cxx') }}
+    {% if cuda_major == "11" %}
+    - {{ compiler('cuda11') }} ={{ cuda_version }}
+    {% else %}
+    - {{ compiler('cuda') }}
+    {% endif %}
+    - cuda-version ={{ cuda_version }}
+    - {{ stdlib("c") }}
+  host:
+    - python
+    - cython >=3.0.3
+    - rapids-build-backend >=0.3.0,<0.4.0.dev0
+    - scikit-build-core >=0.10.0
+    - dlpack >=0.8,<1.0
+    - libcudf ={{ version }}
+    - rmm ={{ minor_version }}
+    {% if cuda_major == "11" %}
+    - cudatoolkit
+    {% else %}
+    - cuda-cudart-dev
+    - cuda-nvrtc
+    - libcufile-dev  # [linux64]
+    {% endif %}
+    - cuda-version ={{ cuda_version }}
+  run:
+    - python
+    - typing_extensions >=4.0.0
+    - pandas >=2.0,<2.2.3dev0
+    - numpy >=1.23,<3.0a0
+    - pyarrow>=14.0.0,<18.0.0a0
+    - {{ pin_compatible('rmm', max_pin='x.x') }}
+    - fsspec >=0.6.0
+    {% if cuda_major == "11" %}
+    - cuda-python >=11.7.1,<12.0a0
+    {% else %}
+    - cuda-python >=12.0,<13.0a0
+    {% endif %}
+    - nvtx >=0.2.1
+    - packaging
+
+test:
+  requires:
+    - cuda-version ={{ cuda_version }}
+  imports:
+    - pylibcudf
+
+about:
+  home: https://rapids.ai/
+  license: Apache-2.0
+  license_family: APACHE
+  license_file: LICENSE
+  summary: pylibcudf library
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 310bc99b279..1040fcb7b91 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -54,11 +54,6 @@ mark_as_advanced(CUDF_BUILD_TESTUTIL)
 option(CUDF_USE_PROPRIETARY_NVCOMP "Download and use NVCOMP with proprietary extensions" ON)
 option(CUDF_LARGE_STRINGS_DISABLED "Build with large string support disabled" OFF)
 mark_as_advanced(CUDF_LARGE_STRINGS_DISABLED)
-option(CUDF_USE_ARROW_STATIC "Build and statically link Arrow libraries" OFF)
-option(CUDF_ENABLE_ARROW_ORC "Build the Arrow ORC adapter" OFF)
-option(CUDF_ENABLE_ARROW_PYTHON "Find (or build) Arrow with Python support" OFF)
-option(CUDF_ENABLE_ARROW_PARQUET "Find (or build) Arrow with Parquet support" OFF)
-option(CUDF_ENABLE_ARROW_S3 "Build/Enable AWS S3 Arrow filesystem support" OFF)
 option(
   CUDF_USE_PER_THREAD_DEFAULT_STREAM
   "Build cuDF with per-thread default stream, including passing the per-thread default
@@ -81,8 +76,6 @@ option(CUDA_ENABLE_LINEINFO
 option(CUDA_WARNINGS_AS_ERRORS "Enable -Werror=all-warnings for all CUDA compilation" ON)
 # cudart can be statically linked or dynamically linked. The python ecosystem wants dynamic linking
 option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF)
-option(USE_LIBARROW_FROM_PYARROW "Only use the libarrow contained in pyarrow" OFF)
-mark_as_advanced(USE_LIBARROW_FROM_PYARROW)
 
 set(DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL ON)
 if(CUDA_STATIC_RUNTIME OR NOT BUILD_SHARED_LIBS)
@@ -100,8 +93,6 @@ message(VERBOSE "CUDF: Configure CMake to build tests: ${BUILD_TESTS}")
 message(VERBOSE "CUDF: Configure CMake to build (google & nvbench) benchmarks: ${BUILD_BENCHMARKS}")
 message(VERBOSE "CUDF: Build cuDF shared libraries: ${BUILD_SHARED_LIBS}")
 message(VERBOSE "CUDF: Use a file cache for JIT compiled kernels: ${JITIFY_USE_CACHE}")
-message(VERBOSE "CUDF: Build and statically link Arrow libraries: ${CUDF_USE_ARROW_STATIC}")
-message(VERBOSE "CUDF: Build and enable S3 filesystem support for Arrow: ${CUDF_ENABLE_ARROW_S3}")
 message(VERBOSE "CUDF: Build with per-thread default stream: ${CUDF_USE_PER_THREAD_DEFAULT_STREAM}")
 message(
   VERBOSE
@@ -192,8 +183,6 @@ include(cmake/thirdparty/get_nvcomp.cmake)
 include(cmake/thirdparty/get_cccl.cmake)
 # find rmm
 include(cmake/thirdparty/get_rmm.cmake)
-# find arrow
-include(cmake/thirdparty/get_arrow.cmake)
 # find flatbuffers
 include(cmake/thirdparty/get_flatbuffers.cmake)
 # find dlpack
@@ -363,17 +352,14 @@ add_library(
   src/hash/sha512_hash.cu
   src/hash/xxhash_64.cu
   src/interop/dlpack.cpp
-  src/interop/from_arrow.cu
   src/interop/arrow_utilities.cpp
   src/interop/decimal_conversion_utilities.cu
-  src/interop/to_arrow.cu
   src/interop/to_arrow_device.cu
   src/interop/to_arrow_host.cu
   src/interop/from_arrow_device.cu
   src/interop/from_arrow_host.cu
   src/interop/from_arrow_stream.cu
   src/interop/to_arrow_schema.cpp
-  src/interop/detail/arrow_allocator.cpp
   src/io/avro/avro.cpp
   src/io/avro/avro_gpu.cu
   src/io/avro/reader_impl.cu
@@ -392,7 +378,6 @@ add_library(
   src/io/csv/reader_impl.cu
   src/io/csv/writer_impl.cu
   src/io/functions.cpp
-  src/io/json/byte_range_info.cu
   src/io/json/json_column.cu
   src/io/json/json_normalization.cu
   src/io/json/json_tree.cu
@@ -439,7 +424,6 @@ add_library(
   src/io/text/bgzip_data_chunk_source.cu
   src/io/text/bgzip_utils.cpp
   src/io/text/multibyte_split.cu
-  src/io/utilities/arrow_io_source.cpp
   src/io/utilities/base64_utilities.cpp
   src/io/utilities/column_buffer.cpp
   src/io/utilities/column_buffer_strings.cu
@@ -671,6 +655,7 @@ add_library(
   src/unary/math_ops.cu
   src/unary/nan_ops.cu
   src/unary/null_ops.cu
+  src/utilities/cuda.cpp
   src/utilities/cuda_memcpy.cu
   src/utilities/default_stream.cpp
   src/utilities/host_memory.cpp
@@ -811,7 +796,7 @@ add_dependencies(cudf jitify_preprocess_run)
 # Specify the target module library dependencies
 target_link_libraries(
   cudf
-  PUBLIC ${ARROW_LIBRARIES} CCCL::CCCL rmm::rmm $<BUILD_LOCAL_INTERFACE:BS::thread_pool>
+  PUBLIC CCCL::CCCL rmm::rmm $<BUILD_LOCAL_INTERFACE:BS::thread_pool>
   PRIVATE $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp> cuco::cuco ZLIB::ZLIB nvcomp::nvcomp
           kvikio::kvikio $<TARGET_NAME_IF_EXISTS:cuFile_interface> nanoarrow
 )
@@ -1060,37 +1045,12 @@ following IMPORTED GLOBAL  targets:
     ]=]
 )
 
-if(CUDF_ENABLE_ARROW_PARQUET)
-  string(
-    APPEND
-    install_code_string
-    [=[
-  if(NOT Parquet_DIR)
-    set(Parquet_DIR "${Arrow_DIR}")
-  endif()
-  set(ArrowDataset_DIR "${Arrow_DIR}")
-  find_dependency(ArrowDataset)
-  ]=]
-  )
-endif()
-
-string(
-  APPEND
-  install_code_string
-  [=[
-if(testing IN_LIST cudf_FIND_COMPONENTS)
-  enable_language(CUDA)
-endif()
-]=]
-)
-
 rapids_export(
   INSTALL cudf
   EXPORT_SET cudf-exports ${_components_export_string}
   GLOBAL_TARGETS cudf cudftestutil
   NAMESPACE cudf::
   DOCUMENTATION doc_string
-  FINAL_CODE_BLOCK install_code_string
 )
 
 # ##################################################################################################
diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index ff431c7f260..d2c22b788cb 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -35,6 +35,30 @@ target_include_directories(
          "$<BUILD_INTERFACE:${CUDF_SOURCE_DIR}/src>"
 )
 
+add_library(
+  tpch_data_generator STATIC
+  common/tpch_data_generator/tpch_data_generator.cpp common/tpch_data_generator/table_helpers.cpp
+  common/tpch_data_generator/random_column_generator.cu
+)
+target_compile_features(tpch_data_generator PUBLIC cxx_std_17 cuda_std_17)
+
+target_compile_options(
+  tpch_data_generator PUBLIC "$<$<COMPILE_LANGUAGE:CXX>:${CUDF_CXX_FLAGS}>"
+                             "$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>"
+)
+
+target_link_libraries(
+  tpch_data_generator
+  PUBLIC cudf cudftestutil nvtx3::nvtx3-cpp
+  PRIVATE $<TARGET_NAME_IF_EXISTS:conda_env>
+)
+
+target_include_directories(
+  tpch_data_generator
+  PUBLIC "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>" "$<BUILD_INTERFACE:${CUDF_SOURCE_DIR}>"
+         "$<BUILD_INTERFACE:${CUDF_SOURCE_DIR}/src>"
+)
+
 # ##################################################################################################
 # * compiler function -----------------------------------------------------------------------------
 
@@ -151,17 +175,15 @@ ConfigureBench(COPY_IF_ELSE_BENCH copying/copy_if_else.cpp)
 # * transpose benchmark ---------------------------------------------------------------------------
 ConfigureBench(TRANSPOSE_BENCH transpose/transpose.cpp)
 
-# ##################################################################################################
-# * apply_boolean_mask benchmark ------------------------------------------------------------------
-ConfigureBench(APPLY_BOOLEAN_MASK_BENCH stream_compaction/apply_boolean_mask.cpp)
-
 # ##################################################################################################
 # * stream_compaction benchmark -------------------------------------------------------------------
 ConfigureNVBench(
   STREAM_COMPACTION_NVBENCH
+  stream_compaction/apply_boolean_mask.cpp
   stream_compaction/distinct.cpp
   stream_compaction/distinct_count.cpp
   stream_compaction/stable_distinct.cpp
+  stream_compaction/stream_compaction_common.cpp
   stream_compaction/unique.cpp
   stream_compaction/unique_count.cpp
 )
@@ -206,12 +228,16 @@ ConfigureBench(TYPE_DISPATCHER_BENCH type_dispatcher/type_dispatcher.cu)
 
 # ##################################################################################################
 # * reduction benchmark ---------------------------------------------------------------------------
-ConfigureBench(
-  REDUCTION_BENCH reduction/anyall.cpp reduction/dictionary.cpp reduction/minmax.cpp
-  reduction/reduce.cpp reduction/scan.cpp
-)
 ConfigureNVBench(
-  REDUCTION_NVBENCH reduction/rank.cpp reduction/scan_structs.cpp reduction/segmented_reduce.cpp
+  REDUCTION_NVBENCH
+  reduction/anyall.cpp
+  reduction/dictionary.cpp
+  reduction/minmax.cpp
+  reduction/rank.cpp
+  reduction/reduce.cpp
+  reduction/scan.cpp
+  reduction/scan_structs.cpp
+  reduction/segmented_reduce.cpp
 )
 
 # ##################################################################################################
@@ -320,7 +346,6 @@ ConfigureBench(
   string/filter.cpp
   string/repeat_strings.cpp
   string/replace.cpp
-  string/slice.cpp
   string/translate.cpp
   string/url_decode.cu
 )
@@ -341,6 +366,7 @@ ConfigureNVBench(
   string/like.cpp
   string/replace_re.cpp
   string/reverse.cpp
+  string/slice.cpp
   string/split.cpp
   string/split_re.cpp
 )
@@ -353,6 +379,11 @@ ConfigureNVBench(JSON_READER_NVBENCH io/json/nested_json.cpp io/json/json_reader
 ConfigureNVBench(JSON_READER_OPTION_NVBENCH io/json/json_reader_option.cpp)
 ConfigureNVBench(JSON_WRITER_NVBENCH io/json/json_writer.cpp)
 
+# ##################################################################################################
+# * multi buffer memset benchmark
+# ----------------------------------------------------------------------
+ConfigureNVBench(BATCHED_MEMSET_BENCH io/utilities/batched_memset_bench.cpp)
+
 # ##################################################################################################
 # * io benchmark ---------------------------------------------------------------------
 ConfigureNVBench(MULTIBYTE_SPLIT_NVBENCH io/text/multibyte_split.cpp)
diff --git a/cpp/benchmarks/common/tpch_data_generator/random_column_generator.cu b/cpp/benchmarks/common/tpch_data_generator/random_column_generator.cu
new file mode 100644
index 00000000000..4246bd1a83b
--- /dev/null
+++ b/cpp/benchmarks/common/tpch_data_generator/random_column_generator.cu
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "random_column_generator.hpp"
+
+#include <cudf_test/column_wrapper.hpp>
+
+#include <cudf/binaryop.hpp>
+#include <cudf/detail/iterator.cuh>
+#include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/filling.hpp>
+#include <cudf/strings/detail/strings_children.cuh>
+
+#include <rmm/exec_policy.hpp>
+
+#include <thrust/iterator/counting_iterator.h>
+#include <thrust/random.h>
+#include <thrust/transform.h>
+
+#include <string>
+
+namespace cudf::datagen {
+
+namespace {
+
+// Functor for generating random strings
+struct random_string_generator {
+  char* chars;
+  thrust::default_random_engine engine;
+  thrust::uniform_int_distribution<unsigned char> char_dist;
+
+  CUDF_HOST_DEVICE random_string_generator(char* c) : chars(c), char_dist(44, 122) {}
+
+  __device__ void operator()(thrust::tuple<int64_t, int64_t> str_begin_end)
+  {
+    auto begin = thrust::get<0>(str_begin_end);
+    auto end   = thrust::get<1>(str_begin_end);
+    engine.discard(begin);
+    for (auto i = begin; i < end; ++i) {
+      auto ch = char_dist(engine);
+      if (i == end - 1 && ch >= '\x7F') ch = ' ';  // last element ASCII only.
+      if (ch >= '\x7F')                            // x7F is at the top edge of ASCII
+        chars[i++] = '\xC4';                       // these characters are assigned two bytes
+      chars[i] = static_cast<char>(ch + (ch >= '\x7F'));
+    }
+  }
+};
+
+// Functor for generating random numbers
+template <typename T>
+struct random_number_generator {
+  T lower;
+  T upper;
+
+  CUDF_HOST_DEVICE random_number_generator(T lower, T upper) : lower(lower), upper(upper) {}
+
+  __device__ T operator()(const int64_t idx) const
+  {
+    if constexpr (cudf::is_integral<T>()) {
+      thrust::default_random_engine engine;
+      thrust::uniform_int_distribution<T> dist(lower, upper);
+      engine.discard(idx);
+      return dist(engine);
+    } else {
+      thrust::default_random_engine engine;
+      thrust::uniform_real_distribution<T> dist(lower, upper);
+      engine.discard(idx);
+      return dist(engine);
+    }
+  }
+};
+
+}  // namespace
+
+std::unique_ptr<cudf::column> generate_random_string_column(cudf::size_type lower,
+                                                            cudf::size_type upper,
+                                                            cudf::size_type num_rows,
+                                                            rmm::cuda_stream_view stream,
+                                                            rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  auto offsets_begin = cudf::detail::make_counting_transform_iterator(
+    0, random_number_generator<cudf::size_type>(lower, upper));
+  auto [offsets_column, computed_bytes] = cudf::strings::detail::make_offsets_child_column(
+    offsets_begin, offsets_begin + num_rows, stream, mr);
+  rmm::device_uvector<char> chars(computed_bytes, stream);
+
+  auto const offset_itr =
+    cudf::detail::offsetalator_factory::make_input_iterator(offsets_column->view());
+
+  // We generate the strings in parallel into the `chars` vector using the
+  // offsets vector generated above.
+  thrust::for_each_n(rmm::exec_policy(stream),
+                     thrust::make_zip_iterator(offset_itr, offset_itr + 1),
+                     num_rows,
+                     random_string_generator(chars.data()));
+
+  return cudf::make_strings_column(
+    num_rows, std::move(offsets_column), chars.release(), 0, rmm::device_buffer{});
+}
+
+template <typename T>
+std::unique_ptr<cudf::column> generate_random_numeric_column(T lower,
+                                                             T upper,
+                                                             cudf::size_type num_rows,
+                                                             rmm::cuda_stream_view stream,
+                                                             rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  auto col = cudf::make_numeric_column(
+    cudf::data_type{cudf::type_to_id<T>()}, num_rows, cudf::mask_state::UNALLOCATED, stream, mr);
+  cudf::size_type begin = 0;
+  cudf::size_type end   = num_rows;
+  thrust::transform(rmm::exec_policy(stream),
+                    thrust::make_counting_iterator(begin),
+                    thrust::make_counting_iterator(end),
+                    col->mutable_view().begin<T>(),
+                    random_number_generator<T>(lower, upper));
+  return col;
+}
+
+template std::unique_ptr<cudf::column> generate_random_numeric_column<int8_t>(
+  int8_t lower,
+  int8_t upper,
+  cudf::size_type num_rows,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr);
+
+template std::unique_ptr<cudf::column> generate_random_numeric_column<int16_t>(
+  int16_t lower,
+  int16_t upper,
+  cudf::size_type num_rows,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr);
+
+template std::unique_ptr<cudf::column> generate_random_numeric_column<cudf::size_type>(
+  cudf::size_type lower,
+  cudf::size_type upper,
+  cudf::size_type num_rows,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr);
+
+template std::unique_ptr<cudf::column> generate_random_numeric_column<double>(
+  double lower,
+  double upper,
+  cudf::size_type num_rows,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr);
+
+std::unique_ptr<cudf::column> generate_primary_key_column(cudf::scalar const& start,
+                                                          cudf::size_type num_rows,
+                                                          rmm::cuda_stream_view stream,
+                                                          rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  return cudf::sequence(num_rows, start, stream, mr);
+}
+
+std::unique_ptr<cudf::column> generate_repeat_string_column(std::string const& value,
+                                                            cudf::size_type num_rows,
+                                                            rmm::cuda_stream_view stream,
+                                                            rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  auto const scalar = cudf::string_scalar(value);
+  return cudf::make_column_from_scalar(scalar, num_rows, stream, mr);
+}
+
+std::unique_ptr<cudf::column> generate_random_string_column_from_set(
+  cudf::host_span<const char* const> set,
+  cudf::size_type num_rows,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  // Build a gather map of random strings to choose from
+  // The size of the string sets always fits within 16-bit integers
+  auto const indices =
+    generate_primary_key_column(cudf::numeric_scalar<int16_t>(0), set.size(), stream, mr);
+  auto const keys       = cudf::test::strings_column_wrapper(set.begin(), set.end()).release();
+  auto const gather_map = cudf::table_view({indices->view(), keys->view()});
+
+  // Build a column of random keys to gather from the set
+  auto const gather_keys =
+    generate_random_numeric_column<int16_t>(0, set.size() - 1, num_rows, stream, mr);
+
+  // Perform the gather operation
+  auto const gathered_table = cudf::gather(
+    gather_map, gather_keys->view(), cudf::out_of_bounds_policy::DONT_CHECK, stream, mr);
+  auto gathered_table_columns = gathered_table->release();
+  return std::move(gathered_table_columns[1]);
+}
+
+template <typename T>
+std::unique_ptr<cudf::column> generate_repeat_sequence_column(T seq_length,
+                                                              bool zero_indexed,
+                                                              cudf::size_type num_rows,
+                                                              rmm::cuda_stream_view stream,
+                                                              rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  auto pkey =
+    generate_primary_key_column(cudf::numeric_scalar<cudf::size_type>(0), num_rows, stream, mr);
+  auto repeat_seq_zero_indexed = cudf::binary_operation(pkey->view(),
+                                                        cudf::numeric_scalar<T>(seq_length),
+                                                        cudf::binary_operator::MOD,
+                                                        cudf::data_type{cudf::type_to_id<T>()},
+                                                        stream,
+                                                        mr);
+  if (zero_indexed) { return repeat_seq_zero_indexed; }
+  return cudf::binary_operation(repeat_seq_zero_indexed->view(),
+                                cudf::numeric_scalar<T>(1),
+                                cudf::binary_operator::ADD,
+                                cudf::data_type{cudf::type_to_id<T>()},
+                                stream,
+                                mr);
+}
+
+template std::unique_ptr<cudf::column> generate_repeat_sequence_column<int8_t>(
+  int8_t seq_length,
+  bool zero_indexed,
+  cudf::size_type num_rows,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr);
+
+template std::unique_ptr<cudf::column> generate_repeat_sequence_column<cudf::size_type>(
+  cudf::size_type seq_length,
+  bool zero_indexed,
+  cudf::size_type num_rows,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr);
+
+}  // namespace cudf::datagen
diff --git a/cpp/benchmarks/common/tpch_data_generator/random_column_generator.hpp b/cpp/benchmarks/common/tpch_data_generator/random_column_generator.hpp
new file mode 100644
index 00000000000..3e254f49805
--- /dev/null
+++ b/cpp/benchmarks/common/tpch_data_generator/random_column_generator.hpp
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/column/column.hpp>
+
+#include <string>
+
+namespace cudf::datagen {
+
+/**
+ * @brief Generate a column of random strings
+ *
+ * @param lower The lower bound of the length of the strings
+ * @param upper The upper bound of the length of the strings
+ * @param num_rows The number of rows in the column
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+std::unique_ptr<cudf::column> generate_random_string_column(
+  cudf::size_type lower,
+  cudf::size_type upper,
+  cudf::size_type num_rows,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief Generate a column of random numbers
+ *
+ * Example:
+ *
+ * lower = 10
+ * upper = 15
+ * num_rows = 10
+ * result = [10, 11, 14, 14, 13, 12, 11, 11, 12, 14]
+
+ *
+ * @param lower The lower bound of the random numbers
+ * @param upper The upper bound of the random numbers
+ * @param num_rows The number of rows in the column
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+template <typename T>
+std::unique_ptr<cudf::column> generate_random_numeric_column(
+  T lower,
+  T upper,
+  cudf::size_type num_rows,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief Generate a primary key column
+ *
+ * Example:
+ *
+ * start = 1
+ * num_rows = 10
+ * result = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+ *
+ * @param start The starting value of the primary key
+ * @param num_rows The number of rows in the column
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+std::unique_ptr<cudf::column> generate_primary_key_column(
+  cudf::scalar const& start,
+  cudf::size_type num_rows,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief Generate a column where all the rows have the same string value
+ *
+ * Example:
+ *
+ * value = "abc"
+ * num_rows = 5
+ * result = ["abc", "abc", "abc", "abc", "abc"]
+ *
+ * @param value The string value to fill the column with
+ * @param num_rows The number of rows in the column
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+std::unique_ptr<cudf::column> generate_repeat_string_column(
+  std::string const& value,
+  cudf::size_type num_rows,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief Generate a column by randomly choosing from set of strings
+ *
+ * Example:
+ *
+ * set = {"s1", "s2", "s3"}
+ * num_rows = 10
+ * result = ["s1", "s2", "s2", "s1", "s3", "s3", "s3", "s2", "s1", "s1"]
+ *
+ * @param set The set of strings to choose from
+ * @param num_rows The number of rows in the column
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+std::unique_ptr<cudf::column> generate_random_string_column_from_set(
+  cudf::host_span<const char* const> set,
+  cudf::size_type num_rows,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief Generate a column consisting of a repeating sequence of integers
+ *
+ * Example:
+ *
+ * seq_length = 3
+ * zero_indexed = false
+ * num_rows = 10
+ * result = [1, 2, 3, 1, 2, 3, 1, 2, 3, 1]
+ *
+ * @param seq_length The length of the repeating sequence
+ * @param zero_indexed Whether the sequence is zero or one indexed
+ * @param num_rows The number of rows in the column
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+template <typename T>
+std::unique_ptr<cudf::column> generate_repeat_sequence_column(
+  T seq_length,
+  bool zero_indexed,
+  cudf::size_type num_rows,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
+
+}  // namespace cudf::datagen
diff --git a/cpp/benchmarks/common/tpch_data_generator/table_helpers.cpp b/cpp/benchmarks/common/tpch_data_generator/table_helpers.cpp
new file mode 100644
index 00000000000..36bf9c49cea
--- /dev/null
+++ b/cpp/benchmarks/common/tpch_data_generator/table_helpers.cpp
@@ -0,0 +1,386 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "table_helpers.hpp"
+
+#include "random_column_generator.hpp"
+
+#include <cudf/aggregation.hpp>
+#include <cudf/ast/detail/operators.hpp>
+#include <cudf/ast/expressions.hpp>
+#include <cudf/binaryop.hpp>
+#include <cudf/column/column_factories.hpp>
+#include <cudf/copying.hpp>
+#include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/filling.hpp>
+#include <cudf/join.hpp>
+#include <cudf/reduction.hpp>
+#include <cudf/scalar/scalar.hpp>
+#include <cudf/strings/combine.hpp>
+#include <cudf/strings/convert/convert_integers.hpp>
+#include <cudf/table/table_view.hpp>
+#include <cudf/transform.hpp>
+#include <cudf/unary.hpp>
+
+#include <vector>
+
+namespace cudf::datagen {
+
+/**
+ * @brief Add a column of days to a column of timestamp_days
+ *
+ * @param timestamp_days The column of timestamp_days
+ * @param days The column of days to add
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+std::unique_ptr<cudf::column> add_calendrical_days(cudf::column_view const& timestamp_days,
+                                                   cudf::column_view const& days,
+                                                   rmm::cuda_stream_view stream,
+                                                   rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  auto const days_duration_type = cudf::cast(days, cudf::data_type{cudf::type_id::DURATION_DAYS});
+  auto const data_type          = cudf::data_type{cudf::type_id::TIMESTAMP_DAYS};
+  return cudf::binary_operation(
+    timestamp_days, days_duration_type->view(), cudf::binary_operator::ADD, data_type, stream, mr);
+}
+
+/**
+ * @brief Perform a left join operation between two tables
+ *
+ * @param left_input The left table
+ * @param right_input The right table
+ * @param left_on The indices of the columns to join on in the left table
+ * @param right_on The indices of the columns to join on in the right table
+ * @param compare_nulls The null equality comparison
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned table's device memory
+ */
+std::unique_ptr<cudf::table> perform_left_join(cudf::table_view const& left_input,
+                                               cudf::table_view const& right_input,
+                                               std::vector<cudf::size_type> const& left_on,
+                                               std::vector<cudf::size_type> const& right_on,
+                                               rmm::cuda_stream_view stream,
+                                               rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  constexpr auto oob_policy = cudf::out_of_bounds_policy::NULLIFY;
+  auto const left_selected  = left_input.select(left_on);
+  auto const right_selected = right_input.select(right_on);
+  auto const [left_join_indices, right_join_indices] =
+    cudf::left_join(left_selected, right_selected, cudf::null_equality::EQUAL, mr);
+
+  auto const left_indices_span  = cudf::device_span<cudf::size_type const>{*left_join_indices};
+  auto const right_indices_span = cudf::device_span<cudf::size_type const>{*right_join_indices};
+
+  auto const left_indices_col  = cudf::column_view{left_indices_span};
+  auto const right_indices_col = cudf::column_view{right_indices_span};
+
+  auto const left_result  = cudf::gather(left_input, left_indices_col, oob_policy, stream, mr);
+  auto const right_result = cudf::gather(right_input, right_indices_col, oob_policy, stream, mr);
+
+  auto joined_cols = left_result->release();
+  auto right_cols  = right_result->release();
+  joined_cols.insert(joined_cols.end(),
+                     std::make_move_iterator(right_cols.begin()),
+                     std::make_move_iterator(right_cols.end()));
+  return std::make_unique<cudf::table>(std::move(joined_cols));
+}
+
+/**
+ * @brief Generate the `p_retailprice` column of the `part` table
+ *
+ * @param p_partkey The `p_partkey` column of the `part` table
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+[[nodiscard]] std::unique_ptr<cudf::column> calculate_p_retailprice(
+  cudf::column_view const& p_partkey,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  // Expression: (90000 + ((p_partkey/10) modulo 20001) + 100 * (p_partkey modulo 1000)) / 100
+  auto table             = cudf::table_view({p_partkey});
+  auto p_partkey_col_ref = cudf::ast::column_reference(0);
+
+  auto scalar_10    = cudf::numeric_scalar<cudf::size_type>(10);
+  auto scalar_100   = cudf::numeric_scalar<cudf::size_type>(100);
+  auto scalar_1000  = cudf::numeric_scalar<cudf::size_type>(1000);
+  auto scalar_20001 = cudf::numeric_scalar<cudf::size_type>(20001);
+  auto scalar_90000 = cudf::numeric_scalar<cudf::size_type>(90000);
+
+  auto literal_10    = cudf::ast::literal(scalar_10);
+  auto literal_100   = cudf::ast::literal(scalar_100);
+  auto literal_1000  = cudf::ast::literal(scalar_1000);
+  auto literal_20001 = cudf::ast::literal(scalar_20001);
+  auto literal_90000 = cudf::ast::literal(scalar_90000);
+
+  auto expr_a = cudf::ast::operation(cudf::ast::ast_operator::DIV, p_partkey_col_ref, literal_10);
+  auto expr_b = cudf::ast::operation(cudf::ast::ast_operator::MOD, expr_a, literal_20001);
+  auto expr_c = cudf::ast::operation(cudf::ast::ast_operator::MOD, p_partkey_col_ref, literal_1000);
+  auto expr_d = cudf::ast::operation(cudf::ast::ast_operator::MUL, expr_c, literal_100);
+  auto expr_e = cudf::ast::operation(cudf::ast::ast_operator::ADD, expr_b, expr_d);
+  auto expr_f = cudf::ast::operation(cudf::ast::ast_operator::ADD, expr_e, literal_90000);
+  auto final_expr = cudf::ast::operation(cudf::ast::ast_operator::TRUE_DIV, expr_f, literal_100);
+
+  // Execute the AST expression
+  return cudf::compute_column(table, final_expr, stream, mr);
+}
+
+/**
+ * @brief Generate the `l_suppkey` column of the `lineitem` table
+ *
+ * @param l_partkey The `l_partkey` column of the `lineitem` table
+ * @param scale_factor The scale factor to use
+ * @param num_rows The number of rows in the `lineitem` table
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+[[nodiscard]] std::unique_ptr<cudf::column> calculate_l_suppkey(cudf::column_view const& l_partkey,
+                                                                cudf::size_type scale_factor,
+                                                                cudf::size_type num_rows,
+                                                                rmm::cuda_stream_view stream,
+                                                                rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  // Expression: (l_partkey + (i * (s/4 + (int)(l_partkey - 1)/s))) % s + 1
+
+  // Generate the `s` col
+  auto s_empty = cudf::make_numeric_column(
+    cudf::data_type{cudf::type_id::INT32}, num_rows, cudf::mask_state::UNALLOCATED, stream);
+
+  auto s = cudf::fill(s_empty->view(),
+                      0,
+                      num_rows,
+                      cudf::numeric_scalar<cudf::size_type>(scale_factor * 10'000),
+                      stream,
+                      mr);
+
+  // Generate the `i` col
+  auto i = generate_repeat_sequence_column<cudf::size_type>(4, true, num_rows, stream, mr);
+
+  // Create a table view out of `l_partkey`, `s`, and `i`
+  auto table = cudf::table_view({l_partkey, s->view(), i->view()});
+
+  // Create the AST expression
+  auto scalar_1  = cudf::numeric_scalar<cudf::size_type>(1);
+  auto scalar_4  = cudf::numeric_scalar<cudf::size_type>(4);
+  auto literal_1 = cudf::ast::literal(scalar_1);
+  auto literal_4 = cudf::ast::literal(scalar_4);
+
+  auto l_partkey_col_ref = cudf::ast::column_reference(0);
+  auto s_col_ref         = cudf::ast::column_reference(1);
+  auto i_col_ref         = cudf::ast::column_reference(2);
+
+  // (int)(l_partkey - 1)/s
+  auto expr_a = cudf::ast::operation(cudf::ast::ast_operator::SUB, l_partkey_col_ref, literal_1);
+  auto expr_b = cudf::ast::operation(cudf::ast::ast_operator::DIV, expr_a, s_col_ref);
+
+  // s/4
+  auto expr_c = cudf::ast::operation(cudf::ast::ast_operator::DIV, s_col_ref, literal_4);
+
+  // (s/4 + (int)(l_partkey - 1)/s)
+  auto expr_d = cudf::ast::operation(cudf::ast::ast_operator::ADD, expr_c, expr_b);
+
+  // (i * (s/4 + (int)(l_partkey - 1)/s))
+  auto expr_e = cudf::ast::operation(cudf::ast::ast_operator::MUL, i_col_ref, expr_d);
+
+  // (l_partkey + (i * (s/4 + (int)(l_partkey - 1)/s)))
+  auto expr_f = cudf::ast::operation(cudf::ast::ast_operator::ADD, l_partkey_col_ref, expr_e);
+
+  // (l_partkey + (i * (s/4 + (int)(l_partkey - 1)/s))) % s
+  auto expr_g = cudf::ast::operation(cudf::ast::ast_operator::MOD, expr_f, s_col_ref);
+
+  // (l_partkey + (i * (s/4 + (int)(l_partkey - 1)/s))) % s + 1
+  auto final_expr = cudf::ast::operation(cudf::ast::ast_operator::ADD, expr_g, literal_1);
+
+  // Execute the AST expression
+  return cudf::compute_column(table, final_expr, stream, mr);
+}
+
+/**
+ * @brief Generate the `ps_suppkey` column of the `partsupp` table
+ *
+ * @param ps_partkey The `ps_partkey` column of the `partsupp` table
+ * @param scale_factor The scale factor to use
+ * @param num_rows The number of rows in the `partsupp` table
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+[[nodiscard]] std::unique_ptr<cudf::column> calculate_ps_suppkey(
+  cudf::column_view const& ps_partkey,
+  cudf::size_type scale_factor,
+  cudf::size_type num_rows,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  // Expression: ps_suppkey = (ps_partkey + (i * (s/4 + (int)(ps_partkey - 1)/s))) % s + 1
+
+  // Generate the `s` col
+  auto s_empty = cudf::make_numeric_column(
+    cudf::data_type{cudf::type_id::INT32}, num_rows, cudf::mask_state::UNALLOCATED, stream);
+
+  auto s = cudf::fill(s_empty->view(),
+                      0,
+                      num_rows,
+                      cudf::numeric_scalar<cudf::size_type>(scale_factor * 10'000),
+                      stream,
+                      mr);
+
+  // Generate the `i` col
+  auto i = generate_repeat_sequence_column<cudf::size_type>(4, true, num_rows, stream, mr);
+
+  // Create a table view out of `p_partkey`, `s`, and `i`
+  auto table = cudf::table_view({ps_partkey, s->view(), i->view()});
+
+  // Create the AST expression
+  auto scalar_1  = cudf::numeric_scalar<cudf::size_type>(1);
+  auto scalar_4  = cudf::numeric_scalar<cudf::size_type>(4);
+  auto literal_1 = cudf::ast::literal(scalar_1);
+  auto literal_4 = cudf::ast::literal(scalar_4);
+
+  auto ps_partkey_col_ref = cudf::ast::column_reference(0);
+  auto s_col_ref          = cudf::ast::column_reference(1);
+  auto i_col_ref          = cudf::ast::column_reference(2);
+
+  // (int)(ps_partkey - 1)/s
+  auto expr_a = cudf::ast::operation(cudf::ast::ast_operator::SUB, ps_partkey_col_ref, literal_1);
+  auto expr_b = cudf::ast::operation(cudf::ast::ast_operator::DIV, expr_a, s_col_ref);
+
+  // s/4
+  auto expr_c = cudf::ast::operation(cudf::ast::ast_operator::DIV, s_col_ref, literal_4);
+
+  // (s/4 + (int)(ps_partkey - 1)/s)
+  auto expr_d = cudf::ast::operation(cudf::ast::ast_operator::ADD, expr_c, expr_b);
+
+  // (i * (s/4 + (int)(ps_partkey - 1)/s))
+  auto expr_e = cudf::ast::operation(cudf::ast::ast_operator::MUL, i_col_ref, expr_d);
+
+  // (ps_partkey + (i * (s/4 + (int)(ps_partkey - 1)/s)))
+  auto expr_f = cudf::ast::operation(cudf::ast::ast_operator::ADD, ps_partkey_col_ref, expr_e);
+
+  // (ps_partkey + (i * (s/4 + (int)(ps_partkey - 1)/s))) % s
+  auto expr_g = cudf::ast::operation(cudf::ast::ast_operator::MOD, expr_f, s_col_ref);
+
+  // (ps_partkey + (i * (s/4 + (int)(ps_partkey - 1)/s))) % s + 1
+  auto final_expr = cudf::ast::operation(cudf::ast::ast_operator::ADD, expr_g, literal_1);
+
+  // Execute the AST expression
+  return cudf::compute_column(table, final_expr, stream, mr);
+}
+
+/**
+ * @brief Calculate the cardinality of the `lineitem` table
+ *
+ * @param o_rep_freqs The frequency of each `o_orderkey` value in the `lineitem` table
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+[[nodiscard]] cudf::size_type calculate_l_cardinality(cudf::column_view const& o_rep_freqs,
+                                                      rmm::cuda_stream_view stream,
+                                                      rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  auto const sum_agg = cudf::make_sum_aggregation<cudf::reduce_aggregation>();
+  auto const l_num_rows_scalar =
+    cudf::reduce(o_rep_freqs, *sum_agg, cudf::data_type{cudf::type_id::INT32}, stream, mr);
+  return reinterpret_cast<cudf::numeric_scalar<cudf::size_type>*>(l_num_rows_scalar.get())
+    ->value(stream);
+}
+
+/**
+ * @brief Calculate the charge column for the `lineitem` table
+ *
+ * @param extendedprice The `l_extendedprice` column
+ * @param tax The `l_tax` column
+ * @param discount The `l_discount` column
+ * @param stream The CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+[[nodiscard]] std::unique_ptr<cudf::column> calculate_charge(cudf::column_view const& extendedprice,
+                                                             cudf::column_view const& tax,
+                                                             cudf::column_view const& discount,
+                                                             rmm::cuda_stream_view stream,
+                                                             rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  auto const one                = cudf::numeric_scalar<double>(1);
+  auto const one_minus_discount = cudf::binary_operation(
+    one, discount, cudf::binary_operator::SUB, cudf::data_type{cudf::type_id::FLOAT64}, stream, mr);
+  auto disc_price = cudf::binary_operation(extendedprice,
+                                           one_minus_discount->view(),
+                                           cudf::binary_operator::MUL,
+                                           cudf::data_type{cudf::type_id::FLOAT64},
+                                           stream,
+                                           mr);
+  auto const one_plus_tax =
+    cudf::binary_operation(one, tax, cudf::binary_operator::ADD, tax.type(), stream, mr);
+  return cudf::binary_operation(disc_price->view(),
+                                one_plus_tax->view(),
+                                cudf::binary_operator::MUL,
+                                cudf::data_type{cudf::type_id::FLOAT64},
+                                stream,
+                                mr);
+}
+
+/**
+ * @brief Generate a column of random addresses according to TPC-H specification clause 4.2.2.7
+ *
+ * @param num_rows The number of rows in the column
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+[[nodiscard]] std::unique_ptr<cudf::column> generate_address_column(
+  cudf::size_type num_rows, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  return generate_random_string_column(10, 40, num_rows, stream, mr);
+}
+
+/**
+ * @brief Generate a phone number column according to TPC-H specification clause 4.2.2.9
+ *
+ * @param num_rows The number of rows in the column
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+[[nodiscard]] std::unique_ptr<cudf::column> generate_phone_column(cudf::size_type num_rows,
+                                                                  rmm::cuda_stream_view stream,
+                                                                  rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  auto const part_a = cudf::strings::from_integers(
+    generate_random_numeric_column<int16_t>(10, 34, num_rows, stream, mr)->view());
+  auto const part_b = cudf::strings::from_integers(
+    generate_random_numeric_column<int16_t>(100, 999, num_rows, stream, mr)->view());
+  auto const part_c = cudf::strings::from_integers(
+    generate_random_numeric_column<int16_t>(100, 999, num_rows, stream, mr)->view());
+  auto const part_d = cudf::strings::from_integers(
+    generate_random_numeric_column<int16_t>(1000, 9999, num_rows, stream, mr)->view());
+  auto const phone_parts_table =
+    cudf::table_view({part_a->view(), part_b->view(), part_c->view(), part_d->view()});
+  return cudf::strings::concatenate(phone_parts_table,
+                                    cudf::string_scalar("-"),
+                                    cudf::string_scalar("", false),
+                                    cudf::strings::separator_on_nulls::NO,
+                                    stream,
+                                    mr);
+}
+
+}  // namespace cudf::datagen
diff --git a/cpp/benchmarks/common/tpch_data_generator/table_helpers.hpp b/cpp/benchmarks/common/tpch_data_generator/table_helpers.hpp
new file mode 100644
index 00000000000..11091689469
--- /dev/null
+++ b/cpp/benchmarks/common/tpch_data_generator/table_helpers.hpp
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/column/column.hpp>
+#include <cudf/column/column_view.hpp>
+#include <cudf/table/table.hpp>
+#include <cudf/table/table_view.hpp>
+
+#include <vector>
+
+namespace cudf::datagen {
+
+/**
+ * @brief Add a column of days to a column of timestamp_days
+ *
+ * @param timestamp_days The column of timestamp_days
+ * @param days The column of days to add
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+std::unique_ptr<cudf::column> add_calendrical_days(
+  cudf::column_view const& timestamp_days,
+  cudf::column_view const& days,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief Perform a left join operation between two tables
+ *
+ * @param left_input The left table
+ * @param right_input The right table
+ * @param left_on The indices of the columns to join on in the left table
+ * @param right_on The indices of the columns to join on in the right table
+ * @param compare_nulls The null equality comparison
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned table's device memory
+ */
+std::unique_ptr<cudf::table> perform_left_join(
+  cudf::table_view const& left_input,
+  cudf::table_view const& right_input,
+  std::vector<cudf::size_type> const& left_on,
+  std::vector<cudf::size_type> const& right_on,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief Generate the `p_retailprice` column of the `part` table
+ *
+ * @param p_partkey The `p_partkey` column of the `part` table
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+[[nodiscard]] std::unique_ptr<cudf::column> calculate_p_retailprice(
+  cudf::column_view const& p_partkey,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief Generate the `l_suppkey` column of the `lineitem` table
+ *
+ * @param l_partkey The `l_partkey` column of the `lineitem` table
+ * @param scale_factor The scale factor to use
+ * @param num_rows The number of rows in the `lineitem` table
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+[[nodiscard]] std::unique_ptr<cudf::column> calculate_l_suppkey(
+  cudf::column_view const& l_partkey,
+  cudf::size_type scale_factor,
+  cudf::size_type num_rows,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief Generate the `ps_suppkey` column of the `partsupp` table
+ *
+ * @param ps_partkey The `ps_partkey` column of the `partsupp` table
+ * @param scale_factor The scale factor to use
+ * @param num_rows The number of rows in the `partsupp` table
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+[[nodiscard]] std::unique_ptr<cudf::column> calculate_ps_suppkey(
+  cudf::column_view const& ps_partkey,
+  cudf::size_type scale_factor,
+  cudf::size_type num_rows,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
+/**
+ * @brief Calculate the cardinality of the `lineitem` table
+ *
+ * @param o_rep_freqs The frequency of each `o_orderkey` value in the `lineitem` table
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+[[nodiscard]] cudf::size_type calculate_l_cardinality(
+  cudf::column_view const& o_rep_freqs,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
+/**
+ * @brief Calculate the charge column for the `lineitem` table
+ *
+ * @param extendedprice The `l_extendedprice` column
+ * @param tax The `l_tax` column
+ * @param discount The `l_discount` column
+ * @param stream The CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+[[nodiscard]] std::unique_ptr<cudf::column> calculate_charge(
+  cudf::column_view const& extendedprice,
+  cudf::column_view const& tax,
+  cudf::column_view const& discount,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief Generate a column of random addresses according to TPC-H specification clause 4.2.2.7
+ *
+ * @param num_rows The number of rows in the column
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+[[nodiscard]] std::unique_ptr<cudf::column> generate_address_column(
+  cudf::size_type num_rows,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief Generate a phone number column according to TPC-H specification clause 4.2.2.9
+ *
+ * @param num_rows The number of rows in the column
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+[[nodiscard]] std::unique_ptr<cudf::column> generate_phone_column(
+  cudf::size_type num_rows,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
+
+}  // namespace cudf::datagen
diff --git a/cpp/benchmarks/common/tpch_data_generator/tpch_data_generator.cpp b/cpp/benchmarks/common/tpch_data_generator/tpch_data_generator.cpp
new file mode 100644
index 00000000000..9001c50c5a5
--- /dev/null
+++ b/cpp/benchmarks/common/tpch_data_generator/tpch_data_generator.cpp
@@ -0,0 +1,987 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tpch_data_generator.hpp"
+
+#include "random_column_generator.hpp"
+#include "table_helpers.hpp"
+
+#include <cudf_test/column_wrapper.hpp>
+
+#include <cudf/ast/detail/operators.hpp>
+#include <cudf/ast/expressions.hpp>
+#include <cudf/binaryop.hpp>
+#include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/filling.hpp>
+#include <cudf/groupby.hpp>
+#include <cudf/round.hpp>
+#include <cudf/sorting.hpp>
+#include <cudf/strings/combine.hpp>
+#include <cudf/strings/convert/convert_datetime.hpp>
+#include <cudf/strings/convert/convert_integers.hpp>
+#include <cudf/strings/padding.hpp>
+#include <cudf/transform.hpp>
+#include <cudf/unary.hpp>
+
+#include <array>
+#include <string>
+#include <vector>
+
+namespace cudf::datagen {
+
+namespace {
+constexpr std::array nations{
+  "ALGERIA", "ARGENTINA", "BRAZIL",         "CANADA",       "EGYPT", "ETHIOPIA", "FRANCE",
+  "GERMANY", "INDIA",     "INDONESIA",      "IRAN",         "IRAQ",  "JAPAN",    "JORDAN",
+  "KENYA",   "MOROCCO",   "MOZAMBIQUE",     "PERU",         "CHINA", "ROMANIA",  "SAUDI ARABIA",
+  "VIETNAM", "RUSSIA",    "UNITED KINGDOM", "UNITED STATES"};
+
+constexpr std::array years{"1992", "1993", "1994", "1995", "1996", "1997", "1998"};
+constexpr std::array months{"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"};
+constexpr std::array days{"1",  "2",  "3",  "4",  "5",  "6",  "7",  "8",  "9",  "10", "11",
+                          "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22",
+                          "23", "24", "25", "26", "27", "28", "29", "30", "31"};
+
+constexpr std::array vocab_p_name{
+  "almond",   "antique",   "aquamarine", "azure",      "beige",     "bisque",    "black",
+  "blanched", "blue",      "blush",      "brown",      "burlywood", "burnished", "chartreuse",
+  "chiffon",  "chocolate", "coral",      "cornflower", "cornsilk",  "cream",     "cyan",
+  "dark",     "deep",      "dim",        "dodger",     "drab",      "firebrick", "floral",
+  "forest",   "frosted",   "gainsboro",  "ghost",      "goldenrod", "green",     "grey",
+  "honeydew", "hot",       "indian",     "ivory",      "khaki",     "lace",      "lavender",
+  "lawn",     "lemon",     "light",      "lime",       "linen",     "magenta",   "maroon",
+  "medium",   "metallic",  "midnight",   "mint",       "misty",     "moccasin",  "navajo",
+  "navy",     "olive",     "orange",     "orchid",     "pale",      "papaya",    "peach",
+  "peru",     "pink",      "plum",       "powder",     "puff",      "purple",    "red",
+  "rose",     "rosy",      "royal",      "saddle",     "salmon",    "sandy",     "seashell",
+  "sienna",   "sky",       "slate",      "smoke",      "snow",      "spring",    "steel",
+  "tan",      "thistle",   "tomato",     "turquoise",  "violet",    "wheat",     "white",
+  "yellow"};
+
+constexpr std::array vocab_modes{"REG AIR", "AIR", "RAIL", "SHIP", "TRUCK", "MAIL", "FOB"};
+
+constexpr std::array vocab_instructions{
+  "DELIVER IN PERSON", "COLLECT COD", "NONE", "TAKE BACK RETURN"};
+
+constexpr std::array vocab_priorities{"1-URGENT", "2-HIGH", "3-MEDIUM", "4-NOT SPECIFIED", "5-LOW"};
+
+constexpr std::array vocab_segments{
+  "AUTOMOBILE", "BUILDING", "FURNITURE", "MACHINERY", "HOUSEHOLD"};
+
+constexpr std::array vocab_types{
+  "STANDARD ANODIZED TIN",     "STANDARD ANODIZED NICKEL", "STANDARD ANODIZED BRASS",
+  "STANDARD ANODIZED STEEL",   "STANDARD ANODIZED COPPER", "STANDARD BURNISHED TIN",
+  "STANDARD BURNISHED NICKEL", "STANDARD BURNISHED BRASS", "STANDARD BURNISHED STEEL",
+  "STANDARD BURNISHED COPPER", "STANDARD PLATED TIN",      "STANDARD PLATED NICKEL",
+  "STANDARD PLATED BRASS",     "STANDARD PLATED STEEL",    "STANDARD PLATED COPPER",
+  "STANDARD POLISHED TIN",     "STANDARD POLISHED NICKEL", "STANDARD POLISHED BRASS",
+  "STANDARD POLISHED STEEL",   "STANDARD POLISHED COPPER", "STANDARD BRUSHED TIN",
+  "STANDARD BRUSHED NICKEL",   "STANDARD BRUSHED BRASS",   "STANDARD BRUSHED STEEL",
+  "STANDARD BRUSHED COPPER",   "SMALL ANODIZED TIN",       "SMALL ANODIZED NICKEL",
+  "SMALL ANODIZED BRASS",      "SMALL ANODIZED STEEL",     "SMALL ANODIZED COPPER",
+  "SMALL BURNISHED TIN",       "SMALL BURNISHED NICKEL",   "SMALL BURNISHED BRASS",
+  "SMALL BURNISHED STEEL",     "SMALL BURNISHED COPPER",   "SMALL PLATED TIN",
+  "SMALL PLATED NICKEL",       "SMALL PLATED BRASS",       "SMALL PLATED STEEL",
+  "SMALL PLATED COPPER",       "SMALL POLISHED TIN",       "SMALL POLISHED NICKEL",
+  "SMALL POLISHED BRASS",      "SMALL POLISHED STEEL",     "SMALL POLISHED COPPER",
+  "SMALL BRUSHED TIN",         "SMALL BRUSHED NICKEL",     "SMALL BRUSHED BRASS",
+  "SMALL BRUSHED STEEL",       "SMALL BRUSHED COPPER",     "MEDIUM ANODIZED TIN",
+  "MEDIUM ANODIZED NICKEL",    "MEDIUM ANODIZED BRASS",    "MEDIUM ANODIZED STEEL",
+  "MEDIUM ANODIZED COPPER",    "MEDIUM BURNISHED TIN",     "MEDIUM BURNISHED NICKEL",
+  "MEDIUM BURNISHED BRASS",    "MEDIUM BURNISHED STEEL",   "MEDIUM BURNISHED COPPER",
+  "MEDIUM PLATED TIN",         "MEDIUM PLATED NICKEL",     "MEDIUM PLATED BRASS",
+  "MEDIUM PLATED STEEL",       "MEDIUM PLATED COPPER",     "MEDIUM POLISHED TIN",
+  "MEDIUM POLISHED NICKEL",    "MEDIUM POLISHED BRASS",    "MEDIUM POLISHED STEEL",
+  "MEDIUM POLISHED COPPER",    "MEDIUM BRUSHED TIN",       "MEDIUM BRUSHED NICKEL",
+  "MEDIUM BRUSHED BRASS",      "MEDIUM BRUSHED STEEL",     "MEDIUM BRUSHED COPPER",
+  "LARGE ANODIZED TIN",        "LARGE ANODIZED NICKEL",    "LARGE ANODIZED BRASS",
+  "LARGE ANODIZED STEEL",      "LARGE ANODIZED COPPER",    "LARGE BURNISHED TIN",
+  "LARGE BURNISHED NICKEL",    "LARGE BURNISHED BRASS",    "LARGE BURNISHED STEEL",
+  "LARGE BURNISHED COPPER",    "LARGE PLATED TIN",         "LARGE PLATED NICKEL",
+  "LARGE PLATED BRASS",        "LARGE PLATED STEEL",       "LARGE PLATED COPPER",
+  "LARGE POLISHED TIN",        "LARGE POLISHED NICKEL",    "LARGE POLISHED BRASS",
+  "LARGE POLISHED STEEL",      "LARGE POLISHED COPPER",    "LARGE BRUSHED TIN",
+  "LARGE BRUSHED NICKEL",      "LARGE BRUSHED BRASS",      "LARGE BRUSHED STEEL",
+  "LARGE BRUSHED COPPER",      "ECONOMY ANODIZED TIN",     "ECONOMY ANODIZED NICKEL",
+  "ECONOMY ANODIZED BRASS",    "ECONOMY ANODIZED STEEL",   "ECONOMY ANODIZED COPPER",
+  "ECONOMY BURNISHED TIN",     "ECONOMY BURNISHED NICKEL", "ECONOMY BURNISHED BRASS",
+  "ECONOMY BURNISHED STEEL",   "ECONOMY BURNISHED COPPER", "ECONOMY PLATED TIN",
+  "ECONOMY PLATED NICKEL",     "ECONOMY PLATED BRASS",     "ECONOMY PLATED STEEL",
+  "ECONOMY PLATED COPPER",     "ECONOMY POLISHED TIN",     "ECONOMY POLISHED NICKEL",
+  "ECONOMY POLISHED BRASS",    "ECONOMY POLISHED STEEL",   "ECONOMY POLISHED COPPER",
+  "ECONOMY BRUSHED TIN",       "ECONOMY BRUSHED NICKEL",   "ECONOMY BRUSHED BRASS",
+  "ECONOMY BRUSHED STEEL",     "ECONOMY BRUSHED COPPER",   "PROMO ANODIZED TIN",
+  "PROMO ANODIZED NICKEL",     "PROMO ANODIZED BRASS",     "PROMO ANODIZED STEEL",
+  "PROMO ANODIZED COPPER",     "PROMO BURNISHED TIN",      "PROMO BURNISHED NICKEL",
+  "PROMO BURNISHED BRASS",     "PROMO BURNISHED STEEL",    "PROMO BURNISHED COPPER",
+  "PROMO PLATED TIN",          "PROMO PLATED NICKEL",      "PROMO PLATED BRASS",
+  "PROMO PLATED STEEL",        "PROMO PLATED COPPER",      "PROMO POLISHED TIN",
+  "PROMO POLISHED NICKEL",     "PROMO POLISHED BRASS",     "PROMO POLISHED STEEL",
+  "PROMO POLISHED COPPER",     "PROMO BRUSHED TIN",        "PROMO BRUSHED NICKEL",
+  "PROMO BRUSHED BRASS",       "PROMO BRUSHED STEEL",      "PROMO BRUSHED COPPER"};
+
+constexpr std::array vocab_containers{
+  "SM CASE",   "SM BOX",     "SM BAG",    "SM JAR",     "SM PKG",    "SM PACK",   "SM CAN",
+  "SM DRUM",   "LG CASE",    "LG BOX",    "LG BAG",     "LG JAR",    "LG PKG",    "LG PACK",
+  "LG CAN",    "LG DRUM",    "MED CASE",  "MED BOX",    "MED BAG",   "MED JAR",   "MED PKG",
+  "MED PACK",  "MED CAN",    "MED DRUM",  "JUMBO CASE", "JUMBO BOX", "JUMBO BAG", "JUMBO JAR",
+  "JUMBO PKG", "JUMBO PACK", "JUMBO CAN", "JUMBO DRUM", "WRAP CASE", "WRAP BOX",  "WRAP BAG",
+  "WRAP JAR",  "WRAP PKG",   "WRAP PACK", "WRAP CAN",   "WRAP DRUM"};
+
+}  // namespace
+
+/**
+ * @brief Generate a table out of the independent columns of the `orders` table
+ *
+ * @param scale_factor The scale factor to generate
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+std::unique_ptr<cudf::table> generate_orders_independent(double scale_factor,
+                                                         rmm::cuda_stream_view stream,
+                                                         rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  cudf::size_type const o_num_rows = scale_factor * 1'500'000;
+
+  // Generate the `o_orderkey` column
+  auto o_orderkey = [&]() {
+    auto const o_orderkey_candidates = generate_primary_key_column(
+      cudf::numeric_scalar<cudf::size_type>(1), 4 * o_num_rows, stream, mr);
+    auto const o_orderkey_unsorted = cudf::sample(cudf::table_view({o_orderkey_candidates->view()}),
+                                                  o_num_rows,
+                                                  cudf::sample_with_replacement::FALSE,
+                                                  0,
+                                                  stream,
+                                                  mr);
+    auto const sort_result =
+      cudf::sort_by_key(o_orderkey_unsorted->view(),
+                        cudf::table_view({o_orderkey_unsorted->view().column(0)}),
+                        {},
+                        {},
+                        stream,
+                        mr);
+    return std::move(sort_result->release()[0]);
+  }();
+
+  // Generate the `o_custkey` column
+  auto o_custkey = [&]() {
+    auto const col = generate_random_numeric_column<cudf::size_type>(
+      1, scale_factor * 49'000, o_num_rows, stream, mr);
+    auto const col_mul_3 = cudf::binary_operation(col->view(),
+                                                  cudf::numeric_scalar<cudf::size_type>(3),
+                                                  cudf::binary_operator::MUL,
+                                                  cudf::data_type{cudf::type_id::INT32},
+                                                  stream,
+                                                  mr);
+    return cudf::binary_operation(col_mul_3->view(),
+                                  cudf::numeric_scalar<cudf::size_type>(1),
+                                  cudf::binary_operator::ADD,
+                                  cudf::data_type{cudf::type_id::INT32},
+                                  stream,
+                                  mr);
+  }();
+
+  // Generate the `o_orderdate` column
+  auto o_orderdate_ts = [&]() {
+    auto const o_orderdate_year = generate_random_string_column_from_set(
+      cudf::host_span<const char* const>(years.data(), years.size()), o_num_rows, stream, mr);
+    auto const o_orderdate_month = generate_random_string_column_from_set(
+      cudf::host_span<const char* const>(months.data(), months.size()), o_num_rows, stream, mr);
+    auto const o_orderdate_day = generate_random_string_column_from_set(
+      cudf::host_span<const char* const>(days.data(), days.size()), o_num_rows, stream, mr);
+    auto const o_orderdate_str = cudf::strings::concatenate(
+      cudf::table_view(
+        {o_orderdate_year->view(), o_orderdate_month->view(), o_orderdate_day->view()}),
+      cudf::string_scalar("-"),
+      cudf::string_scalar("", false),
+      cudf::strings::separator_on_nulls::NO,
+      stream,
+      mr);
+
+    return cudf::strings::to_timestamps(o_orderdate_str->view(),
+                                        cudf::data_type{cudf::type_id::TIMESTAMP_DAYS},
+                                        std::string("%Y-%m-%d"),
+                                        stream,
+                                        mr);
+  }();
+
+  // Generate the `o_orderpriority` column
+  auto o_orderpriority = generate_random_string_column_from_set(
+    cudf::host_span<const char* const>(vocab_priorities.data(), vocab_priorities.size()),
+    o_num_rows,
+    stream,
+    mr);
+
+  // Generate the `o_clerk` column
+  auto o_clerk = [&]() {
+    auto const clerk_repeat = generate_repeat_string_column("Clerk#", o_num_rows, stream, mr);
+    auto const random_c     = generate_random_numeric_column<cudf::size_type>(
+      1, scale_factor * 1'000, o_num_rows, stream, mr);
+    auto const random_c_str        = cudf::strings::from_integers(random_c->view(), stream, mr);
+    auto const random_c_str_padded = cudf::strings::zfill(random_c_str->view(), 9, stream, mr);
+    return cudf::strings::concatenate(
+      cudf::table_view({clerk_repeat->view(), random_c_str_padded->view()}),
+      cudf::string_scalar(""),
+      cudf::string_scalar("", false),
+      cudf::strings::separator_on_nulls::NO,
+      stream,
+      mr);
+  }();
+
+  // Generate the `o_shippriority` column
+  auto o_shippriority = [&]() {
+    auto const empty = cudf::make_numeric_column(
+      cudf::data_type{cudf::type_id::INT8}, o_num_rows, cudf::mask_state::UNALLOCATED, stream);
+    return cudf::fill(empty->view(), 0, o_num_rows, cudf::numeric_scalar<int8_t>(0), stream, mr);
+  }();
+
+  // Generate the `o_comment` column
+  // NOTE: This column is not compliant with clause 4.2.2.10 of the TPC-H specification
+  auto o_comment = generate_random_string_column(19, 78, o_num_rows, stream, mr);
+
+  // Generate the `orders_independent` table
+  std::vector<std::unique_ptr<cudf::column>> columns;
+  columns.push_back(std::move(o_orderkey));
+  columns.push_back(std::move(o_custkey));
+  columns.push_back(std::move(o_orderdate_ts));
+  columns.push_back(std::move(o_orderpriority));
+  columns.push_back(std::move(o_clerk));
+  columns.push_back(std::move(o_shippriority));
+  columns.push_back(std::move(o_comment));
+  return std::make_unique<cudf::table>(std::move(columns));
+}
+
+/**
+ * @brief Generate the `lineitem` table partially
+ *
+ * @param orders_independent Table with the independent columns of the `orders` table
+ * @param scale_factor The scale factor to generate
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+std::unique_ptr<cudf::table> generate_lineitem_partial(cudf::table_view const& orders_independent,
+                                                       double scale_factor,
+                                                       rmm::cuda_stream_view stream,
+                                                       rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  auto const o_num_rows = orders_independent.num_rows();
+  // Generate the `lineitem` table. For each row in the `orders` table,
+  // we have a random number (between 1 and 7) of rows in the `lineitem` table
+
+  // For each `o_orderkey`, generate a random number (between 1 and 7),
+  // which will be the number of rows in the `lineitem` table that will
+  // have the same `l_orderkey`
+  auto const o_rep_freqs = generate_random_numeric_column<int8_t>(1, 7, o_num_rows, stream, mr);
+
+  // Sum up the `o_rep_freqs` to get the number of rows in the
+  // `lineitem` table. This is required to generate the independent columns
+  // in the `lineitem` table
+  auto const l_num_rows = calculate_l_cardinality(o_rep_freqs->view(), stream, mr);
+
+  // We create a table out of `o_orderkey` and `o_orderdate_ts` by repeating
+  // the rows of `orders` according to the frequencies in `o_rep_freqs`
+  auto const o_orderkey     = orders_independent.column(0);
+  auto const o_orderdate_ts = orders_independent.column(2);
+  auto const l_base =
+    cudf::repeat(cudf::table_view({o_orderkey, o_orderdate_ts}), o_rep_freqs->view(), stream, mr);
+  auto l_base_columns = l_base->release();
+
+  // Generate the `l_orderkey` column
+  auto l_orderkey = std::move(l_base_columns[0]);
+
+  // Generate the `l_partkey` column
+  auto l_partkey = generate_random_numeric_column<cudf::size_type>(
+    1, scale_factor * 200'000, l_num_rows, stream, mr);
+
+  // Generate the `l_suppkey` column
+  auto l_suppkey = calculate_l_suppkey(l_partkey->view(), scale_factor, l_num_rows, stream, mr);
+
+  // Generate the `l_linenumber` column
+  auto l_linenumber = generate_repeat_sequence_column<int8_t>(7, false, l_num_rows, stream, mr);
+
+  // Generate the `l_quantity` column
+  auto l_quantity = generate_random_numeric_column<int8_t>(1, 50, l_num_rows, stream, mr);
+
+  // Generate the `l_discount` column
+  auto l_discount = [&]() {
+    auto const col = generate_random_numeric_column<double>(0.00, 0.10, l_num_rows, stream, mr);
+    return cudf::round(col->view(), 2);
+  }();
+
+  // Generate the `l_tax` column
+  auto l_tax = [&]() {
+    auto const col = generate_random_numeric_column<double>(0.00, 0.08, l_num_rows, stream, mr);
+    return cudf::round(col->view(), 2);
+  }();
+
+  // Get the orderdate column from the `l_base` table
+  auto const ol_orderdate_ts = std::move(l_base_columns[1]);
+
+  // Generate the `l_shipdate` column
+  auto l_shipdate_ts = [&]() {
+    auto const l_shipdate_rand_add_days =
+      generate_random_numeric_column<int8_t>(1, 121, l_num_rows, stream, mr);
+    return add_calendrical_days(
+      ol_orderdate_ts->view(), l_shipdate_rand_add_days->view(), stream, mr);
+  }();
+
+  // Generate the `l_commitdate` column
+  auto l_commitdate_ts = [&]() {
+    auto const l_commitdate_rand_add_days =
+      generate_random_numeric_column<int8_t>(30, 90, l_num_rows, stream, mr);
+    return add_calendrical_days(
+      ol_orderdate_ts->view(), l_commitdate_rand_add_days->view(), stream, mr);
+  }();
+
+  // Generate the `l_receiptdate` column
+  auto l_receiptdate_ts = [&]() {
+    auto const l_receiptdate_rand_add_days =
+      generate_random_numeric_column<int8_t>(1, 30, l_num_rows, stream, mr);
+    return add_calendrical_days(
+      l_shipdate_ts->view(), l_receiptdate_rand_add_days->view(), stream, mr);
+  }();
+
+  // Define the current date as per clause 4.2.2.12 of the TPC-H specification
+  constexpr cudf::size_type current_date_days_since_epoch = 9'298;
+  auto current_date =
+    cudf::timestamp_scalar<cudf::timestamp_D>(current_date_days_since_epoch, true);
+  auto current_date_literal = cudf::ast::literal(current_date);
+
+  // Generate the `l_returnflag` column
+  // if `l_receiptdate` <= current_date then "R" or "A" else "N"
+  auto l_returnflag = [&]() {
+    auto const col_ref = cudf::ast::column_reference(0);
+    auto const pred =
+      cudf::ast::operation(cudf::ast::ast_operator::LESS_EQUAL, col_ref, current_date_literal);
+    auto const binary_mask =
+      cudf::compute_column(cudf::table_view({l_receiptdate_ts->view()}), pred, stream, mr);
+
+    auto const multiplier =
+      generate_repeat_sequence_column<int8_t>(2, false, l_num_rows, stream, mr);
+    auto const ternary_mask   = cudf::binary_operation(binary_mask->view(),
+                                                     multiplier->view(),
+                                                     cudf::binary_operator::MUL,
+                                                     cudf::data_type{cudf::type_id::INT8},
+                                                     stream,
+                                                     mr);
+    auto const indices        = cudf::test::fixed_width_column_wrapper<int8_t>({0, 1, 2}).release();
+    auto const keys           = cudf::test::strings_column_wrapper({"N", "A", "R"}).release();
+    auto const gather_map     = cudf::table_view({indices->view(), keys->view()});
+    auto const gathered_table = cudf::gather(
+      gather_map, ternary_mask->view(), cudf::out_of_bounds_policy::DONT_CHECK, stream, mr);
+    return std::move(gathered_table->release()[1]);
+  }();
+
+  // Generate the `l_linestatus` column
+  // if `l_shipdate` > current_date then "F" else "O"
+  auto [l_linestatus, l_linestatus_mask] = [&]() {
+    auto const col_ref = cudf::ast::column_reference(0);
+    auto const pred =
+      cudf::ast::operation(cudf::ast::ast_operator::GREATER, col_ref, current_date_literal);
+    auto mask = cudf::compute_column(cudf::table_view({l_shipdate_ts->view()}), pred, stream, mr);
+    auto mask_index_type      = cudf::cast(mask->view(), cudf::data_type{cudf::type_id::INT8});
+    auto const indices        = cudf::test::fixed_width_column_wrapper<int8_t>({0, 1}).release();
+    auto const keys           = cudf::test::strings_column_wrapper({"O", "F"}).release();
+    auto const gather_map     = cudf::table_view({indices->view(), keys->view()});
+    auto const gathered_table = cudf::gather(
+      gather_map, mask_index_type->view(), cudf::out_of_bounds_policy::DONT_CHECK, stream, mr);
+    return std::make_tuple(std::move(gathered_table->release()[1]), std::move(mask_index_type));
+  }();
+
+  // Generate the `l_shipinstruct` column
+  auto l_shipinstruct = generate_random_string_column_from_set(
+    cudf::host_span<const char* const>(vocab_instructions.data(), vocab_instructions.size()),
+    l_num_rows,
+    stream,
+    mr);
+
+  // Generate the `l_shipmode` column
+  auto l_shipmode = generate_random_string_column_from_set(
+    cudf::host_span<const char* const>(vocab_modes.data(), vocab_modes.size()),
+    l_num_rows,
+    stream,
+    mr);
+
+  // Generate the `l_comment` column
+  // NOTE: This column is not compliant with
+  // clause 4.2.2.10 of the TPC-H specification
+  auto l_comment = generate_random_string_column(10, 43, l_num_rows, stream, mr);
+
+  // Generate the `lineitem_partial` table
+  std::vector<std::unique_ptr<cudf::column>> columns;
+  columns.push_back(std::move(l_linestatus_mask));
+  columns.push_back(std::move(l_orderkey));
+  columns.push_back(std::move(l_partkey));
+  columns.push_back(std::move(l_suppkey));
+  columns.push_back(std::move(l_linenumber));
+  columns.push_back(std::move(l_quantity));
+  columns.push_back(std::move(l_discount));
+  columns.push_back(std::move(l_tax));
+  columns.push_back(std::move(l_shipdate_ts));
+  columns.push_back(std::move(l_commitdate_ts));
+  columns.push_back(std::move(l_receiptdate_ts));
+  columns.push_back(std::move(l_returnflag));
+  columns.push_back(std::move(l_linestatus));
+  columns.push_back(std::move(l_shipinstruct));
+  columns.push_back(std::move(l_shipmode));
+  columns.push_back(std::move(l_comment));
+  return std::make_unique<cudf::table>(std::move(columns));
+}
+
+std::unique_ptr<cudf::table> generate_orders_dependent(cudf::table_view const& lineitem,
+                                                       rmm::cuda_stream_view stream,
+                                                       rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  auto const l_linestatus_mask = lineitem.column(0);
+  auto const l_orderkey        = lineitem.column(1);
+  auto const l_discount        = lineitem.column(6);
+  auto const l_tax             = lineitem.column(7);
+  auto const l_extendedprice   = lineitem.column(16);
+
+  std::vector<std::unique_ptr<cudf::column>> orders_dependent_columns;
+
+  // Generate the `o_totalprice` column
+  // We calculate the `charge` column, which is a function of `l_extendedprice`,
+  // `l_tax`, and `l_discount` and then group by `l_orderkey` and sum the `charge`
+  auto const l_charge = calculate_charge(l_extendedprice, l_tax, l_discount, stream, mr);
+  auto o_totalprice   = [&]() {
+    auto const keys = cudf::table_view({l_orderkey});
+    cudf::groupby::groupby gb(keys);
+    std::vector<cudf::groupby::aggregation_request> requests;
+    requests.push_back(cudf::groupby::aggregation_request());
+    requests[0].aggregations.push_back(cudf::make_sum_aggregation<cudf::groupby_aggregation>());
+    requests[0].values = l_charge->view();
+    auto agg_result    = gb.aggregate(requests);
+    return cudf::round(agg_result.second[0].results[0]->view(), 2);
+  }();
+  orders_dependent_columns.push_back(std::move(o_totalprice));
+
+  // Generate the `o_orderstatus` column
+  auto o_orderstatus = [&]() {
+    auto const keys = cudf::table_view({l_orderkey});
+    cudf::groupby::groupby gb(keys);
+    std::vector<cudf::groupby::aggregation_request> requests;
+
+    // Perform a `count` aggregation on `l_orderkey`
+    requests.push_back(cudf::groupby::aggregation_request());
+    requests[0].aggregations.push_back(cudf::make_count_aggregation<cudf::groupby_aggregation>());
+    requests[0].values = l_orderkey;
+
+    // Perform a `sum` aggregation on `l_linestatus_mask`
+    requests.push_back(cudf::groupby::aggregation_request());
+    requests[1].aggregations.push_back(cudf::make_sum_aggregation<cudf::groupby_aggregation>());
+    requests[1].values = l_linestatus_mask;
+
+    // Perform the aggregations
+    auto agg_result = gb.aggregate(requests);
+
+    // Create a `table_view` out of the `l_orderkey`, `count`, and `sum` columns
+    auto const count = std::move(agg_result.second[0].results[0]);
+    auto const sum   = cudf::cast(
+      agg_result.second[1].results[0]->view(), cudf::data_type{cudf::type_id::INT32}, stream, mr);
+
+    auto const table =
+      cudf::table_view({agg_result.first->get_column(0).view(), count->view(), sum->view()});
+
+    // Now on this table,
+    // if `sum` == `count` then "O",
+    // if `sum` == 0, then "F",
+    // else "P"
+
+    // So, we first evaluate an expression `sum == count` and generate a boolean mask
+    auto const count_ref = cudf::ast::column_reference(1);
+    auto const sum_ref   = cudf::ast::column_reference(2);
+    auto const expr_a    = cudf::ast::operation(cudf::ast::ast_operator::EQUAL, sum_ref, count_ref);
+    auto const mask_a    = cudf::compute_column(table, expr_a);
+    auto const o_orderstatus_intermediate =
+      cudf::copy_if_else(cudf::string_scalar("O"), cudf::string_scalar("F"), mask_a->view());
+
+    // Then, we evaluate an expression `sum == 0` and generate a boolean mask
+    auto zero_scalar        = cudf::numeric_scalar<cudf::size_type>(0);
+    auto const zero_literal = cudf::ast::literal(zero_scalar);
+    auto const expr_b_left =
+      cudf::ast::operation(cudf::ast::ast_operator::NOT_EQUAL, sum_ref, count_ref);
+    auto const expr_b_right =
+      cudf::ast::operation(cudf::ast::ast_operator::NOT_EQUAL, sum_ref, zero_literal);
+    auto const expr_b =
+      cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_AND, expr_b_left, expr_b_right);
+    auto const mask_b = cudf::compute_column(table, expr_b);
+    return cudf::copy_if_else(
+      cudf::string_scalar("P"), o_orderstatus_intermediate->view(), mask_b->view());
+  }();
+  orders_dependent_columns.push_back(std::move(o_orderstatus));
+  return std::make_unique<cudf::table>(std::move(orders_dependent_columns));
+}
+
+/**
+ * @brief Generate the `partsupp` table
+ *
+ * @param scale_factor The scale factor to generate
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+std::unique_ptr<cudf::table> generate_partsupp(double scale_factor,
+                                               rmm::cuda_stream_view stream,
+                                               rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  // Define the number of rows in the `part` and `partsupp` tables
+  cudf::size_type const p_num_rows  = scale_factor * 200'000;
+  cudf::size_type const ps_num_rows = scale_factor * 800'000;
+
+  // Generate the `ps_partkey` column
+  auto ps_partkey = [&]() {
+    auto const p_partkey =
+      generate_primary_key_column(cudf::numeric_scalar<cudf::size_type>(1), p_num_rows, stream, mr);
+    auto const rep_table = cudf::repeat(cudf::table_view({p_partkey->view()}), 4, stream, mr);
+    return std::move(rep_table->release()[0]);
+  }();
+
+  // Generate the `ps_suppkey` column
+  auto ps_suppkey = calculate_ps_suppkey(ps_partkey->view(), scale_factor, ps_num_rows, stream, mr);
+
+  // Generate the `ps_availqty` column
+  auto ps_availqty = generate_random_numeric_column<int16_t>(1, 9999, ps_num_rows, stream, mr);
+
+  // Generate the `ps_supplycost` column
+  auto ps_supplycost = [&]() {
+    auto const col = generate_random_numeric_column<double>(1.00, 1000.00, ps_num_rows, stream, mr);
+    return cudf::round(col->view(), 2);
+  }();
+
+  // Generate the `ps_comment` column
+  // NOTE: This column is not compliant with clause 4.2.2.10 of the TPC-H specification
+  auto ps_comment = generate_random_string_column(49, 198, ps_num_rows, stream, mr);
+
+  // Create the `partsupp` table
+  std::vector<std::unique_ptr<cudf::column>> columns;
+  columns.push_back(std::move(ps_partkey));
+  columns.push_back(std::move(ps_suppkey));
+  columns.push_back(std::move(ps_availqty));
+  columns.push_back(std::move(ps_supplycost));
+  columns.push_back(std::move(ps_comment));
+  return std::make_unique<cudf::table>(std::move(columns));
+}
+
+/**
+ * @brief Generate the `part` table
+ *
+ * @param scale_factor The scale factor to generate
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+std::unique_ptr<cudf::table> generate_part(double scale_factor,
+                                           rmm::cuda_stream_view stream,
+                                           rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  cudf::size_type const num_rows = scale_factor * 200'000;
+
+  // Generate the `p_partkey` column
+  auto p_partkey =
+    generate_primary_key_column(cudf::numeric_scalar<cudf::size_type>(1), num_rows, stream, mr);
+
+  // Generate the `p_name` column
+  auto p_name = [&]() {
+    auto const p_name_a = generate_random_string_column_from_set(
+      cudf::host_span<const char* const>(vocab_p_name.data(), vocab_p_name.size()),
+      num_rows,
+      stream,
+      mr);
+    auto const p_name_b = generate_random_string_column_from_set(
+      cudf::host_span<const char* const>(vocab_p_name.data(), vocab_p_name.size()),
+      num_rows,
+      stream,
+      mr);
+    auto const p_name_c = generate_random_string_column_from_set(
+      cudf::host_span<const char* const>(vocab_p_name.data(), vocab_p_name.size()),
+      num_rows,
+      stream,
+      mr);
+    auto const p_name_d = generate_random_string_column_from_set(
+      cudf::host_span<const char* const>(vocab_p_name.data(), vocab_p_name.size()),
+      num_rows,
+      stream,
+      mr);
+    auto const p_name_e = generate_random_string_column_from_set(
+      cudf::host_span<const char* const>(vocab_p_name.data(), vocab_p_name.size()),
+      num_rows,
+      stream,
+      mr);
+    return cudf::strings::concatenate(
+      cudf::table_view(
+        {p_name_a->view(), p_name_b->view(), p_name_c->view(), p_name_d->view(), p_name_e->view()}),
+      cudf::string_scalar(" "),
+      cudf::string_scalar("", false),
+      cudf::strings::separator_on_nulls::NO,
+      stream,
+      mr);
+  }();
+
+  // Generate the `p_mfgr` and `p_brand` columns
+  auto const random_values_m = generate_random_numeric_column<int8_t>(1, 5, num_rows, stream, mr);
+  auto const random_values_m_str =
+    cudf::strings::from_integers(random_values_m->view(), stream, mr);
+
+  auto const random_values_n = generate_random_numeric_column<int8_t>(1, 5, num_rows, stream, mr);
+  auto const random_values_n_str =
+    cudf::strings::from_integers(random_values_n->view(), stream, mr);
+
+  auto p_mfgr = [&]() {
+    auto const mfgr_repeat = generate_repeat_string_column("Manufacturer#", num_rows, stream, mr);
+    return cudf::strings::concatenate(
+      cudf::table_view({mfgr_repeat->view(), random_values_m_str->view()}),
+      cudf::string_scalar(""),
+      cudf::string_scalar("", false),
+      cudf::strings::separator_on_nulls::NO,
+      stream,
+      mr);
+  }();
+
+  auto p_brand = [&]() {
+    auto const brand_repeat = generate_repeat_string_column("Brand#", num_rows, stream, mr);
+    return cudf::strings::concatenate(
+      cudf::table_view(
+        {brand_repeat->view(), random_values_m_str->view(), random_values_n_str->view()}),
+      cudf::string_scalar(""),
+      cudf::string_scalar("", false),
+      cudf::strings::separator_on_nulls::NO,
+      stream,
+      mr);
+  }();
+
+  // Generate the `p_type` column
+  auto p_type = generate_random_string_column_from_set(
+    cudf::host_span<const char* const>(vocab_types.data(), vocab_types.size()),
+    num_rows,
+    stream,
+    mr);
+
+  // Generate the `p_size` column
+  auto p_size = generate_random_numeric_column<int8_t>(1, 50, num_rows, stream, mr);
+
+  // Generate the `p_container` column
+  auto p_container = generate_random_string_column_from_set(
+    cudf::host_span<const char* const>(vocab_containers.data(), vocab_containers.size()),
+    num_rows,
+    stream,
+    mr);
+
+  // Generate the `p_retailprice` column
+  auto p_retailprice = calculate_p_retailprice(p_partkey->view(), stream, mr);
+
+  // Generate the `p_comment` column
+  // NOTE: This column is not compliant with clause 4.2.2.10 of the TPC-H specification
+  auto p_comment = generate_random_string_column(5, 22, num_rows, stream, mr);
+
+  // Create the `part` table
+  std::vector<std::unique_ptr<cudf::column>> columns;
+  columns.push_back(std::move(p_partkey));
+  columns.push_back(std::move(p_name));
+  columns.push_back(std::move(p_mfgr));
+  columns.push_back(std::move(p_brand));
+  columns.push_back(std::move(p_type));
+  columns.push_back(std::move(p_size));
+  columns.push_back(std::move(p_container));
+  columns.push_back(std::move(p_retailprice));
+  columns.push_back(std::move(p_comment));
+  return std::make_unique<cudf::table>(std::move(columns));
+}
+
+/**
+ * @brief Generate the `orders`, `lineitem`, and `part` tables
+ *
+ * @param scale_factor The scale factor to generate
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+std::tuple<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::table>, std::unique_ptr<cudf::table>>
+generate_orders_lineitem_part(double scale_factor,
+                              rmm::cuda_stream_view stream,
+                              rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  // Generate a table with the independent columns of the `orders` table
+  auto orders_independent = generate_orders_independent(scale_factor, stream, mr);
+
+  // Generate the `lineitem` table partially
+  auto lineitem_partial =
+    generate_lineitem_partial(orders_independent->view(), scale_factor, stream, mr);
+
+  // Generate the `part` table
+  auto part = generate_part(scale_factor, stream, mr);
+
+  // Join the `part` and partial `lineitem` tables, then calculate the `l_extendedprice` column,
+  // add the column to the `lineitem` table, and write the `lineitem` table to a parquet file
+
+  auto l_extendedprice = [&]() {
+    auto const left = cudf::table_view(
+      {lineitem_partial->get_column(2).view(), lineitem_partial->get_column(5).view()});
+    auto const right = cudf::table_view({part->get_column(0).view(), part->get_column(7).view()});
+    auto const joined_table   = perform_left_join(left, right, {0}, {0}, stream, mr);
+    auto joined_table_columns = joined_table->release();
+    auto const l_quantity     = std::move(joined_table_columns[1]);
+    auto const l_quantity_fp =
+      cudf::cast(l_quantity->view(), cudf::data_type{cudf::type_id::FLOAT64});
+    auto const p_retailprice = std::move(joined_table_columns[3]);
+    auto const col           = cudf::binary_operation(l_quantity_fp->view(),
+                                            p_retailprice->view(),
+                                            cudf::binary_operator::MUL,
+                                            cudf::data_type{cudf::type_id::FLOAT64},
+                                            stream,
+                                            mr);
+    return cudf::round(col->view(), 2);
+  }();
+
+  auto lineitem_partial_columns = lineitem_partial->release();
+  lineitem_partial_columns.push_back(std::move(l_extendedprice));
+  auto lineitem_temp = std::make_unique<cudf::table>(std::move(lineitem_partial_columns));
+
+  // Generate the dependent columns of the `orders` table
+  // and merge them with the independent columns
+  auto orders_dependent = generate_orders_dependent(lineitem_temp->view(), stream, mr);
+
+  auto orders_independent_columns = orders_independent->release();
+  auto orders_dependent_columns   = orders_dependent->release();
+  orders_independent_columns.insert(orders_independent_columns.end(),
+                                    std::make_move_iterator(orders_dependent_columns.begin()),
+                                    std::make_move_iterator(orders_dependent_columns.end()));
+
+  // Create the `orders` table
+  auto orders = std::make_unique<cudf::table>(std::move(orders_independent_columns));
+
+  // Create the `lineitem` table
+  auto lineitem_temp_columns = lineitem_temp->release();
+  lineitem_temp_columns.erase(lineitem_temp_columns.begin());
+  auto lineitem = std::make_unique<cudf::table>(std::move(lineitem_temp_columns));
+
+  return std::make_tuple(std::move(orders), std::move(lineitem), std::move(part));
+}
+
+/**
+ * @brief Generate the `supplier` table
+ *
+ * @param scale_factor The scale factor to generate
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+std::unique_ptr<cudf::table> generate_supplier(double scale_factor,
+                                               rmm::cuda_stream_view stream,
+                                               rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  // Calculate the number of rows based on the scale factor
+  cudf::size_type const num_rows = scale_factor * 10'000;
+
+  // Generate the `s_suppkey` column
+  auto s_suppkey =
+    generate_primary_key_column(cudf::numeric_scalar<cudf::size_type>(1), num_rows, stream, mr);
+
+  // Generate the `s_name` column
+  auto s_name = [&]() {
+    auto const supplier_repeat = generate_repeat_string_column("Supplier#", num_rows, stream, mr);
+    auto const s_suppkey_str   = cudf::strings::from_integers(s_suppkey->view(), stream, mr);
+    auto const s_suppkey_str_padded = cudf::strings::zfill(s_suppkey_str->view(), 9, stream, mr);
+    return cudf::strings::concatenate(
+      cudf::table_view({supplier_repeat->view(), s_suppkey_str_padded->view()}),
+      cudf::string_scalar(""),
+      cudf::string_scalar("", false),
+      cudf::strings::separator_on_nulls::NO,
+      stream,
+      mr);
+  }();
+
+  // Generate the `s_address` column
+  auto s_address = generate_address_column(num_rows, stream, mr);
+
+  // Generate the `s_nationkey` column
+  auto s_nationkey = generate_random_numeric_column<int8_t>(0, 24, num_rows, stream, mr);
+
+  // Generate the `s_phone` column
+  auto s_phone = generate_phone_column(num_rows, stream, mr);
+
+  // Generate the `s_acctbal` column
+  auto s_acctbal = [&]() {
+    auto const col = generate_random_numeric_column<double>(-999.99, 9999.99, num_rows, stream, mr);
+    return cudf::round(col->view(), 2);
+  }();
+
+  // Generate the `s_comment` column
+  // NOTE: This column is not compliant with clause 4.2.2.10 of the TPC-H specification
+  auto s_comment = generate_random_string_column(25, 100, num_rows, stream, mr);
+
+  // Create the `supplier` table
+  std::vector<std::unique_ptr<cudf::column>> columns;
+  columns.push_back(std::move(s_suppkey));
+  columns.push_back(std::move(s_name));
+  columns.push_back(std::move(s_address));
+  columns.push_back(std::move(s_nationkey));
+  columns.push_back(std::move(s_phone));
+  columns.push_back(std::move(s_acctbal));
+  columns.push_back(std::move(s_comment));
+  return std::make_unique<cudf::table>(std::move(columns));
+}
+
+/**
+ * @brief Generate the `customer` table
+ *
+ * @param scale_factor The scale factor to generate
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+std::unique_ptr<cudf::table> generate_customer(double scale_factor,
+                                               rmm::cuda_stream_view stream,
+                                               rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  // Calculate the number of rows based on the scale factor
+  cudf::size_type const num_rows = scale_factor * 150'000;
+
+  // Generate the `c_custkey` column
+  auto c_custkey =
+    generate_primary_key_column(cudf::numeric_scalar<cudf::size_type>(1), num_rows, stream, mr);
+
+  // Generate the `c_name` column
+  auto c_name = [&]() {
+    auto const customer_repeat = generate_repeat_string_column("Customer#", num_rows, stream, mr);
+    auto const c_custkey_str   = cudf::strings::from_integers(c_custkey->view(), stream, mr);
+    auto const c_custkey_str_padded = cudf::strings::zfill(c_custkey_str->view(), 9, stream, mr);
+    return cudf::strings::concatenate(
+      cudf::table_view({customer_repeat->view(), c_custkey_str_padded->view()}),
+      cudf::string_scalar(""),
+      cudf::string_scalar("", false),
+      cudf::strings::separator_on_nulls::NO,
+      stream,
+      mr);
+  }();
+
+  // Generate the `c_address` column
+  auto c_address = generate_address_column(num_rows, stream, mr);
+
+  // Generate the `c_nationkey` column
+  auto c_nationkey = generate_random_numeric_column<int8_t>(0, 24, num_rows, stream, mr);
+
+  // Generate the `c_phone` column
+  auto c_phone = generate_phone_column(num_rows, stream, mr);
+
+  // Generate the `c_acctbal` column
+  auto c_acctbal = [&]() {
+    auto const col = generate_random_numeric_column<double>(-999.99, 9999.99, num_rows, stream, mr);
+    return cudf::round(col->view(), 2);
+  }();
+
+  // Generate the `c_mktsegment` column
+  auto c_mktsegment = generate_random_string_column_from_set(
+    cudf::host_span<const char* const>(vocab_segments.data(), vocab_segments.size()),
+    num_rows,
+    stream,
+    mr);
+
+  // Generate the `c_comment` column
+  // NOTE: This column is not compliant with clause 4.2.2.10 of the TPC-H specification
+  auto c_comment = generate_random_string_column(29, 116, num_rows, stream, mr);
+
+  // Create the `customer` table
+  std::vector<std::unique_ptr<cudf::column>> columns;
+  columns.push_back(std::move(c_custkey));
+  columns.push_back(std::move(c_name));
+  columns.push_back(std::move(c_address));
+  columns.push_back(std::move(c_nationkey));
+  columns.push_back(std::move(c_phone));
+  columns.push_back(std::move(c_acctbal));
+  columns.push_back(std::move(c_mktsegment));
+  columns.push_back(std::move(c_comment));
+  return std::make_unique<cudf::table>(std::move(columns));
+}
+
+/**
+ * @brief Generate the `nation` table
+ *
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+std::unique_ptr<cudf::table> generate_nation(rmm::cuda_stream_view stream,
+                                             rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  // Define the number of rows
+  constexpr cudf::size_type num_rows = 25;
+
+  // Generate the `n_nationkey` column
+  auto n_nationkey =
+    generate_primary_key_column(cudf::numeric_scalar<int8_t>(0), num_rows, stream, mr);
+
+  // Generate the `n_name` column
+  auto n_name = cudf::test::strings_column_wrapper(nations.begin(), nations.end()).release();
+
+  // Generate the `n_regionkey` column
+  std::vector<int8_t> region_keys{0, 1, 1, 1, 4, 0, 3, 3, 2, 2, 4, 4, 2,
+                                  4, 0, 0, 0, 1, 2, 3, 4, 2, 3, 3, 1};
+  auto n_regionkey =
+    cudf::test::fixed_width_column_wrapper<int8_t>(region_keys.begin(), region_keys.end())
+      .release();
+
+  // Generate the `n_comment` column
+  // NOTE: This column is not compliant with clause 4.2.2.10 of the TPC-H specification
+  auto n_comment = generate_random_string_column(31, 114, num_rows, stream, mr);
+
+  // Create the `nation` table
+  std::vector<std::unique_ptr<cudf::column>> columns;
+  columns.push_back(std::move(n_nationkey));
+  columns.push_back(std::move(n_name));
+  columns.push_back(std::move(n_regionkey));
+  columns.push_back(std::move(n_comment));
+  return std::make_unique<cudf::table>(std::move(columns));
+}
+
+/**
+ * @brief Generate the `region` table
+ *
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+std::unique_ptr<cudf::table> generate_region(rmm::cuda_stream_view stream,
+                                             rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  // Define the number of rows
+  constexpr cudf::size_type num_rows = 5;
+
+  // Generate the `r_regionkey` column
+  auto r_regionkey =
+    generate_primary_key_column(cudf::numeric_scalar<int8_t>(0), num_rows, stream, mr);
+
+  // Generate the `r_name` column
+  auto r_name =
+    cudf::test::strings_column_wrapper({"AFRICA", "AMERICA", "ASIA", "EUROPE", "MIDDLE EAST"})
+      .release();
+
+  // Generate the `r_comment` column
+  // NOTE: This column is not compliant with clause 4.2.2.10 of the TPC-H specification
+  auto r_comment = generate_random_string_column(31, 115, num_rows, stream, mr);
+
+  // Create the `region` table
+  std::vector<std::unique_ptr<cudf::column>> columns;
+  columns.push_back(std::move(r_regionkey));
+  columns.push_back(std::move(r_name));
+  columns.push_back(std::move(r_comment));
+  return std::make_unique<cudf::table>(std::move(columns));
+}
+
+}  // namespace cudf::datagen
diff --git a/cpp/benchmarks/common/tpch_data_generator/tpch_data_generator.hpp b/cpp/benchmarks/common/tpch_data_generator/tpch_data_generator.hpp
new file mode 100644
index 00000000000..a6286dd8dba
--- /dev/null
+++ b/cpp/benchmarks/common/tpch_data_generator/tpch_data_generator.hpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/table/table.hpp>
+
+namespace CUDF_EXPORT cudf {
+namespace datagen {
+
+/**
+ * @brief Generate the `orders`, `lineitem`, and `part` tables
+ *
+ * @param scale_factor The scale factor to generate
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+std::tuple<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::table>, std::unique_ptr<cudf::table>>
+generate_orders_lineitem_part(
+  double scale_factor,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief Generate the `partsupp` table
+ *
+ * @param scale_factor The scale factor to generate
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+std::unique_ptr<cudf::table> generate_partsupp(
+  double scale_factor,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief Generate the `supplier` table
+ *
+ * @param scale_factor The scale factor to generate
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+std::unique_ptr<cudf::table> generate_supplier(
+  double scale_factor,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief Generate the `customer` table
+ *
+ * @param scale_factor The scale factor to generate
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+std::unique_ptr<cudf::table> generate_customer(
+  double scale_factor,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief Generate the `nation` table
+ *
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+std::unique_ptr<cudf::table> generate_nation(
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief Generate the `region` table
+ *
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+std::unique_ptr<cudf::table> generate_region(
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
+
+}  // namespace datagen
+}  // namespace CUDF_EXPORT cudf
diff --git a/cpp/benchmarks/groupby/group_max.cpp b/cpp/benchmarks/groupby/group_max.cpp
index f41285008c4..b9a701a71f4 100644
--- a/cpp/benchmarks/groupby/group_max.cpp
+++ b/cpp/benchmarks/groupby/group_max.cpp
@@ -101,4 +101,5 @@ NVBENCH_BENCH_TYPES(bench_groupby_max,
 
 NVBENCH_BENCH_TYPES(bench_groupby_max_cardinality, NVBENCH_TYPE_AXES(nvbench::type_list<int32_t>))
   .set_name("groupby_max_cardinality")
+  .add_int64_axis("num_aggregations", {1})
   .add_int64_axis("cardinality", {10, 20, 50, 100, 1'000, 10'000, 100'000, 1'000'000, 10'000'000});
diff --git a/cpp/benchmarks/io/utilities/batched_memset_bench.cpp b/cpp/benchmarks/io/utilities/batched_memset_bench.cpp
new file mode 100644
index 00000000000..2905895a63b
--- /dev/null
+++ b/cpp/benchmarks/io/utilities/batched_memset_bench.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/io/cuio_common.hpp>
+#include <benchmarks/io/nvbench_helpers.hpp>
+
+#include <cudf/io/parquet.hpp>
+#include <cudf/utilities/default_stream.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+// Size of the data in the benchmark dataframe; chosen to be low enough to allow benchmarks to
+// run on most GPUs, but large enough to allow highest throughput
+constexpr size_t data_size = 512 << 20;
+
+void parquet_read_common(cudf::size_type num_rows_to_read,
+                         cudf::size_type num_cols_to_read,
+                         cuio_source_sink_pair& source_sink,
+                         nvbench::state& state)
+{
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(source_sink.make_source_info());
+
+  auto mem_stats_logger = cudf::memory_stats_logger();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+  state.exec(
+    nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch& launch, auto& timer) {
+      try_drop_l3_cache();
+
+      timer.start();
+      auto const result = cudf::io::read_parquet(read_opts);
+      timer.stop();
+
+      CUDF_EXPECTS(result.tbl->num_columns() == num_cols_to_read, "Unexpected number of columns");
+      CUDF_EXPECTS(result.tbl->num_rows() == num_rows_to_read, "Unexpected number of rows");
+    });
+
+  auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
+  state.add_element_count(static_cast<double>(data_size) / time, "bytes_per_second");
+  state.add_buffer_size(
+    mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
+  state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
+}
+
+template <data_type DataType>
+void bench_batched_memset(nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>>)
+{
+  auto const d_type      = get_type_or_group(static_cast<int32_t>(DataType));
+  auto const num_cols    = static_cast<cudf::size_type>(state.get_int64("num_cols"));
+  auto const cardinality = static_cast<cudf::size_type>(state.get_int64("cardinality"));
+  auto const run_length  = static_cast<cudf::size_type>(state.get_int64("run_length"));
+  auto const source_type = retrieve_io_type_enum(state.get_string("io_type"));
+  auto const compression = cudf::io::compression_type::NONE;
+  cuio_source_sink_pair source_sink(source_type);
+  auto const tbl =
+    create_random_table(cycle_dtypes(d_type, num_cols),
+                        table_size_bytes{data_size},
+                        data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
+  auto const view = tbl->view();
+
+  cudf::io::parquet_writer_options write_opts =
+    cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view)
+      .compression(compression);
+  cudf::io::write_parquet(write_opts);
+  auto const num_rows = view.num_rows();
+
+  parquet_read_common(num_rows, num_cols, source_sink, state);
+}
+
+using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL,
+                                            data_type::FLOAT,
+                                            data_type::DECIMAL,
+                                            data_type::TIMESTAMP,
+                                            data_type::DURATION,
+                                            data_type::STRING,
+                                            data_type::LIST,
+                                            data_type::STRUCT>;
+
+NVBENCH_BENCH_TYPES(bench_batched_memset, NVBENCH_TYPE_AXES(d_type_list))
+  .set_name("batched_memset")
+  .set_type_axes_names({"data_type"})
+  .add_int64_axis("num_cols", {1000})
+  .add_string_axis("io_type", {"DEVICE_BUFFER"})
+  .set_min_samples(4)
+  .add_int64_axis("cardinality", {0, 1000})
+  .add_int64_axis("run_length", {1, 32});
diff --git a/cpp/benchmarks/iterator/iterator.cu b/cpp/benchmarks/iterator/iterator.cu
index ada7a9bd73d..fd0cebb12ea 100644
--- a/cpp/benchmarks/iterator/iterator.cu
+++ b/cpp/benchmarks/iterator/iterator.cu
@@ -30,7 +30,6 @@
 #include <thrust/execution_policy.h>
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/iterator/transform_iterator.h>
-#include <thrust/pair.h>
 #include <thrust/reduce.h>
 
 #include <random>
@@ -161,68 +160,6 @@ void BM_iterator(benchmark::State& state)
                           sizeof(TypeParam));
 }
 
-// operator+ defined for pair iterator reduction
-template <typename T>
-__device__ thrust::pair<T, bool> operator+(thrust::pair<T, bool> lhs, thrust::pair<T, bool> rhs)
-{
-  return thrust::pair<T, bool>{lhs.first * lhs.second + rhs.first * rhs.second,
-                               lhs.second + rhs.second};
-}
-// -----------------------------------------------------------------------------
-template <typename T, bool has_null>
-void pair_iterator_bench_cub(cudf::column_view& col,
-                             rmm::device_uvector<thrust::pair<T, bool>>& result)
-{
-  thrust::pair<T, bool> init{0, false};
-  auto d_col    = cudf::column_device_view::create(col);
-  int num_items = col.size();
-  auto begin    = d_col->pair_begin<T, has_null>();
-  reduce_by_cub(result.begin(), begin, num_items, init);
-}
-
-template <typename T, bool has_null>
-void pair_iterator_bench_thrust(cudf::column_view& col,
-                                rmm::device_uvector<thrust::pair<T, bool>>& result)
-{
-  thrust::pair<T, bool> init{0, false};
-  auto d_col = cudf::column_device_view::create(col);
-  auto d_in  = d_col->pair_begin<T, has_null>();
-  auto d_end = d_in + col.size();
-  thrust::reduce(thrust::device, d_in, d_end, init, cudf::DeviceSum{});
-}
-
-template <class TypeParam, bool cub_or_thrust>
-void BM_pair_iterator(benchmark::State& state)
-{
-  cudf::size_type const column_size{(cudf::size_type)state.range(0)};
-  using T      = TypeParam;
-  auto num_gen = thrust::counting_iterator<cudf::size_type>(0);
-  auto null_gen =
-    thrust::make_transform_iterator(num_gen, [](cudf::size_type row) { return row % 2 == 0; });
-
-  cudf::test::fixed_width_column_wrapper<T> wrap_hasnull_F(num_gen, num_gen + column_size);
-  cudf::test::fixed_width_column_wrapper<T> wrap_hasnull_T(
-    num_gen, num_gen + column_size, null_gen);
-  cudf::column_view hasnull_F = wrap_hasnull_F;
-  cudf::column_view hasnull_T = wrap_hasnull_T;
-
-  // Initialize dev_result to false
-  auto dev_result = cudf::detail::make_zeroed_device_uvector_sync<thrust::pair<T, bool>>(
-    1, cudf::get_default_stream(), rmm::mr::get_current_device_resource());
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
-    if (cub_or_thrust) {
-      pair_iterator_bench_cub<T, false>(hasnull_T,
-                                        dev_result);  // driven by pair iterator with nulls
-    } else {
-      pair_iterator_bench_thrust<T, false>(hasnull_T,
-                                           dev_result);  // driven by pair iterator with nulls
-    }
-  }
-  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * column_size *
-                          sizeof(TypeParam));
-}
-
 #define ITER_BM_BENCHMARK_DEFINE(name, type, cub_or_thrust, raw_or_iterator) \
   BENCHMARK_DEFINE_F(Iterator, name)(::benchmark::State & state)             \
   {                                                                          \
@@ -238,17 +175,3 @@ ITER_BM_BENCHMARK_DEFINE(double_cub_raw, double, true, true);
 ITER_BM_BENCHMARK_DEFINE(double_cub_iter, double, true, false);
 ITER_BM_BENCHMARK_DEFINE(double_thrust_raw, double, false, true);
 ITER_BM_BENCHMARK_DEFINE(double_thrust_iter, double, false, false);
-
-#define PAIRITER_BM_BENCHMARK_DEFINE(name, type, cub_or_thrust)  \
-  BENCHMARK_DEFINE_F(Iterator, name)(::benchmark::State & state) \
-  {                                                              \
-    BM_pair_iterator<type, cub_or_thrust>(state);                \
-  }                                                              \
-  BENCHMARK_REGISTER_F(Iterator, name)                           \
-    ->RangeMultiplier(10)                                        \
-    ->Range(1000, 10000000)                                      \
-    ->UseManualTime()                                            \
-    ->Unit(benchmark::kMillisecond);
-
-PAIRITER_BM_BENCHMARK_DEFINE(double_cub_pair, double, true);
-PAIRITER_BM_BENCHMARK_DEFINE(double_thrust_pair, double, false);
diff --git a/cpp/benchmarks/join/generate_input_tables.cuh b/cpp/benchmarks/join/generate_input_tables.cuh
index f7984b29d6b..75bbe8174d3 100644
--- a/cpp/benchmarks/join/generate_input_tables.cuh
+++ b/cpp/benchmarks/join/generate_input_tables.cuh
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <cudf/detail/utilities/cuda.cuh>
+#include <cudf/detail/utilities/cuda.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
@@ -150,13 +151,8 @@ void generate_input_tables(key_type* const build_tbl,
   CUDF_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor(
     &num_blocks_init_probe_tbl, init_probe_tbl<key_type, size_type>, block_size, 0));
 
-  int dev_id{-1};
-  CUDF_CUDA_TRY(cudaGetDevice(&dev_id));
-
-  int num_sms{-1};
-  CUDF_CUDA_TRY(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, dev_id));
-
-  int const num_states =
+  auto const num_sms = cudf::detail::num_multiprocessors();
+  auto const num_states =
     num_sms * std::max(num_blocks_init_build_tbl, num_blocks_init_probe_tbl) * block_size;
   rmm::device_uvector<curandState> devStates(num_states, cudf::get_default_stream());
 
diff --git a/cpp/benchmarks/reduction/anyall.cpp b/cpp/benchmarks/reduction/anyall.cpp
index e9d23881764..1e578fab181 100644
--- a/cpp/benchmarks/reduction/anyall.cpp
+++ b/cpp/benchmarks/reduction/anyall.cpp
@@ -16,65 +16,51 @@
 
 #include <benchmarks/common/benchmark_utilities.hpp>
 #include <benchmarks/common/generate_input.hpp>
-#include <benchmarks/common/table_utilities.hpp>
-#include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
+#include <benchmarks/common/nvbench_utilities.hpp>
 
 #include <cudf/column/column_view.hpp>
 #include <cudf/reduction.hpp>
 #include <cudf/types.hpp>
 
-#include <memory>
+#include <nvbench/nvbench.cuh>
 
-class Reduction : public cudf::benchmark {};
+#include <memory>
 
-template <typename type>
-void BM_reduction_anyall(benchmark::State& state,
-                         std::unique_ptr<cudf::reduce_aggregation> const& agg)
+template <typename DataType>
+static void reduction_anyall(nvbench::state& state, nvbench::type_list<DataType>)
 {
-  cudf::size_type const column_size{static_cast<cudf::size_type>(state.range(0))};
-  auto const dtype           = cudf::type_to_id<type>();
-  data_profile const profile = data_profile_builder().no_validity().distribution(
-    dtype, distribution_id::UNIFORM, 0, agg->kind == cudf::aggregation::ANY ? 0 : 100);
-  auto const values = create_random_column(dtype, row_count{column_size}, profile);
+  auto const size     = static_cast<cudf::size_type>(state.get_int64("size"));
+  auto const kind_str = state.get_string("kind");
 
-  cudf::data_type output_dtype{cudf::type_id::BOOL8};
+  auto const input_type = cudf::type_to_id<DataType>();
+  auto const agg        = kind_str == "any" ? cudf::make_any_aggregation<cudf::reduce_aggregation>()
+                                            : cudf::make_all_aggregation<cudf::reduce_aggregation>();
 
-  for (auto _ : state) {
-    cuda_event_timer timer(state, true);
-    auto result = cudf::reduce(*values, *agg, output_dtype);
-  }
+  data_profile const profile =
+    data_profile_builder().no_validity().distribution(input_type,
+                                                      distribution_id::UNIFORM,
+                                                      (kind_str == "all" ? 1 : 0),
+                                                      (kind_str == "any" ? 0 : 100));
+  auto const values = create_random_column(input_type, row_count{size}, profile);
 
-  // The benchmark takes a column and produces one scalar.
-  set_items_processed(state, column_size + 1);
-  set_bytes_processed(state, estimate_size(values->view()) + cudf::size_of(output_dtype));
-}
+  auto const output_type = cudf::data_type{cudf::type_id::BOOL8};
+  auto stream            = cudf::get_default_stream();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
+  state.add_element_count(size);
+  state.add_global_memory_reads<DataType>(size);
+  state.add_global_memory_writes<nvbench::int8_t>(1);
 
-#define concat(a, b, c) a##b##c
-#define get_agg(op)     concat(cudf::make_, op, _aggregation<cudf::reduce_aggregation>())
+  state.exec(nvbench::exec_tag::sync, [&values, output_type, &agg](nvbench::launch& launch) {
+    cudf::reduce(*values, *agg, output_type);
+  });
 
-// TYPE, OP
-#define RBM_BENCHMARK_DEFINE(name, type, aggregation)             \
-  BENCHMARK_DEFINE_F(Reduction, name)(::benchmark::State & state) \
-  {                                                               \
-    BM_reduction_anyall<type>(state, get_agg(aggregation));       \
-  }                                                               \
-  BENCHMARK_REGISTER_F(Reduction, name)                           \
-    ->UseManualTime()                                             \
-    ->Arg(10000)      /* 10k */                                   \
-    ->Arg(100000)     /* 100k */                                  \
-    ->Arg(1000000)    /* 1M */                                    \
-    ->Arg(10000000)   /* 10M */                                   \
-    ->Arg(100000000); /* 100M */
+  set_throughputs(state);
+}
 
-#define REDUCE_BENCHMARK_DEFINE(type, aggregation) \
-  RBM_BENCHMARK_DEFINE(concat(type, _, aggregation), type, aggregation)
+using Types = nvbench::type_list<bool, int8_t, int32_t, float>;
 
-REDUCE_BENCHMARK_DEFINE(bool, all);
-REDUCE_BENCHMARK_DEFINE(int8_t, all);
-REDUCE_BENCHMARK_DEFINE(int32_t, all);
-REDUCE_BENCHMARK_DEFINE(float, all);
-REDUCE_BENCHMARK_DEFINE(bool, any);
-REDUCE_BENCHMARK_DEFINE(int8_t, any);
-REDUCE_BENCHMARK_DEFINE(int32_t, any);
-REDUCE_BENCHMARK_DEFINE(float, any);
+NVBENCH_BENCH_TYPES(reduction_anyall, NVBENCH_TYPE_AXES(Types))
+  .set_name("anyall")
+  .set_type_axes_names({"DataType"})
+  .add_string_axis("kind", {"any", "all"})
+  .add_int64_axis("size", {100'000, 1'000'000, 10'000'000, 100'000'000});
diff --git a/cpp/benchmarks/reduction/dictionary.cpp b/cpp/benchmarks/reduction/dictionary.cpp
index 5095337dbb3..1bdb50a539a 100644
--- a/cpp/benchmarks/reduction/dictionary.cpp
+++ b/cpp/benchmarks/reduction/dictionary.cpp
@@ -16,79 +16,84 @@
 
 #include <benchmarks/common/benchmark_utilities.hpp>
 #include <benchmarks/common/generate_input.hpp>
-#include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
+#include <benchmarks/common/nvbench_utilities.hpp>
 
+#include <cudf/aggregation.hpp>
 #include <cudf/dictionary/encode.hpp>
 #include <cudf/reduction.hpp>
 #include <cudf/types.hpp>
 #include <cudf/unary.hpp>
 
-class ReductionDictionary : public cudf::benchmark {};
+#include <nvbench/nvbench.cuh>
 
-template <typename T>
-void BM_reduction_dictionary(benchmark::State& state,
-                             std::unique_ptr<cudf::reduce_aggregation> const& agg)
+template <cudf::reduce_aggregation::Kind kind>
+static std::unique_ptr<cudf::reduce_aggregation> make_reduce_aggregation()
 {
-  cudf::size_type const column_size{static_cast<cudf::size_type>(state.range(0))};
+  switch (kind) {
+    case cudf::reduce_aggregation::ANY:
+      return cudf::make_any_aggregation<cudf::reduce_aggregation>();
+    case cudf::reduce_aggregation::ALL:
+      return cudf::make_all_aggregation<cudf::reduce_aggregation>();
+    case cudf::reduce_aggregation::MIN:
+      return cudf::make_min_aggregation<cudf::reduce_aggregation>();
+    case cudf::reduce_aggregation::MAX:
+      return cudf::make_max_aggregation<cudf::reduce_aggregation>();
+    case cudf::reduce_aggregation::MEAN:
+      return cudf::make_mean_aggregation<cudf::reduce_aggregation>();
+    default: CUDF_FAIL("Unsupported reduce aggregation in this benchmark");
+  }
+}
+
+template <typename DataType, cudf::reduce_aggregation::Kind kind>
+static void reduction_dictionary(nvbench::state& state,
+                                 nvbench::type_list<DataType, nvbench::enum_type<kind>>)
+{
+  cudf::size_type const size{static_cast<cudf::size_type>(state.get_int64("size"))};
 
-  // int column and encoded dictionary column
   data_profile const profile = data_profile_builder().cardinality(0).no_validity().distribution(
     cudf::type_to_id<long>(),
     distribution_id::UNIFORM,
-    (agg->kind == cudf::aggregation::ALL ? 1 : 0),
-    (agg->kind == cudf::aggregation::ANY ? 0 : 100));
-  auto int_column = create_random_column(cudf::type_to_id<long>(), row_count{column_size}, profile);
-  auto number_col = cudf::cast(*int_column, cudf::data_type{cudf::type_to_id<T>()});
+    (kind == cudf::aggregation::ALL ? 1 : 0),
+    (kind == cudf::aggregation::ANY ? 0 : 100));
+  auto int_column = create_random_column(cudf::type_to_id<long>(), row_count{size}, profile);
+  auto number_col = cudf::cast(*int_column, cudf::data_type{cudf::type_to_id<DataType>()});
   auto values     = cudf::dictionary::encode(*number_col);
 
-  cudf::data_type output_dtype = [&] {
-    if (agg->kind == cudf::aggregation::ANY || agg->kind == cudf::aggregation::ALL)
+  cudf::data_type output_type = [&] {
+    if (kind == cudf::aggregation::ANY || kind == cudf::aggregation::ALL) {
       return cudf::data_type{cudf::type_id::BOOL8};
-    if (agg->kind == cudf::aggregation::MEAN) return cudf::data_type{cudf::type_id::FLOAT64};
-    return cudf::data_type{cudf::type_to_id<T>()};
+    }
+    if (kind == cudf::aggregation::MEAN) { return cudf::data_type{cudf::type_id::FLOAT64}; }
+    return cudf::data_type{cudf::type_to_id<DataType>()};
   }();
 
-  for (auto _ : state) {
-    cuda_event_timer timer(state, true);
-    auto result = cudf::reduce(*values, *agg, output_dtype);
+  auto agg = make_reduce_aggregation<kind>();
+
+  auto stream = cudf::get_default_stream();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
+  state.add_element_count(size);
+  state.add_global_memory_reads<DataType>(size);
+  if (kind == cudf::aggregation::ANY || kind == cudf::aggregation::ALL) {
+    state.add_global_memory_writes<nvbench::int8_t>(1);  // BOOL8s
+  } else {
+    state.add_global_memory_writes<DataType>(1);
   }
 
-  // The benchmark takes a column and produces two scalars.
-  set_items_processed(state, column_size + 1);
+  state.exec(nvbench::exec_tag::sync, [&values, output_type, &agg](nvbench::launch& launch) {
+    cudf::reduce(*values, *agg, output_type);
+  });
 
-  // We don't set the metrics for the size read/written as row_bit_count() doesn't
-  // support the dictionary type yet (and so is estimate_size()).
-  // See https://github.com/rapidsai/cudf/issues/16121 for details.
+  set_throughputs(state);
 }
 
-#define concat(a, b, c) a##b##c
-#define get_agg(op)     concat(cudf::make_, op, _aggregation<cudf::reduce_aggregation>())
-
-// TYPE, OP
-#define RBM_BENCHMARK_DEFINE(name, type, aggregation)                       \
-  BENCHMARK_DEFINE_F(ReductionDictionary, name)(::benchmark::State & state) \
-  {                                                                         \
-    BM_reduction_dictionary<type>(state, get_agg(aggregation));             \
-  }                                                                         \
-  BENCHMARK_REGISTER_F(ReductionDictionary, name)                           \
-    ->UseManualTime()                                                       \
-    ->Arg(10000)      /* 10k */                                             \
-    ->Arg(100000)     /* 100k */                                            \
-    ->Arg(1000000)    /* 1M */                                              \
-    ->Arg(10000000)   /* 10M */                                             \
-    ->Arg(100000000); /* 100M */
-
-#define REDUCE_BENCHMARK_DEFINE(type, aggregation) \
-  RBM_BENCHMARK_DEFINE(concat(type, _, aggregation), type, aggregation)
+using Types    = nvbench::type_list<int32_t, float>;
+using AggKinds = nvbench::enum_type_list<cudf::reduce_aggregation::ALL,
+                                         cudf::reduce_aggregation::ANY,
+                                         cudf::reduce_aggregation::MIN,
+                                         cudf::reduce_aggregation::MAX,
+                                         cudf::reduce_aggregation::MEAN>;
 
-REDUCE_BENCHMARK_DEFINE(int32_t, all);
-REDUCE_BENCHMARK_DEFINE(float, all);
-REDUCE_BENCHMARK_DEFINE(int32_t, any);
-REDUCE_BENCHMARK_DEFINE(float, any);
-REDUCE_BENCHMARK_DEFINE(int32_t, min);
-REDUCE_BENCHMARK_DEFINE(float, min);
-REDUCE_BENCHMARK_DEFINE(int32_t, max);
-REDUCE_BENCHMARK_DEFINE(float, max);
-REDUCE_BENCHMARK_DEFINE(int32_t, mean);
-REDUCE_BENCHMARK_DEFINE(float, mean);
+NVBENCH_BENCH_TYPES(reduction_dictionary, NVBENCH_TYPE_AXES(Types, AggKinds))
+  .set_name("reduction_dictionary")
+  .set_type_axes_names({"DataType", "AggKinds"})
+  .add_int64_axis("size", {100'000, 1'000'000, 10'000'000, 100'000'000});
diff --git a/cpp/benchmarks/reduction/minmax.cpp b/cpp/benchmarks/reduction/minmax.cpp
index 050f2887221..636de303cc4 100644
--- a/cpp/benchmarks/reduction/minmax.cpp
+++ b/cpp/benchmarks/reduction/minmax.cpp
@@ -16,55 +16,42 @@
 
 #include <benchmarks/common/benchmark_utilities.hpp>
 #include <benchmarks/common/generate_input.hpp>
-#include <benchmarks/common/table_utilities.hpp>
-#include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
+#include <benchmarks/common/nvbench_utilities.hpp>
 
 #include <cudf/column/column_view.hpp>
 #include <cudf/reduction.hpp>
 #include <cudf/types.hpp>
 
-class Reduction : public cudf::benchmark {};
+#include <nvbench/nvbench.cuh>
 
-template <typename type>
-void BM_reduction(benchmark::State& state)
+template <typename DataType>
+static void reduction_minmax(nvbench::state& state, nvbench::type_list<DataType>)
 {
-  cudf::size_type const column_size{(cudf::size_type)state.range(0)};
-  auto const dtype_id = cudf::type_to_id<type>();
-  auto const input_column =
-    create_random_column(dtype_id, row_count{column_size}, data_profile_builder().no_validity());
+  auto const size = static_cast<cudf::size_type>(state.get_int64("size"));
 
-  for (auto _ : state) {
-    cuda_event_timer timer(state, true);
-    auto result = cudf::minmax(*input_column);
-  }
+  auto const input_type = cudf::type_to_id<DataType>();
 
-  // The benchmark takes a column and produces two scalars.
-  set_items_processed(state, column_size + 2);
-  cudf::data_type dtype = cudf::data_type{dtype_id};
-  set_bytes_processed(state, estimate_size(input_column->view()) + 2 * cudf::size_of(dtype));
-}
+  data_profile const profile =
+    data_profile_builder().no_validity().distribution(input_type, distribution_id::UNIFORM, 0, 100);
+  auto const input_column = create_random_column(input_type, row_count{size}, profile);
+
+  auto stream = cudf::get_default_stream();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
+  state.add_element_count(size);
+  state.add_global_memory_reads<DataType>(size);
+  state.add_global_memory_writes<DataType>(2);
 
-#define concat(a, b, c) a##b##c
-#define get_agg(op)     concat(cudf::make_, op, _aggregation())
+  state.exec(nvbench::exec_tag::sync,
+             [&input_column](nvbench::launch& launch) { cudf::minmax(*input_column); });
+
+  set_throughputs(state);
+}
 
-// TYPE, OP
-#define RBM_BENCHMARK_DEFINE(name, type, aggregation)                                            \
-  BENCHMARK_DEFINE_F(Reduction, name)(::benchmark::State & state) { BM_reduction<type>(state); } \
-  BENCHMARK_REGISTER_F(Reduction, name)                                                          \
-    ->UseManualTime()                                                                            \
-    ->Arg(10000)      /* 10k */                                                                  \
-    ->Arg(100000)     /* 100k */                                                                 \
-    ->Arg(1000000)    /* 1M */                                                                   \
-    ->Arg(10000000)   /* 10M */                                                                  \
-    ->Arg(100000000); /* 100M */
+NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_ms, "cudf::timestamp_ms", "cudf::timestamp_ms");
 
-#define REDUCE_BENCHMARK_DEFINE(type, aggregation) \
-  RBM_BENCHMARK_DEFINE(concat(type, _, aggregation), type, aggregation)
+using Types = nvbench::type_list<bool, int8_t, int32_t, float, cudf::timestamp_ms>;
 
-REDUCE_BENCHMARK_DEFINE(bool, minmax);
-REDUCE_BENCHMARK_DEFINE(int8_t, minmax);
-REDUCE_BENCHMARK_DEFINE(int32_t, minmax);
-using cudf::timestamp_ms;
-REDUCE_BENCHMARK_DEFINE(timestamp_ms, minmax);
-REDUCE_BENCHMARK_DEFINE(float, minmax);
+NVBENCH_BENCH_TYPES(reduction_minmax, NVBENCH_TYPE_AXES(Types))
+  .set_name("minmax")
+  .set_type_axes_names({"DataType"})
+  .add_int64_axis("size", {100'000, 1'000'000, 10'000'000, 100'000'000});
diff --git a/cpp/benchmarks/reduction/reduce.cpp b/cpp/benchmarks/reduction/reduce.cpp
index 63c96f4fe9e..a30c27c519c 100644
--- a/cpp/benchmarks/reduction/reduce.cpp
+++ b/cpp/benchmarks/reduction/reduce.cpp
@@ -16,82 +16,82 @@
 
 #include <benchmarks/common/benchmark_utilities.hpp>
 #include <benchmarks/common/generate_input.hpp>
-#include <benchmarks/common/table_utilities.hpp>
-#include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
+#include <benchmarks/common/nvbench_utilities.hpp>
 
 #include <cudf/column/column_view.hpp>
 #include <cudf/detail/aggregation/aggregation.hpp>
 #include <cudf/reduction.hpp>
 #include <cudf/types.hpp>
 
+#include <nvbench/nvbench.cuh>
+
 #include <memory>
 
-class Reduction : public cudf::benchmark {};
+template <cudf::reduce_aggregation::Kind kind>
+static std::unique_ptr<cudf::reduce_aggregation> make_reduce_aggregation()
+{
+  switch (kind) {
+    case cudf::reduce_aggregation::MIN:
+      return cudf::make_min_aggregation<cudf::reduce_aggregation>();
+    case cudf::reduce_aggregation::SUM:
+      return cudf::make_sum_aggregation<cudf::reduce_aggregation>();
+    case cudf::reduce_aggregation::MEAN:
+      return cudf::make_mean_aggregation<cudf::reduce_aggregation>();
+    case cudf::reduce_aggregation::PRODUCT:
+      return cudf::make_product_aggregation<cudf::reduce_aggregation>();
+    case cudf::reduce_aggregation::VARIANCE:
+      return cudf::make_variance_aggregation<cudf::reduce_aggregation>();
+    case cudf::reduce_aggregation::STD:
+      return cudf::make_std_aggregation<cudf::reduce_aggregation>();
+    default: CUDF_FAIL("Unsupported reduce aggregation in this benchmark");
+  }
+}
 
-template <typename type>
-void BM_reduction(benchmark::State& state, std::unique_ptr<cudf::reduce_aggregation> const& agg)
+template <typename DataType, cudf::reduce_aggregation::Kind kind>
+static void reduction(nvbench::state& state, nvbench::type_list<DataType, nvbench::enum_type<kind>>)
 {
-  cudf::size_type const column_size{(cudf::size_type)state.range(0)};
-  auto const dtype = cudf::type_to_id<type>();
+  auto const size = static_cast<cudf::size_type>(state.get_int64("size"));
+  if (cudf::is_chrono<DataType>() && kind != cudf::aggregation::MIN) {
+    state.skip("Skip chrono types for some aggregations");
+  }
+
+  auto const input_type = cudf::type_to_id<DataType>();
   data_profile const profile =
-    data_profile_builder().no_validity().distribution(dtype, distribution_id::UNIFORM, 0, 100);
-  auto const input_column = create_random_column(dtype, row_count{column_size}, profile);
+    data_profile_builder().no_validity().distribution(input_type, distribution_id::UNIFORM, 0, 100);
+  auto const input_column = create_random_column(input_type, row_count{size}, profile);
 
-  cudf::data_type output_dtype =
-    (agg->kind == cudf::aggregation::MEAN || agg->kind == cudf::aggregation::VARIANCE ||
-     agg->kind == cudf::aggregation::STD)
+  cudf::data_type output_type =
+    (kind == cudf::aggregation::MEAN || kind == cudf::aggregation::VARIANCE ||
+     kind == cudf::aggregation::STD)
       ? cudf::data_type{cudf::type_id::FLOAT64}
       : input_column->type();
 
-  for (auto _ : state) {
-    cuda_event_timer timer(state, true);
-    auto result = cudf::reduce(*input_column, *agg, output_dtype);
-  }
+  auto agg = make_reduce_aggregation<kind>();
 
-  // The benchmark takes a column and produces two scalars.
-  set_items_processed(state, column_size + 1);
-  set_bytes_processed(state, estimate_size(input_column->view()) + cudf::size_of(output_dtype));
-}
+  auto stream = cudf::get_default_stream();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
+  state.add_element_count(size);
+  state.add_global_memory_reads<DataType>(size);
+  state.add_global_memory_writes<DataType>(1);
 
-#define concat(a, b, c) a##b##c
-#define get_agg(op)     concat(cudf::make_, op, _aggregation<cudf::reduce_aggregation>())
+  state.exec(nvbench::exec_tag::sync, [&input_column, output_type, &agg](nvbench::launch& launch) {
+    cudf::reduce(*input_column, *agg, output_type);
+  });
 
-// TYPE, OP
-#define RBM_BENCHMARK_DEFINE(name, type, aggregation)             \
-  BENCHMARK_DEFINE_F(Reduction, name)(::benchmark::State & state) \
-  {                                                               \
-    BM_reduction<type>(state, get_agg(aggregation));              \
-  }                                                               \
-  BENCHMARK_REGISTER_F(Reduction, name)                           \
-    ->UseManualTime()                                             \
-    ->Arg(10000)      /* 10k */                                   \
-    ->Arg(100000)     /* 100k */                                  \
-    ->Arg(1000000)    /* 1M */                                    \
-    ->Arg(10000000)   /* 10M */                                   \
-    ->Arg(100000000); /* 100M */
+  set_throughputs(state);
+}
 
-#define REDUCE_BENCHMARK_DEFINE(type, aggregation) \
-  RBM_BENCHMARK_DEFINE(concat(type, _, aggregation), type, aggregation)
+NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_ms, "cudf::timestamp_ms", "cudf::timestamp_ms");
 
-#define REDUCE_BENCHMARK_NUMERIC(aggregation)    \
-  REDUCE_BENCHMARK_DEFINE(bool, aggregation);    \
-  REDUCE_BENCHMARK_DEFINE(int8_t, aggregation);  \
-  REDUCE_BENCHMARK_DEFINE(int32_t, aggregation); \
-  REDUCE_BENCHMARK_DEFINE(int64_t, aggregation); \
-  REDUCE_BENCHMARK_DEFINE(float, aggregation);   \
-  REDUCE_BENCHMARK_DEFINE(double, aggregation);
+using Types    = nvbench::type_list<int32_t, int64_t, double, cudf::timestamp_ms>;
+using AggKinds = nvbench::enum_type_list<cudf::reduce_aggregation::MIN,
+                                         cudf::reduce_aggregation::SUM,
+                                         cudf::reduce_aggregation::PRODUCT,
+                                         cudf::reduce_aggregation::VARIANCE,
+                                         cudf::reduce_aggregation::STD,
+                                         cudf::reduce_aggregation::MEAN>;
 
-REDUCE_BENCHMARK_NUMERIC(sum);
-REDUCE_BENCHMARK_DEFINE(int32_t, product);
-REDUCE_BENCHMARK_DEFINE(float, product);
-REDUCE_BENCHMARK_DEFINE(int64_t, min);
-REDUCE_BENCHMARK_DEFINE(double, min);
-using cudf::timestamp_ms;
-REDUCE_BENCHMARK_DEFINE(timestamp_ms, min);
-REDUCE_BENCHMARK_DEFINE(int8_t, mean);
-REDUCE_BENCHMARK_DEFINE(float, mean);
-REDUCE_BENCHMARK_DEFINE(int32_t, variance);
-REDUCE_BENCHMARK_DEFINE(double, variance);
-REDUCE_BENCHMARK_DEFINE(int64_t, std);
-REDUCE_BENCHMARK_DEFINE(float, std);
+NVBENCH_BENCH_TYPES(reduction, NVBENCH_TYPE_AXES(Types, AggKinds))
+  .set_name("reduction")
+  .set_type_axes_names({"DataType", "AggKinds"})
+  .add_int64_axis("size", {100'000, 1'000'000, 10'000'000, 100'000'000});
diff --git a/cpp/benchmarks/reduction/scan.cpp b/cpp/benchmarks/reduction/scan.cpp
index dc05aad9807..f3d67a79498 100644
--- a/cpp/benchmarks/reduction/scan.cpp
+++ b/cpp/benchmarks/reduction/scan.cpp
@@ -16,9 +16,7 @@
 
 #include <benchmarks/common/benchmark_utilities.hpp>
 #include <benchmarks/common/generate_input.hpp>
-#include <benchmarks/common/table_utilities.hpp>
-#include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
+#include <benchmarks/common/nvbench_utilities.hpp>
 
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_view.hpp>
@@ -26,43 +24,38 @@
 #include <cudf/table/table.hpp>
 #include <cudf/types.hpp>
 
-class ReductionScan : public cudf::benchmark {};
+#include <nvbench/nvbench.cuh>
 
-template <typename type>
-static void BM_reduction_scan(benchmark::State& state, bool include_nulls)
+template <typename DataType>
+static void reduction_scan(nvbench::state& state, nvbench::type_list<DataType>)
 {
-  cudf::size_type const n_rows{(cudf::size_type)state.range(0)};
-  auto const dtype  = cudf::type_to_id<type>();
-  auto const column = create_random_column(dtype, row_count{n_rows});
-  if (!include_nulls) column->set_null_mask(rmm::device_buffer{}, 0);
+  auto const size       = static_cast<cudf::size_type>(state.get_int64("size"));
+  auto const nulls      = state.get_float64("nulls");
+  auto const input_type = cudf::type_to_id<DataType>();
 
-  std::unique_ptr<cudf::column> result = nullptr;
-  for (auto _ : state) {
-    cuda_event_timer timer(state, true);
-    result = cudf::scan(
-      *column, *cudf::make_min_aggregation<cudf::scan_aggregation>(), cudf::scan_type::INCLUSIVE);
-  }
+  data_profile const profile = data_profile_builder().null_probability(nulls).distribution(
+    input_type, distribution_id::UNIFORM, 0, 100);
+  auto const input_column = create_random_column(input_type, row_count{size}, profile);
 
-  // The benchmark takes a column and produces a new column of the same size as input.
-  set_items_processed(state, n_rows * 2);
-  set_bytes_processed(state, estimate_size(column->view()) + estimate_size(result->view()));
+  auto agg = cudf::make_min_aggregation<cudf::scan_aggregation>();
+
+  auto stream = cudf::get_default_stream();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
+  state.add_element_count(size);
+  state.add_global_memory_reads<DataType>(size);
+  state.add_global_memory_writes<DataType>(1);
+
+  state.exec(nvbench::exec_tag::sync, [&input_column, &agg](nvbench::launch& launch) {
+    cudf::scan(*input_column, *agg, cudf::scan_type::INCLUSIVE);
+  });
+
+  set_throughputs(state);
 }
 
-#define SCAN_BENCHMARK_DEFINE(name, type, nulls)                          \
-  BENCHMARK_DEFINE_F(ReductionScan, name)                                 \
-  (::benchmark::State & state) { BM_reduction_scan<type>(state, nulls); } \
-  BENCHMARK_REGISTER_F(ReductionScan, name)                               \
-    ->UseManualTime()                                                     \
-    ->Arg(10000)      /* 10k */                                           \
-    ->Arg(100000)     /* 100k */                                          \
-    ->Arg(1000000)    /* 1M */                                            \
-    ->Arg(10000000)   /* 10M */                                           \
-    ->Arg(100000000); /* 100M */
+using Types = nvbench::type_list<int8_t, int32_t, uint64_t, float, int16_t, uint32_t, double>;
 
-SCAN_BENCHMARK_DEFINE(int8_no_nulls, int8_t, false);
-SCAN_BENCHMARK_DEFINE(int32_no_nulls, int32_t, false);
-SCAN_BENCHMARK_DEFINE(uint64_no_nulls, uint64_t, false);
-SCAN_BENCHMARK_DEFINE(float_no_nulls, float, false);
-SCAN_BENCHMARK_DEFINE(int16_nulls, int16_t, true);
-SCAN_BENCHMARK_DEFINE(uint32_nulls, uint32_t, true);
-SCAN_BENCHMARK_DEFINE(double_nulls, double, true);
+NVBENCH_BENCH_TYPES(reduction_scan, NVBENCH_TYPE_AXES(Types))
+  .set_name("scan")
+  .set_type_axes_names({"DataType"})
+  .add_float64_axis("nulls", {0.0, 0.1})
+  .add_int64_axis("size", {100'000, 1'000'000, 10'000'000, 100'000'000});
diff --git a/cpp/benchmarks/sort/rank_lists.cpp b/cpp/benchmarks/sort/rank_lists.cpp
index fbdb40b3537..7015fe08089 100644
--- a/cpp/benchmarks/sort/rank_lists.cpp
+++ b/cpp/benchmarks/sort/rank_lists.cpp
@@ -37,6 +37,8 @@ void nvbench_rank_lists(nvbench::state& state, nvbench::type_list<nvbench::enum_
                cudf::order::ASCENDING,
                null_frequency ? cudf::null_policy::INCLUDE : cudf::null_policy::EXCLUDE,
                cudf::null_order::AFTER,
+               false,
+               cudf::get_default_stream(),
                rmm::mr::get_current_device_resource());
   });
 }
diff --git a/cpp/benchmarks/sort/rank_structs.cpp b/cpp/benchmarks/sort/rank_structs.cpp
index 4b0da29df9d..8b4b09464d8 100644
--- a/cpp/benchmarks/sort/rank_structs.cpp
+++ b/cpp/benchmarks/sort/rank_structs.cpp
@@ -35,6 +35,8 @@ void nvbench_rank_structs(nvbench::state& state, nvbench::type_list<nvbench::enu
                cudf::order::ASCENDING,
                nulls ? cudf::null_policy::INCLUDE : cudf::null_policy::EXCLUDE,
                cudf::null_order::AFTER,
+               false,
+               cudf::get_default_stream(),
                rmm::mr::get_current_device_resource());
   });
 }
diff --git a/cpp/benchmarks/stream_compaction/apply_boolean_mask.cpp b/cpp/benchmarks/stream_compaction/apply_boolean_mask.cpp
index 492237474ff..fa017ca9e29 100644
--- a/cpp/benchmarks/stream_compaction/apply_boolean_mask.cpp
+++ b/cpp/benchmarks/stream_compaction/apply_boolean_mask.cpp
@@ -15,120 +15,76 @@
  */
 
 #include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/common/nvbench_utilities.hpp>
 
 #include <cudf/stream_compaction.hpp>
+#include <cudf/strings/string_view.hpp>
 
-#include <fixture/benchmark_fixture.hpp>
-#include <synchronization/synchronization.hpp>
+#include <nvbench/nvbench.cuh>
 
 namespace {
 
-constexpr cudf::size_type hundredM      = 1e8;
-constexpr cudf::size_type tenM          = 1e7;
-constexpr cudf::size_type tenK          = 1e4;
-constexpr cudf::size_type fifty_percent = 50;
-
-void percent_range(benchmark::internal::Benchmark* b)
-{
-  b->Unit(benchmark::kMillisecond);
-  for (int percent = 0; percent <= 100; percent += 10)
-    b->Args({hundredM, percent});
-}
-
-void size_range(benchmark::internal::Benchmark* b)
-{
-  b->Unit(benchmark::kMillisecond);
-  for (int size = tenK; size <= hundredM; size *= 10)
-    b->Args({size, fifty_percent});
-}
-
 template <typename T>
-void calculate_bandwidth(benchmark::State& state, cudf::size_type num_columns)
+void calculate_bandwidth(nvbench::state& state)
 {
-  cudf::size_type const column_size{static_cast<cudf::size_type>(state.range(0))};
-  cudf::size_type const percent_true{static_cast<cudf::size_type>(state.range(1))};
-
-  float const fraction                  = percent_true / 100.f;
-  cudf::size_type const column_size_out = fraction * column_size;
-  int64_t const mask_size =
-    sizeof(bool) * column_size + cudf::bitmask_allocation_size_bytes(column_size);
-  int64_t const validity_bytes_in  = (fraction >= 1.0f / 32)
-                                       ? cudf::bitmask_allocation_size_bytes(column_size)
-                                       : 4 * column_size_out;
-  int64_t const validity_bytes_out = cudf::bitmask_allocation_size_bytes(column_size_out);
-  int64_t const column_bytes_out   = sizeof(T) * column_size_out;
+  auto const n_rows       = static_cast<cudf::size_type>(state.get_int64("rows"));
+  auto const n_cols       = static_cast<cudf::size_type>(state.get_int64("columns"));
+  auto const percent_true = static_cast<cudf::size_type>(state.get_int64("hits_%"));
+
+  double const fraction             = percent_true / 100.0;
+  cudf::size_type const output_size = fraction * n_rows;
+  int64_t const mask_size = sizeof(bool) * n_rows + cudf::bitmask_allocation_size_bytes(n_rows);
+  int64_t const validity_bytes_in =
+    (fraction >= 1.0 / 32) ? cudf::bitmask_allocation_size_bytes(n_rows) : 4 * output_size;
+  int64_t const validity_bytes_out = cudf::bitmask_allocation_size_bytes(output_size);
+  int64_t const column_bytes_out   = sizeof(T) * output_size;
   int64_t const column_bytes_in    = column_bytes_out;  // we only read unmasked inputs
 
-  int64_t const bytes_read =
-    (column_bytes_in + validity_bytes_in) * num_columns +  // reading columns
-    mask_size;                                             // reading boolean mask
+  int64_t const bytes_read = (column_bytes_in + validity_bytes_in) * n_cols +  // reading columns
+                             mask_size;  // reading boolean mask
   int64_t const bytes_written =
-    (column_bytes_out + validity_bytes_out) * num_columns;  // writing columns
+    (column_bytes_out + validity_bytes_out) * n_cols;  // writing columns
 
-  state.SetItemsProcessed(state.iterations() * column_size * num_columns);
-  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * (bytes_read + bytes_written));
+  state.add_element_count(n_rows * n_cols);
+  state.add_global_memory_reads<nvbench::int8_t>(bytes_read);
+  state.add_global_memory_writes<nvbench::int8_t>(bytes_written);
 }
 
 }  // namespace
 
-template <class T>
-void BM_apply_boolean_mask(benchmark::State& state, cudf::size_type num_columns)
+template <typename DataType>
+void apply_boolean_mask_benchmark(nvbench::state& state, nvbench::type_list<DataType>)
 {
-  cudf::size_type const column_size{static_cast<cudf::size_type>(state.range(0))};
-  cudf::size_type const percent_true{static_cast<cudf::size_type>(state.range(1))};
+  auto const n_rows       = static_cast<cudf::size_type>(state.get_int64("rows"));
+  auto const n_cols       = static_cast<cudf::size_type>(state.get_int64("columns"));
+  auto const percent_true = static_cast<cudf::size_type>(state.get_int64("hits_%"));
 
-  data_profile profile = data_profile_builder().cardinality(0).null_probability(0.0).distribution(
-    cudf::type_to_id<T>(), distribution_id::UNIFORM, 0, 100);
+  auto const input_type = cudf::type_to_id<DataType>();
+  data_profile profile  = data_profile_builder().cardinality(0).no_validity().distribution(
+    input_type, distribution_id::UNIFORM, 0, 20);
 
-  auto source_table = create_random_table(
-    cycle_dtypes({cudf::type_to_id<T>()}, num_columns), row_count{column_size}, profile);
+  auto source_table =
+    create_random_table(cycle_dtypes({input_type}, n_cols), row_count{n_rows}, profile);
 
   profile.set_bool_probability_true(percent_true / 100.0);
   profile.set_null_probability(std::nullopt);  // no null mask
-  auto mask = create_random_column(cudf::type_id::BOOL8, row_count{column_size}, profile);
+  auto mask = create_random_column(cudf::type_id::BOOL8, row_count{n_rows}, profile);
+
+  auto stream = cudf::get_default_stream();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
+  calculate_bandwidth<DataType>(state);
 
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true);
-    auto result = cudf::apply_boolean_mask(*source_table, mask->view());
-  }
+  state.exec(nvbench::exec_tag::sync, [&source_table, &mask](nvbench::launch& launch) {
+    cudf::apply_boolean_mask(*source_table, mask->view());
+  });
 
-  calculate_bandwidth<T>(state, num_columns);
+  set_throughputs(state);
 }
 
-template <class T>
-class ApplyBooleanMask : public cudf::benchmark {
- public:
-  using TypeParam = T;
-};
-
-#define ABM_BENCHMARK_DEFINE(name, type, n_columns)                                  \
-  BENCHMARK_TEMPLATE_DEFINE_F(ApplyBooleanMask, name, type)(::benchmark::State & st) \
-  {                                                                                  \
-    BM_apply_boolean_mask<TypeParam>(st, n_columns);                                 \
-  }
-
-ABM_BENCHMARK_DEFINE(float_1_col, float, 1);
-ABM_BENCHMARK_DEFINE(float_2_col, float, 2);
-ABM_BENCHMARK_DEFINE(float_4_col, float, 4);
-
-// shmoo 1, 2, 4 column float across percentage true
-BENCHMARK_REGISTER_F(ApplyBooleanMask, float_1_col)->Apply(percent_range);
-BENCHMARK_REGISTER_F(ApplyBooleanMask, float_2_col)->Apply(percent_range);
-BENCHMARK_REGISTER_F(ApplyBooleanMask, float_4_col)->Apply(percent_range);
-
-// shmoo 1, 2, 4 column float across column sizes with 50% true
-BENCHMARK_REGISTER_F(ApplyBooleanMask, float_1_col)->Apply(size_range);
-BENCHMARK_REGISTER_F(ApplyBooleanMask, float_2_col)->Apply(size_range);
-BENCHMARK_REGISTER_F(ApplyBooleanMask, float_4_col)->Apply(size_range);
-
-// spot benchmark other types
-ABM_BENCHMARK_DEFINE(int8_1_col, int8_t, 1);
-ABM_BENCHMARK_DEFINE(int16_1_col, int16_t, 1);
-ABM_BENCHMARK_DEFINE(int32_1_col, int32_t, 1);
-ABM_BENCHMARK_DEFINE(int64_1_col, int64_t, 1);
-ABM_BENCHMARK_DEFINE(double_1_col, double, 1);
-BENCHMARK_REGISTER_F(ApplyBooleanMask, int8_1_col)->Args({tenM, fifty_percent});
-BENCHMARK_REGISTER_F(ApplyBooleanMask, int16_1_col)->Args({tenM, fifty_percent});
-BENCHMARK_REGISTER_F(ApplyBooleanMask, int32_1_col)->Args({tenM, fifty_percent});
-BENCHMARK_REGISTER_F(ApplyBooleanMask, int64_1_col)->Args({tenM, fifty_percent});
-BENCHMARK_REGISTER_F(ApplyBooleanMask, double_1_col)->Args({tenM, fifty_percent});
+using data_type = nvbench::type_list<int32_t, int64_t, double, cudf::string_view>;
+NVBENCH_BENCH_TYPES(apply_boolean_mask_benchmark, NVBENCH_TYPE_AXES(data_type))
+  .set_name("apply_boolean_mask")
+  .set_type_axes_names({"type"})
+  .add_int64_axis("columns", {1, 4})
+  .add_int64_axis("rows", {100'000, 1'000'000, 10'000'000})
+  .add_int64_axis("hits_%", {10, 50, 100});
diff --git a/cpp/benchmarks/stream_compaction/distinct.cpp b/cpp/benchmarks/stream_compaction/distinct.cpp
index c04b6516903..d7deebca89a 100644
--- a/cpp/benchmarks/stream_compaction/distinct.cpp
+++ b/cpp/benchmarks/stream_compaction/distinct.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,6 +15,7 @@
  */
 
 #include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/stream_compaction/stream_compaction_common.hpp>
 
 #include <cudf/column/column_view.hpp>
 #include <cudf/lists/list_view.hpp>
@@ -23,15 +24,29 @@
 
 #include <nvbench/nvbench.cuh>
 
+#include <limits>
+
 NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_ms, "cudf::timestamp_ms", "cudf::timestamp_ms");
 
 template <typename Type>
 void nvbench_distinct(nvbench::state& state, nvbench::type_list<Type>)
 {
-  cudf::size_type const num_rows = state.get_int64("NumRows");
+  cudf::size_type const num_rows    = state.get_int64("NumRows");
+  auto const keep                   = get_keep(state.get_string("keep"));
+  cudf::size_type const cardinality = state.get_int64("cardinality");
+
+  if (cardinality > num_rows) {
+    state.skip("cardinality > num_rows");
+    return;
+  }
 
-  data_profile profile = data_profile_builder().cardinality(0).null_probability(0.01).distribution(
-    cudf::type_to_id<Type>(), distribution_id::UNIFORM, 0, 100);
+  data_profile profile = data_profile_builder()
+                           .cardinality(cardinality)
+                           .null_probability(0.01)
+                           .distribution(cudf::type_to_id<Type>(),
+                                         distribution_id::UNIFORM,
+                                         static_cast<Type>(0),
+                                         std::numeric_limits<Type>::max());
 
   auto source_column = create_random_column(cudf::type_to_id<Type>(), row_count{num_rows}, profile);
 
@@ -40,20 +55,19 @@ void nvbench_distinct(nvbench::state& state, nvbench::type_list<Type>)
 
   state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
-    auto result = cudf::distinct(input_table,
-                                 {0},
-                                 cudf::duplicate_keep_option::KEEP_ANY,
-                                 cudf::null_equality::EQUAL,
-                                 cudf::nan_equality::ALL_EQUAL);
+    auto result = cudf::distinct(
+      input_table, {0}, keep, cudf::null_equality::EQUAL, cudf::nan_equality::ALL_EQUAL);
   });
 }
 
-using data_type = nvbench::type_list<bool, int8_t, int32_t, int64_t, float, cudf::timestamp_ms>;
+using data_type = nvbench::type_list<int32_t, int64_t>;
 
 NVBENCH_BENCH_TYPES(nvbench_distinct, NVBENCH_TYPE_AXES(data_type))
   .set_name("distinct")
   .set_type_axes_names({"Type"})
-  .add_int64_axis("NumRows", {10'000, 100'000, 1'000'000, 10'000'000});
+  .add_string_axis("keep", {"any", "first", "last", "none"})
+  .add_int64_axis("cardinality", {100, 100'000, 10'000'000, 1'000'000'000})
+  .add_int64_axis("NumRows", {100, 100'000, 10'000'000, 1'000'000'000});
 
 template <typename Type>
 void nvbench_distinct_list(nvbench::state& state, nvbench::type_list<Type>)
@@ -61,6 +75,7 @@ void nvbench_distinct_list(nvbench::state& state, nvbench::type_list<Type>)
   auto const size               = state.get_int64("ColumnSize");
   auto const dtype              = cudf::type_to_id<Type>();
   double const null_probability = state.get_float64("null_probability");
+  auto const keep               = get_keep(state.get_string("keep"));
 
   auto builder = data_profile_builder().null_probability(null_probability);
   if (dtype == cudf::type_id::LIST) {
@@ -80,11 +95,8 @@ void nvbench_distinct_list(nvbench::state& state, nvbench::type_list<Type>)
 
   state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
-    auto result = cudf::distinct(*table,
-                                 {0},
-                                 cudf::duplicate_keep_option::KEEP_ANY,
-                                 cudf::null_equality::EQUAL,
-                                 cudf::nan_equality::ALL_EQUAL);
+    auto result =
+      cudf::distinct(*table, {0}, keep, cudf::null_equality::EQUAL, cudf::nan_equality::ALL_EQUAL);
   });
 }
 
@@ -92,5 +104,6 @@ NVBENCH_BENCH_TYPES(nvbench_distinct_list,
                     NVBENCH_TYPE_AXES(nvbench::type_list<int32_t, cudf::list_view>))
   .set_name("distinct_list")
   .set_type_axes_names({"Type"})
+  .add_string_axis("keep", {"any", "first", "last", "none"})
   .add_float64_axis("null_probability", {0.0, 0.1})
   .add_int64_axis("ColumnSize", {100'000'000});
diff --git a/cpp/benchmarks/stream_compaction/stable_distinct.cpp b/cpp/benchmarks/stream_compaction/stable_distinct.cpp
index bcee3048013..0a8836c0583 100644
--- a/cpp/benchmarks/stream_compaction/stable_distinct.cpp
+++ b/cpp/benchmarks/stream_compaction/stable_distinct.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,6 +15,7 @@
  */
 
 #include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/stream_compaction/stream_compaction_common.hpp>
 
 #include <cudf/column/column_view.hpp>
 #include <cudf/lists/list_view.hpp>
@@ -23,15 +24,29 @@
 
 #include <nvbench/nvbench.cuh>
 
+#include <limits>
+
 NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_ms, "cudf::timestamp_ms", "cudf::timestamp_ms");
 
 template <typename Type>
 void nvbench_stable_distinct(nvbench::state& state, nvbench::type_list<Type>)
 {
-  cudf::size_type const num_rows = state.get_int64("NumRows");
+  cudf::size_type const num_rows    = state.get_int64("NumRows");
+  auto const keep                   = get_keep(state.get_string("keep"));
+  cudf::size_type const cardinality = state.get_int64("cardinality");
+
+  if (cardinality > num_rows) {
+    state.skip("cardinality > num_rows");
+    return;
+  }
 
-  data_profile profile = data_profile_builder().cardinality(0).null_probability(0.01).distribution(
-    cudf::type_to_id<Type>(), distribution_id::UNIFORM, 0, 100);
+  data_profile profile = data_profile_builder()
+                           .cardinality(cardinality)
+                           .null_probability(0.01)
+                           .distribution(cudf::type_to_id<Type>(),
+                                         distribution_id::UNIFORM,
+                                         static_cast<Type>(0),
+                                         std::numeric_limits<Type>::max());
 
   auto source_column = create_random_column(cudf::type_to_id<Type>(), row_count{num_rows}, profile);
 
@@ -40,20 +55,19 @@ void nvbench_stable_distinct(nvbench::state& state, nvbench::type_list<Type>)
 
   state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
-    auto result = cudf::stable_distinct(input_table,
-                                        {0},
-                                        cudf::duplicate_keep_option::KEEP_ANY,
-                                        cudf::null_equality::EQUAL,
-                                        cudf::nan_equality::ALL_EQUAL);
+    auto result = cudf::stable_distinct(
+      input_table, {0}, keep, cudf::null_equality::EQUAL, cudf::nan_equality::ALL_EQUAL);
   });
 }
 
-using data_type = nvbench::type_list<bool, int8_t, int32_t, int64_t, float, cudf::timestamp_ms>;
+using data_type = nvbench::type_list<int32_t, int64_t>;
 
 NVBENCH_BENCH_TYPES(nvbench_stable_distinct, NVBENCH_TYPE_AXES(data_type))
   .set_name("stable_distinct")
   .set_type_axes_names({"Type"})
-  .add_int64_axis("NumRows", {10'000, 100'000, 1'000'000, 10'000'000});
+  .add_string_axis("keep", {"any", "first", "last", "none"})
+  .add_int64_axis("cardinality", {100, 100'000, 10'000'000, 1'000'000'000})
+  .add_int64_axis("NumRows", {100, 100'000, 10'000'000, 1'000'000'000});
 
 template <typename Type>
 void nvbench_stable_distinct_list(nvbench::state& state, nvbench::type_list<Type>)
@@ -61,6 +75,7 @@ void nvbench_stable_distinct_list(nvbench::state& state, nvbench::type_list<Type
   auto const size               = state.get_int64("ColumnSize");
   auto const dtype              = cudf::type_to_id<Type>();
   double const null_probability = state.get_float64("null_probability");
+  auto const keep               = get_keep(state.get_string("keep"));
 
   auto builder = data_profile_builder().null_probability(null_probability);
   if (dtype == cudf::type_id::LIST) {
@@ -80,11 +95,8 @@ void nvbench_stable_distinct_list(nvbench::state& state, nvbench::type_list<Type
 
   state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
-    auto result = cudf::stable_distinct(*table,
-                                        {0},
-                                        cudf::duplicate_keep_option::KEEP_ANY,
-                                        cudf::null_equality::EQUAL,
-                                        cudf::nan_equality::ALL_EQUAL);
+    auto result = cudf::stable_distinct(
+      *table, {0}, keep, cudf::null_equality::EQUAL, cudf::nan_equality::ALL_EQUAL);
   });
 }
 
@@ -92,5 +104,6 @@ NVBENCH_BENCH_TYPES(nvbench_stable_distinct_list,
                     NVBENCH_TYPE_AXES(nvbench::type_list<int32_t, cudf::list_view>))
   .set_name("stable_distinct_list")
   .set_type_axes_names({"Type"})
+  .add_string_axis("keep", {"any", "first", "last", "none"})
   .add_float64_axis("null_probability", {0.0, 0.1})
   .add_int64_axis("ColumnSize", {100'000'000});
diff --git a/cpp/benchmarks/stream_compaction/stream_compaction_common.cpp b/cpp/benchmarks/stream_compaction/stream_compaction_common.cpp
new file mode 100644
index 00000000000..8cbb2956777
--- /dev/null
+++ b/cpp/benchmarks/stream_compaction/stream_compaction_common.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/stream_compaction/stream_compaction_common.hpp>
+
+#include <cudf/stream_compaction.hpp>
+#include <cudf/utilities/error.hpp>
+
+cudf::duplicate_keep_option get_keep(std::string const& keep_str)
+{
+  if (keep_str == "any") {
+    return cudf::duplicate_keep_option::KEEP_ANY;
+  } else if (keep_str == "first") {
+    return cudf::duplicate_keep_option::KEEP_FIRST;
+  } else if (keep_str == "last") {
+    return cudf::duplicate_keep_option::KEEP_LAST;
+  } else if (keep_str == "none") {
+    return cudf::duplicate_keep_option::KEEP_NONE;
+  } else {
+    CUDF_FAIL("Unsupported keep option.");
+  }
+}
diff --git a/cpp/benchmarks/stream_compaction/stream_compaction_common.hpp b/cpp/benchmarks/stream_compaction/stream_compaction_common.hpp
new file mode 100644
index 00000000000..d1ef2b10f41
--- /dev/null
+++ b/cpp/benchmarks/stream_compaction/stream_compaction_common.hpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/stream_compaction.hpp>
+
+cudf::duplicate_keep_option get_keep(std::string const& keep_str);
diff --git a/cpp/benchmarks/string/slice.cpp b/cpp/benchmarks/string/slice.cpp
index 0f973a7c8b5..1898f0340b6 100644
--- a/cpp/benchmarks/string/slice.cpp
+++ b/cpp/benchmarks/string/slice.cpp
@@ -14,11 +14,8 @@
  * limitations under the License.
  */
 
-#include "string_bench_args.hpp"
-
 #include <benchmarks/common/generate_input.hpp>
-#include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
+#include <benchmarks/common/nvbench_utilities.hpp>
 
 #include <cudf_test/column_wrapper.hpp>
 
@@ -29,56 +26,56 @@
 
 #include <thrust/iterator/constant_iterator.h>
 
+#include <nvbench/nvbench.cuh>
+
 #include <limits>
 
-class StringSlice : public cudf::benchmark {};
+static void bench_slice(nvbench::state& state)
+{
+  auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
+  auto const stype     = state.get_string("type");
 
-enum slice_type { position, multi_position };
+  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
+      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
+    state.skip("Skip benchmarks greater than size_type limit");
+  }
 
-static void BM_slice(benchmark::State& state, slice_type rt)
-{
-  cudf::size_type const n_rows{static_cast<cudf::size_type>(state.range(0))};
-  cudf::size_type const max_str_length{static_cast<cudf::size_type>(state.range(1))};
   data_profile const profile = data_profile_builder().distribution(
-    cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length);
-  auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile);
+    cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
+  auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
   cudf::strings_column_view input(column->view());
-  auto starts_itr = thrust::constant_iterator<cudf::size_type>(max_str_length / 3);
-  auto stops_itr  = thrust::constant_iterator<cudf::size_type>(max_str_length / 2);
-  cudf::test::fixed_width_column_wrapper<int32_t> starts(starts_itr, starts_itr + n_rows);
-  cudf::test::fixed_width_column_wrapper<int32_t> stops(stops_itr, stops_itr + n_rows);
+  auto starts_itr = thrust::constant_iterator<cudf::size_type>(row_width / 4);
+  auto starts =
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>(starts_itr, starts_itr + num_rows);
+  auto stops_itr = thrust::constant_iterator<cudf::size_type>(row_width / 3);
+  auto stops =
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>(stops_itr, stops_itr + num_rows);
 
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::get_default_stream());
-    switch (rt) {
-      case position:
-        cudf::strings::slice_strings(input, max_str_length / 3, max_str_length / 2);
-        break;
-      case multi_position: cudf::strings::slice_strings(input, starts, stops); break;
-    }
+  auto stream = cudf::get_default_stream();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
+  // gather some throughput statistics as well
+  auto chars_size = input.chars_size(stream);
+  state.add_element_count(chars_size, "chars_size");           // number of bytes
+  state.add_global_memory_reads<nvbench::int8_t>(chars_size);  // all bytes are read
+  auto output_size = (row_width / 3 - row_width / 4) * num_rows;
+  state.add_global_memory_writes<nvbench::int8_t>(output_size);
+
+  if (stype == "multi") {
+    state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+      cudf::strings::slice_strings(input, starts, stops, stream);
+    });
+  } else {
+    state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+      cudf::strings::slice_strings(input, row_width / 4, row_width / 3, 1, stream);
+    });
   }
 
-  state.SetBytesProcessed(state.iterations() * input.chars_size(cudf::get_default_stream()));
+  set_throughputs(state);
 }
 
-static void generate_bench_args(benchmark::internal::Benchmark* b)
-{
-  int const min_rows   = 1 << 12;
-  int const max_rows   = 1 << 24;
-  int const row_mult   = 8;
-  int const min_rowlen = 1 << 5;
-  int const max_rowlen = 1 << 13;
-  int const len_mult   = 2;
-  generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult);
-}
-
-#define STRINGS_BENCHMARK_DEFINE(name)                          \
-  BENCHMARK_DEFINE_F(StringSlice, name)                         \
-  (::benchmark::State & st) { BM_slice(st, slice_type::name); } \
-  BENCHMARK_REGISTER_F(StringSlice, name)                       \
-    ->Apply(generate_bench_args)                                \
-    ->UseManualTime()                                           \
-    ->Unit(benchmark::kMillisecond);
-
-STRINGS_BENCHMARK_DEFINE(position)
-STRINGS_BENCHMARK_DEFINE(multi_position)
+NVBENCH_BENCH(bench_slice)
+  .set_name("slice")
+  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048})
+  .add_int64_axis("num_rows", {262144, 2097152, 16777216})
+  .add_string_axis("type", {"position", "multi"});
diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake
index 0afdc526981..07cbf5150f4 100644
--- a/cpp/cmake/thirdparty/get_arrow.cmake
+++ b/cpp/cmake/thirdparty/get_arrow.cmake
@@ -22,82 +22,8 @@
 
 include_guard(GLOBAL)
 
-# Generate a FindArrow module for the case where we need to search for arrow within a pip install
-# pyarrow.
-function(find_libarrow_in_python_wheel PYARROW_VERSION)
-  string(REPLACE "." ";" PYARROW_VER_COMPONENTS "${PYARROW_VERSION}")
-  list(GET PYARROW_VER_COMPONENTS 0 PYARROW_MAJOR_VER)
-  list(GET PYARROW_VER_COMPONENTS 1 PYARROW_MINOR_VER)
-
-  # Ensure that the major and minor versions are two digits long
-  string(LENGTH ${PYARROW_MAJOR_VER} PYARROW_MAJOR_LENGTH)
-  string(LENGTH ${PYARROW_MINOR_VER} PYARROW_MINOR_LENGTH)
-  if(${PYARROW_MAJOR_LENGTH} EQUAL 1)
-    set(PYARROW_MAJOR_VER "0${PYARROW_MAJOR_VER}")
-  endif()
-  if(${PYARROW_MINOR_LENGTH} EQUAL 1)
-    set(PYARROW_MINOR_VER "0${PYARROW_MINOR_VER}")
-  endif()
-
-  set(PYARROW_LIB "libarrow.so.${PYARROW_MAJOR_VER}${PYARROW_MINOR_VER}")
-
-  string(
-    APPEND
-    initial_code_block
-    [=[
-find_package(Python 3.9 REQUIRED COMPONENTS Interpreter)
-execute_process(
-    COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_library_dirs()[0])"
-    OUTPUT_VARIABLE CUDF_PYARROW_WHEEL_DIR
-    OUTPUT_STRIP_TRAILING_WHITESPACE
-    COMMAND_ERROR_IS_FATAL ANY
-)
-list(APPEND CMAKE_PREFIX_PATH "${CUDF_PYARROW_WHEEL_DIR}")
-]=]
-  )
-  string(
-    APPEND
-    final_code_block
-    [=[
-list(POP_BACK CMAKE_PREFIX_PATH)
-]=]
-  )
-  rapids_find_generate_module(
-    Arrow NO_CONFIG
-    VERSION "${PYARROW_VERSION}"
-    LIBRARY_NAMES "${PYARROW_LIB}"
-    BUILD_EXPORT_SET cudf-exports
-    INSTALL_EXPORT_SET cudf-exports
-    HEADER_NAMES arrow/python/arrow_to_pandas.h INITIAL_CODE_BLOCK initial_code_block
-                 FINAL_CODE_BLOCK final_code_block
-  )
-
-  find_package(Arrow ${PYARROW_VERSION} MODULE REQUIRED GLOBAL)
-  add_library(arrow_shared ALIAS Arrow::Arrow)
-
-  rapids_export_package(BUILD Arrow cudf-exports)
-  rapids_export_package(INSTALL Arrow cudf-exports)
-endfunction()
-
 # This function finds arrow and sets any additional necessary environment variables.
-function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENABLE_PYTHON
-         ENABLE_PARQUET PYARROW_LIBARROW
-)
-
-  if(PYARROW_LIBARROW)
-    # Generate a FindArrow.cmake to find pyarrow's libarrow.so
-    find_libarrow_in_python_wheel(${VERSION})
-    set(ARROW_FOUND
-        TRUE
-        PARENT_SCOPE
-    )
-    set(ARROW_LIBRARIES
-        arrow_shared
-        PARENT_SCOPE
-    )
-    return()
-  endif()
-
+function(find_and_configure_arrow VERSION BUILD_STATIC EXCLUDE_FROM_ALL ENABLE_PARQUET)
   if(BUILD_STATIC)
     if(TARGET arrow_static)
       set(ARROW_FOUND
@@ -124,10 +50,6 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB
     endif()
   endif()
 
-  if(NOT ARROW_ARMV8_ARCH)
-    set(ARROW_ARMV8_ARCH "armv8-a")
-  endif()
-
   if(NOT ARROW_SIMD_LEVEL)
     set(ARROW_SIMD_LEVEL "NONE")
   endif()
@@ -150,14 +72,6 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB
     set(ARROW_OPENSSL_USE_SHARED ON)
   endif()
 
-  set(ARROW_PYTHON_OPTIONS "")
-  if(ENABLE_PYTHON)
-    list(APPEND ARROW_PYTHON_OPTIONS "ARROW_PYTHON ON")
-    # Arrow's logic to build Boost from source is busted, so we have to get it from the system.
-    list(APPEND ARROW_PYTHON_OPTIONS "BOOST_SOURCE SYSTEM")
-    list(APPEND ARROW_PYTHON_OPTIONS "ARROW_DEPENDENCY_SOURCE AUTO")
-  endif()
-
   set(ARROW_PARQUET_OPTIONS "")
   if(ENABLE_PARQUET)
     # Arrow's logic to build Boost from source is busted, so we have to get it from the system.
@@ -174,6 +88,7 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB
     GIT_REPOSITORY https://github.com/apache/arrow.git
     GIT_TAG apache-arrow-${VERSION}
     GIT_SHALLOW TRUE SOURCE_SUBDIR cpp
+    EXCLUDE_FROM_ALL ${EXCLUDE_FROM_ALL}
     OPTIONS "CMAKE_VERBOSE_MAKEFILE ON"
             "ARROW_ACERO ON"
             "ARROW_IPC ON"
@@ -181,16 +96,14 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB
             "ARROW_WITH_BACKTRACE ON"
             "ARROW_CXXFLAGS -w"
             "ARROW_JEMALLOC OFF"
-            "ARROW_S3 ${ENABLE_S3}"
-            "ARROW_ORC ${ENABLE_ORC}"
-            # e.g. needed by blazingsql-io
+            "ARROW_S3 OFF"
+            "ARROW_ORC OFF"
             ${ARROW_PARQUET_OPTIONS}
             "ARROW_PARQUET ${ENABLE_PARQUET}"
             "ARROW_FILESYSTEM ON"
-            ${ARROW_PYTHON_OPTIONS}
+            "ARROW_PYTHON OFF"
             # Arrow modifies CMake's GLOBAL RULE_LAUNCH_COMPILE unless this is off
             "ARROW_USE_CCACHE OFF"
-            "ARROW_ARMV8_ARCH ${ARROW_ARMV8_ARCH}"
             "ARROW_SIMD_LEVEL ${ARROW_SIMD_LEVEL}"
             "ARROW_BUILD_STATIC ${ARROW_BUILD_STATIC}"
             "ARROW_BUILD_SHARED ${ARROW_BUILD_SHARED}"
@@ -269,7 +182,6 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB
   endif()
 
   if(Arrow_ADDED)
-
     set(arrow_code_string
         [=[
           if (TARGET cudf::arrow_shared AND (NOT TARGET arrow_shared))
@@ -324,101 +236,106 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB
         get_target_property(interface_libs arrow_static INTERFACE_LINK_LIBRARIES)
       endif()
     endif()
-    rapids_export(
-      BUILD Arrow
-      VERSION ${VERSION}
-      EXPORT_SET arrow_targets
-      GLOBAL_TARGETS arrow_shared arrow_static
-      NAMESPACE cudf::
-      FINAL_CODE_BLOCK arrow_code_string
-    )
-
-    if(ENABLE_PARQUET)
-
-      set(arrow_acero_code_string
-          [=[
-              if (TARGET cudf::arrow_acero_shared AND (NOT TARGET arrow_acero_shared))
-                  add_library(arrow_acero_shared ALIAS cudf::arrow_acero_shared)
-              endif()
-              if (TARGET cudf::arrow_acero_static AND (NOT TARGET arrow_acero_static))
-                  add_library(arrow_acero_static ALIAS cudf::arrow_acero_static)
-              endif()
-            ]=]
-      )
 
+    include(rapids-export)
+    if(NOT EXCLUDE_FROM_ALL)
       rapids_export(
-        BUILD ArrowAcero
+        BUILD Arrow
         VERSION ${VERSION}
-        EXPORT_SET arrow_acero_targets
-        GLOBAL_TARGETS arrow_acero_shared arrow_acero_static
+        EXPORT_SET arrow_targets
+        GLOBAL_TARGETS arrow_shared arrow_static
         NAMESPACE cudf::
-        FINAL_CODE_BLOCK arrow_acero_code_string
+        FINAL_CODE_BLOCK arrow_code_string
       )
 
-      set(arrow_dataset_code_string
-          [=[
-              if (TARGET cudf::arrow_dataset_shared AND (NOT TARGET arrow_dataset_shared))
-                  add_library(arrow_dataset_shared ALIAS cudf::arrow_dataset_shared)
-              endif()
-              if (TARGET cudf::arrow_dataset_static AND (NOT TARGET arrow_dataset_static))
-                  add_library(arrow_dataset_static ALIAS cudf::arrow_dataset_static)
-              endif()
-            ]=]
-      )
+      if(ENABLE_PARQUET)
+        set(arrow_acero_code_string
+            [=[
+                if (TARGET cudf::arrow_acero_shared AND (NOT TARGET arrow_acero_shared))
+                    add_library(arrow_acero_shared ALIAS cudf::arrow_acero_shared)
+                endif()
+                if (TARGET cudf::arrow_acero_static AND (NOT TARGET arrow_acero_static))
+                    add_library(arrow_acero_static ALIAS cudf::arrow_acero_static)
+                endif()
+              ]=]
+        )
 
-      rapids_export(
-        BUILD ArrowDataset
-        VERSION ${VERSION}
-        EXPORT_SET arrow_dataset_targets
-        GLOBAL_TARGETS arrow_dataset_shared arrow_dataset_static
-        NAMESPACE cudf::
-        FINAL_CODE_BLOCK arrow_dataset_code_string
-      )
+        rapids_export(
+          BUILD ArrowAcero
+          VERSION ${VERSION}
+          EXPORT_SET arrow_acero_targets
+          GLOBAL_TARGETS arrow_acero_shared arrow_acero_static
+          NAMESPACE cudf::
+          FINAL_CODE_BLOCK arrow_acero_code_string
+        )
 
-      set(parquet_code_string
-          [=[
-              if (TARGET cudf::parquet_shared AND (NOT TARGET parquet_shared))
-                  add_library(parquet_shared ALIAS cudf::parquet_shared)
-              endif()
-              if (TARGET cudf::parquet_static AND (NOT TARGET parquet_static))
-                  add_library(parquet_static ALIAS cudf::parquet_static)
-              endif()
-            ]=]
-      )
+        set(arrow_dataset_code_string
+            [=[
+                if (TARGET cudf::arrow_dataset_shared AND (NOT TARGET arrow_dataset_shared))
+                    add_library(arrow_dataset_shared ALIAS cudf::arrow_dataset_shared)
+                endif()
+                if (TARGET cudf::arrow_dataset_static AND (NOT TARGET arrow_dataset_static))
+                    add_library(arrow_dataset_static ALIAS cudf::arrow_dataset_static)
+                endif()
+              ]=]
+        )
 
-      rapids_export(
-        BUILD Parquet
-        VERSION ${VERSION}
-        EXPORT_SET parquet_targets
-        GLOBAL_TARGETS parquet_shared parquet_static
-        NAMESPACE cudf::
-        FINAL_CODE_BLOCK parquet_code_string
-      )
+        rapids_export(
+          BUILD ArrowDataset
+          VERSION ${VERSION}
+          EXPORT_SET arrow_dataset_targets
+          GLOBAL_TARGETS arrow_dataset_shared arrow_dataset_static
+          NAMESPACE cudf::
+          FINAL_CODE_BLOCK arrow_dataset_code_string
+        )
+        set(parquet_code_string
+            [=[
+                if (TARGET cudf::parquet_shared AND (NOT TARGET parquet_shared))
+                    add_library(parquet_shared ALIAS cudf::parquet_shared)
+                endif()
+                if (TARGET cudf::parquet_static AND (NOT TARGET parquet_static))
+                    add_library(parquet_static ALIAS cudf::parquet_static)
+                endif()
+              ]=]
+        )
+
+        rapids_export(
+          BUILD Parquet
+          VERSION ${VERSION}
+          EXPORT_SET parquet_targets
+          GLOBAL_TARGETS parquet_shared parquet_static
+          NAMESPACE cudf::
+          FINAL_CODE_BLOCK parquet_code_string
+        )
+      endif()
     endif()
   endif()
-  # We generate the arrow-configfiles when we built arrow locally, so always do `find_dependency`
-  rapids_export_package(BUILD Arrow cudf-exports)
-  rapids_export_package(INSTALL Arrow cudf-exports)
 
-  if(ENABLE_PARQUET)
-    rapids_export_package(BUILD Parquet cudf-exports)
-    rapids_export_package(BUILD ArrowDataset cudf-exports)
-  endif()
+  if(NOT EXCLUDE_FROM_ALL)
+    # We generate the arrow-configfiles when we built arrow locally, so always do `find_dependency`
+    rapids_export_package(BUILD Arrow cudf-exports)
+    rapids_export_package(INSTALL Arrow cudf-exports)
 
-  include("${rapids-cmake-dir}/export/find_package_root.cmake")
-  rapids_export_find_package_root(
-    BUILD Arrow [=[${CMAKE_CURRENT_LIST_DIR}]=] EXPORT_SET cudf-exports
-  )
-  rapids_export_find_package_root(
-    BUILD Parquet [=[${CMAKE_CURRENT_LIST_DIR}]=]
-    EXPORT_SET cudf-exports
-    CONDITION ENABLE_PARQUET
-  )
-  rapids_export_find_package_root(
-    BUILD ArrowDataset [=[${CMAKE_CURRENT_LIST_DIR}]=]
-    EXPORT_SET cudf-exports
-    CONDITION ENABLE_PARQUET
-  )
+    if(ENABLE_PARQUET)
+      rapids_export_package(BUILD Parquet cudf-exports)
+      rapids_export_package(BUILD ArrowDataset cudf-exports)
+    endif()
+
+    include("${rapids-cmake-dir}/export/find_package_root.cmake")
+    rapids_export_find_package_root(
+      BUILD Arrow [=[${CMAKE_CURRENT_LIST_DIR}]=] EXPORT_SET cudf-exports
+    )
+    rapids_export_find_package_root(
+      BUILD Parquet [=[${CMAKE_CURRENT_LIST_DIR}]=]
+      EXPORT_SET cudf-exports
+      CONDITION ENABLE_PARQUET
+    )
+    rapids_export_find_package_root(
+      BUILD ArrowDataset [=[${CMAKE_CURRENT_LIST_DIR}]=]
+      EXPORT_SET cudf-exports
+      CONDITION ENABLE_PARQUET
+    )
+  endif()
 
   set(ARROW_LIBRARIES
       "${ARROW_LIBRARIES}"
@@ -435,7 +352,21 @@ if(NOT DEFINED CUDF_VERSION_Arrow)
   )
 endif()
 
+# Default to static arrow builds
+if(NOT DEFINED CUDF_USE_ARROW_STATIC)
+  set(CUDF_USE_ARROW_STATIC ON)
+endif()
+
+# Default to excluding from installation since we generally privately and statically link Arrow.
+if(NOT DEFINED CUDF_EXCLUDE_ARROW_FROM_ALL)
+  set(CUDF_EXCLUDE_ARROW_FROM_ALL OFF)
+endif()
+
+if(NOT DEFINED CUDF_ENABLE_ARROW_PARQUET)
+  set(CUDF_ENABLE_ARROW_PARQUET OFF)
+endif()
+
 find_and_configure_arrow(
-  ${CUDF_VERSION_Arrow} ${CUDF_USE_ARROW_STATIC} ${CUDF_ENABLE_ARROW_S3} ${CUDF_ENABLE_ARROW_ORC}
-  ${CUDF_ENABLE_ARROW_PYTHON} ${CUDF_ENABLE_ARROW_PARQUET} ${USE_LIBARROW_FROM_PYARROW}
+  ${CUDF_VERSION_Arrow} ${CUDF_USE_ARROW_STATIC} ${CUDF_EXCLUDE_ARROW_FROM_ALL}
+  ${CUDF_ENABLE_ARROW_PARQUET}
 )
diff --git a/cpp/examples/build.sh b/cpp/examples/build.sh
index dce81fb1677..2d6f6f316c7 100755
--- a/cpp/examples/build.sh
+++ b/cpp/examples/build.sh
@@ -61,3 +61,4 @@ build_example tpch
 build_example strings
 build_example nested_types
 build_example parquet_io
+build_example interop
diff --git a/cpp/examples/interop/CMakeLists.txt b/cpp/examples/interop/CMakeLists.txt
new file mode 100644
index 00000000000..2816f613d3d
--- /dev/null
+++ b/cpp/examples/interop/CMakeLists.txt
@@ -0,0 +1,27 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+cmake_minimum_required(VERSION 3.26.4)
+
+include(../set_cuda_architecture.cmake)
+
+rapids_cuda_init_architectures(interop_example)
+rapids_cuda_set_architectures(RAPIDS)
+
+project(
+  interop_example
+  VERSION 0.0.1
+  LANGUAGES CXX CUDA
+)
+
+include(../fetch_dependencies.cmake)
+
+# The Arrow CMake is currently broken if the build type is not set
+set(CMAKE_BUILD_TYPE Release)
+# No need to install Arrow libs when only the final example executable is shipped.
+set(CUDF_EXCLUDE_ARROW_FROM_ALL ON)
+include(../../cmake/thirdparty/get_arrow.cmake)
+
+add_executable(interop interop.cpp)
+target_link_libraries(interop PRIVATE cudf::cudf)
+target_compile_features(interop PRIVATE cxx_std_17)
+target_link_libraries(interop PRIVATE ${ARROW_LIBRARIES})
diff --git a/cpp/examples/interop/interop.cpp b/cpp/examples/interop/interop.cpp
new file mode 100644
index 00000000000..8271c3836e4
--- /dev/null
+++ b/cpp/examples/interop/interop.cpp
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/column/column_factories.hpp>
+#include <cudf/interop.hpp>
+#include <cudf/io/csv.hpp>
+
+#include <rmm/mr/device/device_memory_resource.hpp>
+
+#include <arrow/array/array_binary.h>
+#include <arrow/type.h>
+
+// Helper functuons to create StringViews
+inline arrow::StringViewType::c_type to_inline_string_view(const void* data, int32_t const& size)
+{
+  arrow::StringViewType::c_type out;
+  out.inlined = {size, {}};
+  memcpy(&out.inlined.data, data, size);
+  return out;
+}
+inline arrow::StringViewType::c_type to_inline_string_view(std::string_view const& v)
+{
+  return to_inline_string_view(v.data(), static_cast<int32_t>(v.size()));
+}
+inline arrow::StringViewType::c_type to_string_view(const void* data,
+                                                    int32_t const& size,
+                                                    int32_t const& buffer_index,
+                                                    int32_t const& offset)
+{
+  if (size <= arrow::StringViewType::kInlineSize) { return to_inline_string_view(data, size); }
+  arrow::StringViewType::c_type out;
+  out.ref = {size, {}, buffer_index, offset};
+  memcpy(&out.ref.prefix, data, sizeof(out.ref.prefix));
+  return out;
+}
+inline arrow::StringViewType::c_type to_string_view(std::string_view const& v,
+                                                    int32_t const& buffer_index,
+                                                    int32_t const& offset)
+{
+  return to_string_view(v.data(), static_cast<int32_t>(v.size()), buffer_index, offset);
+}
+
+/**
+ * @brief Create a StringViewArray
+ *
+ * @param data_buffers The data buffers
+ * @param views The string views
+ * @param validate Whether to validate the array
+ */
+arrow::Result<std::shared_ptr<arrow::StringViewArray>> make_string_view_array(
+  arrow::BufferVector const& data_buffers,
+  std::vector<arrow::StringViewType::c_type> const& views,
+  bool validate = true)
+{
+  auto const length = static_cast<int64_t>(views.size());
+  auto const arr    = std::make_shared<arrow::StringViewArray>(
+    arrow::utf8_view(), length, arrow::Buffer::FromVector(views), std::move(data_buffers));
+  if (validate) { RETURN_NOT_OK(arr->ValidateFull()); }
+  return arr;
+}
+
+/**
+ * @brief Convert a vector of strings into a vector of the
+ * constituent chars and a vector of offsets.
+ *
+ * @param strings The vector of strings
+ */
+auto make_chars_and_offsets(std::vector<std::string> const& strings)
+{
+  std::vector<char> chars{};
+  std::vector<cudf::size_type> offsets(1, 0);
+  for (auto& str : strings) {
+    chars.insert(chars.end(), std::cbegin(str), std::cend(str));
+    auto const last_offset = static_cast<std::size_t>(offsets.back());
+    auto const next_offset = last_offset + str.length();
+    CUDF_EXPECTS(
+      next_offset < static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max()),
+      "Cannot use arrow_string_view_to_cudf_column to build a large strings column");
+    offsets.push_back(static_cast<cudf::size_type>(next_offset));
+  }
+  return std::make_tuple(std::move(chars), std::move(offsets));
+};
+
+/**
+ * @brief Convert an Arrow StringViewArray to a cudf::column
+ *
+ * @param array The Arrow StringViewArray
+ * @param stream The CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+std::unique_ptr<cudf::column> arrow_string_view_to_cudf_column(
+  std::shared_ptr<arrow::StringViewArray> const& array,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource())
+{
+  // Convert the string views into chars and offsets
+  std::vector<std::string> strings;
+  for (auto i = 0; i < array->length(); i++) {
+    strings.push_back(array->GetString(i));
+  }
+  auto const [chars, offsets] = make_chars_and_offsets(strings);
+
+  // Copy the chars vector to the device
+  rmm::device_uvector<char> d_chars(chars.size(), stream, mr);
+  CUDF_CUDA_TRY(cudaMemcpyAsync(
+    d_chars.data(), chars.data(), chars.size() * sizeof(char), cudaMemcpyDefault, stream.value()));
+
+  // Copy the offsets vector to the device
+  // and wrap it in a cudf::column
+  rmm::device_uvector<cudf::size_type> d_offsets(offsets.size(), stream, mr);
+  CUDF_CUDA_TRY(cudaMemcpyAsync(d_offsets.data(),
+                                offsets.data(),
+                                offsets.size() * sizeof(cudf::size_type),
+                                cudaMemcpyDefault,
+                                stream.value()));
+  auto offsets_col =
+    std::make_unique<cudf::column>(std::move(d_offsets), rmm::device_buffer{0, stream, mr}, 0);
+
+  // Create a string column out of the chars and offsets
+  return cudf::make_strings_column(array->length(),
+                                   std::move(offsets_col),
+                                   d_chars.release(),
+                                   0,
+                                   rmm::device_buffer{0, stream, mr});
+}
+
+int main(int argc, char** argv)
+{
+  std::vector<std::shared_ptr<arrow::Buffer>> data_buffers;
+  std::vector<arrow::StringViewType::c_type> views;
+
+  // Define the data buffers and string views
+  auto const buffer_a =
+    arrow::Buffer::FromString("hello rapids teamapache arrow interopnvidiacudf");
+  data_buffers.push_back(buffer_a);
+  views.push_back(to_string_view("hello rapid steam", 0, 0));
+  views.push_back(to_string_view("apache arrow interop", 0, 17));
+  views.push_back(to_inline_string_view("nvidia"));
+  views.push_back(to_inline_string_view("cudf"));
+
+  // Create a StringViewArray
+  auto const string_view_col = make_string_view_array(data_buffers, views, true).ValueOrDie();
+  std::cout << string_view_col->ToString() << std::endl;
+
+  // Convert the StringViewArray to a cudf::column
+  auto const cudf_col = arrow_string_view_to_cudf_column(string_view_col);
+
+  // Write the cudf::column as CSV
+  auto const tbl_view                  = cudf::table_view({cudf_col->view()});
+  std::vector<std::string> const names = {"col_a"};
+
+  std::vector<char> h_buffer;
+  cudf::io::csv_writer_options writer_options =
+    cudf::io::csv_writer_options::builder(cudf::io::sink_info(&h_buffer), tbl_view)
+      .include_header(not names.empty())
+      .names(names);
+
+  cudf::io::write_csv(writer_options);
+  auto const result = std::string(h_buffer.data(), h_buffer.size());
+  std::cout << result << std::endl;
+
+  return 0;
+}
diff --git a/cpp/examples/tpch/README.md b/cpp/examples/tpch/README.md
index 1ea71ae9824..8c046c3f1e8 100644
--- a/cpp/examples/tpch/README.md
+++ b/cpp/examples/tpch/README.md
@@ -1,38 +1,39 @@
-# TPC-H Inspired Examples
+# TPC-H Derived Examples
 
 Implements TPC-H queries using `libcudf`. We leverage the data generator (wrapper around official TPC-H datagen) from [Apache Datafusion](https://github.com/apache/datafusion) for generating data in Parquet format.
 
 ## Requirements
 
 - Rust
+- [libcudf](https://github.com/rapidsai/cudf/blob/branch-24.08/CONTRIBUTING.md#setting-up-your-build-environment)
 
-## Generating the Dataset
+## Running Queries
 
-1. Clone the datafusion repository.
+1. Build the `libcudf` examples.
 ```bash
-git clone git@github.com:apache/datafusion.git
+cd cudf/cpp/examples
+./build.sh
 ```
+The TPC-H query binaries would be built inside `tpch/build`.
 
-2. Run the data generator. The data will be placed in a `data/` subdirectory.
+2. Generate the dataset.
 ```bash
-cd datafusion/benchmarks/
-./bench.sh data tpch
-
-# for scale factor 10,
-./bench.sh data tpch10
+cd tpch/datagen
+./datagen.sh [scale factor (1/10)]
 ```
 
-## Running Queries
+The parquet files will be generated in `tpch/datagen/datafusion/benchmarks/data/tpch_sf[scale factor]`.
 
-1. Build the examples.
+3. Set these environment variables for optimized runtimes.
 ```bash
-cd cpp/examples
-./build.sh
+export KVIKIO_COMPAT_MODE="on"
+export LIBCUDF_CUFILE_POLICY="KVIKIO"
+export CUDA_MODULE_LOADING="EAGER"
 ```
-The TPC-H query binaries would be built inside `examples/tpch/build`.
 
-2. Execute the queries.
+4. Execute the queries.
 ```bash
-./tpch/build/tpch_q1
+./tpch/build/tpch_q[query no] [path to dataset] [memory resource type (cuda/pool/managed/managed_pool)]
 ```
-A parquet file named `q1.parquet` would be generated holding the results of the query.
+
+A parquet file named `q[query no].parquet` would be generated containing the results of the query.
diff --git a/cpp/examples/tpch/datagen/correct_datatypes.py b/cpp/examples/tpch/datagen/correct_datatypes.py
new file mode 100644
index 00000000000..8564774647b
--- /dev/null
+++ b/cpp/examples/tpch/datagen/correct_datatypes.py
@@ -0,0 +1,60 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+import os
+import sys
+
+import pyarrow as pa
+import pyarrow.parquet as pq
+import pandas as pd
+
+if __name__ == "__main__":
+    dataset_path = str(sys.argv[1])
+    tables = ["lineitem", "part", "partsupp", "orders", "supplier", "customer", "nation", "region"]
+    for table in tables:
+        filepath = os.path.join(dataset_path, f"{table}.parquet")
+        print("Reading file ", filepath)
+
+        if filepath.endswith("lineitem.parquet"):
+            df = pd.read_parquet(filepath)
+            df["l_linenumber"] = df["l_linenumber"].astype("int64")
+            df["l_quantity"] = df["l_quantity"].astype("int64")
+            df["l_extendedprice"] = df["l_extendedprice"].astype("float64")
+            df["l_discount"] = df["l_discount"].astype("float64")
+            df["l_tax"] = df["l_tax"].astype("float64")
+            pq.write_table(pa.Table.from_pandas(df), filepath, compression="snappy")
+
+        elif filepath.endswith("part.parquet"):
+            df = pd.read_parquet(filepath)
+            df["p_size"] = df["p_size"].astype("int64")
+            df["p_retailprice"] = df["p_retailprice"].astype("float64")
+            pq.write_table(pa.Table.from_pandas(df), filepath, compression="snappy")
+
+        elif filepath.endswith("partsupp.parquet"):
+            df = pd.read_parquet(filepath)
+            df["ps_availqty"] = df["ps_availqty"].astype("int64")
+            df["ps_supplycost"] = df["ps_supplycost"].astype("float64")
+            pq.write_table(pa.Table.from_pandas(df), filepath, compression="snappy")
+
+        elif filepath.endswith("orders.parquet"):
+            df = pd.read_parquet(filepath)
+            df["o_totalprice"] = df["o_totalprice"].astype("float64")
+            df["o_shippriority"] = df["o_shippriority"].astype("int64")
+            pq.write_table(pa.Table.from_pandas(df), filepath, compression="snappy")
+
+        elif filepath.endswith("supplier.parquet"):
+            df = pd.read_parquet(filepath)
+            df["s_acctbal"] = df["s_acctbal"].astype("float64")
+            pq.write_table(pa.Table.from_pandas(df), filepath, compression="snappy")
+
+        elif filepath.endswith("customer.parquet"):
+            df = pd.read_parquet(filepath)
+            df["c_acctbal"] = df["c_acctbal"].astype("float64")
+            pq.write_table(pa.Table.from_pandas(df), filepath, compression="snappy")
+
+        elif filepath.endswith("nation.parquet"):
+            df = pd.read_parquet(filepath)
+            pq.write_table(pa.Table.from_pandas(df), filepath, compression="snappy")
+
+        elif filepath.endswith("region.parquet"):
+            df = pd.read_parquet(filepath)
+            pq.write_table(pa.Table.from_pandas(df), filepath, compression="snappy")
diff --git a/cpp/examples/tpch/datagen/datagen.sh b/cpp/examples/tpch/datagen/datagen.sh
new file mode 100755
index 00000000000..0b03753daea
--- /dev/null
+++ b/cpp/examples/tpch/datagen/datagen.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+set -e
+
+scale_factor=$1
+script_dir=$(pwd)
+
+# Clone the datafusion repository and apply a patch
+# for single threaded data generation so that a
+# single parquet file is generated for each table
+rm -rf datafusion
+git clone https://github.com/apache/datafusion.git datafusion
+cd datafusion/
+git checkout 679a85f
+git apply ${script_dir}/tpch.patch
+cd benchmarks/
+
+# Generate the data
+# Currently, we support only scale factor 1 and 10
+if [ ${scale_factor} -eq 1 ]; then
+    ./bench.sh data tpch
+elif [ ${scale_factor} -eq 10 ]; then
+    ./bench.sh data tpch10
+else
+    echo "Unsupported scale factor"
+    exit 1
+fi
+
+# Correct the datatypes of the parquet files
+python3 ${script_dir}/correct_datatypes.py data/tpch_sf${scale_factor}
diff --git a/cpp/examples/tpch/datagen/tpch.patch b/cpp/examples/tpch/datagen/tpch.patch
new file mode 100644
index 00000000000..42727aa9904
--- /dev/null
+++ b/cpp/examples/tpch/datagen/tpch.patch
@@ -0,0 +1,33 @@
+diff --git a/benchmarks/bench.sh b/benchmarks/bench.sh
+index 3b854f6dc..f000f09c0 100755
+--- a/benchmarks/bench.sh
++++ b/benchmarks/bench.sh
+@@ -311,6 +311,15 @@ data_tpch() {
+         $CARGO_COMMAND --bin tpch -- convert --input "${TPCH_DIR}" --output "${TPCH_DIR}" --format parquet
+         popd > /dev/null
+     fi
++
++    cp ${TPCH_DIR}/lineitem/part-0.parquet ${TPCH_DIR}/lineitem.parquet
++    cp ${TPCH_DIR}/orders/part-0.parquet ${TPCH_DIR}/orders.parquet
++    cp ${TPCH_DIR}/part/part-0.parquet ${TPCH_DIR}/part.parquet
++    cp ${TPCH_DIR}/partsupp/part-0.parquet ${TPCH_DIR}/partsupp.parquet
++    cp ${TPCH_DIR}/customer/part-0.parquet ${TPCH_DIR}/customer.parquet
++    cp ${TPCH_DIR}/supplier/part-0.parquet ${TPCH_DIR}/supplier.parquet
++    cp ${TPCH_DIR}/nation/part-0.parquet ${TPCH_DIR}/nation.parquet
++    cp ${TPCH_DIR}/region/part-0.parquet ${TPCH_DIR}/region.parquet
+ }
+
+ # Runs the tpch benchmark
+diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
+index b5204b343..84fd2e78d 100644
+--- a/datafusion/common/src/config.rs
++++ b/datafusion/common/src/config.rs
+@@ -250,7 +250,7 @@ config_namespace! {
+         /// concurrency.
+         ///
+         /// Defaults to the number of CPU cores on the system
+-        pub target_partitions: usize, default = num_cpus::get()
++        pub target_partitions: usize, default = 1
+
+         /// The default time zone
+         ///
diff --git a/cpp/include/cudf/ast/detail/expression_evaluator.cuh b/cpp/include/cudf/ast/detail/expression_evaluator.cuh
index 105d87ff96f..9d8762555d7 100644
--- a/cpp/include/cudf/ast/detail/expression_evaluator.cuh
+++ b/cpp/include/cudf/ast/detail/expression_evaluator.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -29,8 +29,6 @@
 
 #include <rmm/cuda_stream_view.hpp>
 
-#include <thrust/optional.h>
-
 namespace cudf {
 
 namespace ast {
@@ -278,7 +276,7 @@ struct expression_evaluator {
     detail::device_data_reference const& input_reference,
     IntermediateDataType<has_nulls>* thread_intermediate_storage,
     cudf::size_type left_row_index,
-    thrust::optional<cudf::size_type> right_row_index = {}) const
+    cudf::size_type right_row_index = {}) const
   {
     // TODO: Everywhere in the code assumes that the table reference is either
     // left or right. Should we error-check somewhere to prevent
@@ -291,7 +289,7 @@ struct expression_evaluator {
       // any case where input_reference.table_source == table_reference::RIGHT.
       // Otherwise, behavior is undefined.
       auto const row_index =
-        (input_reference.table_source == table_reference::LEFT) ? left_row_index : *right_row_index;
+        (input_reference.table_source == table_reference::LEFT) ? left_row_index : right_row_index;
       if constexpr (has_nulls) {
         return table.column(input_reference.data_index).is_valid(row_index)
                  ? ReturnType(table.column(input_reference.data_index).element<Element>(row_index))
@@ -329,7 +327,7 @@ struct expression_evaluator {
     detail::device_data_reference const& device_data_reference,
     IntermediateDataType<has_nulls>* thread_intermediate_storage,
     cudf::size_type left_row_index,
-    thrust::optional<cudf::size_type> right_row_index = {}) const
+    cudf::size_type right_row_index = {}) const
   {
     CUDF_UNREACHABLE("Unsupported type in resolve_input.");
   }
diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh
index 89fe59bfeaa..c3238cb94fd 100644
--- a/cpp/include/cudf/column/column_device_view.cuh
+++ b/cpp/include/cudf/column/column_device_view.cuh
@@ -32,9 +32,9 @@
 
 #include <rmm/cuda_stream_view.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/iterator/transform_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/pair.h>
 
 #include <algorithm>
@@ -614,7 +614,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
   /**
    * @brief Return an optional iterator to the first element of the column.
    *
-   * Dereferencing the returned iterator returns a `thrust::optional<T>`.
+   * Dereferencing the returned iterator returns a `cuda::std::optional<T>`.
    *
    * The element of this iterator contextually converts to bool. The conversion returns true
    * if the object contains a value and false if it does not contain a value.
@@ -739,7 +739,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
   /**
    * @brief Return an optional iterator to the element following the last element of the column.
    *
-   * The returned iterator represents a `thrust::optional<T>` element.
+   * The returned iterator represents a `cuda::std::optional<T>` element.
    *
    * This function does not participate in overload resolution if
    * `column_device_view::has_element_accessor<T>()` is false.
@@ -1272,21 +1272,21 @@ struct value_accessor {
  * @brief optional accessor of a column
  *
  *
- * The optional_accessor always returns a `thrust::optional` of `column[i]`. The validity
+ * The optional_accessor always returns a `cuda::std::optional` of `column[i]`. The validity
  * of the optional is determined by the `Nullate` parameter which may be one of the following:
  *
  * - `nullate::YES` means that the column supports nulls and the optional returned
  *    might be valid or invalid.
  *
  * - `nullate::NO` means the caller attests that the column has no null values,
- *    no checks will occur and `thrust::optional{column[i]}` will be
+ *    no checks will occur and `cuda::std::optional{column[i]}` will be
  *    return for each `i`.
  *
  * - `nullate::DYNAMIC` defers the assumption of nullability to runtime and the caller
  *    specifies if the column has nulls at runtime.
- *    For `DYNAMIC{true}` the return value will be `thrust::optional{column[i]}` if
- *      element `i` is not null and `thrust::optional{}` if element `i` is null.
- *    For `DYNAMIC{false}` the return value will always be `thrust::optional{column[i]}`.
+ *    For `DYNAMIC{true}` the return value will be `cuda::std::optional{column[i]}` if
+ *      element `i` is not null and `cuda::std::optional{}` if element `i` is null.
+ *    For `DYNAMIC{false}` the return value will always be `cuda::std::optional{column[i]}`.
  *
  * @throws cudf::logic_error if column datatype and template T type mismatch.
  * @throws cudf::logic_error if the column is not nullable and `with_nulls` evaluates to true
@@ -1312,19 +1312,19 @@ struct optional_accessor {
   }
 
   /**
-   * @brief Returns a `thrust::optional` of `column[i]`.
+   * @brief Returns a `cuda::std::optional` of `column[i]`.
    *
    * @param i The index of the element to return
-   * @return A `thrust::optional` that contains the value of `column[i]` is not null. If that
+   * @return A `cuda::std::optional` that contains the value of `column[i]` is not null. If that
    * element is null, the resulting optional will not contain a value.
    */
-  __device__ inline thrust::optional<T> operator()(cudf::size_type i) const
+  __device__ inline cuda::std::optional<T> operator()(cudf::size_type i) const
   {
     if (has_nulls) {
-      return (col.is_valid_nocheck(i)) ? thrust::optional<T>{col.element<T>(i)}
-                                       : thrust::optional<T>{thrust::nullopt};
+      return (col.is_valid_nocheck(i)) ? cuda::std::optional<T>{col.element<T>(i)}
+                                       : cuda::std::optional<T>{cuda::std::nullopt};
     }
-    return thrust::optional<T>{col.element<T>(i)};
+    return cuda::std::optional<T>{col.element<T>(i)};
   }
 
   Nullate has_nulls{};  ///< Indicates if the `col` should be checked for nulls.
diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp
index c1f295b7ea8..b2dcb25acb5 100644
--- a/cpp/include/cudf/column/column_factories.hpp
+++ b/cpp/include/cudf/column/column_factories.hpp
@@ -469,7 +469,7 @@ std::unique_ptr<column> make_strings_column(size_type num_strings,
  * offsets (depth 1)   {0, 2, 5, 7}
  * data    (depth 1)
  * offsets (depth 2)
- * data    (depth 1)   {1, 2, 3, 4, 5, 6, 7}
+ * data    (depth 2)   {1, 2, 3, 4, 5, 6, 7}
  * @endcode
  *
  * @param[in] num_rows The number of lists the column represents.
diff --git a/cpp/include/cudf/detail/copy_if.cuh b/cpp/include/cudf/detail/copy_if.cuh
index b6310e6cd2f..4071fa01fb2 100644
--- a/cpp/include/cudf/detail/copy_if.cuh
+++ b/cpp/include/cudf/detail/copy_if.cuh
@@ -22,6 +22,7 @@
 #include <cudf/detail/gather.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/utilities/cuda.cuh>
+#include <cudf/detail/utilities/cuda.hpp>
 #include <cudf/null_mask.hpp>
 #include <cudf/strings/string_view.cuh>
 #include <cudf/table/table.hpp>
diff --git a/cpp/include/cudf/detail/copy_if_else.cuh b/cpp/include/cudf/detail/copy_if_else.cuh
index 8418e279ce7..d260a4591b7 100644
--- a/cpp/include/cudf/detail/copy_if_else.cuh
+++ b/cpp/include/cudf/detail/copy_if_else.cuh
@@ -25,8 +25,8 @@
 #include <rmm/device_scalar.hpp>
 #include <rmm/resource_ref.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/iterator/iterator_traits.h>
-#include <thrust/optional.h>
 
 namespace cudf {
 namespace detail {
@@ -70,7 +70,7 @@ __launch_bounds__(block_size) CUDF_KERNEL
   while (warp_cur <= warp_end) {
     auto const index = static_cast<size_type>(tidx);
     auto const opt_value =
-      (index < end) ? (filter(index) ? lhs[index] : rhs[index]) : thrust::nullopt;
+      (index < end) ? (filter(index) ? lhs[index] : rhs[index]) : cuda::std::nullopt;
     if (opt_value) { out.element<T>(index) = static_cast<T>(*opt_value); }
 
     // update validity
@@ -156,7 +156,7 @@ std::unique_ptr<column> copy_if_else(bool nullable,
                                      rmm::cuda_stream_view stream,
                                      rmm::device_async_resource_ref mr)
 {
-  // This is the type of the thrust::optional element in the passed iterators
+  // This is the type of the cuda::std::optional element in the passed iterators
   using Element = typename thrust::iterator_traits<LeftIter>::value_type::value_type;
 
   size_type size           = std::distance(lhs_begin, lhs_end);
diff --git a/cpp/include/cudf/detail/cuco_helpers.hpp b/cpp/include/cudf/detail/cuco_helpers.hpp
index dca5a39bece..926df921715 100644
--- a/cpp/include/cudf/detail/cuco_helpers.hpp
+++ b/cpp/include/cudf/detail/cuco_helpers.hpp
@@ -36,19 +36,10 @@ static double constexpr CUCO_DESIRED_LOAD_FACTOR = 0.5;
  * later expects a standard C++ `Allocator` interface. This allocator helper provides a simple way
  * to handle cuco memory allocation/deallocation with the given `stream` and the rmm default memory
  * resource.
+ *
+ * @tparam T The allocator's value type.
  */
-class cuco_allocator
-  : public rmm::mr::stream_allocator_adaptor<rmm::mr::polymorphic_allocator<char>> {
-  /// Default stream-ordered allocator type
-  using default_allocator = rmm::mr::polymorphic_allocator<char>;
-  /// The base allocator adaptor type
-  using base_type = rmm::mr::stream_allocator_adaptor<default_allocator>;
-
- public:
-  /**
-   * @brief Constructs the allocator adaptor with the given `stream`
-   */
-  cuco_allocator(rmm::cuda_stream_view stream) : base_type{default_allocator{}, stream} {}
-};
+template <typename T>
+using cuco_allocator = rmm::mr::stream_allocator_adaptor<rmm::mr::polymorphic_allocator<T>>;
 
 }  // namespace cudf::detail
diff --git a/cpp/include/cudf/detail/distinct_hash_join.cuh b/cpp/include/cudf/detail/distinct_hash_join.cuh
index c3bc3ad89fa..0b3d7ac58bf 100644
--- a/cpp/include/cudf/detail/distinct_hash_join.cuh
+++ b/cpp/include/cudf/detail/distinct_hash_join.cuh
@@ -99,7 +99,7 @@ struct distinct_hash_join {
                                            cuda::thread_scope_device,
                                            comparator_adapter<d_equal_type>,
                                            probing_scheme_type,
-                                           cudf::detail::cuco_allocator,
+                                           cudf::detail::cuco_allocator<char>,
                                            cuco_storage_type>;
 
   bool _has_nulls;  ///< true if nulls are present in either build table or probe table
diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh
index 41f5494f78f..df6fe6e6ccb 100644
--- a/cpp/include/cudf/detail/gather.cuh
+++ b/cpp/include/cudf/detail/gather.cuh
@@ -609,7 +609,7 @@ void gather_bitmask(table_view const& source,
        stream);
 
   // Copy the valid counts into each column
-  auto const valid_counts = make_std_vector_sync(d_valid_counts, stream);
+  auto const valid_counts = make_host_vector_sync(d_valid_counts, stream);
   for (size_t i = 0; i < target.size(); ++i) {
     if (target[i]->nullable()) {
       auto const null_count = target_rows - valid_counts[i];
diff --git a/cpp/include/cudf/detail/hash_reduce_by_row.cuh b/cpp/include/cudf/detail/hash_reduce_by_row.cuh
index dfe79646167..7a1e38eefe0 100644
--- a/cpp/include/cudf/detail/hash_reduce_by_row.cuh
+++ b/cpp/include/cudf/detail/hash_reduce_by_row.cuh
@@ -32,7 +32,7 @@
 namespace cudf::detail {
 
 using hash_map_type = cuco::legacy::
-  static_map<size_type, size_type, cuda::thread_scope_device, cudf::detail::cuco_allocator>;
+  static_map<size_type, size_type, cuda::thread_scope_device, cudf::detail::cuco_allocator<char>>;
 
 /**
  * @brief The base struct for customized reduction functor to perform reduce-by-key with keys are
diff --git a/cpp/include/cudf/detail/indexalator.cuh b/cpp/include/cudf/detail/indexalator.cuh
index b5d57da6cd5..f0510c86c3a 100644
--- a/cpp/include/cudf/detail/indexalator.cuh
+++ b/cpp/include/cudf/detail/indexalator.cuh
@@ -22,9 +22,9 @@
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/utilities/traits.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/iterator/constant_iterator.h>
 #include <thrust/iterator/transform_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/pair.h>
 
 namespace cudf {
@@ -93,7 +93,7 @@ struct input_indexalator : base_normalator<input_indexalator, cudf::size_type> {
    */
   __device__ inline cudf::size_type operator[](size_type idx) const
   {
-    void const* tp = p_ + (idx * this->width_);
+    void const* tp = p_ + (static_cast<std::ptrdiff_t>(idx) * this->width_);
     return type_dispatcher(this->dtype_, normalize_type{}, tp);
   }
 
@@ -109,7 +109,7 @@ struct input_indexalator : base_normalator<input_indexalator, cudf::size_type> {
   CUDF_HOST_DEVICE input_indexalator(void const* data, data_type dtype, cudf::size_type offset = 0)
     : base_normalator<input_indexalator, cudf::size_type>(dtype), p_{static_cast<char const*>(data)}
   {
-    p_ += offset * this->width_;
+    p_ += static_cast<std::ptrdiff_t>(offset) * this->width_;
   }
 
  protected:
@@ -165,7 +165,7 @@ struct output_indexalator : base_normalator<output_indexalator, cudf::size_type>
   __device__ inline output_indexalator const operator[](size_type idx) const
   {
     output_indexalator tmp{*this};
-    tmp.p_ += (idx * this->width_);
+    tmp.p_ += static_cast<std::ptrdiff_t>(idx) * this->width_;
     return tmp;
   }
 
@@ -376,10 +376,10 @@ struct indexalator_factory {
       iter = make_input_iterator(col);
     }
 
-    __device__ thrust::optional<size_type> operator()(size_type i) const
+    __device__ cuda::std::optional<size_type> operator()(size_type i) const
     {
-      return has_nulls && !bit_is_set(null_mask, i + offset) ? thrust::nullopt
-                                                             : thrust::make_optional(iter[i]);
+      return has_nulls && !bit_is_set(null_mask, i + offset) ? cuda::std::nullopt
+                                                             : cuda::std::make_optional(iter[i]);
     }
   };
 
@@ -400,9 +400,9 @@ struct indexalator_factory {
       iter = indexalator_factory::make_input_iterator(input);
     }
 
-    __device__ thrust::optional<size_type> operator()(size_type) const
+    __device__ cuda::std::optional<size_type> operator()(size_type) const
     {
-      return is_null ? thrust::nullopt : thrust::make_optional(*iter);
+      return is_null ? cuda::std::nullopt : cuda::std::make_optional(*iter);
     }
   };
 
diff --git a/cpp/include/cudf/detail/interop.hpp b/cpp/include/cudf/detail/interop.hpp
index 0b9319ba663..0d8f078c9d1 100644
--- a/cpp/include/cudf/detail/interop.hpp
+++ b/cpp/include/cudf/detail/interop.hpp
@@ -16,29 +16,13 @@
 
 #pragma once
 
-// We disable warning 611 because the `arrow::TableBatchReader` only partially
-// override the `ReadNext` method of `arrow::RecordBatchReader::ReadNext`
-// triggering warning 611-D from nvcc.
-#ifdef __CUDACC__
-#pragma nv_diag_suppress 611
-#pragma nv_diag_suppress 2810
-#endif
-#include <rmm/resource_ref.hpp>
-
-#include <arrow/api.h>
-#ifdef __CUDACC__
-#pragma nv_diag_default 611
-#pragma nv_diag_default 2810
-#endif
-
 #include <cudf/interop.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
 #include <cudf/utilities/export.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
-
-#include <string>
+#include <rmm/resource_ref.hpp>
 
 namespace CUDF_EXPORT cudf {
 namespace detail {
@@ -61,89 +45,6 @@ DLManagedTensor* to_dlpack(table_view const& input,
                            rmm::cuda_stream_view stream,
                            rmm::device_async_resource_ref mr);
 
-// Creating arrow as per given type_id and buffer arguments
-template <typename... Ts>
-std::shared_ptr<arrow::Array> to_arrow_array(cudf::type_id id, Ts&&... args)
-{
-  switch (id) {
-    case type_id::BOOL8: return std::make_shared<arrow::BooleanArray>(std::forward<Ts>(args)...);
-    case type_id::INT8: return std::make_shared<arrow::Int8Array>(std::forward<Ts>(args)...);
-    case type_id::INT16: return std::make_shared<arrow::Int16Array>(std::forward<Ts>(args)...);
-    case type_id::INT32: return std::make_shared<arrow::Int32Array>(std::forward<Ts>(args)...);
-    case type_id::INT64: return std::make_shared<arrow::Int64Array>(std::forward<Ts>(args)...);
-    case type_id::UINT8: return std::make_shared<arrow::UInt8Array>(std::forward<Ts>(args)...);
-    case type_id::UINT16: return std::make_shared<arrow::UInt16Array>(std::forward<Ts>(args)...);
-    case type_id::UINT32: return std::make_shared<arrow::UInt32Array>(std::forward<Ts>(args)...);
-    case type_id::UINT64: return std::make_shared<arrow::UInt64Array>(std::forward<Ts>(args)...);
-    case type_id::FLOAT32: return std::make_shared<arrow::FloatArray>(std::forward<Ts>(args)...);
-    case type_id::FLOAT64: return std::make_shared<arrow::DoubleArray>(std::forward<Ts>(args)...);
-    case type_id::TIMESTAMP_DAYS:
-      return std::make_shared<arrow::Date32Array>(std::make_shared<arrow::Date32Type>(),
-                                                  std::forward<Ts>(args)...);
-    case type_id::TIMESTAMP_SECONDS:
-      return std::make_shared<arrow::TimestampArray>(arrow::timestamp(arrow::TimeUnit::SECOND),
-                                                     std::forward<Ts>(args)...);
-    case type_id::TIMESTAMP_MILLISECONDS:
-      return std::make_shared<arrow::TimestampArray>(arrow::timestamp(arrow::TimeUnit::MILLI),
-                                                     std::forward<Ts>(args)...);
-    case type_id::TIMESTAMP_MICROSECONDS:
-      return std::make_shared<arrow::TimestampArray>(arrow::timestamp(arrow::TimeUnit::MICRO),
-                                                     std::forward<Ts>(args)...);
-    case type_id::TIMESTAMP_NANOSECONDS:
-      return std::make_shared<arrow::TimestampArray>(arrow::timestamp(arrow::TimeUnit::NANO),
-                                                     std::forward<Ts>(args)...);
-    case type_id::DURATION_SECONDS:
-      return std::make_shared<arrow::DurationArray>(arrow::duration(arrow::TimeUnit::SECOND),
-                                                    std::forward<Ts>(args)...);
-    case type_id::DURATION_MILLISECONDS:
-      return std::make_shared<arrow::DurationArray>(arrow::duration(arrow::TimeUnit::MILLI),
-                                                    std::forward<Ts>(args)...);
-    case type_id::DURATION_MICROSECONDS:
-      return std::make_shared<arrow::DurationArray>(arrow::duration(arrow::TimeUnit::MICRO),
-                                                    std::forward<Ts>(args)...);
-    case type_id::DURATION_NANOSECONDS:
-      return std::make_shared<arrow::DurationArray>(arrow::duration(arrow::TimeUnit::NANO),
-                                                    std::forward<Ts>(args)...);
-    default: CUDF_FAIL("Unsupported type_id conversion to arrow");
-  }
-}
-
-// Converting arrow type to cudf type
-data_type arrow_to_cudf_type(arrow::DataType const& arrow_type);
-
-/**
- * @copydoc cudf::to_arrow(table_view input, std::vector<column_metadata> const& metadata,
- * rmm::cuda_stream_view stream, arrow::MemoryPool* ar_mr)
- */
-std::shared_ptr<arrow::Table> to_arrow(table_view input,
-                                       std::vector<column_metadata> const& metadata,
-                                       rmm::cuda_stream_view stream,
-                                       arrow::MemoryPool* ar_mr);
-
-/**
- * @copydoc cudf::to_arrow(cudf::scalar const& input, column_metadata const& metadata,
- * rmm::cuda_stream_view stream, arrow::MemoryPool* ar_mr)
- */
-std::shared_ptr<arrow::Scalar> to_arrow(cudf::scalar const& input,
-                                        column_metadata const& metadata,
-                                        rmm::cuda_stream_view stream,
-                                        arrow::MemoryPool* ar_mr);
-/**
- * @copydoc cudf::from_arrow(arrow::Table const& input_table, rmm::cuda_stream_view stream,
- * rmm::device_async_resource_ref mr)
- */
-std::unique_ptr<table> from_arrow(arrow::Table const& input_table,
-                                  rmm::cuda_stream_view stream,
-                                  rmm::device_async_resource_ref mr);
-
-/**
- * @copydoc cudf::from_arrow(arrow::Scalar const& input, rmm::cuda_stream_view stream,
- * rmm::device_async_resource_ref mr)
- */
-std::unique_ptr<cudf::scalar> from_arrow(arrow::Scalar const& input,
-                                         rmm::cuda_stream_view stream,
-                                         rmm::device_async_resource_ref mr);
-
 /**
  * @brief Return a maximum precision for a given type.
  *
diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh
index 9e6227ec19b..4349e1b70fd 100644
--- a/cpp/include/cudf/detail/iterator.cuh
+++ b/cpp/include/cudf/detail/iterator.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -37,10 +37,10 @@
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/scalar/scalar_device_view.cuh>
 
+#include <cuda/std/optional>
 #include <thrust/iterator/constant_iterator.h>
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/iterator/transform_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/pair.h>
 
 #include <utility>
@@ -186,7 +186,7 @@ auto make_null_replacement_iterator(column_device_view const& column,
 /**
  * @brief Constructs an optional iterator over a column's values and its validity.
  *
- * Dereferencing the returned iterator returns a `thrust::optional<Element>`.
+ * Dereferencing the returned iterator returns a `cuda::std::optional<Element>`.
  *
  * The element of this iterator contextually converts to bool. The conversion returns true
  * if the object contains a value and false if it does not contain a value.
@@ -237,7 +237,7 @@ auto make_null_replacement_iterator(column_device_view const& column,
  * @param column The column to iterate
  * @param has_nulls Indicates whether `column` is checked for nulls.
  * @return Iterator that returns valid column elements and the validity of the
- * element in a `thrust::optional`
+ * element in a `cuda::std::optional`
  */
 template <typename Element, typename Nullate>
 auto make_optional_iterator(column_device_view const& column, Nullate has_nulls)
@@ -393,7 +393,7 @@ auto inline make_scalar_iterator(scalar const& scalar_value)
 /**
  * @brief Optional accessor for a scalar
  *
- * The `scalar_optional_accessor` always returns a `thrust::optional` of the scalar.
+ * The `scalar_optional_accessor` always returns a `cuda::std::optional` of the scalar.
  * The validity of the optional is determined by the `Nullate` parameter which may
  * be one of the following:
  *
@@ -401,14 +401,14 @@ auto inline make_scalar_iterator(scalar const& scalar_value)
  *    will contain a value only if the scalar is valid.
  *
  * - `nullate::NO` means the caller attests that the scalar will always be valid,
- *    no checks will occur and `thrust::optional{column[i]}` will return a value
+ *    no checks will occur and `cuda::std::optional{column[i]}` will return a value
  *    for each `i`.
  *
  * - `nullate::DYNAMIC` defers the assumption of nullability to runtime and the caller
  *    specifies if the scalar may be valid or invalid.
- *    For `DYNAMIC{true}` the return value will be a `thrust::optional{scalar}` when the
- *      scalar is valid and a `thrust::optional{}` when the scalar is invalid.
- *    For `DYNAMIC{false}` the return value will always be a `thrust::optional{scalar}`.
+ *    For `DYNAMIC{true}` the return value will be a `cuda::std::optional{scalar}` when the
+ *      scalar is valid and a `cuda::std::optional{}` when the scalar is invalid.
+ *    For `DYNAMIC{false}` the return value will always be a `cuda::std::optional{scalar}`.
  *
  * @throws `cudf::logic_error` if scalar datatype and Element type mismatch.
  *
@@ -418,7 +418,7 @@ auto inline make_scalar_iterator(scalar const& scalar_value)
 template <typename Element, typename Nullate>
 struct scalar_optional_accessor : public scalar_value_accessor<Element> {
   using super_t    = scalar_value_accessor<Element>;
-  using value_type = thrust::optional<Element>;
+  using value_type = cuda::std::optional<Element>;
 
   scalar_optional_accessor(scalar const& scalar_value, Nullate with_nulls)
     : scalar_value_accessor<Element>(scalar_value), has_nulls{with_nulls}
@@ -427,7 +427,7 @@ struct scalar_optional_accessor : public scalar_value_accessor<Element> {
 
   __device__ inline value_type const operator()(size_type) const
   {
-    if (has_nulls && !super_t::dscalar.is_valid()) { return value_type{thrust::nullopt}; }
+    if (has_nulls && !super_t::dscalar.is_valid()) { return value_type{cuda::std::nullopt}; }
 
     if constexpr (cudf::is_fixed_point<Element>()) {
       using namespace numeric;
@@ -519,7 +519,7 @@ struct scalar_representation_pair_accessor : public scalar_value_accessor<Elemen
 /**
  * @brief Constructs an optional iterator over a scalar's values and its validity.
  *
- * Dereferencing the returned iterator returns a `thrust::optional<Element>`.
+ * Dereferencing the returned iterator returns a `cuda::std::optional<Element>`.
  *
  * The element of this iterator contextually converts to bool. The conversion returns true
  * if the object contains a value and false if it does not contain a value.
@@ -575,7 +575,7 @@ struct scalar_representation_pair_accessor : public scalar_value_accessor<Elemen
  *
  * @param scalar_value The scalar to be returned by the iterator.
  * @param has_nulls Indicates if the scalar value may be invalid.
- * @return Iterator that returns scalar and the validity of the scalar in a thrust::optional
+ * @return Iterator that returns scalar and the validity of the scalar in a cuda::std::optional
  */
 template <typename Element, typename Nullate>
 auto inline make_optional_iterator(scalar const& scalar_value, Nullate has_nulls)
diff --git a/cpp/include/cudf/detail/join.hpp b/cpp/include/cudf/detail/join.hpp
index ff7da4462a2..af46dd79cdb 100644
--- a/cpp/include/cudf/detail/join.hpp
+++ b/cpp/include/cudf/detail/join.hpp
@@ -59,7 +59,7 @@ struct hash_join {
     cuco::static_multimap<hash_value_type,
                           cudf::size_type,
                           cuda::thread_scope_device,
-                          cudf::detail::cuco_allocator,
+                          cudf::detail::cuco_allocator<char>,
                           cuco::legacy::double_hashing<DEFAULT_JOIN_CG_SIZE, Hasher, Hasher>>;
 
   hash_join()                            = delete;
diff --git a/cpp/include/cudf/detail/stream_compaction.hpp b/cpp/include/cudf/detail/stream_compaction.hpp
index 05194148a70..85d2ee9790f 100644
--- a/cpp/include/cudf/detail/stream_compaction.hpp
+++ b/cpp/include/cudf/detail/stream_compaction.hpp
@@ -29,9 +29,7 @@ namespace CUDF_EXPORT cudf {
 namespace detail {
 /**
  * @copydoc cudf::drop_nulls(table_view const&, std::vector<size_type> const&,
- *                           cudf::size_type, rmm::device_async_resource_ref)
- *
- * @param[in] stream CUDA stream used for device memory operations and kernel launches.
+ *                           cudf::size_type, rmm::cuda_stream_view, rmm::device_async_resource_ref)
  */
 std::unique_ptr<table> drop_nulls(table_view const& input,
                                   std::vector<size_type> const& keys,
@@ -41,9 +39,7 @@ std::unique_ptr<table> drop_nulls(table_view const& input,
 
 /**
  * @copydoc cudf::drop_nans(table_view const&, std::vector<size_type> const&,
- *                          cudf::size_type, rmm::device_async_resource_ref)
- *
- * @param[in] stream CUDA stream used for device memory operations and kernel launches.
+ *                          cudf::size_type, rmm::cuda_stream_view, rmm::device_async_resource_ref)
  */
 std::unique_ptr<table> drop_nans(table_view const& input,
                                  std::vector<size_type> const& keys,
@@ -53,8 +49,6 @@ std::unique_ptr<table> drop_nans(table_view const& input,
 
 /**
  * @copydoc cudf::apply_boolean_mask
- *
- * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<table> apply_boolean_mask(table_view const& input,
                                           column_view const& boolean_mask,
@@ -63,8 +57,6 @@ std::unique_ptr<table> apply_boolean_mask(table_view const& input,
 
 /**
  * @copydoc cudf::unique
- *
- * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<table> unique(table_view const& input,
                               std::vector<size_type> const& keys,
@@ -75,8 +67,6 @@ std::unique_ptr<table> unique(table_view const& input,
 
 /**
  * @copydoc cudf::distinct
- *
- * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<table> distinct(table_view const& input,
                                 std::vector<size_type> const& keys,
@@ -110,9 +100,7 @@ rmm::device_uvector<size_type> distinct_indices(table_view const& input,
                                                 rmm::device_async_resource_ref mr);
 
 /**
- * @copydoc cudf::unique_count(column_view const&, null_policy, nan_policy)
- *
- * @param[in] stream CUDA stream used for device memory operations and kernel launches.
+ * @copydoc cudf::unique_count(column_view const&, null_policy, nan_policy, rmm::cuda_stream_view)
  */
 cudf::size_type unique_count(column_view const& input,
                              null_policy null_handling,
@@ -120,18 +108,14 @@ cudf::size_type unique_count(column_view const& input,
                              rmm::cuda_stream_view stream);
 
 /**
- * @copydoc cudf::unique_count(table_view const&, null_equality)
- *
- * @param[in] stream CUDA stream used for device memory operations and kernel launches.
+ * @copydoc cudf::unique_count(table_view const&, null_equality, rmm::cuda_stream_view)
  */
 cudf::size_type unique_count(table_view const& input,
                              null_equality nulls_equal,
                              rmm::cuda_stream_view stream);
 
 /**
- * @copydoc cudf::distinct_count(column_view const&, null_policy, nan_policy)
- *
- * @param[in] stream CUDA stream used for device memory operations and kernel launches.
+ * @copydoc cudf::distinct_count(column_view const&, null_policy, nan_policy, rmm::cuda_stream_view)
  */
 cudf::size_type distinct_count(column_view const& input,
                                null_policy null_handling,
@@ -139,9 +123,7 @@ cudf::size_type distinct_count(column_view const& input,
                                rmm::cuda_stream_view stream);
 
 /**
- * @copydoc cudf::distinct_count(table_view const&, null_equality)
- *
- * @param[in] stream CUDA stream used for device memory operations and kernel launches.
+ * @copydoc cudf::distinct_count(table_view const&, null_equality, rmm::cuda_stream_view)
  */
 cudf::size_type distinct_count(table_view const& input,
                                null_equality nulls_equal,
diff --git a/cpp/include/cudf/detail/utilities/cuda.cuh b/cpp/include/cudf/detail/utilities/cuda.cuh
index 5007af7f9f1..d31ca3d92d1 100644
--- a/cpp/include/cudf/detail/utilities/cuda.cuh
+++ b/cpp/include/cudf/detail/utilities/cuda.cuh
@@ -189,35 +189,6 @@ __device__ T single_lane_block_sum_reduce(T lane_value)
   return result;
 }
 
-/**
- * @brief Get the number of elements that can be processed per thread.
- *
- * @param[in] kernel The kernel for which the elements per thread needs to be assessed
- * @param[in] total_size Number of elements
- * @param[in] block_size Expected block size
- *
- * @return cudf::size_type Elements per thread that can be processed for given specification.
- */
-template <typename Kernel>
-cudf::size_type elements_per_thread(Kernel kernel,
-                                    cudf::size_type total_size,
-                                    cudf::size_type block_size,
-                                    cudf::size_type max_per_thread = 32)
-{
-  CUDF_FUNC_RANGE();
-
-  // calculate theoretical occupancy
-  int max_blocks = 0;
-  CUDF_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&max_blocks, kernel, block_size, 0));
-
-  int device = 0;
-  CUDF_CUDA_TRY(cudaGetDevice(&device));
-  int num_sms = 0;
-  CUDF_CUDA_TRY(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, device));
-  int per_thread = total_size / (max_blocks * num_sms * block_size);
-  return std::clamp(per_thread, 1, max_per_thread);
-}
-
 /**
  * @brief Finds the smallest value not less than `number_to_round` and modulo `modulus` is
  * zero. Expects modulus to be a power of 2.
diff --git a/cpp/include/cudf/detail/utilities/cuda.hpp b/cpp/include/cudf/detail/utilities/cuda.hpp
new file mode 100644
index 00000000000..58c7ae8ed6a
--- /dev/null
+++ b/cpp/include/cudf/detail/utilities/cuda.hpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/types.hpp>
+#include <cudf/utilities/error.hpp>
+
+#include <algorithm>
+
+namespace CUDF_EXPORT cudf {
+namespace detail {
+
+/**
+ * @brief Get the number of multiprocessors on the device
+ */
+cudf::size_type num_multiprocessors();
+
+/**
+ * @brief Get the number of elements that can be processed per thread.
+ *
+ * @param[in] kernel The kernel for which the elements per thread needs to be assessed
+ * @param[in] total_size Number of elements
+ * @param[in] block_size Expected block size
+ *
+ * @return cudf::size_type Elements per thread that can be processed for given specification.
+ */
+template <typename Kernel>
+cudf::size_type elements_per_thread(Kernel kernel,
+                                    cudf::size_type total_size,
+                                    cudf::size_type block_size,
+                                    cudf::size_type max_per_thread = 32)
+{
+  CUDF_FUNC_RANGE();
+
+  // calculate theoretical occupancy
+  int max_blocks = 0;
+  CUDF_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&max_blocks, kernel, block_size, 0));
+
+  int per_thread = total_size / (max_blocks * num_multiprocessors() * block_size);
+  return std::clamp(per_thread, 1, max_per_thread);
+}
+
+}  // namespace detail
+}  // namespace CUDF_EXPORT cudf
diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp
index 9a8f87b4a46..0f52b0f7b31 100644
--- a/cpp/include/cudf/interop.hpp
+++ b/cpp/include/cudf/interop.hpp
@@ -16,21 +16,6 @@
 
 #pragma once
 
-// We disable warning 611 because the `arrow::TableBatchReader` only partially
-// override the `ReadNext` method of `arrow::RecordBatchReader::ReadNext`
-// triggering warning 611-D from nvcc.
-#ifdef __CUDACC__
-#pragma nv_diag_suppress 611
-#pragma nv_diag_suppress 2810
-#endif
-#include <rmm/resource_ref.hpp>
-
-#include <arrow/api.h>
-#ifdef __CUDACC__
-#pragma nv_diag_default 611
-#pragma nv_diag_default 2810
-#endif
-
 #include <cudf/column/column.hpp>
 #include <cudf/detail/transform.hpp>
 #include <cudf/table/table.hpp>
@@ -131,59 +116,6 @@ struct column_metadata {
   column_metadata() = default;
 };
 
-/**
- * @brief Create `arrow::Table` from cudf table `input`
- *
- * Converts the `cudf::table_view` to `arrow::Table` with the provided
- * metadata `column_names`.
- *
- * @deprecated Since 24.08. Use cudf::to_arrow_host instead.
- *
- * @throws cudf::logic_error if `column_names` size doesn't match with number of columns.
- *
- * @param input table_view that needs to be converted to arrow Table
- * @param metadata Contains hierarchy of names of columns and children
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param ar_mr arrow memory pool to allocate memory for arrow Table
- * @return arrow Table generated from `input`
- *
- * @note For decimals, since the precision is not stored for them in libcudf,
- * it will be converted to an Arrow decimal128 that has the widest-precision the cudf decimal type
- * supports. For example, numeric::decimal32 will be converted to Arrow decimal128 of the precision
- * 9 which is the maximum precision for 32-bit types. Similarly, numeric::decimal128 will be
- * converted to Arrow decimal128 of the precision 38.
- */
-[[deprecated("Use cudf::to_arrow_host")]] std::shared_ptr<arrow::Table> to_arrow(
-  table_view input,
-  std::vector<column_metadata> const& metadata = {},
-  rmm::cuda_stream_view stream                 = cudf::get_default_stream(),
-  arrow::MemoryPool* ar_mr                     = arrow::default_memory_pool());
-
-/**
- * @brief Create `arrow::Scalar` from cudf scalar `input`
- *
- * Converts the `cudf::scalar` to `arrow::Scalar`.
- *
- * @deprecated Since 24.08.
- *
- * @param input scalar that needs to be converted to arrow Scalar
- * @param metadata Contains hierarchy of names of columns and children
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param ar_mr arrow memory pool to allocate memory for arrow Scalar
- * @return arrow Scalar generated from `input`
- *
- * @note For decimals, since the precision is not stored for them in libcudf,
- * it will be converted to an Arrow decimal128 that has the widest-precision the cudf decimal type
- * supports. For example, numeric::decimal32 will be converted to Arrow decimal128 of the precision
- * 9 which is the maximum precision for 32-bit types. Similarly, numeric::decimal128 will be
- * converted to Arrow decimal128 of the precision 38.
- */
-[[deprecated("Use cudf::to_arrow_host")]] std::shared_ptr<arrow::Scalar> to_arrow(
-  cudf::scalar const& input,
-  column_metadata const& metadata = {},
-  rmm::cuda_stream_view stream    = cudf::get_default_stream(),
-  arrow::MemoryPool* ar_mr        = arrow::default_memory_pool());
-
 /**
  * @brief typedef for a unique_ptr to an ArrowSchema with custom deleter
  *
@@ -386,39 +318,6 @@ unique_device_array_t to_arrow_host(
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
 
-/**
- * @brief Create `cudf::table` from given arrow Table input
- *
- * @deprecated Since 24.08. Use cudf::from_arrow_host instead.
- *
- * @param input arrow:Table that needs to be converted to `cudf::table`
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr    Device memory resource used to allocate `cudf::table`
- * @return cudf table generated from given arrow Table
- */
-[[deprecated("Use cudf::from_arrow_host")]] std::unique_ptr<table> from_arrow(
-  arrow::Table const& input,
-  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
-  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
-
-/**
- * @brief Create `cudf::scalar` from given arrow Scalar input
- *
- * @deprecated Since 24.08. Use arrow's `MakeArrayFromScalar` on the
- * input, followed by `ExportArray` to obtain something that can be
- * consumed by `from_arrow_host`. Then use `cudf::get_element` to
- * extract a device scalar from the column.
- *
- * @param input `arrow::Scalar` that needs to be converted to `cudf::scalar`
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr    Device memory resource used to allocate `cudf::scalar`
- * @return cudf scalar generated from given arrow Scalar
- */
-[[deprecated("See docstring for migration strategies")]] std::unique_ptr<cudf::scalar> from_arrow(
-  arrow::Scalar const& input,
-  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
-  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
-
 /**
  * @brief Create `cudf::table` from given ArrowArray and ArrowSchema input
  *
diff --git a/cpp/include/cudf/io/arrow_io_source.hpp b/cpp/include/cudf/io/arrow_io_source.hpp
deleted file mode 100644
index ed5c839cbb4..00000000000
--- a/cpp/include/cudf/io/arrow_io_source.hpp
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "datasource.hpp"
-
-#include <cudf/utilities/export.hpp>
-
-#include <arrow/filesystem/filesystem.h>
-#include <arrow/io/interfaces.h>
-
-#include <memory>
-#include <string>
-#include <utility>
-
-namespace CUDF_EXPORT cudf {
-namespace io {
-/**
- * @addtogroup io_datasources
- * @{
- * @file
- */
-
-/**
- * @brief Implementation class for reading from an Apache Arrow file. The file
- * could be a memory-mapped file or other implementation supported by Arrow.
- */
-class arrow_io_source : public datasource {
- public:
-  /**
-   * @brief Constructs an object from an Apache Arrow Filesystem URI
-   *
-   * @param arrow_uri Apache Arrow Filesystem URI
-   */
-  explicit arrow_io_source(std::string const& arrow_uri);
-
-  /**
-   * @brief Constructs an object from an `arrow` source object.
-   *
-   * @param file The `arrow` object from which the data is read
-   */
-  explicit arrow_io_source(std::shared_ptr<arrow::io::RandomAccessFile> file)
-    : arrow_file(std::move(file))
-  {
-  }
-
-  /**
-   * @brief Returns a buffer with a subset of data from the `arrow` source.
-   *
-   * @param offset The offset in bytes from which to read
-   * @param size The number of bytes to read
-   * @return A buffer with the read data
-   */
-  std::unique_ptr<buffer> host_read(size_t offset, size_t size) override;
-
-  /**
-   * @brief Reads a selected range from the `arrow` source into a preallocated buffer.
-   *
-   * @param[in] offset The offset in bytes from which to read
-   * @param[in] size The number of bytes to read
-   * @param[out] dst The preallocated buffer to read into
-   * @return The number of bytes read
-   */
-  size_t host_read(size_t offset, size_t size, uint8_t* dst) override;
-  /**
-   * @brief Returns the size of the data in the `arrow` source.
-   *
-   * @return The size of the data in the `arrow` source
-   */
-  [[nodiscard]] size_t size() const override;
-
- private:
-  std::shared_ptr<arrow::fs::FileSystem> filesystem;
-  std::shared_ptr<arrow::io::RandomAccessFile> arrow_file;
-};
-
-/** @} */  // end of group
-}  // namespace io
-}  // namespace CUDF_EXPORT cudf
diff --git a/cpp/include/cudf/io/detail/batched_memset.hpp b/cpp/include/cudf/io/detail/batched_memset.hpp
new file mode 100644
index 00000000000..d0922cc64ee
--- /dev/null
+++ b/cpp/include/cudf/io/detail/batched_memset.hpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/detail/iterator.cuh>
+#include <cudf/detail/utilities/vector_factories.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_buffer.hpp>
+#include <rmm/resource_ref.hpp>
+
+#include <cub/device/device_copy.cuh>
+#include <cuda/functional>
+#include <thrust/iterator/counting_iterator.h>
+#include <thrust/iterator/transform_iterator.h>
+#include <thrust/transform.h>
+
+namespace CUDF_EXPORT cudf {
+namespace io::detail {
+
+/**
+ * @brief A helper function that takes in a vector of device spans and memsets them to the
+ * value provided using batches sent to the GPU.
+ *
+ * @param bufs Vector with device spans of data
+ * @param value Value to memset all device spans to
+ * @param _stream Stream used for device memory operations and kernel launches
+ *
+ * @return The data in device spans all set to value
+ */
+template <typename T>
+void batched_memset(std::vector<cudf::device_span<T>> const& bufs,
+                    T const value,
+                    rmm::cuda_stream_view stream)
+{
+  // define task and bytes parameters
+  auto const num_bufs = bufs.size();
+
+  // copy bufs into device memory and then get sizes
+  auto gpu_bufs =
+    cudf::detail::make_device_uvector_async(bufs, stream, rmm::mr::get_current_device_resource());
+
+  // get a vector with the sizes of all buffers
+  auto sizes = cudf::detail::make_counting_transform_iterator(
+    static_cast<std::size_t>(0),
+    cuda::proclaim_return_type<std::size_t>(
+      [gpu_bufs = gpu_bufs.data()] __device__(std::size_t i) { return gpu_bufs[i].size(); }));
+
+  // get an iterator with a constant value to memset
+  auto iter_in = thrust::make_constant_iterator(thrust::make_constant_iterator(value));
+
+  // get an iterator pointing to each device span
+  auto iter_out = thrust::make_transform_iterator(
+    thrust::counting_iterator<std::size_t>(0),
+    cuda::proclaim_return_type<T*>(
+      [gpu_bufs = gpu_bufs.data()] __device__(std::size_t i) { return gpu_bufs[i].data(); }));
+
+  size_t temp_storage_bytes = 0;
+
+  cub::DeviceCopy::Batched(nullptr, temp_storage_bytes, iter_in, iter_out, sizes, num_bufs, stream);
+
+  rmm::device_buffer d_temp_storage(
+    temp_storage_bytes, stream, rmm::mr::get_current_device_resource());
+
+  cub::DeviceCopy::Batched(
+    d_temp_storage.data(), temp_storage_bytes, iter_in, iter_out, sizes, num_bufs, stream);
+}
+
+}  // namespace io::detail
+}  // namespace CUDF_EXPORT cudf
diff --git a/cpp/include/cudf/io/detail/json.hpp b/cpp/include/cudf/io/detail/json.hpp
index 42b10a78ce8..38ba4f675c3 100644
--- a/cpp/include/cudf/io/detail/json.hpp
+++ b/cpp/include/cudf/io/detail/json.hpp
@@ -61,7 +61,7 @@ void write_json(data_sink* sink,
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource to use for device memory allocation
  */
-void normalize_single_quotes(datasource::owning_buffer<rmm::device_uvector<char>>& indata,
+void normalize_single_quotes(datasource::owning_buffer<rmm::device_buffer>& indata,
                              rmm::cuda_stream_view stream,
                              rmm::device_async_resource_ref mr);
 
@@ -72,7 +72,7 @@ void normalize_single_quotes(datasource::owning_buffer<rmm::device_uvector<char>
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource to use for device memory allocation
  */
-void normalize_whitespace(datasource::owning_buffer<rmm::device_uvector<char>>& indata,
+void normalize_whitespace(datasource::owning_buffer<rmm::device_buffer>& indata,
                           rmm::cuda_stream_view stream,
                           rmm::device_async_resource_ref mr);
 }  // namespace io::json::detail
diff --git a/cpp/include/cudf/io/json.hpp b/cpp/include/cudf/io/json.hpp
index 0cb39d15cd5..fde1857cb7f 100644
--- a/cpp/include/cudf/io/json.hpp
+++ b/cpp/include/cudf/io/json.hpp
@@ -696,6 +696,8 @@ class json_writer_options_builder;
 class json_writer_options {
   // Specify the sink to use for writer output
   sink_info _sink;
+  // maximum number of rows to write in each chunk (limits memory use)
+  size_type _rows_per_chunk = std::numeric_limits<size_type>::max();
   // Set of columns to output
   table_view _table;
   // string to use for null entries
@@ -704,8 +706,6 @@ class json_writer_options {
   bool _include_nulls = false;
   // Indicates whether to use JSON lines for records format
   bool _lines = false;
-  // maximum number of rows to write in each chunk (limits memory use)
-  size_type _rows_per_chunk = std::numeric_limits<size_type>::max();
   // string to use for values != 0 in INT8 types (default 'true')
   std::string _true_value = std::string{"true"};
   // string to use for values == 0 in INT8 types (default 'false')
@@ -720,7 +720,7 @@ class json_writer_options {
    * @param table Table to be written to output
    */
   explicit json_writer_options(sink_info sink, table_view table)
-    : _sink(std::move(sink)), _table(std::move(table)), _rows_per_chunk(table.num_rows())
+    : _sink(std::move(sink)), _rows_per_chunk(table.num_rows()), _table(std::move(table))
   {
   }
 
diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index 12897ac77ef..64c37f9a9df 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -76,6 +76,8 @@ class parquet_reader_options {
   bool _use_pandas_metadata = true;
   // Whether to read and use ARROW schema
   bool _use_arrow_schema = true;
+  // Whether to allow reading matching select columns from mismatched Parquet files.
+  bool _allow_mismatched_pq_schemas = false;
   // Cast timestamp columns to a specific type
   data_type _timestamp_type{type_id::EMPTY};
 
@@ -138,6 +140,18 @@ class parquet_reader_options {
    */
   [[nodiscard]] bool is_enabled_use_arrow_schema() const { return _use_arrow_schema; }
 
+  /**
+   * @brief Returns true/false depending on whether to read matching projected and filter columns
+   * from mismatched Parquet sources.
+   *
+   * @return `true` if mismatched projected and filter columns will be read from mismatched Parquet
+   * sources.
+   */
+  [[nodiscard]] bool is_enabled_allow_mismatched_pq_schemas() const
+  {
+    return _allow_mismatched_pq_schemas;
+  }
+
   /**
    * @brief Returns optional tree of metadata.
    *
@@ -258,6 +272,15 @@ class parquet_reader_options {
    */
   void enable_use_arrow_schema(bool val) { _use_arrow_schema = val; }
 
+  /**
+   * @brief Sets to enable/disable reading of matching projected and filter columns from mismatched
+   * Parquet sources.
+   *
+   * @param val Boolean value whether to read matching projected and filter columns from mismatched
+   * Parquet sources.
+   */
+  void enable_allow_mismatched_pq_schemas(bool val) { _allow_mismatched_pq_schemas = val; }
+
   /**
    * @brief Sets reader column schema.
    *
@@ -382,6 +405,20 @@ class parquet_reader_options_builder {
     return *this;
   }
 
+  /**
+   * @brief Sets to enable/disable reading of matching projected and filter columns from mismatched
+   * Parquet sources.
+   *
+   * @param val Boolean value whether to read matching projected and filter columns from mismatched
+   * Parquet sources.
+   * @return this for chaining.
+   */
+  parquet_reader_options_builder& allow_mismatched_pq_schemas(bool val)
+  {
+    options._allow_mismatched_pq_schemas = val;
+    return *this;
+  }
+
   /**
    * @brief Sets reader metadata.
    *
diff --git a/cpp/include/cudf/io/text/byte_range_info.hpp b/cpp/include/cudf/io/text/byte_range_info.hpp
index 7e9256be1d3..5f3c91dc99c 100644
--- a/cpp/include/cudf/io/text/byte_range_info.hpp
+++ b/cpp/include/cudf/io/text/byte_range_info.hpp
@@ -16,7 +16,6 @@
 
 #pragma once
 
-#include <cudf/utilities/error.hpp>
 #include <cudf/utilities/export.hpp>
 
 #include <cstdint>
@@ -40,53 +39,49 @@ class byte_range_info {
   int64_t _size{};    ///< size in bytes
 
  public:
-  constexpr byte_range_info() = default;
+  byte_range_info() = default;
   /**
    * @brief Constructs a byte_range_info object
    *
    * @param offset offset in bytes
    * @param size size in bytes
    */
-  constexpr byte_range_info(int64_t offset, int64_t size) : _offset(offset), _size(size)
-  {
-    CUDF_EXPECTS(offset >= 0, "offset must be non-negative");
-    CUDF_EXPECTS(size >= 0, "size must be non-negative");
-  }
+  byte_range_info(int64_t offset, int64_t size);
 
   /**
    * @brief Copy constructor
    *
    * @param other byte_range_info object to copy
    */
-  constexpr byte_range_info(byte_range_info const& other) noexcept = default;
+  byte_range_info(byte_range_info const& other) noexcept = default;
   /**
    * @brief  Copy assignment operator
    *
    * @param other byte_range_info object to copy
    * @return this object after copying
    */
-  constexpr byte_range_info& operator=(byte_range_info const& other) noexcept = default;
+  byte_range_info& operator=(byte_range_info const& other) noexcept = default;
 
   /**
    * @brief Get the offset in bytes
    *
    * @return Offset in bytes
    */
-  [[nodiscard]] constexpr int64_t offset() { return _offset; }
+  [[nodiscard]] int64_t offset() const { return _offset; }
 
   /**
    * @brief Get the size in bytes
    *
    * @return Size in bytes
    */
-  [[nodiscard]] constexpr int64_t size() { return _size; }
+  [[nodiscard]] int64_t size() const { return _size; }
 
   /**
    * @brief Returns whether the span is empty.
    *
-   * @return true iff the span is empty, i.e. `size() == 0`
+   * @return true iff the range is empty, i.e. `size() == 0`
    */
-  [[nodiscard]] constexpr bool empty() { return size() == 0; }
+  [[nodiscard]] bool is_empty() const { return size() == 0; }
 };
 
 /**
diff --git a/cpp/include/cudf/io/text/multibyte_split.hpp b/cpp/include/cudf/io/text/multibyte_split.hpp
index 8624a386d0f..3a1f9611324 100644
--- a/cpp/include/cudf/io/text/multibyte_split.hpp
+++ b/cpp/include/cudf/io/text/multibyte_split.hpp
@@ -96,26 +96,6 @@ std::unique_ptr<cudf::column> multibyte_split(
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
 
-/**
- * @brief Splits the source text into a strings column using a multiple byte delimiter.
- *
- * @deprecated Since 24.08
- *
- * @param source The source input data encoded in UTF-8
- * @param delimiter UTF-8 encoded string for which to find offsets in the source
- * @param byte_range The position and size within `source` to produce the column from
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr Memory resource to use for the device memory allocation
- * @return The strings found by splitting the source by the delimiter within the relevant byte
- * range.
- */
-[[deprecated]] std::unique_ptr<cudf::column> multibyte_split(
-  data_chunk_source const& source,
-  std::string const& delimiter,
-  std::optional<byte_range_info> byte_range,
-  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
-  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
-
 /** @} */  // end of group
 
 }  // namespace text
diff --git a/cpp/include/cudf/io/types.hpp b/cpp/include/cudf/io/types.hpp
index 3df737413fa..a34881942ce 100644
--- a/cpp/include/cudf/io/types.hpp
+++ b/cpp/include/cudf/io/types.hpp
@@ -54,7 +54,7 @@ namespace io {
 /**
  * @brief Compression algorithms
  */
-enum class compression_type {
+enum class compression_type : int32_t {
   NONE,    ///< No compression
   AUTO,    ///< Automatically detect or select compression format
   SNAPPY,  ///< Snappy format, using byte-oriented LZ77
@@ -72,7 +72,7 @@ enum class compression_type {
 /**
  * @brief Data source or destination types
  */
-enum class io_type {
+enum class io_type : int32_t {
   FILEPATH,          ///< Input/output is a file path
   HOST_BUFFER,       ///< Input/output is a buffer in host memory
   DEVICE_BUFFER,     ///< Input/output is a buffer in device memory
@@ -83,7 +83,7 @@ enum class io_type {
 /**
  * @brief Behavior when handling quotations in field data
  */
-enum class quote_style {
+enum class quote_style : int32_t {
   MINIMAL,     ///< Quote only fields which contain special characters
   ALL,         ///< Quote all fields
   NONNUMERIC,  ///< Quote all non-numeric fields
@@ -93,7 +93,7 @@ enum class quote_style {
 /**
  * @brief Column statistics granularity type for parquet/orc writers
  */
-enum statistics_freq {
+enum statistics_freq : int32_t {
   STATISTICS_NONE     = 0,  ///< No column statistics
   STATISTICS_ROWGROUP = 1,  ///< Per-Rowgroup column statistics
   STATISTICS_PAGE     = 2,  ///< Per-page column statistics
@@ -103,7 +103,7 @@ enum statistics_freq {
 /**
  * @brief Valid encodings for use with `column_in_metadata::set_encoding()`
  */
-enum class column_encoding {
+enum class column_encoding : int32_t {
   // Common encodings:
   USE_DEFAULT = -1,  ///< No encoding has been requested, use default encoding
   DICTIONARY,        ///< Use dictionary encoding
@@ -222,7 +222,7 @@ class writer_compression_statistics {
 /**
  * @brief Control use of dictionary encoding for parquet writer
  */
-enum dictionary_policy {
+enum dictionary_policy : int32_t {
   NEVER    = 0,  ///< Never use dictionary encoding
   ADAPTIVE = 1,  ///< Use dictionary when it will not impact compression
   ALWAYS   = 2   ///< Use dictionary regardless of impact on compression
diff --git a/cpp/include/cudf/json/json.hpp b/cpp/include/cudf/json/json.hpp
index 48d5dcf7727..403374c536d 100644
--- a/cpp/include/cudf/json/json.hpp
+++ b/cpp/include/cudf/json/json.hpp
@@ -22,8 +22,6 @@
 #include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/resource_ref.hpp>
 
-#include <thrust/optional.h>
-
 namespace CUDF_EXPORT cudf {
 
 /**
diff --git a/cpp/include/cudf/lists/detail/stream_compaction.hpp b/cpp/include/cudf/lists/detail/stream_compaction.hpp
index c11e07cd190..be0bd27083c 100644
--- a/cpp/include/cudf/lists/detail/stream_compaction.hpp
+++ b/cpp/include/cudf/lists/detail/stream_compaction.hpp
@@ -26,10 +26,7 @@ namespace CUDF_EXPORT cudf {
 namespace lists::detail {
 
 /**
- * @copydoc cudf::lists::apply_boolean_mask(lists_column_view const&, lists_column_view const&,
- * rmm::device_async_resource_ref)
- *
- * @param stream CUDA stream used for device memory operations and kernel launches
+ * @copydoc cudf::lists::apply_boolean_mask
  */
 std::unique_ptr<column> apply_boolean_mask(lists_column_view const& input,
                                            lists_column_view const& boolean_mask,
@@ -37,9 +34,7 @@ std::unique_ptr<column> apply_boolean_mask(lists_column_view const& input,
                                            rmm::device_async_resource_ref mr);
 
 /**
- * @copydoc cudf::list::distinct
- *
- * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @copydoc cudf::lists::distinct
  */
 std::unique_ptr<column> distinct(lists_column_view const& input,
                                  null_equality nulls_equal,
diff --git a/cpp/include/cudf/stream_compaction.hpp b/cpp/include/cudf/stream_compaction.hpp
index cfe404ff6ab..ced8d5849d0 100644
--- a/cpp/include/cudf/stream_compaction.hpp
+++ b/cpp/include/cudf/stream_compaction.hpp
@@ -67,6 +67,7 @@ namespace CUDF_EXPORT cudf {
  * @param[in] keys  vector of indices representing key columns from `input`
  * @param[in] keep_threshold The minimum number of non-null fields in a row
  *                           required to keep the row.
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches
  * @param[in] mr Device memory resource used to allocate the returned table's device memory
  * @return Table containing all rows of the `input` with at least @p
  * keep_threshold non-null fields in @p keys.
@@ -75,6 +76,7 @@ std::unique_ptr<table> drop_nulls(
   table_view const& input,
   std::vector<size_type> const& keys,
   cudf::size_type keep_threshold,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -99,6 +101,7 @@ std::unique_ptr<table> drop_nulls(
  *
  * @param[in] input The input `table_view` to filter
  * @param[in] keys  vector of indices representing key columns from `input`
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches
  * @param[in] mr Device memory resource used to allocate the returned table's device memory
  * @return Table containing all rows of the `input` without nulls in the columns
  * of @p keys.
@@ -106,6 +109,7 @@ std::unique_ptr<table> drop_nulls(
 std::unique_ptr<table> drop_nulls(
   table_view const& input,
   std::vector<size_type> const& keys,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -141,6 +145,7 @@ std::unique_ptr<table> drop_nulls(
  * @param[in] keys  vector of indices representing key columns from `input`
  * @param[in] keep_threshold The minimum number of non-NAN elements in a row
  *                           required to keep the row.
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches
  * @param[in] mr Device memory resource used to allocate the returned table's device memory
  * @return Table containing all rows of the `input` with at least @p
  * keep_threshold non-NAN elements in @p keys.
@@ -149,6 +154,7 @@ std::unique_ptr<table> drop_nans(
   table_view const& input,
   std::vector<size_type> const& keys,
   cudf::size_type keep_threshold,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -174,6 +180,7 @@ std::unique_ptr<table> drop_nans(
  *
  * @param[in] input The input `table_view` to filter
  * @param[in] keys  vector of indices representing key columns from `input`
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches
  * @param[in] mr Device memory resource used to allocate the returned table's device memory
  * @return Table containing all rows of the `input` without NANs in the columns
  * of @p keys.
@@ -181,6 +188,7 @@ std::unique_ptr<table> drop_nans(
 std::unique_ptr<table> drop_nans(
   table_view const& input,
   std::vector<size_type> const& keys,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -200,6 +208,7 @@ std::unique_ptr<table> drop_nans(
  * @param[in] input The input table_view to filter
  * @param[in] boolean_mask A nullable column_view of type type_id::BOOL8 used
  * as a mask to filter the `input`.
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches
  * @param[in] mr Device memory resource used to allocate the returned table's device memory
  * @return Table containing copy of all rows of @p input passing
  * the filter defined by @p boolean_mask.
@@ -207,6 +216,7 @@ std::unique_ptr<table> drop_nans(
 std::unique_ptr<table> apply_boolean_mask(
   table_view const& input,
   column_view const& boolean_mask,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -241,6 +251,7 @@ enum class duplicate_keep_option {
  * @param[in] keep            keep any, first, last, or none of the found duplicates
  * @param[in] nulls_equal     flag to denote nulls are equal if null_equality::EQUAL, nulls are not
  *                            equal if null_equality::UNEQUAL
+ * @param[in] stream          CUDA stream used for device memory operations and kernel launches
  * @param[in] mr              Device memory resource used to allocate the returned table's device
  *                            memory
  *
@@ -251,6 +262,7 @@ std::unique_ptr<table> unique(
   std::vector<size_type> const& keys,
   duplicate_keep_option keep,
   null_equality nulls_equal         = null_equality::EQUAL,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -269,6 +281,7 @@ std::unique_ptr<table> unique(
  * @param keep Copy any, first, last, or none of the found duplicates
  * @param nulls_equal Flag to specify whether null elements should be considered as equal
  * @param nans_equal Flag to specify whether NaN elements should be considered as equal
+ * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned table
  * @return Table with distinct rows in an unspecified order
  */
@@ -278,6 +291,7 @@ std::unique_ptr<table> distinct(
   duplicate_keep_option keep        = duplicate_keep_option::KEEP_ANY,
   null_equality nulls_equal         = null_equality::EQUAL,
   nan_equality nans_equal           = nan_equality::ALL_EQUAL,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -346,12 +360,14 @@ std::unique_ptr<table> stable_distinct(
  * @param[in] input The column_view whose consecutive groups of equivalent rows will be counted
  * @param[in] null_handling flag to include or ignore `null` while counting
  * @param[in] nan_handling flag to consider `NaN==null` or not
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches
  *
  * @return number of consecutive groups of equivalent rows in the column
  */
 cudf::size_type unique_count(column_view const& input,
                              null_policy null_handling,
-                             nan_policy nan_handling);
+                             nan_policy nan_handling,
+                             rmm::cuda_stream_view stream = cudf::get_default_stream());
 
 /**
  * @brief Count the number of consecutive groups of equivalent rows in a table.
@@ -359,11 +375,13 @@ cudf::size_type unique_count(column_view const& input,
  * @param[in] input Table whose consecutive groups of equivalent rows will be counted
  * @param[in] nulls_equal flag to denote if null elements should be considered equal
  *            nulls are not equal if null_equality::UNEQUAL.
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches
  *
  * @return number of consecutive groups of equivalent rows in the column
  */
 cudf::size_type unique_count(table_view const& input,
-                             null_equality nulls_equal = null_equality::EQUAL);
+                             null_equality nulls_equal    = null_equality::EQUAL,
+                             rmm::cuda_stream_view stream = cudf::get_default_stream());
 
 /**
  * @brief Count the distinct elements in the column_view.
@@ -382,12 +400,14 @@ cudf::size_type unique_count(table_view const& input,
  * @param[in] input The column_view whose distinct elements will be counted
  * @param[in] null_handling flag to include or ignore `null` while counting
  * @param[in] nan_handling flag to consider `NaN==null` or not
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches
  *
  * @return number of distinct rows in the table
  */
 cudf::size_type distinct_count(column_view const& input,
                                null_policy null_handling,
-                               nan_policy nan_handling);
+                               nan_policy nan_handling,
+                               rmm::cuda_stream_view stream = cudf::get_default_stream());
 
 /**
  * @brief Count the distinct rows in a table.
@@ -395,11 +415,13 @@ cudf::size_type distinct_count(column_view const& input,
  * @param[in] input Table whose distinct rows will be counted
  * @param[in] nulls_equal flag to denote if null elements should be considered equal.
  *            nulls are not equal if null_equality::UNEQUAL.
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches
  *
  * @return number of distinct rows in the table
  */
 cudf::size_type distinct_count(table_view const& input,
-                               null_equality nulls_equal = null_equality::EQUAL);
+                               null_equality nulls_equal    = null_equality::EQUAL,
+                               rmm::cuda_stream_view stream = cudf::get_default_stream());
 
 /** @} */
 }  // namespace CUDF_EXPORT cudf
diff --git a/cpp/include/cudf/strings/convert/convert_ipv4.hpp b/cpp/include/cudf/strings/convert/convert_ipv4.hpp
index 04a04907c12..97d1dfee017 100644
--- a/cpp/include/cudf/strings/convert/convert_ipv4.hpp
+++ b/cpp/include/cudf/strings/convert/convert_ipv4.hpp
@@ -44,15 +44,12 @@ namespace strings {
  * No checking is done on the format. If a string is not in IPv4 format, the resulting
  * integer is undefined.
  *
- * The resulting 32-bit integer is placed in an int64_t to avoid setting the sign-bit
- * in an int32_t type. This could be changed if cudf supported a UINT32 type in the future.
- *
  * Any null entries will result in corresponding null entries in the output column.
  *
  * @param input Strings instance for this operation
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned column's device memory
- * @return New INT64 column converted from strings
+ * @return New UINT32 column converted from strings
  */
 std::unique_ptr<column> ipv4_to_integers(
   strings_column_view const& input,
@@ -68,13 +65,11 @@ std::unique_ptr<column> ipv4_to_integers(
  * Each input integer is dissected into four integers by dividing the input into 8-bit sections.
  * These sub-integers are then converted into [0-9] characters and placed between '.' characters.
  *
- * No checking is done on the input integer value. Only the lower 32-bits are used.
- *
  * Any null entries will result in corresponding null entries in the output column.
  *
- * @throw cudf::logic_error if the input column is not INT64 type.
+ * @throw cudf::logic_error if the input column is not UINT32 type.
  *
- * @param integers Integer (INT64) column to convert
+ * @param integers Integer (UINT32) column to convert
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned column's device memory
  * @return New strings column
diff --git a/cpp/include/cudf/strings/detail/convert/fixed_point.cuh b/cpp/include/cudf/strings/detail/convert/fixed_point.cuh
index 5f51da967d3..8440805960e 100644
--- a/cpp/include/cudf/strings/detail/convert/fixed_point.cuh
+++ b/cpp/include/cudf/strings/detail/convert/fixed_point.cuh
@@ -17,8 +17,8 @@
 
 #include <cudf/fixed_point/temporary.hpp>
 
+#include <cuda/std/optional>
 #include <cuda/std/type_traits>
-#include <thrust/optional.h>
 #include <thrust/pair.h>
 
 namespace cudf {
@@ -88,7 +88,7 @@ __device__ inline thrust::pair<UnsignedDecimalType, int32_t> parse_integer(
  * @return Integer value of the exponent
  */
 template <bool check_only = false>
-__device__ thrust::optional<int32_t> parse_exponent(char const* iter, char const* iter_end)
+__device__ cuda::std::optional<int32_t> parse_exponent(char const* iter, char const* iter_end)
 {
   constexpr uint32_t exponent_max = static_cast<uint32_t>(std::numeric_limits<int32_t>::max());
 
@@ -105,12 +105,12 @@ __device__ thrust::optional<int32_t> parse_exponent(char const* iter, char const
   while (iter < iter_end) {
     auto const ch = *iter++;
     if (ch < '0' || ch > '9') {
-      if (check_only) { return thrust::nullopt; }
+      if (check_only) { return cuda::std::nullopt; }
       break;
     }
 
     uint32_t exp_check = static_cast<uint32_t>(exp_ten * 10) + static_cast<uint32_t>(ch - '0');
-    if (check_only && (exp_check > exponent_max)) { return thrust::nullopt; }  // check overflow
+    if (check_only && (exp_check > exponent_max)) { return cuda::std::nullopt; }  // check overflow
     exp_ten = static_cast<int32_t>(exp_check);
   }
 
diff --git a/cpp/include/cudf/strings/detail/copy_if_else.cuh b/cpp/include/cudf/strings/detail/copy_if_else.cuh
index 4db7651330b..213a41ca596 100644
--- a/cpp/include/cudf/strings/detail/copy_if_else.cuh
+++ b/cpp/include/cudf/strings/detail/copy_if_else.cuh
@@ -25,8 +25,8 @@
 #include <rmm/resource_ref.hpp>
 
 #include <cuda/functional>
+#include <cuda/std/optional>
 #include <thrust/iterator/counting_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/transform.h>
 
 namespace cudf {
@@ -41,9 +41,9 @@ namespace detail {
  * ```
  *
  * @tparam StringIterLeft A random access iterator whose value_type is
- * `thrust::optional<string_view>` where the `optional` has a value iff the element is valid.
+ * `cuda::std::optional<string_view>` where the `optional` has a value iff the element is valid.
  * @tparam StringIterRight A random access iterator whose value_type is
- * `thrust::optional<string_view>` where the `optional` has a value iff the element is valid.
+ * `cuda::std::optional<string_view>` where the `optional` has a value iff the element is valid.
  * @tparam Filter Functor that takes an index and returns a boolean.
  *
  * @param lhs_begin Start of first set of data. Used when `filter_fn` returns true.
diff --git a/cpp/include/cudf/strings/replace.hpp b/cpp/include/cudf/strings/replace.hpp
index 5b4ffb98f99..f450b77ad7a 100644
--- a/cpp/include/cudf/strings/replace.hpp
+++ b/cpp/include/cudf/strings/replace.hpp
@@ -160,18 +160,6 @@ std::unique_ptr<column> replace_multiple(
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
 
-/**
- * @copydoc cudf::strings::replace_multiple
- *
- * @deprecated since 24.08
- */
-[[deprecated]] std::unique_ptr<column> replace(
-  strings_column_view const& input,
-  strings_column_view const& targets,
-  strings_column_view const& repls,
-  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
-  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
-
 /** @} */  // end of doxygen group
 }  // namespace strings
 }  // namespace CUDF_EXPORT cudf
diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh
index f05e5f4ca5c..3f33c70c29a 100644
--- a/cpp/include/cudf/table/experimental/row_operators.cuh
+++ b/cpp/include/cudf/table/experimental/row_operators.cuh
@@ -211,7 +211,7 @@ struct sorting_physical_element_comparator {
   }
 };
 
-using optional_dremel_view = thrust::optional<detail::dremel_device_view const>;
+using optional_dremel_view = cuda::std::optional<detail::dremel_device_view const>;
 
 // The has_nested_columns template parameter of the device_row_comparator is
 // necessary to help the compiler optimize our code. Without it, the list and
@@ -223,12 +223,12 @@ using optional_dremel_view = thrust::optional<detail::dremel_device_view const>;
 // std::optional<device_span<dremel_device_view>> in the
 // preprocessed_table/device_row_comparator (which is always valid when
 // has_nested_columns and is otherwise invalid) that is then unpacked to a
-// thrust::optional<dremel_device_view> at the element_comparator level (which
+// cuda::std::optional<dremel_device_view> at the element_comparator level (which
 // is always valid for a list column and otherwise invalid).  We cannot use an
 // additional template parameter for the element_comparator on a per-column
 // basis because we cannot conditionally define dremel_device_view member
 // variables without jumping through extra hoops with inheritance, so the
-// thrust::optional<dremel_device_view> member must be an optional rather than
+// cuda::std::optional<dremel_device_view> member must be an optional rather than
 // a raw dremel_device_view.
 /**
  * @brief Computes the lexicographic comparison between 2 rows.
diff --git a/cpp/include/cudf/transform.hpp b/cpp/include/cudf/transform.hpp
index adc5bdb2af8..f16214260f7 100644
--- a/cpp/include/cudf/transform.hpp
+++ b/cpp/include/cudf/transform.hpp
@@ -47,6 +47,7 @@ namespace CUDF_EXPORT cudf {
  * @param unary_udf     The PTX/CUDA string of the unary function to apply
  * @param output_type   The output type that is compatible with the output type in the UDF
  * @param is_ptx        true: the UDF is treated as PTX code; false: the UDF is treated as CUDA code
+ * @param stream        CUDA stream used for device memory operations and kernel launches
  * @param mr            Device memory resource used to allocate the returned column's device memory
  * @return              The column resulting from applying the unary function to
  *                      every element of the input
@@ -56,6 +57,7 @@ std::unique_ptr<column> transform(
   std::string const& unary_udf,
   data_type output_type,
   bool is_ptx,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -65,12 +67,14 @@ std::unique_ptr<column> transform(
  * @throws cudf::logic_error if `input.type()` is a non-floating type
  *
  * @param input         An immutable view of the input column of floating-point type
+ * @param stream        CUDA stream used for device memory operations and kernel launches
  * @param mr            Device memory resource used to allocate the returned bitmask
  * @return A pair containing a `device_buffer` with the new bitmask and it's
  * null count obtained by replacing `NaN` in `input` with null.
  */
 std::pair<std::unique_ptr<rmm::device_buffer>, size_type> nans_to_nulls(
   column_view const& input,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -83,12 +87,14 @@ std::pair<std::unique_ptr<rmm::device_buffer>, size_type> nans_to_nulls(
  *
  * @param table The table used for expression evaluation
  * @param expr The root of the expression tree
+ * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource
  * @return Output column
  */
 std::unique_ptr<column> compute_column(
   table_view const& table,
   ast::expression const& expr,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -101,6 +107,7 @@ std::unique_ptr<column> compute_column(
  * @throws cudf::logic_error if `input.type()` is a non-boolean type
  *
  * @param input        Boolean elements to convert to a bitmask
+ * @param stream       CUDA stream used for device memory operations and kernel launches
  * @param mr           Device memory resource used to allocate the returned bitmask
  * @return A pair containing a `device_buffer` with the new bitmask and it's
  * null count obtained from input considering `true` represent `valid`/`1` and
@@ -108,6 +115,7 @@ std::unique_ptr<column> compute_column(
  */
 std::pair<std::unique_ptr<rmm::device_buffer>, cudf::size_type> bools_to_mask(
   column_view const& input,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -130,12 +138,14 @@ std::pair<std::unique_ptr<rmm::device_buffer>, cudf::size_type> bools_to_mask(
  * @endcode
  *
  * @param input Table containing values to be encoded
+ * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned table's device memory
  * @return A pair containing the distinct row of the input table in sorter order,
  * and a column of integer indices representing the encoded rows.
  */
 std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::column>> encode(
   cudf::table_view const& input,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -162,12 +172,14 @@ std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::column>> encode(
  *
  * @param input Column containing values to be encoded
  * @param categories Column containing categories
+ * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned table's device memory
  * @return A pair containing the owner to all encoded data and a table view into the data
  */
 std::pair<std::unique_ptr<column>, table_view> one_hot_encode(
   column_view const& input,
   column_view const& categories,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -188,6 +200,7 @@ std::pair<std::unique_ptr<column>, table_view> one_hot_encode(
  * @param bitmask A device pointer to the bitmask which needs to be converted
  * @param begin_bit position of the bit from which the conversion should start
  * @param end_bit position of the bit before which the conversion should stop
+ * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned columns' device memory
  * @return A boolean column representing the given mask from [begin_bit, end_bit)
  */
@@ -195,6 +208,7 @@ std::unique_ptr<column> mask_to_bools(
   bitmask_type const* bitmask,
   size_type begin_bit,
   size_type end_bit,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -219,11 +233,14 @@ std::unique_ptr<column> mask_to_bools(
  * row_bit_count(column(x)) >= row_bit_count(gather(column(x)))
  *
  * @param t The table view to perform the computation on
+ * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned columns' device memory
  * @return A 32-bit integer column containing the per-row bit counts
  */
 std::unique_ptr<column> row_bit_count(
-  table_view const& t, rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
+  table_view const& t,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
 
 /**
  * @brief Returns an approximate cumulative size in bits of all columns in the `table_view` for
@@ -240,12 +257,14 @@ std::unique_ptr<column> row_bit_count(
  *
  * @param t The table view to perform the computation on
  * @param segment_length The number of rows in each segment for which the total size is computed
+ * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned columns' device memory
  * @return A 32-bit integer column containing the bit counts for each segment of rows
  */
 std::unique_ptr<column> segmented_row_bit_count(
   table_view const& t,
   size_type segment_length,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
 
 /** @} */  // end of group
diff --git a/cpp/include/cudf/utilities/type_checks.hpp b/cpp/include/cudf/utilities/type_checks.hpp
index 4fcbca09d17..aeb5db57830 100644
--- a/cpp/include/cudf/utilities/type_checks.hpp
+++ b/cpp/include/cudf/utilities/type_checks.hpp
@@ -22,25 +22,6 @@
 
 namespace CUDF_EXPORT cudf {
 
-/**
- * @brief Compare the types of two `column_view`s
- *
- * @deprecated Since 24.06. Use cudf::have_same_types instead.
- *
- * This function returns true if the type of `lhs` equals that of `rhs`.
- * - For fixed point types, the scale is compared.
- * - For dictionary types, the type of the keys are compared if both are
- *   non-empty columns.
- * - For lists types, the type of child columns are compared recursively.
- * - For struct types, the type of each field are compared in order.
- * - For all other types, the `id` of `data_type` is compared.
- *
- * @param lhs The first `column_view` to compare
- * @param rhs The second `column_view` to compare
- * @return true if column types match
- */
-[[deprecated]] bool column_types_equal(column_view const& lhs, column_view const& rhs);
-
 /**
  * @brief Compare the type IDs of two `column_view`s
  *
diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp
index 4e504ec1d30..d00db222b62 100644
--- a/cpp/include/cudf_test/column_wrapper.hpp
+++ b/cpp/include/cudf_test/column_wrapper.hpp
@@ -1337,7 +1337,7 @@ class lists_column_wrapper : public detail::column_wrapper {
   lists_column_wrapper(std::initializer_list<SourceElementT> elements) : column_wrapper{}
   {
     build_from_non_nested(
-      std::move(cudf::test::fixed_width_column_wrapper<T, SourceElementT>(elements).release()));
+      cudf::test::fixed_width_column_wrapper<T, SourceElementT>(elements).release());
   }
 
   /**
@@ -1361,7 +1361,7 @@ class lists_column_wrapper : public detail::column_wrapper {
   lists_column_wrapper(InputIterator begin, InputIterator end) : column_wrapper{}
   {
     build_from_non_nested(
-      std::move(cudf::test::fixed_width_column_wrapper<T, SourceElementT>(begin, end).release()));
+      cudf::test::fixed_width_column_wrapper<T, SourceElementT>(begin, end).release());
   }
 
   /**
@@ -1386,7 +1386,7 @@ class lists_column_wrapper : public detail::column_wrapper {
     : column_wrapper{}
   {
     build_from_non_nested(
-      std::move(cudf::test::fixed_width_column_wrapper<T, SourceElementT>(elements, v).release()));
+      cudf::test::fixed_width_column_wrapper<T, SourceElementT>(elements, v).release());
   }
 
   /**
@@ -1413,8 +1413,8 @@ class lists_column_wrapper : public detail::column_wrapper {
   lists_column_wrapper(InputIterator begin, InputIterator end, ValidityIterator v)
     : column_wrapper{}
   {
-    build_from_non_nested(std::move(
-      cudf::test::fixed_width_column_wrapper<T, SourceElementT>(begin, end, v).release()));
+    build_from_non_nested(
+      cudf::test::fixed_width_column_wrapper<T, SourceElementT>(begin, end, v).release());
   }
 
   /**
@@ -1435,7 +1435,7 @@ class lists_column_wrapper : public detail::column_wrapper {
   lists_column_wrapper(std::initializer_list<std::string> elements) : column_wrapper{}
   {
     build_from_non_nested(
-      std::move(cudf::test::strings_column_wrapper(elements.begin(), elements.end()).release()));
+      cudf::test::strings_column_wrapper(elements.begin(), elements.end()).release());
   }
 
   /**
@@ -1460,7 +1460,7 @@ class lists_column_wrapper : public detail::column_wrapper {
     : column_wrapper{}
   {
     build_from_non_nested(
-      std::move(cudf::test::strings_column_wrapper(elements.begin(), elements.end(), v).release()));
+      cudf::test::strings_column_wrapper(elements.begin(), elements.end(), v).release());
   }
 
   /**
diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp
index 3ac8547baad..25b0f68aaa8 100644
--- a/cpp/src/binaryop/binaryop.cpp
+++ b/cpp/src/binaryop/binaryop.cpp
@@ -41,7 +41,7 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/resource_ref.hpp>
 
-#include <thrust/optional.h>
+#include <cuda/std/optional>
 
 #include <jit_preprocessed_files/binaryop/jit/kernel.cu.jit.hpp>
 
@@ -173,7 +173,7 @@ template <typename Lhs, typename Rhs>
 void fixed_point_binary_operation_validation(binary_operator op,
                                              Lhs lhs,
                                              Rhs rhs,
-                                             thrust::optional<cudf::data_type> output_type = {})
+                                             cuda::std::optional<cudf::data_type> output_type = {})
 {
   CUDF_EXPECTS((is_fixed_point(lhs) or is_fixed_point(rhs)),
                "One of the inputs must have fixed_point data_type.");
diff --git a/cpp/src/column/column_factories.cu b/cpp/src/column/column_factories.cu
index bad20d6817c..ad9c5e4d3a0 100644
--- a/cpp/src/column/column_factories.cu
+++ b/cpp/src/column/column_factories.cu
@@ -20,11 +20,12 @@
 #include <cudf/dictionary/dictionary_factories.hpp>
 #include <cudf/lists/detail/lists_column_factories.hpp>
 #include <cudf/scalar/scalar.hpp>
-#include <cudf/strings/detail/fill.hpp>
+#include <cudf/strings/detail/strings_column_factories.cuh>
 
 #include <rmm/resource_ref.hpp>
 
 #include <thrust/iterator/constant_iterator.h>
+#include <thrust/uninitialized_fill.h>
 
 namespace cudf {
 
@@ -57,15 +58,26 @@ std::unique_ptr<cudf::column> column_from_scalar_dispatch::operator()<cudf::stri
 {
   if (size == 0) return make_empty_column(value.type());
 
-  // Since we are setting every row to the scalar, the fill() never needs to access
-  // any of the children in the strings column which would otherwise cause an exception.
-  column_view sc{value.type(), size, nullptr, nullptr, 0};
-  auto& sv = static_cast<scalar_type_t<cudf::string_view> const&>(value);
+  if (!value.is_valid(stream)) {
+    return make_strings_column(
+      size,
+      make_column_from_scalar(numeric_scalar<int32_t>(0), size + 1, stream, mr),
+      rmm::device_buffer{},
+      size,
+      cudf::detail::create_null_mask(size, mask_state::ALL_NULL, stream, mr));
+  }
 
-  // fill the column with the scalar
-  auto output = strings::detail::fill(strings_column_view(sc), 0, size, sv, stream, mr);
+  auto& ss         = static_cast<scalar_type_t<cudf::string_view> const&>(value);
+  auto const d_str = ss.value(stream);  // no actual data is copied
 
-  return output;
+  // fill the column with the scalar
+  rmm::device_uvector<cudf::strings::detail::string_index_pair> indices(size, stream);
+  auto const row_value =
+    d_str.empty() ? cudf::strings::detail::string_index_pair{"", 0}
+                  : cudf::strings::detail::string_index_pair{d_str.data(), d_str.size_bytes()};
+  thrust::uninitialized_fill(
+    rmm::exec_policy_nosync(stream), indices.begin(), indices.end(), row_value);
+  return cudf::strings::detail::make_strings_column(indices.begin(), indices.end(), stream, mr);
 }
 
 template <>
diff --git a/cpp/src/copying/copy.cpp b/cpp/src/copying/copy.cpp
index 98ee6aa8f68..bac8dbe5d95 100644
--- a/cpp/src/copying/copy.cpp
+++ b/cpp/src/copying/copy.cpp
@@ -143,6 +143,12 @@ std::unique_ptr<column> empty_like(column_view const& input)
 {
   CUDF_FUNC_RANGE();
 
+  // test_dataframe.py passes an EMPTY column type here;
+  // this causes is_nested to throw an error since it uses the type-dispatcher
+  if ((input.type().id() == type_id::EMPTY) || !cudf::is_nested(input.type())) {
+    return make_empty_column(input.type());
+  }
+
   std::vector<std::unique_ptr<column>> children;
   std::transform(input.child_begin(),
                  input.child_end(),
diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu
index 5fe4a5eb30f..35161eada28 100644
--- a/cpp/src/groupby/hash/groupby.cu
+++ b/cpp/src/groupby/hash/groupby.cu
@@ -568,15 +568,16 @@ std::unique_ptr<table> groupby(table_view const& keys,
   cudf::detail::result_cache sparse_results(requests.size());
 
   auto const comparator_helper = [&](auto const d_key_equal) {
-    auto const set = cuco::static_set{num_keys,
-                                      0.5,  // desired load factor
-                                      cuco::empty_key{cudf::detail::CUDF_SIZE_TYPE_SENTINEL},
-                                      d_key_equal,
-                                      probing_scheme_type{d_row_hash},
-                                      cuco::thread_scope_device,
-                                      cuco::storage<1>{},
-                                      cudf::detail::cuco_allocator{stream},
-                                      stream.value()};
+    auto const set = cuco::static_set{
+      num_keys,
+      0.5,  // desired load factor
+      cuco::empty_key{cudf::detail::CUDF_SIZE_TYPE_SENTINEL},
+      d_key_equal,
+      probing_scheme_type{d_row_hash},
+      cuco::thread_scope_device,
+      cuco::storage<1>{},
+      cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream},
+      stream.value()};
 
     // Compute all single pass aggs first
     compute_single_pass_aggs(keys,
diff --git a/cpp/src/interop/arrow_utilities.cpp b/cpp/src/interop/arrow_utilities.cpp
index 4292552a800..3776daf41aa 100644
--- a/cpp/src/interop/arrow_utilities.cpp
+++ b/cpp/src/interop/arrow_utilities.cpp
@@ -98,6 +98,7 @@ ArrowType id_to_arrow_type(cudf::type_id id)
 ArrowType id_to_arrow_storage_type(cudf::type_id id)
 {
   switch (id) {
+    case cudf::type_id::TIMESTAMP_DAYS: return NANOARROW_TYPE_INT32;
     case cudf::type_id::TIMESTAMP_SECONDS:
     case cudf::type_id::TIMESTAMP_MILLISECONDS:
     case cudf::type_id::TIMESTAMP_MICROSECONDS:
diff --git a/cpp/src/interop/detail/arrow_allocator.cpp b/cpp/src/interop/detail/arrow_allocator.cpp
deleted file mode 100644
index 2a19a5360fe..00000000000
--- a/cpp/src/interop/detail/arrow_allocator.cpp
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cudf/detail/interop.hpp>
-
-#include <sys/mman.h>
-#include <unistd.h>
-
-#include <memory>
-
-namespace cudf {
-namespace detail {
-
-/*
-  Enable Transparent Huge Pages (THP) for large (>4MB) allocations.
-  `buf` is returned untouched.
-  Enabling THP can improve performance of device-host memory transfers
-  significantly, see <https://github.com/rapidsai/cudf/pull/13914>.
-*/
-template <typename T>
-T enable_hugepage(T&& buf)
-{
-  if (buf->size() < (1u << 22u)) {  // Smaller than 4 MB
-    return std::move(buf);
-  }
-
-#ifdef MADV_HUGEPAGE
-  auto const pagesize = sysconf(_SC_PAGESIZE);
-  void* addr          = const_cast<uint8_t*>(buf->data());
-  if (addr == nullptr) { return std::move(buf); }
-  auto length{static_cast<std::size_t>(buf->size())};
-  if (std::align(pagesize, pagesize, addr, length)) {
-    // Intentionally not checking for errors that may be returned by older kernel versions;
-    // optimistically tries enabling huge pages.
-    madvise(addr, length, MADV_HUGEPAGE);
-  }
-#endif
-  return std::move(buf);
-}
-
-std::unique_ptr<arrow::Buffer> allocate_arrow_buffer(int64_t const size, arrow::MemoryPool* ar_mr)
-{
-  /*
-  nvcc 11.0 generates Internal Compiler Error during codegen when arrow::AllocateBuffer
-  and `ValueOrDie` are used inside a CUDA compilation unit.
-
-  To work around this issue we compile an allocation shim in C++ and use
-  that from our cuda sources
-  */
-  arrow::Result<std::unique_ptr<arrow::Buffer>> result = arrow::AllocateBuffer(size, ar_mr);
-  CUDF_EXPECTS(result.ok(), "Failed to allocate Arrow buffer");
-  return enable_hugepage(std::move(result).ValueOrDie());
-}
-
-std::shared_ptr<arrow::Buffer> allocate_arrow_bitmap(int64_t const size, arrow::MemoryPool* ar_mr)
-{
-  /*
-  nvcc 11.0 generates Internal Compiler Error during codegen when arrow::AllocateBuffer
-  and `ValueOrDie` are used inside a CUDA compilation unit.
-
-  To work around this issue we compile an allocation shim in C++ and use
-  that from our cuda sources
-  */
-  arrow::Result<std::shared_ptr<arrow::Buffer>> result = arrow::AllocateBitmap(size, ar_mr);
-  CUDF_EXPECTS(result.ok(), "Failed to allocate Arrow bitmap");
-  return enable_hugepage(std::move(result).ValueOrDie());
-}
-
-}  // namespace detail
-}  // namespace cudf
diff --git a/cpp/src/interop/from_arrow.cu b/cpp/src/interop/from_arrow.cu
deleted file mode 100644
index 579820cbae3..00000000000
--- a/cpp/src/interop/from_arrow.cu
+++ /dev/null
@@ -1,524 +0,0 @@
-/*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <cudf/column/column_factories.hpp>
-#include <cudf/column/column_view.hpp>
-#include <cudf/detail/concatenate.hpp>
-#include <cudf/detail/copy.hpp>
-#include <cudf/detail/interop.hpp>
-#include <cudf/detail/iterator.cuh>
-#include <cudf/detail/null_mask.hpp>
-#include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/detail/transform.hpp>
-#include <cudf/detail/unary.hpp>
-#include <cudf/dictionary/dictionary_factories.hpp>
-#include <cudf/interop.hpp>
-#include <cudf/null_mask.hpp>
-#include <cudf/table/table_view.hpp>
-#include <cudf/types.hpp>
-#include <cudf/utilities/default_stream.hpp>
-#include <cudf/utilities/traits.hpp>
-#include <cudf/utilities/type_dispatcher.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/device_buffer.hpp>
-#include <rmm/resource_ref.hpp>
-
-#include <thrust/gather.h>
-
-namespace cudf {
-
-namespace detail {
-data_type arrow_to_cudf_type(arrow::DataType const& arrow_type)
-{
-  switch (arrow_type.id()) {
-    case arrow::Type::NA: return data_type(type_id::EMPTY);
-    case arrow::Type::BOOL: return data_type(type_id::BOOL8);
-    case arrow::Type::INT8: return data_type(type_id::INT8);
-    case arrow::Type::INT16: return data_type(type_id::INT16);
-    case arrow::Type::INT32: return data_type(type_id::INT32);
-    case arrow::Type::INT64: return data_type(type_id::INT64);
-    case arrow::Type::UINT8: return data_type(type_id::UINT8);
-    case arrow::Type::UINT16: return data_type(type_id::UINT16);
-    case arrow::Type::UINT32: return data_type(type_id::UINT32);
-    case arrow::Type::UINT64: return data_type(type_id::UINT64);
-    case arrow::Type::FLOAT: return data_type(type_id::FLOAT32);
-    case arrow::Type::DOUBLE: return data_type(type_id::FLOAT64);
-    case arrow::Type::DATE32: return data_type(type_id::TIMESTAMP_DAYS);
-    case arrow::Type::TIMESTAMP: {
-      auto type = static_cast<arrow::TimestampType const*>(&arrow_type);
-      switch (type->unit()) {
-        case arrow::TimeUnit::type::SECOND: return data_type(type_id::TIMESTAMP_SECONDS);
-        case arrow::TimeUnit::type::MILLI: return data_type(type_id::TIMESTAMP_MILLISECONDS);
-        case arrow::TimeUnit::type::MICRO: return data_type(type_id::TIMESTAMP_MICROSECONDS);
-        case arrow::TimeUnit::type::NANO: return data_type(type_id::TIMESTAMP_NANOSECONDS);
-        default: CUDF_FAIL("Unsupported timestamp unit in arrow");
-      }
-    }
-    case arrow::Type::DURATION: {
-      auto type = static_cast<arrow::DurationType const*>(&arrow_type);
-      switch (type->unit()) {
-        case arrow::TimeUnit::type::SECOND: return data_type(type_id::DURATION_SECONDS);
-        case arrow::TimeUnit::type::MILLI: return data_type(type_id::DURATION_MILLISECONDS);
-        case arrow::TimeUnit::type::MICRO: return data_type(type_id::DURATION_MICROSECONDS);
-        case arrow::TimeUnit::type::NANO: return data_type(type_id::DURATION_NANOSECONDS);
-        default: CUDF_FAIL("Unsupported duration unit in arrow");
-      }
-    }
-    case arrow::Type::STRING: return data_type(type_id::STRING);
-    case arrow::Type::LARGE_STRING: return data_type(type_id::STRING);
-    case arrow::Type::DICTIONARY: return data_type(type_id::DICTIONARY32);
-    case arrow::Type::LIST: return data_type(type_id::LIST);
-    case arrow::Type::DECIMAL: {
-      auto const type = static_cast<arrow::Decimal128Type const*>(&arrow_type);
-      return data_type{type_id::DECIMAL128, -type->scale()};
-    }
-    case arrow::Type::STRUCT: return data_type(type_id::STRUCT);
-    default: CUDF_FAIL("Unsupported type_id conversion to cudf");
-  }
-}
-
-namespace {
-/**
- * @brief Functor to return column for a corresponding arrow array. column
- * is formed from buffer underneath the arrow array along with any offset and
- * change in length that array has.
- */
-struct dispatch_to_cudf_column {
-  /**
-   * @brief Returns mask from an array without any offsets.
-   */
-  std::unique_ptr<rmm::device_buffer> get_mask_buffer(arrow::Array const& array,
-                                                      rmm::cuda_stream_view stream,
-                                                      rmm::device_async_resource_ref mr)
-  {
-    if (array.null_bitmap_data() == nullptr) {
-      return std::make_unique<rmm::device_buffer>(0, stream, mr);
-    }
-    auto const null_bitmap_size = array.null_bitmap()->size();
-    auto const allocation_size =
-      bitmask_allocation_size_bytes(static_cast<size_type>(null_bitmap_size * CHAR_BIT));
-    auto mask        = std::make_unique<rmm::device_buffer>(allocation_size, stream, mr);
-    auto mask_buffer = array.null_bitmap();
-    CUDF_CUDA_TRY(cudaMemcpyAsync(mask->data(),
-                                  reinterpret_cast<uint8_t const*>(mask_buffer->address()),
-                                  null_bitmap_size,
-                                  cudaMemcpyDefault,
-                                  stream.value()));
-    // Zero-initialize trailing padding bytes
-    auto const num_trailing_bytes = allocation_size - null_bitmap_size;
-    if (num_trailing_bytes > 0) {
-      auto trailing_bytes = static_cast<uint8_t*>(mask->data()) + null_bitmap_size;
-      CUDF_CUDA_TRY(cudaMemsetAsync(trailing_bytes, 0, num_trailing_bytes, stream.value()));
-    }
-    return mask;
-  }
-
-  template <typename T, CUDF_ENABLE_IF(not is_rep_layout_compatible<T>())>
-  std::unique_ptr<column> operator()(
-    arrow::Array const&, data_type, bool, rmm::cuda_stream_view, rmm::device_async_resource_ref)
-  {
-    CUDF_FAIL("Unsupported type in from_arrow.");
-  }
-
-  template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
-  std::unique_ptr<column> operator()(arrow::Array const& array,
-                                     data_type type,
-                                     bool skip_mask,
-                                     rmm::cuda_stream_view stream,
-                                     rmm::device_async_resource_ref mr)
-  {
-    auto data_buffer         = array.data()->buffers[1];
-    size_type const num_rows = array.length();
-    auto const has_nulls     = skip_mask ? false : array.null_bitmap_data() != nullptr;
-    auto col = make_fixed_width_column(type, num_rows, mask_state::UNALLOCATED, stream, mr);
-    auto mutable_column_view = col->mutable_view();
-    CUDF_CUDA_TRY(cudaMemcpyAsync(
-      mutable_column_view.data<T>(),
-      reinterpret_cast<uint8_t const*>(data_buffer->address()) + array.offset() * sizeof(T),
-      sizeof(T) * num_rows,
-      cudaMemcpyDefault,
-      stream.value()));
-    if (has_nulls) {
-      auto tmp_mask = get_mask_buffer(array, stream, mr);
-
-      // If array is sliced, we have to copy whole mask and then take copy.
-      auto out_mask = (num_rows == static_cast<size_type>(data_buffer->size() / sizeof(T)))
-                        ? std::move(*tmp_mask)
-                        : cudf::detail::copy_bitmask(static_cast<bitmask_type*>(tmp_mask->data()),
-                                                     array.offset(),
-                                                     array.offset() + num_rows,
-                                                     stream,
-                                                     mr);
-
-      col->set_null_mask(std::move(out_mask), array.null_count());
-    }
-
-    return col;
-  }
-};
-
-std::unique_ptr<column> get_empty_type_column(size_type size)
-{
-  // this abomination is required by cuDF Python, which needs to handle
-  // [PyArrow null arrays](https://arrow.apache.org/docs/python/generated/pyarrow.NullArray.html)
-  // of finite length
-  return std::make_unique<column>(
-    data_type(type_id::EMPTY), size, rmm::device_buffer{}, rmm::device_buffer{}, size);
-}
-
-/**
- * @brief Returns cudf column formed from given arrow array
- * This has been introduced to take care of compiler error "error: explicit specialization of
- * function must precede its first use"
- */
-std::unique_ptr<column> get_column(arrow::Array const& array,
-                                   data_type type,
-                                   bool skip_mask,
-                                   rmm::cuda_stream_view stream,
-                                   rmm::device_async_resource_ref mr);
-
-template <>
-std::unique_ptr<column> dispatch_to_cudf_column::operator()<numeric::decimal128>(
-  arrow::Array const& array,
-  data_type type,
-  bool skip_mask,
-  rmm::cuda_stream_view stream,
-  rmm::device_async_resource_ref mr)
-{
-  using DeviceType = __int128_t;
-
-  auto data_buffer    = array.data()->buffers[1];
-  auto const num_rows = static_cast<size_type>(array.length());
-  auto col = make_fixed_width_column(type, num_rows, mask_state::UNALLOCATED, stream, mr);
-  auto mutable_column_view = col->mutable_view();
-
-  CUDF_CUDA_TRY(cudaMemcpyAsync(
-    mutable_column_view.data<DeviceType>(),
-    reinterpret_cast<uint8_t const*>(data_buffer->address()) + array.offset() * sizeof(DeviceType),
-    sizeof(DeviceType) * num_rows,
-    cudaMemcpyDefault,
-    stream.value()));
-
-  auto null_mask = [&] {
-    if (not skip_mask and array.null_bitmap_data()) {
-      auto temp_mask = get_mask_buffer(array, stream, mr);
-      // If array is sliced, we have to copy whole mask and then take copy.
-      return (num_rows == static_cast<size_type>(data_buffer->size() / sizeof(DeviceType)))
-               ? std::move(*temp_mask.release())
-               : cudf::detail::copy_bitmask(static_cast<bitmask_type*>(temp_mask->data()),
-                                            array.offset(),
-                                            array.offset() + num_rows,
-                                            stream,
-                                            mr);
-    }
-    return rmm::device_buffer{};
-  }();
-
-  col->set_null_mask(std::move(null_mask), array.null_count());
-  return col;
-}
-
-template <>
-std::unique_ptr<column> dispatch_to_cudf_column::operator()<bool>(arrow::Array const& array,
-                                                                  data_type,
-                                                                  bool skip_mask,
-                                                                  rmm::cuda_stream_view stream,
-                                                                  rmm::device_async_resource_ref mr)
-{
-  auto data_buffer = array.data()->buffers[1];
-  // mask-to-bools expects the mask to be bitmask_type aligned/padded
-  auto data = rmm::device_buffer(
-    cudf::bitmask_allocation_size_bytes(data_buffer->size() * CHAR_BIT), stream, mr);
-  CUDF_CUDA_TRY(cudaMemcpyAsync(data.data(),
-                                reinterpret_cast<uint8_t const*>(data_buffer->address()),
-                                data_buffer->size(),
-                                cudaMemcpyDefault,
-                                stream.value()));
-  auto out_col = mask_to_bools(static_cast<bitmask_type*>(data.data()),
-                               array.offset(),
-                               array.offset() + array.length(),
-                               stream,
-                               mr);
-
-  auto const has_nulls = skip_mask ? false : array.null_bitmap_data() != nullptr;
-  if (has_nulls) {
-    auto out_mask =
-      detail::copy_bitmask(static_cast<bitmask_type*>(get_mask_buffer(array, stream, mr)->data()),
-                           array.offset(),
-                           array.offset() + array.length(),
-                           stream,
-                           mr);
-
-    out_col->set_null_mask(std::move(out_mask), array.null_count());
-  }
-
-  return out_col;
-}
-
-template <>
-std::unique_ptr<column> dispatch_to_cudf_column::operator()<cudf::string_view>(
-  arrow::Array const& array,
-  data_type,
-  bool,
-  rmm::cuda_stream_view stream,
-  rmm::device_async_resource_ref mr)
-{
-  if (array.length() == 0) { return make_empty_column(type_id::STRING); }
-
-  std::unique_ptr<column> offsets_column;
-  std::unique_ptr<arrow::Array> char_array;
-
-  if (array.type_id() == arrow::Type::LARGE_STRING) {
-    auto str_array    = static_cast<arrow::LargeStringArray const*>(&array);
-    auto offset_array = std::make_unique<arrow::Int64Array>(
-      str_array->value_offsets()->size() / sizeof(int64_t), str_array->value_offsets(), nullptr);
-    offsets_column = dispatch_to_cudf_column{}.operator()<int64_t>(
-      *offset_array, data_type(type_id::INT64), true, stream, mr);
-    char_array = std::make_unique<arrow::Int8Array>(
-      str_array->value_data()->size(), str_array->value_data(), nullptr);
-  } else if (array.type_id() == arrow::Type::STRING) {
-    auto str_array    = static_cast<arrow::StringArray const*>(&array);
-    auto offset_array = std::make_unique<arrow::Int32Array>(
-      str_array->value_offsets()->size() / sizeof(int32_t), str_array->value_offsets(), nullptr);
-    offsets_column = dispatch_to_cudf_column{}.operator()<int32_t>(
-      *offset_array, data_type(type_id::INT32), true, stream, mr);
-    char_array = std::make_unique<arrow::Int8Array>(
-      str_array->value_data()->size(), str_array->value_data(), nullptr);
-  } else {
-    throw std::runtime_error("Unsupported array type");
-  }
-
-  rmm::device_buffer chars(char_array->length(), stream, mr);
-  auto data_buffer = char_array->data()->buffers[1];
-  CUDF_CUDA_TRY(cudaMemcpyAsync(chars.data(),
-                                reinterpret_cast<uint8_t const*>(data_buffer->address()),
-                                chars.size(),
-                                cudaMemcpyDefault,
-                                stream.value()));
-
-  auto const num_rows = offsets_column->size() - 1;
-  auto out_col        = make_strings_column(num_rows,
-                                     std::move(offsets_column),
-                                     std::move(chars),
-                                     array.null_count(),
-                                     std::move(*get_mask_buffer(array, stream, mr)));
-
-  return num_rows == array.length()
-           ? std::move(out_col)
-           : std::make_unique<column>(
-               cudf::detail::slice(out_col->view(),
-                                   static_cast<size_type>(array.offset()),
-                                   static_cast<size_type>(array.offset() + array.length()),
-                                   stream),
-               stream,
-               mr);
-}
-
-template <>
-std::unique_ptr<column> dispatch_to_cudf_column::operator()<cudf::dictionary32>(
-  arrow::Array const& array,
-  data_type,
-  bool,
-  rmm::cuda_stream_view stream,
-  rmm::device_async_resource_ref mr)
-{
-  auto dict_array  = static_cast<arrow::DictionaryArray const*>(&array);
-  auto dict_type   = arrow_to_cudf_type(*(dict_array->dictionary()->type()));
-  auto keys_column = get_column(*(dict_array->dictionary()), dict_type, true, stream, mr);
-  auto ind_type    = arrow_to_cudf_type(*(dict_array->indices()->type()));
-
-  auto indices_column = get_column(*(dict_array->indices()), ind_type, false, stream, mr);
-  // If index type is not of type uint32_t, then cast it to uint32_t
-  auto const dict_indices_type = data_type{type_id::UINT32};
-  if (indices_column->type().id() != dict_indices_type.id())
-    indices_column = cudf::detail::cast(indices_column->view(), dict_indices_type, stream, mr);
-
-  // Child columns shouldn't have masks and we need the mask in main column
-  auto column_contents = indices_column->release();
-  indices_column       = std::make_unique<column>(dict_indices_type,
-                                            static_cast<size_type>(array.length()),
-                                            std::move(*(column_contents.data)),
-                                            rmm::device_buffer{},
-                                            0);
-
-  return make_dictionary_column(std::move(keys_column),
-                                std::move(indices_column),
-                                std::move(*(column_contents.null_mask)),
-                                array.null_count());
-}
-
-template <>
-std::unique_ptr<column> dispatch_to_cudf_column::operator()<cudf::struct_view>(
-  arrow::Array const& array,
-  data_type,
-  bool,
-  rmm::cuda_stream_view stream,
-  rmm::device_async_resource_ref mr)
-{
-  auto struct_array = static_cast<arrow::StructArray const*>(&array);
-  std::vector<std::unique_ptr<column>> child_columns;
-  // Offsets have already been applied to child
-  arrow::ArrayVector array_children = struct_array->fields();
-  std::transform(array_children.cbegin(),
-                 array_children.cend(),
-                 std::back_inserter(child_columns),
-                 [&mr, &stream](auto const& child_array) {
-                   auto type = arrow_to_cudf_type(*(child_array->type()));
-                   return get_column(*child_array, type, false, stream, mr);
-                 });
-
-  auto out_mask = std::move(*(get_mask_buffer(array, stream, mr)));
-  if (struct_array->null_bitmap_data() != nullptr) {
-    out_mask = detail::copy_bitmask(static_cast<bitmask_type*>(out_mask.data()),
-                                    array.offset(),
-                                    array.offset() + array.length(),
-                                    stream,
-                                    mr);
-  }
-
-  return make_structs_column(
-    array.length(), move(child_columns), array.null_count(), std::move(out_mask), stream, mr);
-}
-
-template <>
-std::unique_ptr<column> dispatch_to_cudf_column::operator()<cudf::list_view>(
-  arrow::Array const& array,
-  data_type,
-  bool,
-  rmm::cuda_stream_view stream,
-  rmm::device_async_resource_ref mr)
-{
-  auto list_array   = static_cast<arrow::ListArray const*>(&array);
-  auto offset_array = std::make_unique<arrow::Int32Array>(
-    list_array->value_offsets()->size() / sizeof(int32_t), list_array->value_offsets(), nullptr);
-  auto offsets_column = dispatch_to_cudf_column{}.operator()<int32_t>(
-    *offset_array, data_type(type_id::INT32), true, stream, mr);
-
-  auto child_type   = arrow_to_cudf_type(*(list_array->values()->type()));
-  auto child_column = get_column(*(list_array->values()), child_type, false, stream, mr);
-
-  auto const num_rows = offsets_column->size() - 1;
-  auto out_col        = make_lists_column(num_rows,
-                                   std::move(offsets_column),
-                                   std::move(child_column),
-                                   array.null_count(),
-                                   std::move(*get_mask_buffer(array, stream, mr)),
-                                   stream,
-                                   mr);
-
-  return num_rows == array.length()
-           ? std::move(out_col)
-           : std::make_unique<column>(
-               cudf::detail::slice(out_col->view(),
-                                   static_cast<size_type>(array.offset()),
-                                   static_cast<size_type>(array.offset() + array.length()),
-                                   stream),
-               stream,
-               mr);
-}
-
-std::unique_ptr<column> get_column(arrow::Array const& array,
-                                   data_type type,
-                                   bool skip_mask,
-                                   rmm::cuda_stream_view stream,
-                                   rmm::device_async_resource_ref mr)
-{
-  return type.id() != type_id::EMPTY
-           ? type_dispatcher(type, dispatch_to_cudf_column{}, array, type, skip_mask, stream, mr)
-           : get_empty_type_column(array.length());
-}
-
-}  // namespace
-
-std::unique_ptr<table> from_arrow(arrow::Table const& input_table,
-                                  rmm::cuda_stream_view stream,
-                                  rmm::device_async_resource_ref mr)
-{
-  if (input_table.num_columns() == 0) { return std::make_unique<table>(); }
-  std::vector<std::unique_ptr<column>> columns;
-  auto chunked_arrays = input_table.columns();
-  std::transform(chunked_arrays.begin(),
-                 chunked_arrays.end(),
-                 std::back_inserter(columns),
-                 [&mr, &stream](auto const& chunked_array) {
-                   std::vector<std::unique_ptr<column>> concat_columns;
-                   auto cudf_type    = arrow_to_cudf_type(*(chunked_array->type()));
-                   auto array_chunks = chunked_array->chunks();
-                   if (cudf_type.id() == type_id::EMPTY) {
-                     return get_empty_type_column(chunked_array->length());
-                   }
-                   std::transform(array_chunks.begin(),
-                                  array_chunks.end(),
-                                  std::back_inserter(concat_columns),
-                                  [&cudf_type, &mr, &stream](auto const& array_chunk) {
-                                    return get_column(*array_chunk, cudf_type, false, stream, mr);
-                                  });
-                   if (concat_columns.empty()) {
-                     return std::make_unique<column>(
-                       cudf_type, 0, rmm::device_buffer{}, rmm::device_buffer{}, 0);
-                   } else if (concat_columns.size() == 1) {
-                     return std::move(concat_columns[0]);
-                   }
-
-                   std::vector<cudf::column_view> column_views;
-                   std::transform(concat_columns.begin(),
-                                  concat_columns.end(),
-                                  std::back_inserter(column_views),
-                                  [](auto const& col) { return col->view(); });
-                   return cudf::detail::concatenate(column_views, stream, mr);
-                 });
-
-  return std::make_unique<table>(std::move(columns));
-}
-
-std::unique_ptr<cudf::scalar> from_arrow(arrow::Scalar const& input,
-                                         rmm::cuda_stream_view stream,
-                                         rmm::device_async_resource_ref mr)
-{
-  auto maybe_array = arrow::MakeArrayFromScalar(input, 1);
-  if (!maybe_array.ok()) { CUDF_FAIL("Failed to create array"); }
-  auto array = *maybe_array;
-
-  auto field = arrow::field("", input.type);
-
-  auto table = arrow::Table::Make(arrow::schema({field}), {array});
-
-  auto cudf_table = detail::from_arrow(*table, stream, mr);
-
-  auto cv = cudf_table->view().column(0);
-  return get_element(cv, 0, stream);
-}
-
-}  // namespace detail
-
-std::unique_ptr<table> from_arrow(arrow::Table const& input_table,
-                                  rmm::cuda_stream_view stream,
-                                  rmm::device_async_resource_ref mr)
-{
-  CUDF_FUNC_RANGE();
-
-  return detail::from_arrow(input_table, stream, mr);
-}
-
-std::unique_ptr<cudf::scalar> from_arrow(arrow::Scalar const& input,
-                                         rmm::cuda_stream_view stream,
-                                         rmm::device_async_resource_ref mr)
-{
-  CUDF_FUNC_RANGE();
-
-  return detail::from_arrow(input, stream, mr);
-}
-}  // namespace cudf
diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu
deleted file mode 100644
index 3d41f856f4f..00000000000
--- a/cpp/src/interop/to_arrow.cu
+++ /dev/null
@@ -1,490 +0,0 @@
-/*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "arrow_utilities.hpp"
-#include "decimal_conversion_utilities.cuh"
-#include "detail/arrow_allocator.hpp"
-
-#include <cudf/column/column.hpp>
-#include <cudf/column/column_factories.hpp>
-#include <cudf/column/column_view.hpp>
-#include <cudf/copying.hpp>
-#include <cudf/detail/interop.hpp>
-#include <cudf/detail/iterator.cuh>
-#include <cudf/detail/null_mask.hpp>
-#include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/detail/unary.hpp>
-#include <cudf/dictionary/dictionary_column_view.hpp>
-#include <cudf/interop.hpp>
-#include <cudf/null_mask.hpp>
-#include <cudf/strings/strings_column_view.hpp>
-#include <cudf/table/table_view.hpp>
-#include <cudf/types.hpp>
-#include <cudf/utilities/default_stream.hpp>
-#include <cudf/utilities/traits.hpp>
-#include <cudf/utilities/type_dispatcher.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/exec_policy.hpp>
-#include <rmm/mr/device/per_device_resource.hpp>
-
-#include <thrust/copy.h>
-#include <thrust/for_each.h>
-#include <thrust/iterator/counting_iterator.h>
-
-namespace cudf {
-namespace detail {
-namespace {
-
-/**
- * @brief Create arrow data buffer from given cudf column
- */
-template <typename T>
-std::shared_ptr<arrow::Buffer> fetch_data_buffer(device_span<T const> input,
-                                                 arrow::MemoryPool* ar_mr,
-                                                 rmm::cuda_stream_view stream)
-{
-  int64_t const data_size_in_bytes = sizeof(T) * input.size();
-
-  auto data_buffer = allocate_arrow_buffer(data_size_in_bytes, ar_mr);
-
-  CUDF_CUDA_TRY(cudaMemcpyAsync(data_buffer->mutable_data(),
-                                input.data(),
-                                data_size_in_bytes,
-                                cudaMemcpyDefault,
-                                stream.value()));
-
-  return std::move(data_buffer);
-}
-
-/**
- * @brief Create arrow buffer of mask from given cudf column
- */
-std::shared_ptr<arrow::Buffer> fetch_mask_buffer(column_view input_view,
-                                                 arrow::MemoryPool* ar_mr,
-                                                 rmm::cuda_stream_view stream)
-{
-  int64_t const mask_size_in_bytes = cudf::bitmask_allocation_size_bytes(input_view.size());
-
-  if (input_view.has_nulls()) {
-    auto mask_buffer = allocate_arrow_bitmap(static_cast<int64_t>(input_view.size()), ar_mr);
-    CUDF_CUDA_TRY(cudaMemcpyAsync(
-      mask_buffer->mutable_data(),
-      (input_view.offset() > 0)
-        ? cudf::detail::copy_bitmask(input_view, stream, rmm::mr::get_current_device_resource())
-            .data()
-        : input_view.null_mask(),
-      mask_size_in_bytes,
-      cudaMemcpyDefault,
-      stream.value()));
-
-    // Resets all padded bits to 0
-    mask_buffer->ZeroPadding();
-
-    return mask_buffer;
-  }
-
-  return nullptr;
-}
-
-/**
- * @brief Functor to convert cudf column to arrow array
- */
-struct dispatch_to_arrow {
-  /**
-   * @brief Creates vector Arrays from given cudf column children
-   */
-  std::vector<std::shared_ptr<arrow::Array>> fetch_child_array(
-    column_view input_view,
-    std::vector<column_metadata> const& metadata,
-    arrow::MemoryPool* ar_mr,
-    rmm::cuda_stream_view stream)
-  {
-    std::vector<std::shared_ptr<arrow::Array>> child_arrays;
-    std::transform(
-      input_view.child_begin(),
-      input_view.child_end(),
-      metadata.begin(),
-      std::back_inserter(child_arrays),
-      [&ar_mr, &stream](auto const& child, auto const& meta) {
-        return type_dispatcher(
-          child.type(), dispatch_to_arrow{}, child, child.type().id(), meta, ar_mr, stream);
-      });
-    return child_arrays;
-  }
-
-  template <typename T, CUDF_ENABLE_IF(not is_rep_layout_compatible<T>())>
-  std::shared_ptr<arrow::Array> operator()(
-    column_view, cudf::type_id, column_metadata const&, arrow::MemoryPool*, rmm::cuda_stream_view)
-  {
-    CUDF_FAIL("Unsupported type for to_arrow.");
-  }
-
-  template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
-  std::shared_ptr<arrow::Array> operator()(column_view input_view,
-                                           cudf::type_id id,
-                                           column_metadata const&,
-                                           arrow::MemoryPool* ar_mr,
-                                           rmm::cuda_stream_view stream)
-  {
-    return to_arrow_array(
-      id,
-      static_cast<int64_t>(input_view.size()),
-      fetch_data_buffer<T>(
-        device_span<T const>(input_view.data<T>(), input_view.size()), ar_mr, stream),
-      fetch_mask_buffer(input_view, ar_mr, stream),
-      static_cast<int64_t>(input_view.null_count()));
-  }
-};
-
-// Convert decimal types from libcudf to arrow where those types are not
-// directly supported by Arrow. These types must be fit into 128 bits, the
-// smallest decimal resolution supported by Arrow.
-template <typename DeviceType>
-std::shared_ptr<arrow::Array> unsupported_decimals_to_arrow(column_view input,
-                                                            int32_t precision,
-                                                            arrow::MemoryPool* ar_mr,
-                                                            rmm::cuda_stream_view stream)
-{
-  auto buf = detail::convert_decimals_to_decimal128<DeviceType>(
-    input, stream, rmm::mr::get_current_device_resource());
-
-  // Synchronize stream here to ensure the decimal128 buffer is ready.
-  stream.synchronize();
-
-  auto const buf_size_in_bytes = buf->size();
-  auto data_buffer             = allocate_arrow_buffer(buf_size_in_bytes, ar_mr);
-
-  CUDF_CUDA_TRY(cudaMemcpyAsync(data_buffer->mutable_data(),
-                                buf->data(),
-                                buf_size_in_bytes,
-                                cudaMemcpyDefault,
-                                stream.value()));
-
-  auto type    = arrow::decimal(precision, -input.type().scale());
-  auto mask    = fetch_mask_buffer(input, ar_mr, stream);
-  auto buffers = std::vector<std::shared_ptr<arrow::Buffer>>{mask, std::move(data_buffer)};
-  auto data    = std::make_shared<arrow::ArrayData>(type, input.size(), buffers);
-
-  return std::make_shared<arrow::Decimal128Array>(data);
-}
-
-template <>
-std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<numeric::decimal32>(
-  column_view input,
-  cudf::type_id,
-  column_metadata const&,
-  arrow::MemoryPool* ar_mr,
-  rmm::cuda_stream_view stream)
-{
-  using DeviceType = int32_t;
-  return unsupported_decimals_to_arrow<DeviceType>(
-    input, cudf::detail::max_precision<DeviceType>(), ar_mr, stream);
-}
-
-template <>
-std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<numeric::decimal64>(
-  column_view input,
-  cudf::type_id,
-  column_metadata const&,
-  arrow::MemoryPool* ar_mr,
-  rmm::cuda_stream_view stream)
-{
-  using DeviceType = int64_t;
-  return unsupported_decimals_to_arrow<DeviceType>(
-    input, cudf::detail::max_precision<DeviceType>(), ar_mr, stream);
-}
-
-template <>
-std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<numeric::decimal128>(
-  column_view input,
-  cudf::type_id,
-  column_metadata const&,
-  arrow::MemoryPool* ar_mr,
-  rmm::cuda_stream_view stream)
-{
-  using DeviceType         = __int128_t;
-  auto const max_precision = cudf::detail::max_precision<DeviceType>();
-
-  rmm::device_uvector<DeviceType> buf(input.size(), stream);
-
-  thrust::copy(rmm::exec_policy(stream),  //
-               input.begin<DeviceType>(),
-               input.end<DeviceType>(),
-               buf.begin());
-
-  auto const buf_size_in_bytes = buf.size() * sizeof(DeviceType);
-  auto data_buffer             = allocate_arrow_buffer(buf_size_in_bytes, ar_mr);
-
-  CUDF_CUDA_TRY(cudaMemcpyAsync(
-    data_buffer->mutable_data(), buf.data(), buf_size_in_bytes, cudaMemcpyDefault, stream.value()));
-
-  auto type    = arrow::decimal(max_precision, -input.type().scale());
-  auto mask    = fetch_mask_buffer(input, ar_mr, stream);
-  auto buffers = std::vector<std::shared_ptr<arrow::Buffer>>{mask, std::move(data_buffer)};
-  auto data    = std::make_shared<arrow::ArrayData>(type, input.size(), buffers);
-
-  return std::make_shared<arrow::Decimal128Array>(data);
-}
-
-template <>
-std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<bool>(column_view input,
-                                                                  cudf::type_id id,
-                                                                  column_metadata const&,
-                                                                  arrow::MemoryPool* ar_mr,
-                                                                  rmm::cuda_stream_view stream)
-{
-  auto bitmask = bools_to_mask(input, stream, rmm::mr::get_current_device_resource());
-
-  auto data_buffer = allocate_arrow_buffer(static_cast<int64_t>(bitmask.first->size()), ar_mr);
-
-  CUDF_CUDA_TRY(cudaMemcpyAsync(data_buffer->mutable_data(),
-                                bitmask.first->data(),
-                                bitmask.first->size(),
-                                cudaMemcpyDefault,
-                                stream.value()));
-  return to_arrow_array(id,
-                        static_cast<int64_t>(input.size()),
-                        std::move(data_buffer),
-                        fetch_mask_buffer(input, ar_mr, stream),
-                        static_cast<int64_t>(input.null_count()));
-}
-
-template <>
-std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<cudf::string_view>(
-  column_view input,
-  cudf::type_id,
-  column_metadata const&,
-  arrow::MemoryPool* ar_mr,
-  rmm::cuda_stream_view stream)
-{
-  std::unique_ptr<column> tmp_column =
-    ((input.offset() != 0) or
-     ((input.num_children() == 1) and (input.child(0).size() - 1 != input.size())))
-      ? std::make_unique<cudf::column>(input, stream)
-      : nullptr;
-
-  column_view input_view = (tmp_column != nullptr) ? tmp_column->view() : input;
-  auto child_arrays      = fetch_child_array(input_view, {{}, {}}, ar_mr, stream);
-  if (child_arrays.empty()) {
-    // Empty string will have only one value in offset of 4 bytes
-    auto tmp_offset_buffer = allocate_arrow_buffer(sizeof(int32_t), ar_mr);
-    auto tmp_data_buffer   = allocate_arrow_buffer(0, ar_mr);
-    memset(tmp_offset_buffer->mutable_data(), 0, sizeof(int32_t));
-
-    return std::make_shared<arrow::StringArray>(
-      0, std::move(tmp_offset_buffer), std::move(tmp_data_buffer));
-  }
-  auto offset_buffer = child_arrays[strings_column_view::offsets_column_index]->data()->buffers[1];
-  auto const sview   = strings_column_view{input_view};
-  auto data_buffer   = fetch_data_buffer<char>(
-    device_span<char const>{sview.chars_begin(stream),
-                              static_cast<std::size_t>(sview.chars_size(stream))},
-    ar_mr,
-    stream);
-  if (sview.offsets().type().id() == cudf::type_id::INT64) {
-    return std::make_shared<arrow::LargeStringArray>(static_cast<int64_t>(input_view.size()),
-                                                     offset_buffer,
-                                                     data_buffer,
-                                                     fetch_mask_buffer(input_view, ar_mr, stream),
-                                                     static_cast<int64_t>(input_view.null_count()));
-  } else {
-    return std::make_shared<arrow::StringArray>(static_cast<int64_t>(input_view.size()),
-                                                offset_buffer,
-                                                data_buffer,
-                                                fetch_mask_buffer(input_view, ar_mr, stream),
-                                                static_cast<int64_t>(input_view.null_count()));
-  }
-}
-
-template <>
-std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<cudf::struct_view>(
-  column_view input,
-  cudf::type_id,
-  column_metadata const& metadata,
-  arrow::MemoryPool* ar_mr,
-  rmm::cuda_stream_view stream)
-{
-  CUDF_EXPECTS(metadata.children_meta.size() == static_cast<std::size_t>(input.num_children()),
-               "Number of field names and number of children doesn't match\n");
-  std::unique_ptr<column> tmp_column = nullptr;
-
-  if (input.offset() != 0) { tmp_column = std::make_unique<cudf::column>(input, stream); }
-
-  column_view input_view = (tmp_column != nullptr) ? tmp_column->view() : input;
-  auto child_arrays      = fetch_child_array(input_view, metadata.children_meta, ar_mr, stream);
-  auto mask              = fetch_mask_buffer(input_view, ar_mr, stream);
-
-  std::vector<std::shared_ptr<arrow::Field>> fields;
-  std::transform(child_arrays.cbegin(),
-                 child_arrays.cend(),
-                 metadata.children_meta.cbegin(),
-                 std::back_inserter(fields),
-                 [](auto const array, auto const meta) {
-                   return std::make_shared<arrow::Field>(
-                     meta.name, array->type(), array->null_count() > 0);
-                 });
-  auto dtype = std::make_shared<arrow::StructType>(fields);
-
-  return std::make_shared<arrow::StructArray>(dtype,
-                                              static_cast<int64_t>(input_view.size()),
-                                              child_arrays,
-                                              mask,
-                                              static_cast<int64_t>(input_view.null_count()));
-}
-
-template <>
-std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<cudf::list_view>(
-  column_view input,
-  cudf::type_id,
-  column_metadata const& metadata,
-  arrow::MemoryPool* ar_mr,
-  rmm::cuda_stream_view stream)
-{
-  CUDF_EXPECTS(metadata.children_meta.empty() ||
-                 metadata.children_meta.size() == static_cast<std::size_t>(input.num_children()),
-               "Number of field names and number of children do not match\n");
-  std::unique_ptr<column> tmp_column = nullptr;
-  if ((input.offset() != 0) or
-      ((input.num_children() == 2) and (input.child(0).size() - 1 != input.size()))) {
-    tmp_column = std::make_unique<cudf::column>(input, stream);
-  }
-
-  column_view input_view = (tmp_column != nullptr) ? tmp_column->view() : input;
-  auto children_meta =
-    metadata.children_meta.empty() ? std::vector<column_metadata>{{}, {}} : metadata.children_meta;
-  auto child_arrays = fetch_child_array(input_view, children_meta, ar_mr, stream);
-  if (child_arrays.empty() || child_arrays[0]->data()->length == 0) {
-    auto element_type = child_arrays.empty() ? arrow::null() : child_arrays[1]->type();
-    auto result       = arrow::MakeEmptyArray(arrow::list(element_type), ar_mr);
-    CUDF_EXPECTS(result.ok(), "Failed to construct empty arrow list array\n");
-    return result.ValueUnsafe();
-  }
-
-  auto offset_buffer = child_arrays[0]->data()->buffers[1];
-  auto data          = child_arrays[1];
-  return std::make_shared<arrow::ListArray>(arrow::list(data->type()),
-                                            static_cast<int64_t>(input_view.size()),
-                                            offset_buffer,
-                                            data,
-                                            fetch_mask_buffer(input_view, ar_mr, stream),
-                                            static_cast<int64_t>(input_view.null_count()));
-}
-
-template <>
-std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<cudf::dictionary32>(
-  column_view input,
-  cudf::type_id,
-  column_metadata const& metadata,
-  arrow::MemoryPool* ar_mr,
-  rmm::cuda_stream_view stream)
-{
-  // Arrow dictionary requires indices to be signed integer
-  std::unique_ptr<column> dict_indices =
-    detail::cast(cudf::dictionary_column_view(input).get_indices_annotated(),
-                 cudf::data_type{type_id::INT32},
-                 stream,
-                 rmm::mr::get_current_device_resource());
-  auto indices = dispatch_to_arrow{}.operator()<int32_t>(
-    dict_indices->view(), dict_indices->type().id(), {}, ar_mr, stream);
-  auto dict_keys = cudf::dictionary_column_view(input).keys();
-  auto dictionary =
-    type_dispatcher(dict_keys.type(),
-                    dispatch_to_arrow{},
-                    dict_keys,
-                    dict_keys.type().id(),
-                    metadata.children_meta.empty() ? column_metadata{} : metadata.children_meta[0],
-                    ar_mr,
-                    stream);
-
-  return std::make_shared<arrow::DictionaryArray>(
-    arrow::dictionary(indices->type(), dictionary->type()), indices, dictionary);
-}
-}  // namespace
-
-std::shared_ptr<arrow::Table> to_arrow(table_view input,
-                                       std::vector<column_metadata> const& metadata,
-                                       rmm::cuda_stream_view stream,
-                                       arrow::MemoryPool* ar_mr)
-{
-  CUDF_EXPECTS((metadata.size() == static_cast<std::size_t>(input.num_columns())),
-               "columns' metadata should be equal to number of columns in table");
-
-  std::vector<std::shared_ptr<arrow::Array>> arrays;
-  std::vector<std::shared_ptr<arrow::Field>> fields;
-
-  std::transform(
-    input.begin(),
-    input.end(),
-    metadata.begin(),
-    std::back_inserter(arrays),
-    [&](auto const& c, auto const& meta) {
-      return c.type().id() != type_id::EMPTY
-               ? type_dispatcher(
-                   c.type(), detail::dispatch_to_arrow{}, c, c.type().id(), meta, ar_mr, stream)
-               : std::make_shared<arrow::NullArray>(c.size());
-    });
-
-  std::transform(
-    arrays.begin(),
-    arrays.end(),
-    metadata.begin(),
-    std::back_inserter(fields),
-    [](auto const& array, auto const& meta) { return arrow::field(meta.name, array->type()); });
-
-  auto result = arrow::Table::Make(arrow::schema(fields), arrays);
-
-  // synchronize the stream because after the return the data may be accessed from the host before
-  // the above `cudaMemcpyAsync` calls have completed their copies (especially if pinned host
-  // memory is used).
-  stream.synchronize();
-
-  return result;
-}
-
-std::shared_ptr<arrow::Scalar> to_arrow(cudf::scalar const& input,
-                                        column_metadata const& metadata,
-                                        rmm::cuda_stream_view stream,
-                                        arrow::MemoryPool* ar_mr)
-{
-  auto const column = cudf::make_column_from_scalar(input, 1, stream);
-  cudf::table_view const tv{{column->view()}};
-  auto const arrow_table  = detail::to_arrow(tv, {metadata}, stream, ar_mr);
-  auto const ac           = arrow_table->column(0);
-  auto const maybe_scalar = ac->GetScalar(0);
-  if (!maybe_scalar.ok()) { CUDF_FAIL("Failed to produce a scalar"); }
-  return maybe_scalar.ValueOrDie();
-}
-}  // namespace detail
-
-std::shared_ptr<arrow::Table> to_arrow(table_view input,
-                                       std::vector<column_metadata> const& metadata,
-                                       rmm::cuda_stream_view stream,
-                                       arrow::MemoryPool* ar_mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::to_arrow(input, metadata, stream, ar_mr);
-}
-
-std::shared_ptr<arrow::Scalar> to_arrow(cudf::scalar const& input,
-                                        column_metadata const& metadata,
-                                        rmm::cuda_stream_view stream,
-                                        arrow::MemoryPool* ar_mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::to_arrow(input, metadata, stream, ar_mr);
-}
-}  // namespace cudf
diff --git a/cpp/src/interop/to_arrow_device.cu b/cpp/src/interop/to_arrow_device.cu
index cea7cdebcba..a5f3f9d87f5 100644
--- a/cpp/src/interop/to_arrow_device.cu
+++ b/cpp/src/interop/to_arrow_device.cu
@@ -200,7 +200,7 @@ int dispatch_to_arrow_device::operator()<bool>(cudf::column&& column,
   nanoarrow::UniqueArray tmp;
   NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), NANOARROW_TYPE_BOOL, column));
 
-  auto bitmask  = bools_to_mask(column.view(), stream, mr);
+  auto bitmask  = detail::bools_to_mask(column.view(), stream, mr);
   auto contents = column.release();
   NANOARROW_RETURN_NOT_OK(set_null_mask(contents, tmp.get()));
   NANOARROW_RETURN_NOT_OK(
@@ -442,7 +442,7 @@ int dispatch_to_arrow_device_view::operator()<bool>(ArrowArray* out) const
   nanoarrow::UniqueArray tmp;
   NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), NANOARROW_TYPE_BOOL, column));
 
-  auto bitmask = bools_to_mask(column, stream, mr);
+  auto bitmask = detail::bools_to_mask(column, stream, mr);
   NANOARROW_RETURN_NOT_OK(
     set_buffer(std::move(bitmask.first), fixed_width_data_buffer_idx, tmp.get()));
   NANOARROW_RETURN_NOT_OK(set_null_mask(column, tmp.get()));
diff --git a/cpp/src/interop/to_arrow_host.cu b/cpp/src/interop/to_arrow_host.cu
index 193b3a3b5a2..26f7c7e6e53 100644
--- a/cpp/src/interop/to_arrow_host.cu
+++ b/cpp/src/interop/to_arrow_host.cu
@@ -147,7 +147,7 @@ int dispatch_to_arrow_host::operator()<bool>(ArrowArray* out) const
   NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), NANOARROW_TYPE_BOOL, column));
 
   NANOARROW_RETURN_NOT_OK(populate_validity_bitmap(ArrowArrayValidityBitmap(tmp.get())));
-  auto bitmask = bools_to_mask(column, stream, mr);
+  auto bitmask = detail::bools_to_mask(column, stream, mr);
   NANOARROW_RETURN_NOT_OK(populate_data_buffer(
     device_span<uint8_t const>(reinterpret_cast<const uint8_t*>(bitmask.first->data()),
                                bitmask.first->size()),
diff --git a/cpp/src/interop/to_arrow_schema.cpp b/cpp/src/interop/to_arrow_schema.cpp
index b98ca8a7bed..5afed772656 100644
--- a/cpp/src/interop/to_arrow_schema.cpp
+++ b/cpp/src/interop/to_arrow_schema.cpp
@@ -170,8 +170,9 @@ int dispatch_to_arrow_type::operator()<cudf::list_view>(column_view input,
   NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(out, NANOARROW_TYPE_LIST));
   auto child = input.child(cudf::lists_column_view::child_column_index);
   ArrowSchemaInit(out->children[0]);
-  auto child_meta =
-    metadata.children_meta.empty() ? column_metadata{"element"} : metadata.children_meta[0];
+  auto child_meta = metadata.children_meta.empty()
+                      ? column_metadata{"element"}
+                      : metadata.children_meta[cudf::lists_column_view::child_column_index];
 
   out->flags = input.has_nulls() ? ARROW_FLAG_NULLABLE : 0;
   NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(out->children[0], child_meta.name.c_str()));
diff --git a/cpp/src/io/comp/debrotli.cu b/cpp/src/io/comp/debrotli.cu
index 861820f47e7..72649dbe427 100644
--- a/cpp/src/io/comp/debrotli.cu
+++ b/cpp/src/io/comp/debrotli.cu
@@ -58,6 +58,7 @@ THE SOFTWARE.
 #include "gpuinflate.hpp"
 #include "io/utilities/block_utils.cuh"
 
+#include <cudf/detail/utilities/cuda.hpp>
 #include <cudf/utilities/error.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
@@ -2047,19 +2048,14 @@ CUDF_KERNEL void __launch_bounds__(block_size, 2)
  */
 size_t __host__ get_gpu_debrotli_scratch_size(int max_num_inputs)
 {
-  int sm_count = 0;
-  int dev      = 0;
   uint32_t max_fb_size, min_fb_size, fb_size;
-  CUDF_CUDA_TRY(cudaGetDevice(&dev));
-  if (cudaSuccess == cudaDeviceGetAttribute(&sm_count, cudaDevAttrMultiProcessorCount, dev)) {
-    // printf("%d SMs on device %d\n", sm_count, dev);
-    max_num_inputs =
-      min(max_num_inputs, sm_count * 3);  // no more than 3 blocks/sm at most due to 32KB smem use
-    if (max_num_inputs <= 0) {
-      max_num_inputs = sm_count * 2;  // Target 2 blocks/SM by default for scratch mem computation
-    }
+  auto const sm_count = cudf::detail::num_multiprocessors();
+  // no more than 3 blocks/sm at most due to 32KB smem use
+  max_num_inputs = std::min(max_num_inputs, sm_count * 3);
+  if (max_num_inputs <= 0) {
+    max_num_inputs = sm_count * 2;  // Target 2 blocks/SM by default for scratch mem computation
   }
-  max_num_inputs = min(max(max_num_inputs, 1), 512);
+  max_num_inputs = std::min(std::max(max_num_inputs, 1), 512);
   // Max fb size per block occurs if all huffman tables for all 3 group types fail local_alloc()
   // with num_htrees=256 (See HuffmanTreeGroupAlloc)
   max_fb_size = 256 * (630 + 1080 + 920) * 2;  // 1.3MB
diff --git a/cpp/src/io/csv/csv_gpu.cu b/cpp/src/io/csv/csv_gpu.cu
index 7a05d0aebaf..5a0c6decfda 100644
--- a/cpp/src/io/csv/csv_gpu.cu
+++ b/cpp/src/io/csv/csv_gpu.cu
@@ -794,7 +794,7 @@ device_span<uint64_t> __host__ remove_blank_rows(cudf::io::parse_options_view co
   return row_offsets.subspan(0, new_end - row_offsets.begin());
 }
 
-std::vector<column_type_histogram> detect_column_types(
+cudf::detail::host_vector<column_type_histogram> detect_column_types(
   cudf::io::parse_options_view const& options,
   device_span<char const> const data,
   device_span<column_parse::flags const> const column_flags,
@@ -812,7 +812,7 @@ std::vector<column_type_histogram> detect_column_types(
   data_type_detection<<<grid_size, block_size, 0, stream.value()>>>(
     options, data, column_flags, row_starts, d_stats);
 
-  return detail::make_std_vector_sync(d_stats, stream);
+  return detail::make_host_vector_sync(d_stats, stream);
 }
 
 void decode_row_column_data(cudf::io::parse_options_view const& options,
diff --git a/cpp/src/io/csv/csv_gpu.hpp b/cpp/src/io/csv/csv_gpu.hpp
index 06c60319371..aa3d9f6c7b7 100644
--- a/cpp/src/io/csv/csv_gpu.hpp
+++ b/cpp/src/io/csv/csv_gpu.hpp
@@ -199,7 +199,7 @@ device_span<uint64_t> remove_blank_rows(cudf::io::parse_options_view const& opti
  *
  * @return stats Histogram of each dtypes' occurrence for each column
  */
-std::vector<column_type_histogram> detect_column_types(
+cudf::detail::host_vector<column_type_histogram> detect_column_types(
   cudf::io::parse_options_view const& options,
   device_span<char const> data,
   device_span<column_parse::flags const> column_flags,
diff --git a/cpp/src/io/csv/reader_impl.cu b/cpp/src/io/csv/reader_impl.cu
index 40d4372ae9d..e27b06682bb 100644
--- a/cpp/src/io/csv/reader_impl.cu
+++ b/cpp/src/io/csv/reader_impl.cu
@@ -614,7 +614,7 @@ std::vector<column_buffer> decode_data(parse_options const& parse_opts,
     d_valid_counts,
     stream);
 
-  auto const h_valid_counts = cudf::detail::make_std_vector_sync(d_valid_counts, stream);
+  auto const h_valid_counts = cudf::detail::make_host_vector_sync(d_valid_counts, stream);
   for (int i = 0; i < num_active_columns; ++i) {
     out_buffers[i].null_count() = num_records - h_valid_counts[i];
   }
diff --git a/cpp/src/io/json/byte_range_info.cu b/cpp/src/io/json/byte_range_info.cu
deleted file mode 100644
index 258a40b0dd3..00000000000
--- a/cpp/src/io/json/byte_range_info.cu
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cudf/types.hpp>
-#include <cudf/utilities/span.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/exec_policy.hpp>
-
-#include <thrust/find.h>
-
-namespace cudf::io::json::detail {
-
-// Extract the first character position in the string.
-size_type find_first_delimiter(device_span<char const> d_data,
-                               char const delimiter,
-                               rmm::cuda_stream_view stream)
-{
-  auto const first_delimiter_position =
-    thrust::find(rmm::exec_policy(stream), d_data.begin(), d_data.end(), delimiter);
-  return first_delimiter_position != d_data.end() ? first_delimiter_position - d_data.begin() : -1;
-}
-
-}  // namespace cudf::io::json::detail
diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu
index 17fa7abdffe..8d6890045be 100644
--- a/cpp/src/io/json/json_column.cu
+++ b/cpp/src/io/json/json_column.cu
@@ -77,16 +77,16 @@ void print_tree(host_span<SymbolT const> input,
                 tree_meta_t const& d_gpu_tree,
                 rmm::cuda_stream_view stream)
 {
-  print_vec(cudf::detail::make_std_vector_sync(d_gpu_tree.node_categories, stream),
+  print_vec(cudf::detail::make_host_vector_sync(d_gpu_tree.node_categories, stream),
             "node_categories",
             to_cat);
-  print_vec(cudf::detail::make_std_vector_sync(d_gpu_tree.parent_node_ids, stream),
+  print_vec(cudf::detail::make_host_vector_sync(d_gpu_tree.parent_node_ids, stream),
             "parent_node_ids",
             to_int);
   print_vec(
-    cudf::detail::make_std_vector_sync(d_gpu_tree.node_levels, stream), "node_levels", to_int);
-  auto node_range_begin = cudf::detail::make_std_vector_sync(d_gpu_tree.node_range_begin, stream);
-  auto node_range_end   = cudf::detail::make_std_vector_sync(d_gpu_tree.node_range_end, stream);
+    cudf::detail::make_host_vector_sync(d_gpu_tree.node_levels, stream), "node_levels", to_int);
+  auto node_range_begin = cudf::detail::make_host_vector_sync(d_gpu_tree.node_range_begin, stream);
+  auto node_range_end   = cudf::detail::make_host_vector_sync(d_gpu_tree.node_range_end, stream);
   print_vec(node_range_begin, "node_range_begin", to_int);
   print_vec(node_range_end, "node_range_end", to_int);
   for (int i = 0; i < int(node_range_begin.size()); i++) {
@@ -373,9 +373,9 @@ std::vector<std::string> copy_strings_to_host_sync(
   auto to_host        = [stream](auto const& col) {
     if (col.is_empty()) return std::vector<std::string>{};
     auto const scv     = cudf::strings_column_view(col);
-    auto const h_chars = cudf::detail::make_std_vector_async<char>(
+    auto const h_chars = cudf::detail::make_host_vector_async<char>(
       cudf::device_span<char const>(scv.chars_begin(stream), scv.chars_size(stream)), stream);
-    auto const h_offsets = cudf::detail::make_std_vector_async(
+    auto const h_offsets = cudf::detail::make_host_vector_async(
       cudf::device_span<cudf::size_type const>(scv.offsets().data<cudf::size_type>() + scv.offset(),
                                                scv.size() + 1),
       stream);
@@ -523,25 +523,23 @@ void make_device_json_column(device_span<SymbolT const> input,
                           row_array_parent_col_id,
                           stream);
   auto num_columns    = d_unique_col_ids.size();
-  auto unique_col_ids = cudf::detail::make_std_vector_async(d_unique_col_ids, stream);
+  auto unique_col_ids = cudf::detail::make_host_vector_async(d_unique_col_ids, stream);
   auto column_categories =
-    cudf::detail::make_std_vector_async(d_column_tree.node_categories, stream);
-  auto column_parent_ids =
-    cudf::detail::make_std_vector_async(d_column_tree.parent_node_ids, stream);
+    cudf::detail::make_host_vector_async(d_column_tree.node_categories, stream);
+  auto const column_parent_ids =
+    cudf::detail::make_host_vector_async(d_column_tree.parent_node_ids, stream);
   auto column_range_beg =
-    cudf::detail::make_std_vector_async(d_column_tree.node_range_begin, stream);
-  auto max_row_offsets = cudf::detail::make_std_vector_async(d_max_row_offsets, stream);
+    cudf::detail::make_host_vector_async(d_column_tree.node_range_begin, stream);
+  auto const max_row_offsets = cudf::detail::make_host_vector_async(d_max_row_offsets, stream);
   std::vector<std::string> column_names = copy_strings_to_host_sync(
     input, d_column_tree.node_range_begin, d_column_tree.node_range_end, stream);
-  stream.synchronize();
   // array of arrays column names
   if (is_array_of_arrays) {
     TreeDepthT const row_array_children_level = is_enabled_lines ? 1 : 2;
     auto values_column_indices =
       get_values_column_indices(row_array_children_level, tree, col_ids, num_columns, stream);
     auto h_values_column_indices =
-      cudf::detail::make_std_vector_async(values_column_indices, stream);
-    stream.synchronize();
+      cudf::detail::make_host_vector_sync(values_column_indices, stream);
     std::transform(unique_col_ids.begin(),
                    unique_col_ids.end(),
                    column_names.begin(),
@@ -567,22 +565,22 @@ void make_device_json_column(device_span<SymbolT const> input,
     thrust::uninitialized_fill(rmm::exec_policy_nosync(stream), v.begin(), v.end(), 0);
   };
 
-  auto initialize_json_columns = [&](auto i, auto& col) {
-    if (column_categories[i] == NC_ERR || column_categories[i] == NC_FN) {
+  auto initialize_json_columns = [&](auto i, auto& col, auto column_category) {
+    if (column_category == NC_ERR || column_category == NC_FN) {
       return;
-    } else if (column_categories[i] == NC_VAL || column_categories[i] == NC_STR) {
+    } else if (column_category == NC_VAL || column_category == NC_STR) {
       col.string_offsets.resize(max_row_offsets[i] + 1, stream);
       col.string_lengths.resize(max_row_offsets[i] + 1, stream);
       init_to_zero(col.string_offsets);
       init_to_zero(col.string_lengths);
-    } else if (column_categories[i] == NC_LIST) {
+    } else if (column_category == NC_LIST) {
       col.child_offsets.resize(max_row_offsets[i] + 2, stream);
       init_to_zero(col.child_offsets);
     }
     col.num_rows = max_row_offsets[i] + 1;
     col.validity =
       cudf::detail::create_null_mask(col.num_rows, cudf::mask_state::ALL_NULL, stream, mr);
-    col.type = to_json_col_type(column_categories[i]);
+    col.type = to_json_col_type(column_category);
   };
 
   auto reinitialize_as_string = [&](auto i, auto& col) {
@@ -611,11 +609,13 @@ void make_device_json_column(device_span<SymbolT const> input,
     return thrust::get<0>(a) < thrust::get<0>(b);
   });
 
-  std::vector<uint8_t> is_str_column_all_nulls{};
-  if (is_enabled_mixed_types_as_string) {
-    is_str_column_all_nulls = cudf::detail::make_std_vector_sync(
-      is_all_nulls_each_column(input, d_column_tree, tree, col_ids, options, stream), stream);
-  }
+  auto const is_str_column_all_nulls = [&, &column_tree = d_column_tree]() {
+    if (is_enabled_mixed_types_as_string) {
+      return cudf::detail::make_host_vector_sync(
+        is_all_nulls_each_column(input, column_tree, tree, col_ids, options, stream), stream);
+    }
+    return cudf::detail::make_empty_host_vector<uint8_t>(0, stream);
+  }();
 
   // use hash map because we may skip field name's col_ids
   std::unordered_map<NodeIndexT, std::reference_wrapper<device_json_column>> columns;
@@ -764,21 +764,23 @@ void make_device_json_column(device_span<SymbolT const> input,
       }
     }
 
+    auto this_column_category = column_categories[this_col_id];
     if (is_enabled_mixed_types_as_string) {
-      // get path of this column, check if it is a struct forced as string, and enforce it
+      // get path of this column, check if it is a struct/list forced as string, and enforce it
       auto const nt                             = tree_path.get_path(this_col_id);
       std::optional<data_type> const user_dtype = get_path_data_type(nt, options);
-      if (column_categories[this_col_id] == NC_STRUCT and user_dtype.has_value() and
-          user_dtype.value().id() == type_id::STRING) {
+      if ((column_categories[this_col_id] == NC_STRUCT or
+           column_categories[this_col_id] == NC_LIST) and
+          user_dtype.has_value() and user_dtype.value().id() == type_id::STRING) {
         is_mixed_type_column[this_col_id] = 1;
-        column_categories[this_col_id]    = NC_STR;
+        this_column_category              = NC_STR;
       }
     }
 
     CUDF_EXPECTS(parent_col.child_columns.count(name) == 0, "duplicate column name: " + name);
     // move into parent
     device_json_column col(stream, mr);
-    initialize_json_columns(this_col_id, col);
+    initialize_json_columns(this_col_id, col, this_column_category);
     auto inserted = parent_col.child_columns.try_emplace(name, std::move(col)).second;
     CUDF_EXPECTS(inserted, "child column insertion failed, duplicate column name in the parent");
     if (not replaced) parent_col.column_order.push_back(name);
diff --git a/cpp/src/io/json/json_normalization.cu b/cpp/src/io/json/json_normalization.cu
index 760b2214365..cb8b4e97ebb 100644
--- a/cpp/src/io/json/json_normalization.cu
+++ b/cpp/src/io/json/json_normalization.cu
@@ -298,7 +298,7 @@ struct TransduceToNormalizedWS {
 
 namespace detail {
 
-void normalize_single_quotes(datasource::owning_buffer<rmm::device_uvector<SymbolT>>& indata,
+void normalize_single_quotes(datasource::owning_buffer<rmm::device_buffer>& indata,
                              rmm::cuda_stream_view stream,
                              rmm::device_async_resource_ref mr)
 {
@@ -311,22 +311,22 @@ void normalize_single_quotes(datasource::owning_buffer<rmm::device_uvector<Symbo
                             normalize_quotes::TransduceToNormalizedQuotes{}),
                           stream);
 
-  rmm::device_uvector<SymbolT> outbuf(indata.size() * 2, stream, mr);
+  rmm::device_buffer outbuf(indata.size() * 2, stream, mr);
   rmm::device_scalar<SymbolOffsetT> outbuf_size(stream, mr);
-  parser.Transduce(indata.data(),
+  parser.Transduce(reinterpret_cast<SymbolT const*>(indata.data()),
                    static_cast<SymbolOffsetT>(indata.size()),
-                   outbuf.data(),
+                   static_cast<SymbolT*>(outbuf.data()),
                    thrust::make_discard_iterator(),
                    outbuf_size.data(),
                    normalize_quotes::start_state,
                    stream);
 
   outbuf.resize(outbuf_size.value(stream), stream);
-  datasource::owning_buffer<rmm::device_uvector<SymbolT>> outdata(std::move(outbuf));
+  datasource::owning_buffer<rmm::device_buffer> outdata(std::move(outbuf));
   std::swap(indata, outdata);
 }
 
-void normalize_whitespace(datasource::owning_buffer<rmm::device_uvector<SymbolT>>& indata,
+void normalize_whitespace(datasource::owning_buffer<rmm::device_buffer>& indata,
                           rmm::cuda_stream_view stream,
                           rmm::device_async_resource_ref mr)
 {
@@ -339,18 +339,18 @@ void normalize_whitespace(datasource::owning_buffer<rmm::device_uvector<SymbolT>
                             normalize_whitespace::TransduceToNormalizedWS{}),
                           stream);
 
-  rmm::device_uvector<SymbolT> outbuf(indata.size(), stream, mr);
+  rmm::device_buffer outbuf(indata.size(), stream, mr);
   rmm::device_scalar<SymbolOffsetT> outbuf_size(stream, mr);
-  parser.Transduce(indata.data(),
+  parser.Transduce(reinterpret_cast<SymbolT const*>(indata.data()),
                    static_cast<SymbolOffsetT>(indata.size()),
-                   outbuf.data(),
+                   static_cast<SymbolT*>(outbuf.data()),
                    thrust::make_discard_iterator(),
                    outbuf_size.data(),
                    normalize_whitespace::start_state,
                    stream);
 
   outbuf.resize(outbuf_size.value(stream), stream);
-  datasource::owning_buffer<rmm::device_uvector<SymbolT>> outdata(std::move(outbuf));
+  datasource::owning_buffer<rmm::device_buffer> outdata(std::move(outbuf));
   std::swap(indata, outdata);
 }
 
diff --git a/cpp/src/io/json/json_tree.cu b/cpp/src/io/json/json_tree.cu
index ad807b57766..ee6bc0b9f4b 100644
--- a/cpp/src/io/json/json_tree.cu
+++ b/cpp/src/io/json/json_tree.cu
@@ -545,15 +545,15 @@ rmm::device_uvector<size_type> hash_node_type_with_field_name(device_span<Symbol
 
   using hasher_type                             = decltype(d_hasher);
   constexpr size_type empty_node_index_sentinel = -1;
-  auto key_set =
-    cuco::static_set{cuco::extent{compute_hash_table_size(num_fields, 40)},  // 40% occupancy
-                     cuco::empty_key{empty_node_index_sentinel},
-                     d_equal,
-                     cuco::linear_probing<1, hasher_type>{d_hasher},
-                     {},
-                     {},
-                     cudf::detail::cuco_allocator{stream},
-                     stream.value()};
+  auto key_set                                  = cuco::static_set{
+    cuco::extent{compute_hash_table_size(num_fields, 40)},  // 40% occupancy
+    cuco::empty_key{empty_node_index_sentinel},
+    d_equal,
+    cuco::linear_probing<1, hasher_type>{d_hasher},
+                                     {},
+                                     {},
+    cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream},
+    stream.value()};
   key_set.insert_if_async(iter,
                           iter + num_nodes,
                           thrust::counting_iterator<size_type>(0),  // stencil
@@ -734,14 +734,15 @@ std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> hash_n
   constexpr size_type empty_node_index_sentinel = -1;
   using hasher_type                             = decltype(d_hashed_cache);
 
-  auto key_set = cuco::static_set{cuco::extent{compute_hash_table_size(num_nodes)},
-                                  cuco::empty_key<cudf::size_type>{empty_node_index_sentinel},
-                                  d_equal,
-                                  cuco::linear_probing<1, hasher_type>{d_hashed_cache},
-                                  {},
-                                  {},
-                                  cudf::detail::cuco_allocator{stream},
-                                  stream.value()};
+  auto key_set = cuco::static_set{
+    cuco::extent{compute_hash_table_size(num_nodes)},
+    cuco::empty_key<cudf::size_type>{empty_node_index_sentinel},
+    d_equal,
+    cuco::linear_probing<1, hasher_type>{d_hashed_cache},
+    {},
+    {},
+    cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream},
+    stream.value()};
 
   // insert and convert node ids to unique set ids
   auto nodes_itr         = thrust::make_counting_iterator<size_type>(0);
diff --git a/cpp/src/io/json/read_json.cu b/cpp/src/io/json/read_json.cu
index 590f70864b1..98e8e8d3c7e 100644
--- a/cpp/src/io/json/read_json.cu
+++ b/cpp/src/io/json/read_json.cu
@@ -31,6 +31,7 @@
 #include <rmm/exec_policy.hpp>
 #include <rmm/resource_ref.hpp>
 
+#include <thrust/distance.h>
 #include <thrust/iterator/constant_iterator.h>
 #include <thrust/scatter.h>
 
@@ -38,11 +39,14 @@
 
 namespace cudf::io::json::detail {
 
-size_t sources_size(host_span<std::unique_ptr<datasource>> const sources,
-                    size_t range_offset,
-                    size_t range_size)
+namespace {
+
+// Return total size of sources enclosing the passed range
+std::size_t sources_size(host_span<std::unique_ptr<datasource>> const sources,
+                         std::size_t range_offset,
+                         std::size_t range_size)
 {
-  return std::accumulate(sources.begin(), sources.end(), 0ul, [=](size_t sum, auto& source) {
+  return std::accumulate(sources.begin(), sources.end(), 0ul, [=](std::size_t sum, auto& source) {
     auto const size = source->size();
     // TODO take care of 0, 0, or *, 0 case.
     return sum +
@@ -50,109 +54,55 @@ size_t sources_size(host_span<std::unique_ptr<datasource>> const sources,
   });
 }
 
+// Return estimated size of subchunk using a heuristic involving the byte range size and the minimum
+// subchunk size
+std::size_t estimate_size_per_subchunk(std::size_t chunk_size)
+{
+  auto geometric_mean = [](double a, double b) { return std::sqrt(a * b); };
+  // NOTE: heuristic for choosing subchunk size: geometric mean of minimum subchunk size (set to
+  // 10kb) and the byte range size
+  return geometric_mean(std::ceil(static_cast<double>(chunk_size) / num_subchunks),
+                        min_subchunk_size);
+}
+
 /**
- * @brief Read from array of data sources into RMM buffer. The size of the returned device span
-          can be larger than the number of bytes requested from the list of sources when
-          the range to be read spans across multiple sources. This is due to the delimiter
-          characters inserted after the end of each accessed source.
+ * @brief Return the upper bound on the batch size for the JSON reader.
  *
- * @param buffer Device span buffer to which data is read
- * @param sources Array of data sources
- * @param compression Compression format of source
- * @param range_offset Number of bytes to skip from source start
- * @param range_size Number of bytes to read from source
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @returns A subspan of the input device span containing data read
+ * The datasources passed to the JSON reader are split into batches demarcated by byte range
+ * offsets and read iteratively. The batch size is capped at INT_MAX bytes, which is the
+ * default value returned by the function. This value can be overridden at runtime using the
+ * environment variable LIBCUDF_JSON_BATCH_SIZE
+ *
+ * @return size in bytes
  */
-device_span<char> ingest_raw_input(device_span<char> buffer,
-                                   host_span<std::unique_ptr<datasource>> sources,
-                                   compression_type compression,
-                                   size_t range_offset,
-                                   size_t range_size,
-                                   rmm::cuda_stream_view stream)
+std::size_t get_batch_size_upper_bound()
 {
-  CUDF_FUNC_RANGE();
-  // We append a line delimiter between two files to make sure the last line of file i and the first
-  // line of file i+1 don't end up on the same JSON line, if file i does not already end with a line
-  // delimiter.
-  auto constexpr num_delimiter_chars = 1;
-
-  if (compression == compression_type::NONE) {
-    auto delimiter_map = cudf::detail::make_empty_host_vector<size_t>(sources.size(), stream);
-    std::vector<size_t> prefsum_source_sizes(sources.size());
-    std::vector<std::unique_ptr<datasource::buffer>> h_buffers;
-    size_t bytes_read = 0;
-    std::transform_inclusive_scan(sources.begin(),
-                                  sources.end(),
-                                  prefsum_source_sizes.begin(),
-                                  std::plus<size_t>{},
-                                  [](std::unique_ptr<datasource> const& s) { return s->size(); });
-    auto upper =
-      std::upper_bound(prefsum_source_sizes.begin(), prefsum_source_sizes.end(), range_offset);
-    size_t start_source = std::distance(prefsum_source_sizes.begin(), upper);
-
-    auto const total_bytes_to_read =
-      std::min(range_size, prefsum_source_sizes.back() - range_offset);
-    range_offset -= start_source ? prefsum_source_sizes[start_source - 1] : 0;
-    for (size_t i = start_source; i < sources.size() && bytes_read < total_bytes_to_read; i++) {
-      if (sources[i]->is_empty()) continue;
-      auto data_size =
-        std::min(sources[i]->size() - range_offset, total_bytes_to_read - bytes_read);
-      auto destination = reinterpret_cast<uint8_t*>(buffer.data()) + bytes_read +
-                         (num_delimiter_chars * delimiter_map.size());
-      if (sources[i]->is_device_read_preferred(data_size)) {
-        bytes_read += sources[i]->device_read(range_offset, data_size, destination, stream);
-      } else {
-        h_buffers.emplace_back(sources[i]->host_read(range_offset, data_size));
-        auto const& h_buffer = h_buffers.back();
-        CUDF_CUDA_TRY(cudaMemcpyAsync(
-          destination, h_buffer->data(), h_buffer->size(), cudaMemcpyHostToDevice, stream.value()));
-        bytes_read += h_buffer->size();
-      }
-      range_offset = 0;
-      delimiter_map.push_back(bytes_read + (num_delimiter_chars * delimiter_map.size()));
-    }
-    // Removing delimiter inserted after last non-empty source is read
-    if (!delimiter_map.empty()) { delimiter_map.pop_back(); }
-
-    // If this is a multi-file source, we scatter the JSON line delimiters between files
-    if (sources.size() > 1) {
-      static_assert(num_delimiter_chars == 1,
-                    "Currently only single-character delimiters are supported");
-      auto const delimiter_source = thrust::make_constant_iterator('\n');
-      auto const d_delimiter_map  = cudf::detail::make_device_uvector_async(
-        delimiter_map, stream, rmm::mr::get_current_device_resource());
-      thrust::scatter(rmm::exec_policy_nosync(stream),
-                      delimiter_source,
-                      delimiter_source + d_delimiter_map.size(),
-                      d_delimiter_map.data(),
-                      buffer.data());
-    }
-    stream.synchronize();
-    return buffer.first(bytes_read + (delimiter_map.size() * num_delimiter_chars));
-  }
-  // TODO: allow byte range reading from multiple compressed files.
-  auto remaining_bytes_to_read = std::min(range_size, sources[0]->size() - range_offset);
-  auto hbuffer                 = std::vector<uint8_t>(remaining_bytes_to_read);
-  // Single read because only a single compressed source is supported
-  // Reading to host because decompression of a single block is much faster on the CPU
-  sources[0]->host_read(range_offset, remaining_bytes_to_read, hbuffer.data());
-  auto uncomp_data = decompress(compression, hbuffer);
-  CUDF_CUDA_TRY(cudaMemcpyAsync(buffer.data(),
-                                reinterpret_cast<char*>(uncomp_data.data()),
-                                uncomp_data.size() * sizeof(char),
-                                cudaMemcpyHostToDevice,
-                                stream.value()));
-  stream.synchronize();
-  return buffer.first(uncomp_data.size());
+  auto const batch_size_str         = std::getenv("LIBCUDF_JSON_BATCH_SIZE");
+  int64_t const batch_size          = batch_size_str != nullptr ? std::atol(batch_size_str) : 0L;
+  auto const batch_limit            = static_cast<int64_t>(std::numeric_limits<int32_t>::max());
+  auto const batch_size_upper_bound = static_cast<std::size_t>(
+    (batch_size > 0 && batch_size < batch_limit) ? batch_size : batch_limit);
+  return batch_size_upper_bound;
 }
 
-size_t estimate_size_per_subchunk(size_t chunk_size)
+/**
+ * @brief Extract the first delimiter character position in the string
+ *
+ * @param d_data Device span in which to search for delimiter character
+ * @param delimiter Delimiter character to search for
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ *
+ * @return Position of first delimiter character in device array
+ */
+size_type find_first_delimiter(device_span<char const> d_data,
+                               char const delimiter,
+                               rmm::cuda_stream_view stream)
 {
-  auto geometric_mean = [](double a, double b) { return std::sqrt(a * b); };
-  // NOTE: heuristic for choosing subchunk size: geometric mean of minimum subchunk size (set to
-  // 10kb) and the byte range size
-  return geometric_mean(std::ceil((double)chunk_size / num_subchunks), min_subchunk_size);
+  auto const first_delimiter_position =
+    thrust::find(rmm::exec_policy(stream), d_data.begin(), d_data.end(), delimiter);
+  return first_delimiter_position != d_data.end()
+           ? static_cast<size_type>(thrust::distance(d_data.begin(), first_delimiter_position))
+           : -1;
 }
 
 /**
@@ -168,19 +118,19 @@ size_t estimate_size_per_subchunk(size_t chunk_size)
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @returns Data source owning buffer enclosing the bytes read
  */
-datasource::owning_buffer<rmm::device_uvector<char>> get_record_range_raw_input(
+datasource::owning_buffer<rmm::device_buffer> get_record_range_raw_input(
   host_span<std::unique_ptr<datasource>> sources,
   json_reader_options const& reader_opts,
   rmm::cuda_stream_view stream)
 {
   CUDF_FUNC_RANGE();
 
-  size_t const total_source_size            = sources_size(sources, 0, 0);
+  std::size_t const total_source_size       = sources_size(sources, 0, 0);
   auto constexpr num_delimiter_chars        = 1;
   auto const num_extra_delimiters           = num_delimiter_chars * (sources.size() - 1);
   compression_type const reader_compression = reader_opts.get_compression();
-  size_t const chunk_offset                 = reader_opts.get_byte_range_offset();
-  size_t chunk_size                         = reader_opts.get_byte_range_size();
+  std::size_t const chunk_offset            = reader_opts.get_byte_range_offset();
+  std::size_t chunk_size                    = reader_opts.get_byte_range_size();
 
   CUDF_EXPECTS(total_source_size ? chunk_offset < total_source_size : !chunk_offset,
                "Invalid offsetting",
@@ -188,20 +138,20 @@ datasource::owning_buffer<rmm::device_uvector<char>> get_record_range_raw_input(
   auto should_load_all_sources = !chunk_size || chunk_size >= total_source_size - chunk_offset;
   chunk_size = should_load_all_sources ? total_source_size - chunk_offset : chunk_size;
 
-  int const num_subchunks_prealloced = should_load_all_sources ? 0 : max_subchunks_prealloced;
-  size_t const size_per_subchunk     = estimate_size_per_subchunk(chunk_size);
+  int num_subchunks_prealloced        = should_load_all_sources ? 0 : max_subchunks_prealloced;
+  std::size_t const size_per_subchunk = estimate_size_per_subchunk(chunk_size);
 
   // The allocation for single source compressed input is estimated by assuming a ~4:1
   // compression ratio. For uncompressed inputs, we can getter a better estimate using the idea
   // of subchunks.
   auto constexpr header_size = 4096;
-  size_t const buffer_size =
+  std::size_t buffer_size =
     reader_compression != compression_type::NONE
       ? total_source_size * estimated_compression_ratio + header_size
       : std::min(total_source_size, chunk_size + num_subchunks_prealloced * size_per_subchunk) +
           num_extra_delimiters;
-  rmm::device_uvector<char> buffer(buffer_size, stream);
-  device_span<char> bufspan(buffer);
+  rmm::device_buffer buffer(buffer_size, stream);
+  device_span<char> bufspan(reinterpret_cast<char*>(buffer.data()), buffer.size());
 
   // Offset within buffer indicating first read position
   std::int64_t buffer_offset = 0;
@@ -213,43 +163,67 @@ datasource::owning_buffer<rmm::device_uvector<char>> get_record_range_raw_input(
     chunk_offset == 0 ? 0 : find_first_delimiter(readbufspan, '\n', stream);
   if (first_delim_pos == -1) {
     // return empty owning datasource buffer
-    auto empty_buf = rmm::device_uvector<char>(0, stream);
-    return datasource::owning_buffer<rmm::device_uvector<char>>(std::move(empty_buf));
+    auto empty_buf = rmm::device_buffer(0, stream);
+    return datasource::owning_buffer<rmm::device_buffer>(std::move(empty_buf));
   } else if (!should_load_all_sources) {
     // Find next delimiter
-    std::int64_t next_delim_pos = -1;
-    size_t next_subchunk_start  = chunk_offset + chunk_size;
-    while (next_subchunk_start < total_source_size && next_delim_pos < buffer_offset) {
-      buffer_offset += readbufspan.size();
-      readbufspan    = ingest_raw_input(bufspan.last(buffer_size - buffer_offset),
-                                     sources,
-                                     reader_compression,
-                                     next_subchunk_start,
-                                     size_per_subchunk,
-                                     stream);
-      next_delim_pos = find_first_delimiter(readbufspan, '\n', stream) + buffer_offset;
-      if (next_delim_pos < buffer_offset) { next_subchunk_start += size_per_subchunk; }
+    std::int64_t next_delim_pos     = -1;
+    std::size_t next_subchunk_start = chunk_offset + chunk_size;
+    while (next_delim_pos < buffer_offset) {
+      for (int subchunk = 0;
+           subchunk < num_subchunks_prealloced && next_delim_pos < buffer_offset &&
+           next_subchunk_start < total_source_size;
+           subchunk++) {
+        buffer_offset += readbufspan.size();
+        readbufspan    = ingest_raw_input(bufspan.last(buffer_size - buffer_offset),
+                                       sources,
+                                       reader_compression,
+                                       next_subchunk_start,
+                                       size_per_subchunk,
+                                       stream);
+        next_delim_pos = find_first_delimiter(readbufspan, '\n', stream) + buffer_offset;
+        next_subchunk_start += size_per_subchunk;
+      }
+      if (next_delim_pos < buffer_offset) {
+        if (next_subchunk_start >= total_source_size) {
+          // If we have reached the end of source list but the source does not terminate with a
+          // newline character
+          next_delim_pos = buffer_offset + readbufspan.size();
+        } else {
+          // Our buffer_size estimate is insufficient to read until the end of the line! We need to
+          // allocate more memory and try again!
+          num_subchunks_prealloced *= 2;
+          buffer_size = reader_compression != compression_type::NONE
+                          ? 2 * buffer_size
+                          : std::min(total_source_size,
+                                     buffer_size + num_subchunks_prealloced * size_per_subchunk) +
+                              num_extra_delimiters;
+          buffer.resize(buffer_size, stream);
+          bufspan = device_span<char>(reinterpret_cast<char*>(buffer.data()), buffer.size());
+        }
+      }
     }
-    if (next_delim_pos < buffer_offset) next_delim_pos = buffer_offset + readbufspan.size();
 
-    return datasource::owning_buffer<rmm::device_uvector<char>>(
+    return datasource::owning_buffer<rmm::device_buffer>(
       std::move(buffer),
       reinterpret_cast<uint8_t*>(buffer.data()) + first_delim_pos + shift_for_nonzero_offset,
       next_delim_pos - first_delim_pos - shift_for_nonzero_offset);
   }
-  return datasource::owning_buffer<rmm::device_uvector<char>>(
+  return datasource::owning_buffer<rmm::device_buffer>(
     std::move(buffer),
     reinterpret_cast<uint8_t*>(buffer.data()) + first_delim_pos + shift_for_nonzero_offset,
     readbufspan.size() - first_delim_pos - shift_for_nonzero_offset);
 }
 
+// Helper function to read the current batch using byte range offsets and size
+// passed
 table_with_metadata read_batch(host_span<std::unique_ptr<datasource>> sources,
                                json_reader_options const& reader_opts,
                                rmm::cuda_stream_view stream,
                                rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
-  datasource::owning_buffer<rmm::device_uvector<char>> bufview =
+  datasource::owning_buffer<rmm::device_buffer> bufview =
     get_record_range_raw_input(sources, reader_opts, stream);
 
   // If input JSON buffer has single quotes and option to normalize single quotes is enabled,
@@ -270,6 +244,92 @@ table_with_metadata read_batch(host_span<std::unique_ptr<datasource>> sources,
   return device_parse_nested_json(buffer, reader_opts, stream, mr);
 }
 
+}  // anonymous namespace
+
+device_span<char> ingest_raw_input(device_span<char> buffer,
+                                   host_span<std::unique_ptr<datasource>> sources,
+                                   compression_type compression,
+                                   std::size_t range_offset,
+                                   std::size_t range_size,
+                                   rmm::cuda_stream_view stream)
+{
+  CUDF_FUNC_RANGE();
+  // We append a line delimiter between two files to make sure the last line of file i and the first
+  // line of file i+1 don't end up on the same JSON line, if file i does not already end with a line
+  // delimiter.
+  auto constexpr num_delimiter_chars = 1;
+
+  if (compression == compression_type::NONE) {
+    auto delimiter_map = cudf::detail::make_empty_host_vector<std::size_t>(sources.size(), stream);
+    std::vector<std::size_t> prefsum_source_sizes(sources.size());
+    std::vector<std::unique_ptr<datasource::buffer>> h_buffers;
+    std::size_t bytes_read = 0;
+    std::transform_inclusive_scan(sources.begin(),
+                                  sources.end(),
+                                  prefsum_source_sizes.begin(),
+                                  std::plus<std::size_t>{},
+                                  [](std::unique_ptr<datasource> const& s) { return s->size(); });
+    auto upper =
+      std::upper_bound(prefsum_source_sizes.begin(), prefsum_source_sizes.end(), range_offset);
+    std::size_t start_source = std::distance(prefsum_source_sizes.begin(), upper);
+
+    auto const total_bytes_to_read =
+      std::min(range_size, prefsum_source_sizes.back() - range_offset);
+    range_offset -= start_source ? prefsum_source_sizes[start_source - 1] : 0;
+    for (std::size_t i = start_source; i < sources.size() && bytes_read < total_bytes_to_read;
+         i++) {
+      if (sources[i]->is_empty()) continue;
+      auto data_size =
+        std::min(sources[i]->size() - range_offset, total_bytes_to_read - bytes_read);
+      auto destination = reinterpret_cast<uint8_t*>(buffer.data()) + bytes_read +
+                         (num_delimiter_chars * delimiter_map.size());
+      if (sources[i]->is_device_read_preferred(data_size)) {
+        bytes_read += sources[i]->device_read(range_offset, data_size, destination, stream);
+      } else {
+        h_buffers.emplace_back(sources[i]->host_read(range_offset, data_size));
+        auto const& h_buffer = h_buffers.back();
+        CUDF_CUDA_TRY(cudaMemcpyAsync(
+          destination, h_buffer->data(), h_buffer->size(), cudaMemcpyHostToDevice, stream.value()));
+        bytes_read += h_buffer->size();
+      }
+      range_offset = 0;
+      delimiter_map.push_back(bytes_read + (num_delimiter_chars * delimiter_map.size()));
+    }
+    // Removing delimiter inserted after last non-empty source is read
+    if (!delimiter_map.empty()) { delimiter_map.pop_back(); }
+
+    // If this is a multi-file source, we scatter the JSON line delimiters between files
+    if (sources.size() > 1) {
+      static_assert(num_delimiter_chars == 1,
+                    "Currently only single-character delimiters are supported");
+      auto const delimiter_source = thrust::make_constant_iterator('\n');
+      auto const d_delimiter_map  = cudf::detail::make_device_uvector_async(
+        delimiter_map, stream, rmm::mr::get_current_device_resource());
+      thrust::scatter(rmm::exec_policy_nosync(stream),
+                      delimiter_source,
+                      delimiter_source + d_delimiter_map.size(),
+                      d_delimiter_map.data(),
+                      buffer.data());
+    }
+    stream.synchronize();
+    return buffer.first(bytes_read + (delimiter_map.size() * num_delimiter_chars));
+  }
+  // TODO: allow byte range reading from multiple compressed files.
+  auto remaining_bytes_to_read = std::min(range_size, sources[0]->size() - range_offset);
+  auto hbuffer                 = std::vector<uint8_t>(remaining_bytes_to_read);
+  // Single read because only a single compressed source is supported
+  // Reading to host because decompression of a single block is much faster on the CPU
+  sources[0]->host_read(range_offset, remaining_bytes_to_read, hbuffer.data());
+  auto uncomp_data = decompress(compression, hbuffer);
+  CUDF_CUDA_TRY(cudaMemcpyAsync(buffer.data(),
+                                reinterpret_cast<char*>(uncomp_data.data()),
+                                uncomp_data.size() * sizeof(char),
+                                cudaMemcpyHostToDevice,
+                                stream.value()));
+  stream.synchronize();
+  return buffer.first(uncomp_data.size());
+}
+
 table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
                               json_reader_options const& reader_opts,
                               rmm::cuda_stream_view stream,
@@ -296,15 +356,16 @@ table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
    * Note that the batched reader does not work for compressed inputs or for regular
    * JSON inputs.
    */
-  size_t const total_source_size = sources_size(sources, 0, 0);
-  size_t chunk_offset            = reader_opts.get_byte_range_offset();
-  size_t chunk_size              = reader_opts.get_byte_range_size();
-  chunk_size                     = !chunk_size ? total_source_size - chunk_offset
-                                               : std::min(chunk_size, total_source_size - chunk_offset);
+  std::size_t const total_source_size = sources_size(sources, 0, 0);
+  std::size_t chunk_offset            = reader_opts.get_byte_range_offset();
+  std::size_t chunk_size              = reader_opts.get_byte_range_size();
+  chunk_size                          = !chunk_size ? total_source_size - chunk_offset
+                                                    : std::min(chunk_size, total_source_size - chunk_offset);
 
-  size_t const size_per_subchunk = estimate_size_per_subchunk(chunk_size);
-  size_t const batch_size_ub =
-    std::numeric_limits<int>::max() - (max_subchunks_prealloced * size_per_subchunk);
+  std::size_t const size_per_subchunk      = estimate_size_per_subchunk(chunk_size);
+  std::size_t const batch_size_upper_bound = get_batch_size_upper_bound();
+  std::size_t const batch_size =
+    batch_size_upper_bound - (max_subchunks_prealloced * size_per_subchunk);
 
   /*
    * Identify the position (zero-indexed) of starting source file from which to begin
@@ -314,10 +375,10 @@ table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
    */
 
   // Prefix sum of source file sizes
-  size_t pref_source_size = 0;
+  std::size_t pref_source_size = 0;
   // Starting source file from which to being batching evaluated using byte range offset
-  size_t const start_source = [chunk_offset, &sources, &pref_source_size]() {
-    for (size_t src_idx = 0; src_idx < sources.size(); ++src_idx) {
+  std::size_t const start_source = [chunk_offset, &sources, &pref_source_size]() {
+    for (std::size_t src_idx = 0; src_idx < sources.size(); ++src_idx) {
       if (pref_source_size + sources[src_idx]->size() > chunk_offset) { return src_idx; }
       pref_source_size += sources[src_idx]->size();
     }
@@ -329,16 +390,16 @@ table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
    * batch begins, and `end_bytes_size` gives the terminal bytes position after which reading
    * stops.
    */
-  size_t pref_bytes_size = chunk_offset;
-  size_t end_bytes_size  = chunk_offset + chunk_size;
-  std::vector<size_t> batch_offsets{pref_bytes_size};
-  for (size_t i = start_source; i < sources.size() && pref_bytes_size < end_bytes_size;) {
+  std::size_t pref_bytes_size = chunk_offset;
+  std::size_t end_bytes_size  = chunk_offset + chunk_size;
+  std::vector<std::size_t> batch_offsets{pref_bytes_size};
+  for (std::size_t i = start_source; i < sources.size() && pref_bytes_size < end_bytes_size;) {
     pref_source_size += sources[i]->size();
     // If the current source file can subsume multiple batches, we split the file until the
     // boundary of the last batch exceeds the end of the file (indexed by `pref_source_size`)
     while (pref_bytes_size < end_bytes_size &&
-           pref_source_size >= std::min(pref_bytes_size + batch_size_ub, end_bytes_size)) {
-      auto next_batch_size = std::min(batch_size_ub, end_bytes_size - pref_bytes_size);
+           pref_source_size >= std::min(pref_bytes_size + batch_size, end_bytes_size)) {
+      auto next_batch_size = std::min(batch_size, end_bytes_size - pref_bytes_size);
       batch_offsets.push_back(batch_offsets.back() + next_batch_size);
       pref_bytes_size += next_batch_size;
     }
@@ -356,7 +417,7 @@ table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
   // Dispatch individual batches to read_batch and push the resulting table into
   // partial_tables array. Note that the reader options need to be updated for each
   // batch to adjust byte range offset and byte range size.
-  for (size_t i = 0; i < batch_offsets.size() - 1; i++) {
+  for (std::size_t i = 0; i < batch_offsets.size() - 1; i++) {
     batched_reader_opts.set_byte_range_offset(batch_offsets[i]);
     batched_reader_opts.set_byte_range_size(batch_offsets[i + 1] - batch_offsets[i]);
     partial_tables.emplace_back(
diff --git a/cpp/src/io/json/read_json.hpp b/cpp/src/io/json/read_json.hpp
index 32de4ebabfa..7e3a920f00d 100644
--- a/cpp/src/io/json/read_json.hpp
+++ b/cpp/src/io/json/read_json.hpp
@@ -37,6 +37,20 @@ constexpr size_t min_subchunk_size        = 10000;
 constexpr int estimated_compression_ratio = 4;
 constexpr int max_subchunks_prealloced    = 3;
 
+/**
+ * @brief Read from array of data sources into RMM buffer. The size of the returned device span
+          can be larger than the number of bytes requested from the list of sources when
+          the range to be read spans across multiple sources. This is due to the delimiter
+          characters inserted after the end of each accessed source.
+ *
+ * @param buffer Device span buffer to which data is read
+ * @param sources Array of data sources
+ * @param compression Compression format of source
+ * @param range_offset Number of bytes to skip from source start
+ * @param range_size Number of bytes to read from source
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @returns A subspan of the input device span containing data read
+ */
 device_span<char> ingest_raw_input(device_span<char> buffer,
                                    host_span<std::unique_ptr<datasource>> sources,
                                    compression_type compression,
@@ -44,14 +58,20 @@ device_span<char> ingest_raw_input(device_span<char> buffer,
                                    size_t range_size,
                                    rmm::cuda_stream_view stream);
 
+/**
+ * @brief Reads and returns the entire data set in batches.
+ *
+ * @param sources Input `datasource` objects to read the dataset from
+ * @param reader_opts Settings for controlling reading behavior
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource to use for device memory allocation
+ *
+ * @return cudf::table object that contains the array of cudf::column.
+ */
 table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
                               json_reader_options const& reader_opts,
                               rmm::cuda_stream_view stream,
                               rmm::device_async_resource_ref mr);
 
-size_type find_first_delimiter(device_span<char const> d_data,
-                               char const delimiter,
-                               rmm::cuda_stream_view stream);
-
 }  // namespace io::json::detail
 }  // namespace CUDF_EXPORT cudf
diff --git a/cpp/src/io/json/write_json.cu b/cpp/src/io/json/write_json.cu
index c688c809e04..60bb2366e87 100644
--- a/cpp/src/io/json/write_json.cu
+++ b/cpp/src/io/json/write_json.cu
@@ -649,7 +649,7 @@ struct column_to_strings_fn {
     auto const list_child_string = make_lists_column(
       column.size(),
       std::move(new_offsets),
-      std::move(child_string_with_null()),
+      child_string_with_null(),
       column.null_count(),
       cudf::detail::copy_bitmask(column, stream_, rmm::mr::get_current_device_resource()),
       stream_);
diff --git a/cpp/src/io/orc/orc.hpp b/cpp/src/io/orc/orc.hpp
index e1403acd455..790532c9d54 100644
--- a/cpp/src/io/orc/orc.hpp
+++ b/cpp/src/io/orc/orc.hpp
@@ -24,7 +24,7 @@
 #include <cudf/io/orc_types.hpp>
 #include <cudf/utilities/error.hpp>
 
-#include <thrust/optional.h>
+#include <cuda/std/optional>
 
 #include <algorithm>
 #include <cstddef>
@@ -692,11 +692,12 @@ class metadata {
  * @brief `column_device_view` and additional, ORC specific, information on the column.
  */
 struct orc_column_device_view : public column_device_view {
-  __device__ orc_column_device_view(column_device_view col, thrust::optional<uint32_t> parent_idx)
+  __device__ orc_column_device_view(column_device_view col,
+                                    cuda::std::optional<uint32_t> parent_idx)
     : column_device_view{col}, parent_index{parent_idx}
   {
   }
-  thrust::optional<uint32_t> parent_index;
+  cuda::std::optional<uint32_t> parent_index;
   bitmask_type const* pushdown_mask = nullptr;
 };
 
diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu
index f3b8cfbc836..ede9fd060b8 100644
--- a/cpp/src/io/orc/writer_impl.cu
+++ b/cpp/src/io/orc/writer_impl.cu
@@ -42,6 +42,7 @@
 #include <cooperative_groups/memcpy_async.h>
 #include <cuda/std/climits>
 #include <cuda/std/limits>
+#include <cuda/std/optional>
 #include <thrust/execution_policy.h>
 #include <thrust/extrema.h>
 #include <thrust/for_each.h>
@@ -50,7 +51,6 @@
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/iterator/reverse_iterator.h>
 #include <thrust/iterator/transform_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/pair.h>
 #include <thrust/reduce.h>
 #include <thrust/scan.h>
@@ -1831,7 +1831,7 @@ orc_table_view make_orc_table_view(table_view const& table,
     type_kinds, stream, rmm::mr::get_current_device_resource());
 
   rmm::device_uvector<orc_column_device_view> d_orc_columns(orc_columns.size(), stream);
-  using stack_value_type = thrust::pair<column_device_view const*, thrust::optional<uint32_t>>;
+  using stack_value_type = thrust::pair<column_device_view const*, cuda::std::optional<uint32_t>>;
   rmm::device_uvector<stack_value_type> stack_storage(orc_columns.size(), stream);
 
   // pre-order append ORC device columns
@@ -1847,7 +1847,7 @@ orc_table_view make_orc_table_view(table_view const& table,
                        thrust::make_reverse_iterator(d_table.end()),
                        thrust::make_reverse_iterator(d_table.begin()),
                        [&stack](column_device_view const& c) {
-                         stack.push({&c, thrust::nullopt});
+                         stack.push({&c, cuda::std::nullopt});
                        });
 
       uint32_t idx = 0;
@@ -1978,7 +1978,7 @@ encoder_decimal_info decimal_chunk_sizes(orc_table_view& orc_table,
 
   // Gather the row group sizes and copy to host
   auto d_tmp_rowgroup_sizes = rmm::device_uvector<uint32_t>(segmentation.num_rowgroups(), stream);
-  std::map<uint32_t, std::vector<uint32_t>> rg_sizes;
+  std::map<uint32_t, cudf::detail::host_vector<uint32_t>> rg_sizes;
   for (auto const& [col_idx, esizes] : elem_sizes) {
     // Copy last elem in each row group - equal to row group size
     thrust::tabulate(rmm::exec_policy(stream),
@@ -1991,14 +1991,14 @@ encoder_decimal_info decimal_chunk_sizes(orc_table_view& orc_table,
                        return src[rg_bounds[idx][col_idx].end - 1];
                      });
 
-    rg_sizes[col_idx] = cudf::detail::make_std_vector_async(d_tmp_rowgroup_sizes, stream);
+    rg_sizes.emplace(col_idx, cudf::detail::make_host_vector_async(d_tmp_rowgroup_sizes, stream));
   }
 
   return {std::move(elem_sizes), std::move(rg_sizes)};
 }
 
 std::map<uint32_t, size_t> decimal_column_sizes(
-  std::map<uint32_t, std::vector<uint32_t>> const& chunk_sizes)
+  std::map<uint32_t, cudf::detail::host_vector<uint32_t>> const& chunk_sizes)
 {
   std::map<uint32_t, size_t> column_sizes;
   std::transform(chunk_sizes.cbegin(),
@@ -2056,7 +2056,7 @@ auto set_rowgroup_char_counts(orc_table_view& orc_table,
                             orc_table.d_string_column_indices,
                             stream);
 
-  auto const h_counts = cudf::detail::make_std_vector_sync(counts, stream);
+  auto const h_counts = cudf::detail::make_host_vector_sync(counts, stream);
 
   for (auto col_idx : orc_table.string_column_indices) {
     auto& str_column = orc_table.column(col_idx);
diff --git a/cpp/src/io/orc/writer_impl.hpp b/cpp/src/io/orc/writer_impl.hpp
index f5f8b3cfed9..cae849ee315 100644
--- a/cpp/src/io/orc/writer_impl.hpp
+++ b/cpp/src/io/orc/writer_impl.hpp
@@ -90,8 +90,9 @@ struct stripe_rowgroups {
  */
 struct encoder_decimal_info {
   std::map<uint32_t, rmm::device_uvector<uint32_t>>
-    elem_sizes;                                        ///< Column index -> per-element size map
-  std::map<uint32_t, std::vector<uint32_t>> rg_sizes;  ///< Column index -> per-rowgroup size map
+    elem_sizes;  ///< Column index -> per-element size map
+  std::map<uint32_t, cudf::detail::host_vector<uint32_t>>
+    rg_sizes;  ///< Column index -> per-rowgroup size map
 };
 
 /**
diff --git a/cpp/src/io/parquet/chunk_dict.cu b/cpp/src/io/parquet/chunk_dict.cu
index a43c6d4cbb6..17ccb73c0a8 100644
--- a/cpp/src/io/parquet/chunk_dict.cu
+++ b/cpp/src/io/parquet/chunk_dict.cu
@@ -22,6 +22,7 @@
 
 #include <rmm/exec_policy.hpp>
 
+#include <cuco/static_map_ref.cuh>
 #include <cuda/atomic>
 
 namespace cudf::io::parquet::detail {
@@ -30,28 +31,14 @@ namespace {
 constexpr int DEFAULT_BLOCK_SIZE = 256;
 }
 
-template <int block_size>
-CUDF_KERNEL void __launch_bounds__(block_size)
-  initialize_chunk_hash_maps_kernel(device_span<EncColumnChunk> chunks)
-{
-  auto const chunk = chunks[blockIdx.x];
-  auto const t     = threadIdx.x;
-  // fut: Now that per-chunk dict is same size as ck.num_values, try to not use one block per chunk
-  for (thread_index_type i = 0; i < chunk.dict_map_size; i += block_size) {
-    if (t + i < chunk.dict_map_size) {
-      new (&chunk.dict_map_slots[t + i].first) map_type::atomic_key_type{KEY_SENTINEL};
-      new (&chunk.dict_map_slots[t + i].second) map_type::atomic_mapped_type{VALUE_SENTINEL};
-    }
-  }
-}
-
 template <typename T>
 struct equality_functor {
   column_device_view const& col;
-  __device__ bool operator()(size_type lhs_idx, size_type rhs_idx)
+  __device__ bool operator()(key_type lhs_idx, key_type rhs_idx) const
   {
-    // We don't call this for nulls so this is fine
-    auto const equal = cudf::experimental::row::equality::nan_equal_physical_equality_comparator{};
+    // We don't call this for nulls so this is fine.
+    auto constexpr equal =
+      cudf::experimental::row::equality::nan_equal_physical_equality_comparator{};
     return equal(col.element<T>(lhs_idx), col.element<T>(rhs_idx));
   }
 };
@@ -59,38 +46,167 @@ struct equality_functor {
 template <typename T>
 struct hash_functor {
   column_device_view const& col;
-  __device__ auto operator()(size_type idx) const
+  uint32_t const seed = 0;
+  __device__ auto operator()(key_type idx) const
   {
-    return cudf::hashing::detail::MurmurHash3_x86_32<T>{}(col.element<T>(idx));
+    return cudf::hashing::detail::MurmurHash3_x86_32<T>{seed}(col.element<T>(idx));
   }
 };
 
+template <int block_size>
 struct map_insert_fn {
-  map_type::device_mutable_view& map;
+  storage_ref_type const& storage_ref;
+  EncColumnChunk* const& chunk;
 
   template <typename T>
-  __device__ bool operator()(column_device_view const& col, size_type i)
+  __device__ void operator()(size_type const s_start_value_idx, size_type const end_value_idx)
   {
     if constexpr (column_device_view::has_element_accessor<T>()) {
-      auto hash_fn     = hash_functor<T>{col};
-      auto equality_fn = equality_functor<T>{col};
-      return map.insert(std::pair(i, i), hash_fn, equality_fn);
+      using block_reduce = cub::BlockReduce<size_type, block_size>;
+      __shared__ typename block_reduce::TempStorage reduce_storage;
+
+      auto const col                     = chunk->col_desc;
+      column_device_view const& data_col = *col->leaf_column;
+      __shared__ size_type total_num_dict_entries;
+
+      using equality_fn_type = equality_functor<T>;
+      using hash_fn_type     = hash_functor<T>;
+      // Choosing `linear_probing` over `double_hashing` for slighhhtly better performance seen in
+      // benchmarks.
+      using probing_scheme_type = cuco::linear_probing<map_cg_size, hash_fn_type>;
+
+      // Make a view of the hash map.
+      auto hash_map_ref = cuco::static_map_ref{cuco::empty_key{KEY_SENTINEL},
+                                               cuco::empty_value{VALUE_SENTINEL},
+                                               equality_fn_type{data_col},
+                                               probing_scheme_type{hash_fn_type{data_col}},
+                                               cuco::thread_scope_block,
+                                               storage_ref};
+
+      // Create a map ref with `cuco::insert` operator
+      auto map_insert_ref = hash_map_ref.with_operators(cuco::insert);
+      auto const t        = threadIdx.x;
+
+      // Create atomic refs to the current chunk's num_dict_entries and uniq_data_size
+      cuda::atomic_ref<size_type, SCOPE> const chunk_num_dict_entries{chunk->num_dict_entries};
+      cuda::atomic_ref<size_type, SCOPE> const chunk_uniq_data_size{chunk->uniq_data_size};
+
+      // Note: Adjust the following loop to use `cg::tile<map_cg_size>` if needed in the future.
+      for (thread_index_type val_idx = s_start_value_idx + t; val_idx - t < end_value_idx;
+           val_idx += block_size) {
+        size_type is_unique      = 0;
+        size_type uniq_elem_size = 0;
+
+        // Check if this index is valid.
+        auto const is_valid =
+          val_idx < end_value_idx and val_idx < data_col.size() and data_col.is_valid(val_idx);
+
+        // Insert tile_val_idx to hash map and count successful insertions.
+        if (is_valid) {
+          // Insert the keys using a single thread for best performance for now.
+          is_unique      = map_insert_ref.insert(cuco::pair{val_idx, val_idx});
+          uniq_elem_size = [&]() -> size_type {
+            if (not is_unique) { return 0; }
+            switch (col->physical_type) {
+              case Type::INT32: return 4;
+              case Type::INT64: return 8;
+              case Type::INT96: return 12;
+              case Type::FLOAT: return 4;
+              case Type::DOUBLE: return 8;
+              case Type::BYTE_ARRAY: {
+                auto const col_type = data_col.type().id();
+                if (col_type == type_id::STRING) {
+                  // Strings are stored as 4 byte length + string bytes
+                  return 4 + data_col.element<string_view>(val_idx).size_bytes();
+                } else if (col_type == type_id::LIST) {
+                  // Binary is stored as 4 byte length + bytes
+                  return 4 +
+                         get_element<statistics::byte_array_view>(data_col, val_idx).size_bytes();
+                }
+                CUDF_UNREACHABLE(
+                  "Byte array only supports string and list<byte> column types for dictionary "
+                  "encoding!");
+              }
+              case Type::FIXED_LEN_BYTE_ARRAY:
+                if (data_col.type().id() == type_id::DECIMAL128) { return sizeof(__int128_t); }
+                CUDF_UNREACHABLE(
+                  "Fixed length byte array only supports decimal 128 column types for dictionary "
+                  "encoding!");
+              default: CUDF_UNREACHABLE("Unsupported type for dictionary encoding");
+            }
+          }();
+        }
+        // Reduce num_unique and uniq_data_size from all tiles.
+        auto num_unique = block_reduce(reduce_storage).Sum(is_unique);
+        __syncthreads();
+        auto uniq_data_size = block_reduce(reduce_storage).Sum(uniq_elem_size);
+        // The first thread in the block atomically updates total num_unique and uniq_data_size
+        if (t == 0) {
+          total_num_dict_entries =
+            chunk_num_dict_entries.fetch_add(num_unique, cuda::std::memory_order_relaxed);
+          total_num_dict_entries += num_unique;
+          chunk_uniq_data_size.fetch_add(uniq_data_size, cuda::std::memory_order_relaxed);
+        }
+        __syncthreads();
+
+        // Check if the num unique values in chunk has already exceeded max dict size and early exit
+        if (total_num_dict_entries > MAX_DICT_SIZE) { return; }
+      }  // for loop
     } else {
       CUDF_UNREACHABLE("Unsupported type to insert in map");
     }
   }
 };
 
+template <int block_size>
 struct map_find_fn {
-  map_type::device_view& map;
-
+  storage_ref_type const& storage_ref;
+  EncColumnChunk* const& chunk;
   template <typename T>
-  __device__ map_type::device_view::iterator operator()(column_device_view const& col, size_type i)
+  __device__ void operator()(size_type const s_start_value_idx,
+                             size_type const end_value_idx,
+                             size_type const s_ck_start_val_idx)
   {
     if constexpr (column_device_view::has_element_accessor<T>()) {
-      auto hash_fn     = hash_functor<T>{col};
-      auto equality_fn = equality_functor<T>{col};
-      return map.find(i, hash_fn, equality_fn);
+      auto const col                     = chunk->col_desc;
+      column_device_view const& data_col = *col->leaf_column;
+
+      using equality_fn_type = equality_functor<T>;
+      using hash_fn_type     = hash_functor<T>;
+      // Choosing `linear_probing` over `double_hashing` for slighhhtly better performance seen in
+      // benchmarks.
+      using probing_scheme_type = cuco::linear_probing<map_cg_size, hash_fn_type>;
+
+      // Make a view of the hash map.
+      auto hash_map_ref = cuco::static_map_ref{cuco::empty_key{KEY_SENTINEL},
+                                               cuco::empty_value{VALUE_SENTINEL},
+                                               equality_fn_type{data_col},
+                                               probing_scheme_type{hash_fn_type{data_col}},
+                                               cuco::thread_scope_block,
+                                               storage_ref};
+
+      // Create a map ref with `cuco::find` operator
+      auto const map_find_ref = hash_map_ref.with_operators(cuco::find);
+      auto const t            = threadIdx.x;
+
+      // Note: Adjust the following loop to use `cg::tiles<map_cg_size>` if needed in the future.
+      for (thread_index_type val_idx = s_start_value_idx + t; val_idx < end_value_idx;
+           val_idx += block_size) {
+        // Find the key using a single thread for best performance for now.
+        if (data_col.is_valid(val_idx)) {
+          // No need for atomic as this is not going to be modified by any other thread.
+          chunk->dict_index[val_idx - s_ck_start_val_idx] = [&]() {
+            auto const found_slot = map_find_ref.find(val_idx);
+
+            // Fail if we didn't find the previously inserted key.
+            cudf_assert(found_slot != map_find_ref.end() &&
+                        "Unable to find value in map in dictionary index construction");
+
+            // Return the found value.
+            return found_slot->second;
+          }();
+        }
+      }
     } else {
       CUDF_UNREACHABLE("Unsupported type to find in map");
     }
@@ -99,124 +215,61 @@ struct map_find_fn {
 
 template <int block_size>
 CUDF_KERNEL void __launch_bounds__(block_size)
-  populate_chunk_hash_maps_kernel(cudf::detail::device_2dspan<PageFragment const> frags)
+  populate_chunk_hash_maps_kernel(device_span<window_type> const map_storage,
+                                  cudf::detail::device_2dspan<PageFragment const> frags)
 {
-  auto col_idx = blockIdx.y;
-  auto block_x = blockIdx.x;
-  auto t       = threadIdx.x;
-  auto frag    = frags[col_idx][block_x];
-  auto chunk   = frag.chunk;
-  auto col     = chunk->col_desc;
+  auto const col_idx = blockIdx.y;
+  auto const block_x = blockIdx.x;
+  auto const frag    = frags[col_idx][block_x];
+  auto chunk         = frag.chunk;
+  auto col           = chunk->col_desc;
 
   if (not chunk->use_dictionary) { return; }
 
-  using block_reduce = cub::BlockReduce<size_type, block_size>;
-  __shared__ typename block_reduce::TempStorage reduce_storage;
-
   size_type start_row = frag.start_row;
   size_type end_row   = frag.start_row + frag.num_rows;
 
-  // Find the bounds of values in leaf column to be inserted into the map for current chunk
+  // Find the bounds of values in leaf column to be inserted into the map for current chunk.
   size_type const s_start_value_idx = row_to_value_idx(start_row, *col);
   size_type const end_value_idx     = row_to_value_idx(end_row, *col);
 
   column_device_view const& data_col = *col->leaf_column;
-
-  // Make a view of the hash map
-  auto hash_map_mutable = map_type::device_mutable_view(chunk->dict_map_slots,
-                                                        chunk->dict_map_size,
-                                                        cuco::empty_key{KEY_SENTINEL},
-                                                        cuco::empty_value{VALUE_SENTINEL});
-
-  __shared__ size_type total_num_dict_entries;
-  thread_index_type val_idx = s_start_value_idx + t;
-  while (val_idx - block_size < end_value_idx) {
-    auto const is_valid =
-      val_idx < end_value_idx and val_idx < data_col.size() and data_col.is_valid(val_idx);
-
-    // insert element at val_idx to hash map and count successful insertions
-    size_type is_unique      = 0;
-    size_type uniq_elem_size = 0;
-    if (is_valid) {
-      is_unique =
-        type_dispatcher(data_col.type(), map_insert_fn{hash_map_mutable}, data_col, val_idx);
-      uniq_elem_size = [&]() -> size_type {
-        if (not is_unique) { return 0; }
-        switch (col->physical_type) {
-          case Type::INT32: return 4;
-          case Type::INT64: return 8;
-          case Type::INT96: return 12;
-          case Type::FLOAT: return 4;
-          case Type::DOUBLE: return 8;
-          case Type::BYTE_ARRAY: {
-            auto const col_type = data_col.type().id();
-            if (col_type == type_id::STRING) {
-              // Strings are stored as 4 byte length + string bytes
-              return 4 + data_col.element<string_view>(val_idx).size_bytes();
-            } else if (col_type == type_id::LIST) {
-              // Binary is stored as 4 byte length + bytes
-              return 4 + get_element<statistics::byte_array_view>(data_col, val_idx).size_bytes();
-            }
-            CUDF_UNREACHABLE(
-              "Byte array only supports string and list<byte> column types for dictionary "
-              "encoding!");
-          }
-          case Type::FIXED_LEN_BYTE_ARRAY:
-            if (data_col.type().id() == type_id::DECIMAL128) { return sizeof(__int128_t); }
-            CUDF_UNREACHABLE(
-              "Fixed length byte array only supports decimal 128 column types for dictionary "
-              "encoding!");
-          default: CUDF_UNREACHABLE("Unsupported type for dictionary encoding");
-        }
-      }();
-    }
-
-    auto num_unique = block_reduce(reduce_storage).Sum(is_unique);
-    __syncthreads();
-    auto uniq_data_size = block_reduce(reduce_storage).Sum(uniq_elem_size);
-    if (t == 0) {
-      total_num_dict_entries = atomicAdd(&chunk->num_dict_entries, num_unique);
-      total_num_dict_entries += num_unique;
-      atomicAdd(&chunk->uniq_data_size, uniq_data_size);
-    }
-    __syncthreads();
-
-    // Check if the num unique values in chunk has already exceeded max dict size and early exit
-    if (total_num_dict_entries > MAX_DICT_SIZE) { return; }
-
-    val_idx += block_size;
-  }  // while
+  storage_ref_type const storage_ref{chunk->dict_map_size,
+                                     map_storage.data() + chunk->dict_map_offset};
+  type_dispatcher(data_col.type(),
+                  map_insert_fn<block_size>{storage_ref, chunk},
+                  s_start_value_idx,
+                  end_value_idx);
 }
 
 template <int block_size>
 CUDF_KERNEL void __launch_bounds__(block_size)
-  collect_map_entries_kernel(device_span<EncColumnChunk> chunks)
+  collect_map_entries_kernel(device_span<window_type> const map_storage,
+                             device_span<EncColumnChunk> chunks)
 {
   auto& chunk = chunks[blockIdx.x];
   if (not chunk.use_dictionary) { return; }
 
-  auto t   = threadIdx.x;
-  auto map = map_type::device_view(chunk.dict_map_slots,
-                                   chunk.dict_map_size,
-                                   cuco::empty_key{KEY_SENTINEL},
-                                   cuco::empty_value{VALUE_SENTINEL});
-
-  __shared__ cuda::atomic<size_type, cuda::thread_scope_block> counter;
+  auto t = threadIdx.x;
+  __shared__ cuda::atomic<size_type, SCOPE> counter;
   using cuda::std::memory_order_relaxed;
-  if (t == 0) { new (&counter) cuda::atomic<size_type, cuda::thread_scope_block>{0}; }
+  if (t == 0) { new (&counter) cuda::atomic<size_type, SCOPE>{0}; }
   __syncthreads();
-  for (size_type i = 0; i < chunk.dict_map_size; i += block_size) {
-    if (t + i < chunk.dict_map_size) {
-      auto* slot = reinterpret_cast<map_type::value_type*>(map.begin_slot() + t + i);
-      auto key   = slot->first;
+
+  // Iterate over all windows in the map.
+  for (; t < chunk.dict_map_size; t += block_size) {
+    auto window = map_storage.data() + chunk.dict_map_offset + t;
+    // Collect all slots from each window.
+    for (auto& slot : *window) {
+      auto const key = slot.first;
       if (key != KEY_SENTINEL) {
-        auto loc = counter.fetch_add(1, memory_order_relaxed);
+        auto const loc = counter.fetch_add(1, memory_order_relaxed);
         cudf_assert(loc < MAX_DICT_SIZE && "Number of filled slots exceeds max dict size");
         chunk.dict_data[loc] = key;
-        // If sorting dict page ever becomes a hard requirement, enable the following statement and
-        // add a dict sorting step before storing into the slot's second field.
-        // chunk.dict_data_idx[loc] = t + i;
-        slot->second = loc;
+        // If sorting dict page ever becomes a hard requirement, enable the following statement
+        // and add a dict sorting step before storing into the slot's second field.
+        // chunk.dict_data_idx[loc] = idx;
+        slot.second = loc;
       }
     }
   }
@@ -224,75 +277,60 @@ CUDF_KERNEL void __launch_bounds__(block_size)
 
 template <int block_size>
 CUDF_KERNEL void __launch_bounds__(block_size)
-  get_dictionary_indices_kernel(cudf::detail::device_2dspan<PageFragment const> frags)
+  get_dictionary_indices_kernel(device_span<window_type> const map_storage,
+                                cudf::detail::device_2dspan<PageFragment const> frags)
 {
-  auto col_idx = blockIdx.y;
-  auto block_x = blockIdx.x;
-  auto t       = threadIdx.x;
-  auto frag    = frags[col_idx][block_x];
-  auto chunk   = frag.chunk;
-  auto col     = chunk->col_desc;
+  auto const col_idx = blockIdx.y;
+  auto const block_x = blockIdx.x;
+  auto const frag    = frags[col_idx][block_x];
+  auto chunk         = frag.chunk;
 
   if (not chunk->use_dictionary) { return; }
 
   size_type start_row = frag.start_row;
   size_type end_row   = frag.start_row + frag.num_rows;
 
+  auto const col = chunk->col_desc;
   // Find the bounds of values in leaf column to be searched in the map for current chunk
   auto const s_start_value_idx  = row_to_value_idx(start_row, *col);
   auto const s_ck_start_val_idx = row_to_value_idx(chunk->start_row, *col);
   auto const end_value_idx      = row_to_value_idx(end_row, *col);
 
   column_device_view const& data_col = *col->leaf_column;
-
-  auto map = map_type::device_view(chunk->dict_map_slots,
-                                   chunk->dict_map_size,
-                                   cuco::empty_key{KEY_SENTINEL},
-                                   cuco::empty_value{VALUE_SENTINEL});
-
-  thread_index_type val_idx = s_start_value_idx + t;
-  while (val_idx < end_value_idx) {
-    if (data_col.is_valid(val_idx)) {
-      auto found_slot = type_dispatcher(data_col.type(), map_find_fn{map}, data_col, val_idx);
-      cudf_assert(found_slot != map.end() &&
-                  "Unable to find value in map in dictionary index construction");
-      if (found_slot != map.end()) {
-        // No need for atomic as this is not going to be modified by any other thread
-        auto* val_ptr = reinterpret_cast<map_type::mapped_type*>(&found_slot->second);
-        chunk->dict_index[val_idx - s_ck_start_val_idx] = *val_ptr;
-      }
-    }
-
-    val_idx += block_size;
-  }
-}
-
-void initialize_chunk_hash_maps(device_span<EncColumnChunk> chunks, rmm::cuda_stream_view stream)
-{
-  constexpr int block_size = 1024;
-  initialize_chunk_hash_maps_kernel<block_size>
-    <<<chunks.size(), block_size, 0, stream.value()>>>(chunks);
+  storage_ref_type const storage_ref{chunk->dict_map_size,
+                                     map_storage.data() + chunk->dict_map_offset};
+
+  type_dispatcher(data_col.type(),
+                  map_find_fn<block_size>{storage_ref, chunk},
+                  s_start_value_idx,
+                  end_value_idx,
+                  s_ck_start_val_idx);
 }
 
-void populate_chunk_hash_maps(cudf::detail::device_2dspan<PageFragment const> frags,
+void populate_chunk_hash_maps(device_span<window_type> const map_storage,
+                              cudf::detail::device_2dspan<PageFragment const> frags,
                               rmm::cuda_stream_view stream)
 {
   dim3 const dim_grid(frags.size().second, frags.size().first);
   populate_chunk_hash_maps_kernel<DEFAULT_BLOCK_SIZE>
-    <<<dim_grid, DEFAULT_BLOCK_SIZE, 0, stream.value()>>>(frags);
+    <<<dim_grid, DEFAULT_BLOCK_SIZE, 0, stream.value()>>>(map_storage, frags);
 }
 
-void collect_map_entries(device_span<EncColumnChunk> chunks, rmm::cuda_stream_view stream)
+void collect_map_entries(device_span<window_type> const map_storage,
+                         device_span<EncColumnChunk> chunks,
+                         rmm::cuda_stream_view stream)
 {
   constexpr int block_size = 1024;
-  collect_map_entries_kernel<block_size><<<chunks.size(), block_size, 0, stream.value()>>>(chunks);
+  collect_map_entries_kernel<block_size>
+    <<<chunks.size(), block_size, 0, stream.value()>>>(map_storage, chunks);
 }
 
-void get_dictionary_indices(cudf::detail::device_2dspan<PageFragment const> frags,
+void get_dictionary_indices(device_span<window_type> const map_storage,
+                            cudf::detail::device_2dspan<PageFragment const> frags,
                             rmm::cuda_stream_view stream)
 {
   dim3 const dim_grid(frags.size().second, frags.size().first);
   get_dictionary_indices_kernel<DEFAULT_BLOCK_SIZE>
-    <<<dim_grid, DEFAULT_BLOCK_SIZE, 0, stream.value()>>>(frags);
+    <<<dim_grid, DEFAULT_BLOCK_SIZE, 0, stream.value()>>>(map_storage, frags);
 }
 }  // namespace cudf::io::parquet::detail
diff --git a/cpp/src/io/parquet/compact_protocol_reader.cpp b/cpp/src/io/parquet/compact_protocol_reader.cpp
index e13ed5e85e5..afcf6b373a9 100644
--- a/cpp/src/io/parquet/compact_protocol_reader.cpp
+++ b/cpp/src/io/parquet/compact_protocol_reader.cpp
@@ -304,10 +304,10 @@ class parquet_field_struct : public parquet_field {
 template <typename E, typename T>
 class parquet_field_union_struct : public parquet_field {
   E& enum_val;
-  thrust::optional<T>& val;  // union structs are always wrapped in std::optional
+  cuda::std::optional<T>& val;  // union structs are always wrapped in std::optional
 
  public:
-  parquet_field_union_struct(int f, E& ev, thrust::optional<T>& v)
+  parquet_field_union_struct(int f, E& ev, cuda::std::optional<T>& v)
     : parquet_field(f), enum_val(ev), val(v)
   {
   }
@@ -431,10 +431,10 @@ class parquet_field_struct_blob : public parquet_field {
  */
 template <typename T, typename FieldFunctor>
 class parquet_field_optional : public parquet_field {
-  thrust::optional<T>& val;
+  cuda::std::optional<T>& val;
 
  public:
-  parquet_field_optional(int f, thrust::optional<T>& v) : parquet_field(f), val(v) {}
+  parquet_field_optional(int f, cuda::std::optional<T>& v) : parquet_field(f), val(v) {}
 
   inline void operator()(CompactProtocolReader* cpr, int field_type)
   {
diff --git a/cpp/src/io/parquet/parquet.hpp b/cpp/src/io/parquet/parquet.hpp
index 8ee4c175e09..5d10472b0ae 100644
--- a/cpp/src/io/parquet/parquet.hpp
+++ b/cpp/src/io/parquet/parquet.hpp
@@ -20,7 +20,7 @@
 
 #include <cudf/types.hpp>
 
-#include <thrust/optional.h>
+#include <cuda/std/optional>
 
 #include <cstdint>
 #include <optional>
@@ -94,10 +94,10 @@ struct LogicalType {
     BSON
   };
   Type type;
-  thrust::optional<DecimalType> decimal_type;
-  thrust::optional<TimeType> time_type;
-  thrust::optional<TimestampType> timestamp_type;
-  thrust::optional<IntType> int_type;
+  cuda::std::optional<DecimalType> decimal_type;
+  cuda::std::optional<TimeType> time_type;
+  cuda::std::optional<TimestampType> timestamp_type;
+  cuda::std::optional<IntType> int_type;
 
   LogicalType(Type tp = UNDEFINED) : type(tp) {}
   LogicalType(DecimalType&& dt) : type(DECIMAL), decimal_type(dt) {}
@@ -178,21 +178,21 @@ struct SchemaElement {
   // 5: nested fields
   int32_t num_children = 0;
   // 6: DEPRECATED: record the original type before conversion to parquet type
-  thrust::optional<ConvertedType> converted_type;
+  cuda::std::optional<ConvertedType> converted_type;
   // 7: DEPRECATED: record the scale for DECIMAL converted type
   int32_t decimal_scale = 0;
   // 8: DEPRECATED: record the precision for DECIMAL converted type
   int32_t decimal_precision = 0;
   // 9: save field_id from original schema
-  thrust::optional<int32_t> field_id;
+  cuda::std::optional<int32_t> field_id;
   // 10: replaces converted type
-  thrust::optional<LogicalType> logical_type;
+  cuda::std::optional<LogicalType> logical_type;
 
   // extra cudf specific fields
   bool output_as_byte_array = false;
 
   // cudf type determined from arrow:schema
-  thrust::optional<type_id> arrow_type;
+  cuda::std::optional<type_id> arrow_type;
 
   // The following fields are filled in later during schema initialization
   int max_definition_level = 0;
@@ -259,21 +259,21 @@ struct SchemaElement {
  */
 struct Statistics {
   // deprecated max value in signed comparison order
-  thrust::optional<std::vector<uint8_t>> max;
+  cuda::std::optional<std::vector<uint8_t>> max;
   // deprecated min value in signed comparison order
-  thrust::optional<std::vector<uint8_t>> min;
+  cuda::std::optional<std::vector<uint8_t>> min;
   // count of null values in the column
-  thrust::optional<int64_t> null_count;
+  cuda::std::optional<int64_t> null_count;
   // count of distinct values occurring
-  thrust::optional<int64_t> distinct_count;
+  cuda::std::optional<int64_t> distinct_count;
   // max value for column determined by ColumnOrder
-  thrust::optional<std::vector<uint8_t>> max_value;
+  cuda::std::optional<std::vector<uint8_t>> max_value;
   // min value for column determined by ColumnOrder
-  thrust::optional<std::vector<uint8_t>> min_value;
+  cuda::std::optional<std::vector<uint8_t>> min_value;
   // If true, max_value is the actual maximum value for a column
-  thrust::optional<bool> is_max_value_exact;
+  cuda::std::optional<bool> is_max_value_exact;
   // If true, min_value is the actual minimum value for a column
-  thrust::optional<bool> is_min_value_exact;
+  cuda::std::optional<bool> is_min_value_exact;
 };
 
 /**
@@ -282,7 +282,7 @@ struct Statistics {
 struct SizeStatistics {
   // Number of variable-width bytes stored for the page/chunk. Should not be set for anything
   // but the BYTE_ARRAY physical type.
-  thrust::optional<int64_t> unencoded_byte_array_data_bytes;
+  cuda::std::optional<int64_t> unencoded_byte_array_data_bytes;
   /**
    * When present, there is expected to be one element corresponding to each
    * repetition (i.e. size=max repetition_level+1) where each element
@@ -291,14 +291,14 @@ struct SizeStatistics {
    *
    * This value should not be written if max_repetition_level is 0.
    */
-  thrust::optional<std::vector<int64_t>> repetition_level_histogram;
+  cuda::std::optional<std::vector<int64_t>> repetition_level_histogram;
 
   /**
    * Same as repetition_level_histogram except for definition levels.
    *
    * This value should not be written if max_definition_level is 0 or 1.
    */
-  thrust::optional<std::vector<int64_t>> definition_level_histogram;
+  cuda::std::optional<std::vector<int64_t>> definition_level_histogram;
 };
 
 /**
@@ -319,7 +319,7 @@ struct OffsetIndex {
   std::vector<PageLocation> page_locations;
   // per-page size info. see description of the same field in SizeStatistics. only present for
   // columns with a BYTE_ARRAY physical type.
-  thrust::optional<std::vector<int64_t>> unencoded_byte_array_data_bytes;
+  cuda::std::optional<std::vector<int64_t>> unencoded_byte_array_data_bytes;
 };
 
 /**
@@ -331,10 +331,10 @@ struct ColumnIndex {
   std::vector<std::vector<uint8_t>> max_values;  // upper bound for values in each page
   BoundaryOrder boundary_order =
     BoundaryOrder::UNORDERED;  // Indicates if min and max values are ordered
-  thrust::optional<std::vector<int64_t>> null_counts;  // Optional count of null values per page
+  cuda::std::optional<std::vector<int64_t>> null_counts;  // Optional count of null values per page
   // Repetition/definition level histograms for the column chunk
-  thrust::optional<std::vector<int64_t>> repetition_level_histogram;
-  thrust::optional<std::vector<int64_t>> definition_level_histogram;
+  cuda::std::optional<std::vector<int64_t>> repetition_level_histogram;
+  cuda::std::optional<std::vector<int64_t>> definition_level_histogram;
 };
 
 /**
@@ -384,11 +384,11 @@ struct ColumnChunkMetaData {
   Statistics statistics;
   // Set of all encodings used for pages in this column chunk. This information can be used to
   // determine if all data pages are dictionary encoded for example.
-  thrust::optional<std::vector<PageEncodingStats>> encoding_stats;
+  cuda::std::optional<std::vector<PageEncodingStats>> encoding_stats;
   // Optional statistics to help estimate total memory when converted to in-memory representations.
   // The histograms contained in these statistics can also be useful in some cases for more
   // fine-grained nullability/list length filter pushdown.
-  thrust::optional<SizeStatistics> size_statistics;
+  cuda::std::optional<SizeStatistics> size_statistics;
 };
 
 /**
@@ -430,13 +430,13 @@ struct RowGroup {
   int64_t num_rows = 0;
   // If set, specifies a sort ordering of the rows in this RowGroup.
   // The sorting columns can be a subset of all the columns.
-  thrust::optional<std::vector<SortingColumn>> sorting_columns;
+  cuda::std::optional<std::vector<SortingColumn>> sorting_columns;
   // Byte offset from beginning of file to first page (data or dictionary) in this row group
-  thrust::optional<int64_t> file_offset;
+  cuda::std::optional<int64_t> file_offset;
   // Total byte size of all compressed (and potentially encrypted) column data in this row group
-  thrust::optional<int64_t> total_compressed_size;
+  cuda::std::optional<int64_t> total_compressed_size;
   // Row group ordinal in the file
-  thrust::optional<int16_t> ordinal;
+  cuda::std::optional<int16_t> ordinal;
 };
 
 /**
@@ -461,7 +461,7 @@ struct FileMetaData {
   std::vector<RowGroup> row_groups;
   std::vector<KeyValue> key_value_metadata;
   std::string created_by = "";
-  thrust::optional<std::vector<ColumnOrder>> column_orders;
+  cuda::std::optional<std::vector<ColumnOrder>> column_orders;
 };
 
 /**
diff --git a/cpp/src/io/parquet/parquet_gpu.cuh b/cpp/src/io/parquet/parquet_gpu.cuh
index e3c44c78898..7c09764da2d 100644
--- a/cpp/src/io/parquet/parquet_gpu.cuh
+++ b/cpp/src/io/parquet/parquet_gpu.cuh
@@ -18,25 +18,37 @@
 
 #include "parquet_gpu.hpp"
 
+#include <cudf/detail/cuco_helpers.hpp>
 #include <cudf/lists/lists_column_device_view.cuh>
 #include <cudf/types.hpp>
 
-#include <cuco/static_map.cuh>
+#include <cuco/pair.cuh>
+#include <cuco/storage.cuh>
 
 namespace cudf::io::parquet::detail {
 
-auto constexpr KEY_SENTINEL   = size_type{-1};
-auto constexpr VALUE_SENTINEL = size_type{-1};
+using key_type    = size_type;
+using mapped_type = size_type;
+using slot_type   = cuco::pair<key_type, mapped_type>;
 
-using map_type = cuco::legacy::static_map<size_type, size_type>;
+auto constexpr map_cg_size =
+  1;  ///< A CUDA Cooperative Group of 1 thread (set for best performance) to handle each subset.
+      ///< Note: Adjust insert and find loops to use `cg::tile<map_cg_size>` if increasing this.
+auto constexpr window_size =
+  1;  ///< Number of concurrent slots (set for best performance) handled by each thread.
+auto constexpr occupancy_factor = 1.43f;  ///< cuCollections suggests using a hash map of size
+                                          ///< N * (1/0.7) = 1.43 to target a 70% occupancy factor.
 
-/**
- * @brief The alias of `map_type::pair_atomic_type` class.
- *
- * Declare this struct by trivial subclassing instead of type aliasing so we can have forward
- * declaration of this struct somewhere else.
- */
-struct slot_type : public map_type::pair_atomic_type {};
+auto constexpr KEY_SENTINEL   = key_type{-1};
+auto constexpr VALUE_SENTINEL = mapped_type{-1};
+auto constexpr SCOPE          = cuda::thread_scope_block;
+
+using storage_type     = cuco::aow_storage<slot_type,
+                                       window_size,
+                                       cuco::extent<std::size_t>,
+                                       cudf::detail::cuco_allocator<char>>;
+using storage_ref_type = typename storage_type::ref_type;
+using window_type      = typename storage_type::window_type;
 
 /**
  * @brief Return the byte length of parquet dtypes that are physically represented by INT32
@@ -81,4 +93,43 @@ inline size_type __device__ row_to_value_idx(size_type idx,
   return idx;
 }
 
+/**
+ * @brief Insert chunk values into their respective hash maps
+ *
+ * @param map_storage Bulk hashmap storage
+ * @param frags Column fragments
+ * @param stream CUDA stream to use
+ */
+void populate_chunk_hash_maps(device_span<window_type> const map_storage,
+                              cudf::detail::device_2dspan<PageFragment const> frags,
+                              rmm::cuda_stream_view stream);
+
+/**
+ * @brief Compact dictionary hash map entries into chunk.dict_data
+ *
+ * @param map_storage Bulk hashmap storage
+ * @param chunks Flat span of chunks to compact hash maps for
+ * @param stream CUDA stream to use
+ */
+void collect_map_entries(device_span<window_type> const map_storage,
+                         device_span<EncColumnChunk> chunks,
+                         rmm::cuda_stream_view stream);
+
+/**
+ * @brief Get the Dictionary Indices for each row
+ *
+ * For each row of a chunk, gets the indices into chunk.dict_data which contains the value otherwise
+ * stored in input column [row]. Stores these indices into chunk.dict_index.
+ *
+ * Since dict_data itself contains indices into the original cudf column, this means that
+ * col[row] == col[dict_data[dict_index[row - chunk.start_row]]]
+ *
+ * @param map_storage Bulk hashmap storage
+ * @param frags Column fragments
+ * @param stream CUDA stream to use
+ */
+void get_dictionary_indices(device_span<window_type> const map_storage,
+                            cudf::detail::device_2dspan<PageFragment const> frags,
+                            rmm::cuda_stream_view stream);
+
 }  // namespace cudf::io::parquet::detail
diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp
index efc1f5ebab1..125d35f6499 100644
--- a/cpp/src/io/parquet/parquet_gpu.hpp
+++ b/cpp/src/io/parquet/parquet_gpu.hpp
@@ -394,7 +394,7 @@ struct ColumnChunkDesc {
                            uint8_t def_level_bits_,
                            uint8_t rep_level_bits_,
                            Compression codec_,
-                           thrust::optional<LogicalType> logical_type_,
+                           cuda::std::optional<LogicalType> logical_type_,
                            int32_t ts_clock_rate_,
                            int32_t src_col_index_,
                            int32_t src_col_schema_,
@@ -438,12 +438,12 @@ struct ColumnChunkDesc {
   int32_t num_data_pages{};                     // number of data pages
   int32_t num_dict_pages{};                     // number of dictionary pages
   PageInfo const* dict_page{};
-  string_index_pair* str_dict_index{};           // index for string dictionary
-  bitmask_type** valid_map_base{};               // base pointers of valid bit map for this column
-  void** column_data_base{};                     // base pointers of column data
-  void** column_string_base{};                   // base pointers of column string data
-  Compression codec{};                           // compressed codec enum
-  thrust::optional<LogicalType> logical_type{};  // logical type
+  string_index_pair* str_dict_index{};  // index for string dictionary
+  bitmask_type** valid_map_base{};      // base pointers of valid bit map for this column
+  void** column_data_base{};            // base pointers of column data
+  void** column_string_base{};          // base pointers of column string data
+  Compression codec{};                  // compressed codec enum
+  cuda::std::optional<LogicalType> logical_type{};  // logical type
   int32_t ts_clock_rate{};  // output timestamp clock frequency (0=default, 1000=ms, 1000000000=ns)
 
   int32_t src_col_index{};   // my input column index
@@ -514,7 +514,6 @@ constexpr unsigned int kDictHashBits = 16;
 constexpr size_t kDictScratchSize    = (1 << kDictHashBits) * sizeof(uint32_t);
 
 struct EncPage;
-struct slot_type;
 
 // convert Encoding to a mask value
 constexpr uint32_t encoding_to_mask(Encoding encoding)
@@ -560,7 +559,8 @@ struct EncColumnChunk {
   uint8_t is_compressed;    //!< Nonzero if the chunk uses compression
   uint32_t dictionary_size;    //!< Size of dictionary page including header
   uint32_t ck_stat_size;       //!< Size of chunk-level statistics (included in 1st page header)
-  slot_type* dict_map_slots;   //!< Hash map storage for calculating dict encoding for this chunk
+  uint32_t dict_map_offset;    //!< Offset of the hash map storage for calculating dict encoding for
+                               //!< this chunk
   size_type dict_map_size;     //!< Size of dict_map_slots
   size_type num_dict_entries;  //!< Total number of entries in dictionary
   size_type
@@ -1001,46 +1001,6 @@ void InitFragmentStatistics(device_span<statistics_group> groups,
                             device_span<PageFragment const> fragments,
                             rmm::cuda_stream_view stream);
 
-/**
- * @brief Initialize per-chunk hash maps used for dictionary with sentinel values
- *
- * @param chunks Flat span of chunks to initialize hash maps for
- * @param stream CUDA stream to use
- */
-void initialize_chunk_hash_maps(device_span<EncColumnChunk> chunks, rmm::cuda_stream_view stream);
-
-/**
- * @brief Insert chunk values into their respective hash maps
- *
- * @param frags Column fragments
- * @param stream CUDA stream to use
- */
-void populate_chunk_hash_maps(cudf::detail::device_2dspan<PageFragment const> frags,
-                              rmm::cuda_stream_view stream);
-
-/**
- * @brief Compact dictionary hash map entries into chunk.dict_data
- *
- * @param chunks Flat span of chunks to compact hash maps for
- * @param stream CUDA stream to use
- */
-void collect_map_entries(device_span<EncColumnChunk> chunks, rmm::cuda_stream_view stream);
-
-/**
- * @brief Get the Dictionary Indices for each row
- *
- * For each row of a chunk, gets the indices into chunk.dict_data which contains the value otherwise
- * stored in input column [row]. Stores these indices into chunk.dict_index.
- *
- * Since dict_data itself contains indices into the original cudf column, this means that
- * col[row] == col[dict_data[dict_index[row - chunk.start_row]]]
- *
- * @param frags Column fragments
- * @param stream CUDA stream to use
- */
-void get_dictionary_indices(cudf::detail::device_2dspan<PageFragment const> frags,
-                            rmm::cuda_stream_view stream);
-
 /**
  * @brief Launches kernel for initializing encoder data pages
  *
diff --git a/cpp/src/io/parquet/predicate_pushdown.cpp b/cpp/src/io/parquet/predicate_pushdown.cpp
index 481c1e9fcdd..c8b8b7a1193 100644
--- a/cpp/src/io/parquet/predicate_pushdown.cpp
+++ b/cpp/src/io/parquet/predicate_pushdown.cpp
@@ -154,7 +154,7 @@ struct stats_caster {
         }
 
         void set_index(size_type index,
-                       thrust::optional<std::vector<uint8_t>> const& binary_value,
+                       cuda::std::optional<std::vector<uint8_t>> const& binary_value,
                        Type const type)
         {
           if (binary_value.has_value()) {
@@ -236,8 +236,8 @@ struct stats_caster {
             max.set_index(stats_idx, max_value, colchunk.meta_data.type);
           } else {
             // Marking it null, if column present in row group
-            min.set_index(stats_idx, thrust::nullopt, {});
-            max.set_index(stats_idx, thrust::nullopt, {});
+            min.set_index(stats_idx, cuda::std::nullopt, {});
+            max.set_index(stats_idx, cuda::std::nullopt, {});
           }
           stats_idx++;
         }
@@ -468,7 +468,7 @@ std::optional<std::vector<std::vector<size_type>>> aggregate_reader_metadata::fi
   auto validity_it = cudf::detail::make_counting_transform_iterator(
     0, [bitmask = host_bitmask.data()](auto bit_index) { return bit_is_set(bitmask, bit_index); });
 
-  auto is_row_group_required = cudf::detail::make_std_vector_sync(
+  auto const is_row_group_required = cudf::detail::make_host_vector_sync(
     device_span<uint8_t const>(predicate.data<uint8_t>(), predicate.size()), stream);
 
   // Return only filtered row groups based on predicate
diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp
index 68ec61ead0a..9950e2f7d7d 100644
--- a/cpp/src/io/parquet/reader_impl.cpp
+++ b/cpp/src/io/parquet/reader_impl.cpp
@@ -39,7 +39,7 @@ namespace {
 // be treated as a string. Currently the only logical type that has special handling is DECIMAL.
 // Other valid types in the future would be UUID (still treated as string) and FLOAT16 (which
 // for now would also be treated as a string).
-inline bool is_treat_fixed_length_as_string(thrust::optional<LogicalType> const& logical_type)
+inline bool is_treat_fixed_length_as_string(cuda::std::optional<LogicalType> const& logical_type)
 {
   if (!logical_type.has_value()) { return true; }
   return logical_type->type != LogicalType::DECIMAL;
@@ -470,8 +470,10 @@ reader::impl::impl(std::size_t chunk_read_limit,
     _input_pass_read_limit{pass_read_limit}
 {
   // Open and parse the source dataset metadata
-  _metadata =
-    std::make_unique<aggregate_reader_metadata>(_sources, options.is_enabled_use_arrow_schema());
+  _metadata = std::make_unique<aggregate_reader_metadata>(
+    _sources,
+    options.is_enabled_use_arrow_schema(),
+    options.get_columns().has_value() and options.is_enabled_allow_mismatched_pq_schemas());
 
   // Strings may be returned as either string or categorical columns
   _strings_to_categorical = options.is_enabled_convert_strings_to_categories();
@@ -769,11 +771,14 @@ parquet_column_schema walk_schema(aggregate_reader_metadata const* mt, int idx)
 
 parquet_metadata read_parquet_metadata(host_span<std::unique_ptr<datasource> const> sources)
 {
-  // do not use arrow schema when reading information from parquet metadata.
+  // Do not use arrow schema when reading information from parquet metadata.
   static constexpr auto use_arrow_schema = false;
 
+  // Do not select any columns when only reading the parquet metadata.
+  static constexpr auto has_column_projection = false;
+
   // Open and parse the source dataset metadata
-  auto metadata = aggregate_reader_metadata(sources, use_arrow_schema);
+  auto metadata = aggregate_reader_metadata(sources, use_arrow_schema, has_column_projection);
 
   return parquet_metadata{parquet_schema{walk_schema(&metadata, 0)},
                           metadata.get_num_rows(),
diff --git a/cpp/src/io/parquet/reader_impl_chunking.cu b/cpp/src/io/parquet/reader_impl_chunking.cu
index 794750ab6d2..00d62c45962 100644
--- a/cpp/src/io/parquet/reader_impl_chunking.cu
+++ b/cpp/src/io/parquet/reader_impl_chunking.cu
@@ -77,9 +77,9 @@ void print_cumulative_page_info(device_span<PageInfo const> d_pages,
                                 device_span<cumulative_page_info const> d_c_info,
                                 rmm::cuda_stream_view stream)
 {
-  std::vector<PageInfo> pages              = cudf::detail::make_std_vector_sync(d_pages, stream);
-  std::vector<ColumnChunkDesc> chunks      = cudf::detail::make_std_vector_sync(d_chunks, stream);
-  std::vector<cumulative_page_info> c_info = cudf::detail::make_std_vector_sync(d_c_info, stream);
+  auto const pages  = cudf::detail::make_host_vector_sync(d_pages, stream);
+  auto const chunks = cudf::detail::make_host_vector_sync(d_chunks, stream);
+  auto const c_info = cudf::detail::make_host_vector_sync(d_c_info, stream);
 
   printf("------------\nCumulative sizes by page\n");
 
@@ -370,11 +370,11 @@ int64_t find_next_split(int64_t cur_pos,
  *
  * @return A tuple of Parquet clock rate and Parquet decimal type.
  */
-[[nodiscard]] std::tuple<int32_t, thrust::optional<LogicalType>> conversion_info(
+[[nodiscard]] std::tuple<int32_t, cuda::std::optional<LogicalType>> conversion_info(
   type_id column_type_id,
   type_id timestamp_type_id,
   Type physical,
-  thrust::optional<LogicalType> logical_type)
+  cuda::std::optional<LogicalType> logical_type)
 {
   int32_t const clock_rate =
     is_chrono(data_type{column_type_id}) ? to_clockrate(timestamp_type_id) : 0;
@@ -385,7 +385,7 @@ int64_t find_next_split(int64_t cur_pos,
     // if decimal but not outputting as float or decimal, then convert to no logical type
     if (column_type_id != type_id::FLOAT64 and
         not cudf::is_fixed_point(data_type{column_type_id})) {
-      return std::make_tuple(clock_rate, thrust::nullopt);
+      return std::make_tuple(clock_rate, cuda::std::nullopt);
     }
   }
 
@@ -647,7 +647,7 @@ std::tuple<rmm::device_uvector<page_span>, size_t, size_t> compute_next_subpass(
   auto [aggregated_info, page_keys_by_split] = adjust_cumulative_sizes(c_info, pages, stream);
 
   // bring back to the cpu
-  auto const h_aggregated_info = cudf::detail::make_std_vector_sync(aggregated_info, stream);
+  auto const h_aggregated_info = cudf::detail::make_host_vector_sync(aggregated_info, stream);
   // print_cumulative_row_info(h_aggregated_info, "adjusted");
 
   // TODO: if the user has explicitly specified skip_rows/num_rows we could be more intelligent
@@ -694,8 +694,7 @@ std::vector<row_range> compute_page_splits_by_row(device_span<cumulative_page_in
   auto [aggregated_info, page_keys_by_split] = adjust_cumulative_sizes(c_info, pages, stream);
 
   // bring back to the cpu
-  std::vector<cumulative_page_info> h_aggregated_info =
-    cudf::detail::make_std_vector_sync(aggregated_info, stream);
+  auto const h_aggregated_info = cudf::detail::make_host_vector_sync(aggregated_info, stream);
   // print_cumulative_row_info(h_aggregated_info, "adjusted");
 
   std::vector<row_range> splits;
@@ -1304,9 +1303,8 @@ void reader::impl::setup_next_pass(read_mode mode)
     printf("\tskip_rows: %'lu\n", pass.skip_rows);
     printf("\tnum_rows: %'lu\n", pass.num_rows);
     printf("\tbase mem usage: %'lu\n", pass.base_mem_size);
-    auto const num_columns = _input_columns.size();
-    std::vector<size_type> h_page_offsets =
-      cudf::detail::make_std_vector_sync(pass.page_offsets, _stream);
+    auto const num_columns    = _input_columns.size();
+    auto const h_page_offsets = cudf::detail::make_host_vector_sync(pass.page_offsets, _stream);
     for (size_t c_idx = 0; c_idx < num_columns; c_idx++) {
       printf("\t\tColumn %'lu: num_pages(%'d)\n",
              c_idx,
@@ -1426,7 +1424,7 @@ void reader::impl::setup_next_subpass(read_mode mode)
     subpass.pages = subpass.page_buf;
   }
 
-  std::vector<page_span> h_spans = cudf::detail::make_std_vector_async(page_indices, _stream);
+  auto const h_spans = cudf::detail::make_host_vector_async(page_indices, _stream);
   subpass.pages.device_to_host_async(_stream);
 
   _stream.synchronize();
@@ -1464,7 +1462,7 @@ void reader::impl::setup_next_subpass(read_mode mode)
   printf("\t\tTotal expected usage: %'lu\n",
          total_expected_size == 0 ? subpass.decomp_page_data.size() + pass.base_mem_size
                                   : total_expected_size + pass.base_mem_size);
-  std::vector<page_span> h_page_indices = cudf::detail::make_std_vector_sync(page_indices, _stream);
+  auto const h_page_indices = cudf::detail::make_host_vector_sync(page_indices, _stream);
   for (size_t c_idx = 0; c_idx < num_columns; c_idx++) {
     printf("\t\tColumn %'lu: pages(%'lu - %'lu)\n",
            c_idx,
diff --git a/cpp/src/io/parquet/reader_impl_helpers.cpp b/cpp/src/io/parquet/reader_impl_helpers.cpp
index 581c44d024b..8b5678f202b 100644
--- a/cpp/src/io/parquet/reader_impl_helpers.cpp
+++ b/cpp/src/io/parquet/reader_impl_helpers.cpp
@@ -38,7 +38,7 @@ namespace flatbuf = cudf::io::parquet::flatbuf;
 
 namespace {
 
-thrust::optional<LogicalType> converted_to_logical_type(SchemaElement const& schema)
+cuda::std::optional<LogicalType> converted_to_logical_type(SchemaElement const& schema)
 {
   if (schema.converted_type.has_value()) {
     switch (schema.converted_type.value()) {
@@ -66,7 +66,7 @@ thrust::optional<LogicalType> converted_to_logical_type(SchemaElement const& sch
       default: return LogicalType{LogicalType::UNDEFINED};
     }
   }
-  return thrust::nullopt;
+  return cuda::std::nullopt;
 }
 
 }  // namespace
@@ -246,7 +246,7 @@ void metadata::sanitize_schema()
         struct_elem.repetition_type = REQUIRED;
         struct_elem.num_children    = schema_elem.num_children;
         struct_elem.type            = UNDEFINED_TYPE;
-        struct_elem.converted_type  = thrust::nullopt;
+        struct_elem.converted_type  = cuda::std::nullopt;
 
         // swap children
         struct_elem.children_idx = std::move(schema_elem.children_idx);
@@ -380,6 +380,17 @@ aggregate_reader_metadata::collect_keyval_metadata() const
   return kv_maps;
 }
 
+std::vector<std::unordered_map<int32_t, int32_t>> aggregate_reader_metadata::init_schema_idx_maps(
+  bool const has_cols_from_mismatched_srcs) const
+{
+  // Only initialize if more than 1 data sources and has select columns from mismatched data sources
+  if (has_cols_from_mismatched_srcs and per_file_metadata.size() > 1) {
+    return std::vector<std::unordered_map<int32_t, int32_t>>{per_file_metadata.size() - 1};
+  }
+
+  return {};
+}
+
 int64_t aggregate_reader_metadata::calc_num_rows() const
 {
   return std::accumulate(
@@ -539,13 +550,18 @@ void aggregate_reader_metadata::column_info_for_row_group(row_group_info& rg_inf
 }
 
 aggregate_reader_metadata::aggregate_reader_metadata(
-  host_span<std::unique_ptr<datasource> const> sources, bool use_arrow_schema)
+  host_span<std::unique_ptr<datasource> const> sources,
+  bool use_arrow_schema,
+  bool has_cols_from_mismatched_srcs)
   : per_file_metadata(metadatas_from_sources(sources)),
     keyval_maps(collect_keyval_metadata()),
+    schema_idx_maps(init_schema_idx_maps(has_cols_from_mismatched_srcs)),
     num_rows(calc_num_rows()),
     num_row_groups(calc_num_row_groups())
 {
-  if (per_file_metadata.size() > 0) {
+  // Validate that all sources have the same schema unless we are reading select columns
+  // from mismatched sources, in which case, we will only check the projected columns later.
+  if (per_file_metadata.size() > 1 and not has_cols_from_mismatched_srcs) {
     auto const& first_meta = per_file_metadata.front();
     auto const num_cols =
       first_meta.row_groups.size() > 0 ? first_meta.row_groups.front().columns.size() : 0;
@@ -632,7 +648,7 @@ arrow_schema_data_types aggregate_reader_metadata::collect_arrow_schema() const
       if (field->type_type() == flatbuf::Type::Type_Duration) {
         auto type_data = field->type_as_Duration();
         if (type_data != nullptr) {
-          auto name = (field->name()) ? field->name()->str() : "";
+          auto name = field->name() ? field->name()->str() : "";
           // set the schema_elem type to duration type
           schema_elem.type = duration_from_flatbuffer(type_data);
           arrow_type_col_seen |= (schema_elem.type.id() != type_id::EMPTY);
@@ -868,12 +884,23 @@ ColumnChunkMetaData const& aggregate_reader_metadata::get_column_metadata(size_t
                                                                           size_type src_idx,
                                                                           int schema_idx) const
 {
+  // schema_idx_maps will only have > 0 size when we are reading matching column projection from
+  // mismatched Parquet sources.
+  if (src_idx and not schema_idx_maps.empty()) {
+    auto const& schema_idx_map = schema_idx_maps[src_idx - 1];
+    CUDF_EXPECTS(schema_idx_map.find(schema_idx) != schema_idx_map.end(),
+                 "Unmapped schema index encountered in the specified source tree",
+                 std::range_error);
+    schema_idx = schema_idx_map.at(schema_idx);
+  }
+
   auto col =
     std::find_if(per_file_metadata[src_idx].row_groups[row_group_index].columns.begin(),
                  per_file_metadata[src_idx].row_groups[row_group_index].columns.end(),
                  [schema_idx](ColumnChunk const& col) { return col.schema_idx == schema_idx; });
   CUDF_EXPECTS(col != std::end(per_file_metadata[src_idx].row_groups[row_group_index].columns),
-               "Found no metadata for schema index");
+               "Found no metadata for schema index",
+               std::range_error);
   return col->meta_data;
 }
 
@@ -1041,18 +1068,19 @@ aggregate_reader_metadata::select_columns(
   std::optional<std::vector<std::string>> const& filter_columns_names,
   bool include_index,
   bool strings_to_categorical,
-  type_id timestamp_type_id) const
+  type_id timestamp_type_id)
 {
-  auto find_schema_child = [&](SchemaElement const& schema_elem, std::string const& name) {
-    auto const& col_schema_idx =
-      std::find_if(schema_elem.children_idx.cbegin(),
-                   schema_elem.children_idx.cend(),
-                   [&](size_t col_schema_idx) { return get_schema(col_schema_idx).name == name; });
-
-    return (col_schema_idx != schema_elem.children_idx.end())
-             ? static_cast<size_type>(*col_schema_idx)
-             : -1;
-  };
+  auto const find_schema_child =
+    [&](SchemaElement const& schema_elem, std::string const& name, int const pfm_idx = 0) {
+      auto const& col_schema_idx = std::find_if(
+        schema_elem.children_idx.cbegin(),
+        schema_elem.children_idx.cend(),
+        [&](size_t col_schema_idx) { return get_schema(col_schema_idx, pfm_idx).name == name; });
+
+      return (col_schema_idx != schema_elem.children_idx.end())
+               ? static_cast<size_type>(*col_schema_idx)
+               : -1;
+    };
 
   std::vector<cudf::io::detail::inline_column_buffer> output_columns;
   std::vector<input_column_info> input_columns;
@@ -1074,7 +1102,7 @@ aggregate_reader_metadata::select_columns(
       if (schema_elem.is_stub()) {
         // is this legit?
         CUDF_EXPECTS(schema_elem.num_children == 1, "Unexpected number of children for stub");
-        auto child_col_name_info = (col_name_info) ? &col_name_info->children[0] : nullptr;
+        auto const child_col_name_info = col_name_info ? &col_name_info->children[0] : nullptr;
         return build_column(
           child_col_name_info, schema_elem.children_idx[0], out_col_array, has_list_parent);
       }
@@ -1154,6 +1182,97 @@ aggregate_reader_metadata::select_columns(
       return path_is_valid;
     };
 
+  // Compares two schema elements to be equal except their number of children
+  auto const equal_to_except_num_children = [](SchemaElement const& lhs, SchemaElement const& rhs) {
+    return lhs.type == rhs.type and lhs.converted_type == rhs.converted_type and
+           lhs.type_length == rhs.type_length and lhs.repetition_type == rhs.repetition_type and
+           lhs.name == rhs.name and lhs.decimal_scale == rhs.decimal_scale and
+           lhs.decimal_precision == rhs.decimal_precision and lhs.field_id == rhs.field_id;
+  };
+
+  // Maps a projected column's schema_idx in the zeroth per_file_metadata (source) to the
+  // corresponding schema_idx in pfm_idx'th per_file_metadata (destination). The projected
+  // column's path must match across sources, else an appropriate exception is thrown.
+  std::function<void(column_name_info const*, int const, int const, int const)> map_column =
+    [&](column_name_info const* col_name_info,
+        int const src_schema_idx,
+        int const dst_schema_idx,
+        int const pfm_idx) {
+      auto const& src_schema_elem = get_schema(src_schema_idx);
+      auto const& dst_schema_elem = get_schema(dst_schema_idx, pfm_idx);
+
+      // Check the schema elements to be equal except their number of children as we only care about
+      // the specific column paths in the schema trees. Raise an invalid_argument error if the
+      // schema elements don't match.
+      CUDF_EXPECTS(equal_to_except_num_children(src_schema_elem, dst_schema_elem),
+                   "Encountered mismatching SchemaElement properties for a column in "
+                   "the selected path",
+                   std::invalid_argument);
+
+      // If src_schema_elem is a stub, it does not exist in the column_name_info and column_buffer
+      // hierarchy. So continue on with mapping.
+      if (src_schema_elem.is_stub()) {
+        // Check if dst_schema_elem is also a stub i.e. has num_children == 1 that we didn't
+        // previously check. Raise an invalid_argument error if dst_schema_elem is not a stub.
+        CUDF_EXPECTS(dst_schema_elem.is_stub(),
+                     "Encountered mismatching schemas for stub.",
+                     std::invalid_argument);
+        auto const child_col_name_info = col_name_info ? &col_name_info->children[0] : nullptr;
+        return map_column(child_col_name_info,
+                          src_schema_elem.children_idx[0],
+                          dst_schema_elem.children_idx[0],
+                          pfm_idx);
+      }
+
+      // The path ends here. If this is a list/struct col (has children), then map all its children
+      // which must be identical.
+      if (col_name_info == nullptr or col_name_info->children.empty()) {
+        // Check the number of children to be equal to be mapped. An out_of_range error if the
+        // number of children isn't equal.
+        CUDF_EXPECTS(src_schema_elem.num_children == dst_schema_elem.num_children,
+                     "Encountered mismatching number of children for a "
+                     "column in the selected path",
+                     std::out_of_range);
+
+        std::for_each(thrust::make_counting_iterator(0),
+                      thrust::make_counting_iterator(src_schema_elem.num_children),
+                      [&](auto const child_idx) {
+                        map_column(nullptr,
+                                   src_schema_elem.children_idx[child_idx],
+                                   dst_schema_elem.children_idx[child_idx],
+                                   pfm_idx);
+                      });
+      }
+      // The path goes further down to specific child(ren) of this column so map only those
+      // children.
+      else {
+        std::for_each(
+          col_name_info->children.cbegin(),
+          col_name_info->children.cend(),
+          [&](auto const& child_col_name_info) {
+            // Ensure that each named child column exists in the destination schema tree for the
+            // paths to align up. An out_of_range error otherwise.
+            CUDF_EXPECTS(
+              find_schema_child(dst_schema_elem, child_col_name_info.name, pfm_idx) != -1,
+              "Encountered mismatching schema tree depths across data sources",
+              std::out_of_range);
+            map_column(&child_col_name_info,
+                       find_schema_child(src_schema_elem, child_col_name_info.name),
+                       find_schema_child(dst_schema_elem, child_col_name_info.name, pfm_idx),
+                       pfm_idx);
+          });
+      }
+
+      // We're at a leaf and this is an input column (one with actual data stored) so map it.
+      if (src_schema_elem.num_children == 0) {
+        // Get the schema_idx_map for this data source (pfm)
+        auto& schema_idx_map = schema_idx_maps[pfm_idx - 1];
+
+        // Map the schema index from 0th tree (src) to the one in the current (dst) tree.
+        schema_idx_map[src_schema_idx] = dst_schema_idx;
+      }
+    };
+
   std::vector<int> output_column_schemas;
 
   //
@@ -1287,7 +1406,28 @@ aggregate_reader_metadata::select_columns(
     for (auto& col : selected_columns) {
       auto const& top_level_col_schema_idx = find_schema_child(root, col.name);
       bool valid_column = build_column(&col, top_level_col_schema_idx, output_columns, false);
-      if (valid_column) output_column_schemas.push_back(top_level_col_schema_idx);
+      if (valid_column) {
+        output_column_schemas.push_back(top_level_col_schema_idx);
+
+        // Map the column's schema_idx across the rest of the data sources if required.
+        if (per_file_metadata.size() > 1 and not schema_idx_maps.empty()) {
+          std::for_each(thrust::make_counting_iterator(static_cast<size_t>(1)),
+                        thrust::make_counting_iterator(per_file_metadata.size()),
+                        [&](auto const pfm_idx) {
+                          auto const& dst_root = get_schema(0, pfm_idx);
+                          // Ensure that each top level column exists in the destination schema
+                          // tree. An out_of_range error is thrown otherwise.
+                          CUDF_EXPECTS(
+                            find_schema_child(dst_root, col.name, pfm_idx) != -1,
+                            "Encountered mismatching schema tree depths across data sources",
+                            std::out_of_range);
+                          map_column(&col,
+                                     top_level_col_schema_idx,
+                                     find_schema_child(dst_root, col.name, pfm_idx),
+                                     pfm_idx);
+                        });
+        }
+      }
     }
   }
 
diff --git a/cpp/src/io/parquet/reader_impl_helpers.hpp b/cpp/src/io/parquet/reader_impl_helpers.hpp
index 309132a5347..6f2863136b2 100644
--- a/cpp/src/io/parquet/reader_impl_helpers.hpp
+++ b/cpp/src/io/parquet/reader_impl_helpers.hpp
@@ -128,6 +128,7 @@ struct arrow_schema_data_types {
 class aggregate_reader_metadata {
   std::vector<metadata> per_file_metadata;
   std::vector<std::unordered_map<std::string, std::string>> keyval_maps;
+  std::vector<std::unordered_map<int32_t, int32_t>> schema_idx_maps;
 
   int64_t num_rows;
   size_type num_row_groups;
@@ -144,6 +145,19 @@ class aggregate_reader_metadata {
   [[nodiscard]] std::vector<std::unordered_map<std::string, std::string>> collect_keyval_metadata()
     const;
 
+  /**
+   * @brief Initialize the vector of schema_idx maps.
+   *
+   * Initializes a vector of hash maps that will store the one-to-one mappings between the
+   * schema_idx'es of the selected columns in the zeroth per_file_metadata (source) and each
+   * kth per_file_metadata (destination) for k in range: [1, per_file_metadata.size()-1].
+   *
+   * @param has_cols_from_mismatched_srcs True if we are reading select cols from mismatched
+   * parquet schemas.
+   */
+  [[nodiscard]] std::vector<std::unordered_map<int32_t, int32_t>> init_schema_idx_maps(
+    bool has_cols_from_mismatched_srcs) const;
+
   /**
    * @brief Decodes and constructs the arrow schema from the ARROW_SCHEMA_KEY IPC message
    * in key value metadata section of Parquet file footer
@@ -183,10 +197,28 @@ class aggregate_reader_metadata {
 
  public:
   aggregate_reader_metadata(host_span<std::unique_ptr<datasource> const> sources,
-                            bool use_arrow_schema);
+                            bool use_arrow_schema,
+                            bool has_cols_from_mismatched_srcs);
 
   [[nodiscard]] RowGroup const& get_row_group(size_type row_group_index, size_type src_idx) const;
 
+  /**
+   * @brief Extracts the schema_idx'th column chunk metadata from row_group_index'th row group of
+   * the src_idx'th file.
+   *
+   * Extracts the schema_idx'th column chunk metadata from the specified row group index of the
+   * src_idx'th file. Note that the schema_idx is actually the index in the zeroth file which may
+   * not be the same in all files, in which case, the schema_idx is mapped to the corresponding
+   * index in the src_idx'th file and returned. A range_error error is thrown if schema_idx
+   * doesn't exist or isn't mapped to the src_idx file.
+   *
+   * @param row_group_index The row group index in the file to extract column chunk metadata from.
+   * @param src_idx The per_file_metadata index to extract extract column chunk metadata from.
+   * @param schema_idx The schema_idx of the column chunk to be extracted
+   *
+   * @return The requested column chunk metadata or a range_error error if the schema index isn't
+   * valid.
+   */
   [[nodiscard]] ColumnChunkMetaData const& get_column_metadata(size_type row_group_index,
                                                                size_type src_idx,
                                                                int schema_idx) const;
@@ -202,9 +234,22 @@ class aggregate_reader_metadata {
 
   [[nodiscard]] auto get_num_row_groups() const { return num_row_groups; }
 
-  [[nodiscard]] auto const& get_schema(int schema_idx) const
+  /**
+   * @brief Extracts the schema_idx'th SchemaElement from the pfm_idx'th file
+   *
+   * @param schema_idx The index of the SchemaElement to be extracted.
+   * @param pfm_idx The index of the per_file_metadata to extract SchemaElement from, default = 0 if
+   * not specified.
+   *
+   * @return The requested SchemaElement or an error if invalid schema_idx or pfm_idx.
+   */
+  [[nodiscard]] auto const& get_schema(int schema_idx, int pfm_idx = 0) const
   {
-    return per_file_metadata[0].schema[schema_idx];
+    CUDF_EXPECTS(
+      schema_idx >= 0 and pfm_idx >= 0 and pfm_idx < static_cast<int>(per_file_metadata.size()),
+      "Parquet reader encountered an invalid schema_idx or pfm_idx",
+      std::invalid_argument);
+    return per_file_metadata[pfm_idx].schema[schema_idx];
   }
 
   [[nodiscard]] auto const& get_key_value_metadata() const& { return keyval_maps; }
@@ -314,7 +359,7 @@ class aggregate_reader_metadata {
                  std::optional<std::vector<std::string>> const& filter_columns_names,
                  bool include_index,
                  bool strings_to_categorical,
-                 type_id timestamp_type_id) const;
+                 type_id timestamp_type_id);
 };
 
 /**
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index e006cc7d714..557b1a45c1f 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -21,6 +21,7 @@
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/utilities/integer_utils.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/io/detail/batched_memset.hpp>
 
 #include <rmm/exec_policy.hpp>
 
@@ -1494,6 +1495,11 @@ void reader::impl::allocate_columns(read_mode mode, size_t skip_rows, size_t num
   // buffers if they are not part of a list hierarchy. mark down
   // if we have any list columns that need further processing.
   bool has_lists = false;
+  // Casting to std::byte since data buffer pointer is void *
+  std::vector<cudf::device_span<std::byte>> memset_bufs;
+  // Validity Buffer is a uint32_t pointer
+  std::vector<cudf::device_span<cudf::bitmask_type>> nullmask_bufs;
+
   for (size_t idx = 0; idx < _input_columns.size(); idx++) {
     auto const& input_col  = _input_columns[idx];
     size_t const max_depth = input_col.nesting_depth();
@@ -1514,13 +1520,19 @@ void reader::impl::allocate_columns(read_mode mode, size_t skip_rows, size_t num
         // we're going to start null mask as all valid and then turn bits off if necessary
         out_buf.create_with_mask(
           out_buf.type.id() == type_id::LIST && l_idx < max_depth ? num_rows + 1 : num_rows,
-          cudf::mask_state::ALL_VALID,
+          cudf::mask_state::UNINITIALIZED,
+          false,
           _stream,
           _mr);
+        memset_bufs.push_back(cudf::device_span<std::byte>(static_cast<std::byte*>(out_buf.data()),
+                                                           out_buf.data_size()));
+        nullmask_bufs.push_back(cudf::device_span<cudf::bitmask_type>(
+          out_buf.null_mask(),
+          cudf::util::round_up_safe(out_buf.null_mask_size(), sizeof(cudf::bitmask_type)) /
+            sizeof(cudf::bitmask_type)));
       }
     }
   }
-
   // compute output column sizes by examining the pages of the -input- columns
   if (has_lists) {
     auto h_cols_info =
@@ -1593,11 +1605,22 @@ void reader::impl::allocate_columns(read_mode mode, size_t skip_rows, size_t num
 
           // allocate
           // we're going to start null mask as all valid and then turn bits off if necessary
-          out_buf.create_with_mask(size, cudf::mask_state::ALL_VALID, _stream, _mr);
+          out_buf.create_with_mask(size, cudf::mask_state::UNINITIALIZED, false, _stream, _mr);
+          memset_bufs.push_back(cudf::device_span<std::byte>(
+            static_cast<std::byte*>(out_buf.data()), out_buf.data_size()));
+          nullmask_bufs.push_back(cudf::device_span<cudf::bitmask_type>(
+            out_buf.null_mask(),
+            cudf::util::round_up_safe(out_buf.null_mask_size(), sizeof(cudf::bitmask_type)) /
+              sizeof(cudf::bitmask_type)));
         }
       }
     }
   }
+
+  cudf::io::detail::batched_memset(memset_bufs, static_cast<std::byte>(0), _stream);
+  // Need to set null mask bufs to all high bits
+  cudf::io::detail::batched_memset(
+    nullmask_bufs, std::numeric_limits<cudf::bitmask_type>::max(), _stream);
 }
 
 std::vector<size_t> reader::impl::calculate_page_string_offsets()
diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu
index 36a1d8377bf..46c3151c731 100644
--- a/cpp/src/io/parquet/writer_impl.cu
+++ b/cpp/src/io/parquet/writer_impl.cu
@@ -185,7 +185,7 @@ struct aggregate_writer_metadata {
     std::vector<std::vector<uint8_t>> column_indexes;
   };
   std::vector<per_file_metadata> files;
-  thrust::optional<std::vector<ColumnOrder>> column_orders = thrust::nullopt;
+  cuda::std::optional<std::vector<ColumnOrder>> column_orders = cuda::std::nullopt;
 };
 
 namespace {
@@ -471,7 +471,7 @@ struct leaf_schema_fn {
   std::enable_if_t<std::is_same_v<T, cudf::timestamp_ns>, void> operator()()
   {
     col_schema.type           = (timestamp_is_int96) ? Type::INT96 : Type::INT64;
-    col_schema.converted_type = thrust::nullopt;
+    col_schema.converted_type = cuda::std::nullopt;
     col_schema.stats_dtype    = statistics_dtype::dtype_timestamp64;
     if (timestamp_is_int96) {
       col_schema.ts_scale = -1000;  // negative value indicates division by absolute value
@@ -749,7 +749,7 @@ std::vector<schema_tree_node> construct_parquet_schema_tree(
           col_schema.type = Type::BYTE_ARRAY;
         }
 
-        col_schema.converted_type  = thrust::nullopt;
+        col_schema.converted_type  = cuda::std::nullopt;
         col_schema.stats_dtype     = statistics_dtype::dtype_byte_array;
         col_schema.repetition_type = col_nullable ? OPTIONAL : REQUIRED;
         col_schema.name = (schema[parent_idx].name == "list") ? "element" : col_meta.get_name();
@@ -1285,10 +1285,10 @@ build_chunk_dictionaries(hostdevice_2dvector<EncColumnChunk>& chunks,
     return std::pair(std::move(dict_data), std::move(dict_index));
   }
 
-  // Allocate slots for each chunk
-  std::vector<rmm::device_uvector<slot_type>> hash_maps_storage;
-  hash_maps_storage.reserve(h_chunks.size());
-  for (auto& chunk : h_chunks) {
+  // Variable to keep track of the current total map storage size
+  size_t total_map_storage_size = 0;
+  // Populate dict offsets and sizes for each chunk that need to build a dictionary.
+  std::for_each(h_chunks.begin(), h_chunks.end(), [&](auto& chunk) {
     auto const& chunk_col_desc = col_desc[chunk.col_desc_id];
     auto const is_requested_non_dict =
       chunk_col_desc.requested_encoding != column_encoding::USE_DEFAULT &&
@@ -1300,19 +1300,31 @@ build_chunk_dictionaries(hostdevice_2dvector<EncColumnChunk>& chunks,
       chunk.use_dictionary = false;
     } else {
       chunk.use_dictionary = true;
-      // cuCollections suggests using a hash map of size N * (1/0.7) = num_values * 1.43
-      // https://github.com/NVIDIA/cuCollections/blob/3a49fc71/include/cuco/static_map.cuh#L190-L193
-      auto& inserted_map   = hash_maps_storage.emplace_back(chunk.num_values * 1.43, stream);
-      chunk.dict_map_slots = inserted_map.data();
-      chunk.dict_map_size  = inserted_map.size();
+      chunk.dict_map_size =
+        static_cast<cudf::size_type>(cuco::make_window_extent<map_cg_size, window_size>(
+          static_cast<cudf::size_type>(occupancy_factor * chunk.num_values)));
+      chunk.dict_map_offset = total_map_storage_size;
+      total_map_storage_size += chunk.dict_map_size;
     }
-  }
+  });
 
-  chunks.host_to_device_async(stream);
+  // No chunk needs to create a dictionary, exit early
+  if (total_map_storage_size == 0) { return {std::move(dict_data), std::move(dict_index)}; }
 
-  initialize_chunk_hash_maps(chunks.device_view().flat_view(), stream);
-  populate_chunk_hash_maps(frags, stream);
+  // Create a single bulk storage used by all sub-dictionaries
+  auto map_storage = storage_type{
+    total_map_storage_size,
+    cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream}};
+  // Create a span of non-const map_storage as map_storage_ref takes in a non-const pointer.
+  device_span<window_type> const map_storage_data{map_storage.data(), total_map_storage_size};
 
+  // Synchronize
+  chunks.host_to_device_async(stream);
+  // Initialize storage with the given sentinel
+  map_storage.initialize_async({KEY_SENTINEL, VALUE_SENTINEL}, {stream.value()});
+  // Populate the hash map for each chunk
+  populate_chunk_hash_maps(map_storage_data, frags, stream);
+  // Synchronize again
   chunks.device_to_host_sync(stream);
 
   // Make decision about which chunks have dictionary
@@ -1372,8 +1384,8 @@ build_chunk_dictionaries(hostdevice_2dvector<EncColumnChunk>& chunks,
     chunk.dict_index          = inserted_dict_index.data();
   }
   chunks.host_to_device_async(stream);
-  collect_map_entries(chunks.device_view().flat_view(), stream);
-  get_dictionary_indices(frags, stream);
+  collect_map_entries(map_storage_data, chunks.device_view().flat_view(), stream);
+  get_dictionary_indices(map_storage_data, frags, stream);
 
   return std::pair(std::move(dict_data), std::move(dict_index));
 }
@@ -2230,20 +2242,20 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta,
     bool need_sync{false};
 
     // need to fetch the histogram data from the device
-    std::vector<uint32_t> h_def_histogram;
-    std::vector<uint32_t> h_rep_histogram;
-    if (stats_granularity == statistics_freq::STATISTICS_COLUMN) {
-      if (def_histogram_bfr_size > 0) {
-        h_def_histogram =
-          std::move(cudf::detail::make_std_vector_async(def_level_histogram, stream));
+    auto const h_def_histogram = [&]() {
+      if (stats_granularity == statistics_freq::STATISTICS_COLUMN && def_histogram_bfr_size > 0) {
         need_sync = true;
+        return cudf::detail::make_host_vector_async(def_level_histogram, stream);
       }
-      if (rep_histogram_bfr_size > 0) {
-        h_rep_histogram =
-          std::move(cudf::detail::make_std_vector_async(rep_level_histogram, stream));
+      return cudf::detail::make_host_vector<uint32_t>(0, stream);
+    }();
+    auto const h_rep_histogram = [&]() {
+      if (stats_granularity == statistics_freq::STATISTICS_COLUMN && rep_histogram_bfr_size > 0) {
         need_sync = true;
+        return cudf::detail::make_host_vector_async(rep_level_histogram, stream);
       }
-    }
+      return cudf::detail::make_host_vector<uint32_t>(0, stream);
+    }();
 
     for (int r = 0; r < num_rowgroups; r++) {
       int p           = rg_to_part[r];
@@ -2265,7 +2277,7 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta,
         update_chunk_encoding_stats(column_chunk_meta, ck, write_v2_headers);
 
         if (ck.ck_stat_size != 0) {
-          std::vector<uint8_t> const stats_blob = cudf::detail::make_std_vector_sync(
+          auto const stats_blob = cudf::detail::make_host_vector_sync(
             device_span<uint8_t const>(dev_bfr, ck.ck_stat_size), stream);
           CompactProtocolReader cp(stats_blob.data(), stats_blob.size());
           cp.read(&column_chunk_meta.statistics);
@@ -2776,7 +2788,7 @@ std::unique_ptr<std::vector<uint8_t>> writer::merge_row_group_metadata(
   // See https://github.com/rapidsai/cudf/pull/14264#issuecomment-1778311615
   for (auto& se : md.schema) {
     if (se.logical_type.has_value() && se.logical_type.value().type == LogicalType::UNKNOWN) {
-      se.logical_type = thrust::nullopt;
+      se.logical_type = cuda::std::nullopt;
     }
   }
 
diff --git a/cpp/src/io/text/byte_range_info.cpp b/cpp/src/io/text/byte_range_info.cpp
index 6a7836ed4e1..fe811739b97 100644
--- a/cpp/src/io/text/byte_range_info.cpp
+++ b/cpp/src/io/text/byte_range_info.cpp
@@ -16,6 +16,7 @@
 
 #include <cudf/detail/utilities/integer_utils.hpp>
 #include <cudf/io/text/byte_range_info.hpp>
+#include <cudf/utilities/error.hpp>
 
 #include <limits>
 
@@ -23,6 +24,12 @@ namespace cudf {
 namespace io {
 namespace text {
 
+byte_range_info::byte_range_info(int64_t offset, int64_t size) : _offset(offset), _size(size)
+{
+  CUDF_EXPECTS(offset >= 0, "offset must be non-negative");
+  CUDF_EXPECTS(size >= 0, "size must be non-negative");
+}
+
 byte_range_info create_byte_range_info_max() { return {0, std::numeric_limits<int64_t>::max()}; }
 
 std::vector<byte_range_info> create_byte_range_infos_consecutive(int64_t total_bytes,
diff --git a/cpp/src/io/text/multibyte_split.cu b/cpp/src/io/text/multibyte_split.cu
index be2e2b9a79c..e3435a24b18 100644
--- a/cpp/src/io/text/multibyte_split.cu
+++ b/cpp/src/io/text/multibyte_split.cu
@@ -310,7 +310,7 @@ std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source
 {
   CUDF_FUNC_RANGE();
 
-  if (byte_range.empty()) { return make_empty_column(type_id::STRING); }
+  if (byte_range.is_empty()) { return make_empty_column(type_id::STRING); }
 
   auto device_delim = cudf::string_scalar(delimiter, true, stream, mr);
 
@@ -567,20 +567,6 @@ std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source
 
 }  // namespace detail
 
-// deprecated in 24.08
-std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source const& source,
-                                              std::string const& delimiter,
-                                              std::optional<byte_range_info> byte_range,
-                                              rmm::cuda_stream_view stream,
-                                              rmm::device_async_resource_ref mr)
-{
-  return multibyte_split(source,
-                         delimiter,
-                         parse_options{byte_range.value_or(create_byte_range_info_max())},
-                         stream,
-                         mr);
-}
-
 std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source const& source,
                                               std::string const& delimiter,
                                               parse_options options,
diff --git a/cpp/src/io/utilities/arrow_io_source.cpp b/cpp/src/io/utilities/arrow_io_source.cpp
deleted file mode 100644
index 157240b8b08..00000000000
--- a/cpp/src/io/utilities/arrow_io_source.cpp
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cudf/io/arrow_io_source.hpp>
-
-#include <arrow/buffer.h>
-#include <arrow/filesystem/filesystem.h>
-#include <arrow/result.h>
-
-#include <memory>
-#include <string>
-#include <utility>
-
-namespace cudf::io {
-
-/**
- * @brief Implementation for an owning buffer where `arrow::Buffer` holds the data.
- */
-class arrow_io_buffer : public datasource::buffer {
-  std::shared_ptr<arrow::Buffer> arrow_buffer;
-
- public:
-  explicit arrow_io_buffer(std::shared_ptr<arrow::Buffer> arrow_buffer)
-    : arrow_buffer(std::move(arrow_buffer))
-  {
-  }
-  [[nodiscard]] size_t size() const override { return arrow_buffer->size(); }
-  [[nodiscard]] uint8_t const* data() const override { return arrow_buffer->data(); }
-};
-
-arrow_io_source::arrow_io_source(std::string const& arrow_uri)
-{
-  std::string const uri_start_delimiter = "//";
-  std::string const uri_end_delimiter   = "?";
-
-  auto const result = arrow::fs::FileSystemFromUri(arrow_uri);
-  CUDF_EXPECTS(result.ok(), "Failed to generate Arrow Filesystem instance from URI.");
-  filesystem = result.ValueOrDie();
-
-  // Parse the path from the URI
-  auto const start = [&]() {
-    auto const delim_start = arrow_uri.find(uri_start_delimiter);
-    return delim_start == std::string::npos ? 0 : delim_start + uri_start_delimiter.size();
-  }();
-  auto const end  = arrow_uri.find(uri_end_delimiter) - start;
-  auto const path = arrow_uri.substr(start, end);
-
-  auto const in_stream = filesystem->OpenInputFile(path);
-  CUDF_EXPECTS(in_stream.ok(), "Failed to open Arrow RandomAccessFile");
-  arrow_file = in_stream.ValueOrDie();
-}
-
-std::unique_ptr<datasource::buffer> arrow_io_source::host_read(size_t offset, size_t size)
-{
-  auto const result = arrow_file->ReadAt(offset, size);
-  CUDF_EXPECTS(result.ok(), "Cannot read file data");
-  return std::make_unique<arrow_io_buffer>(result.ValueOrDie());
-}
-
-size_t arrow_io_source::host_read(size_t offset, size_t size, uint8_t* dst)
-{
-  auto const result = arrow_file->ReadAt(offset, size, dst);
-  CUDF_EXPECTS(result.ok(), "Cannot read file data");
-  return result.ValueOrDie();
-}
-
-[[nodiscard]] size_t arrow_io_source::size() const
-{
-  auto const result = arrow_file->GetSize();
-  CUDF_EXPECTS(result.ok(), "Cannot get file size");
-  return result.ValueOrDie();
-}
-
-}  // namespace cudf::io
diff --git a/cpp/src/io/utilities/column_buffer.cpp b/cpp/src/io/utilities/column_buffer.cpp
index 2f4272b0367..8abfb000b94 100644
--- a/cpp/src/io/utilities/column_buffer.cpp
+++ b/cpp/src/io/utilities/column_buffer.cpp
@@ -33,7 +33,7 @@
 
 namespace cudf::io::detail {
 
-void gather_column_buffer::allocate_strings_data(rmm::cuda_stream_view stream)
+void gather_column_buffer::allocate_strings_data(bool memset_data, rmm::cuda_stream_view stream)
 {
   CUDF_EXPECTS(type.id() == type_id::STRING, "allocate_strings_data called for non-string column");
   // The contents of _strings will never be directly returned to the user.
@@ -56,11 +56,12 @@ std::unique_ptr<column> gather_column_buffer::make_string_column_impl(rmm::cuda_
   return make_strings_column(*_strings, stream, _mr);
 }
 
-void cudf::io::detail::inline_column_buffer::allocate_strings_data(rmm::cuda_stream_view stream)
+void cudf::io::detail::inline_column_buffer::allocate_strings_data(bool memset_data,
+                                                                   rmm::cuda_stream_view stream)
 {
   CUDF_EXPECTS(type.id() == type_id::STRING, "allocate_strings_data called for non-string column");
   // size + 1 for final offset. _string_data will be initialized later.
-  _data = create_data(data_type{type_id::INT32}, size + 1, stream, _mr);
+  _data = create_data(data_type{type_to_id<size_type>()}, size + 1, memset_data, stream, _mr);
 }
 
 void cudf::io::detail::inline_column_buffer::create_string_data(size_t num_bytes,
@@ -93,6 +94,7 @@ void copy_buffer_data(string_policy const& buff, string_policy& new_buff)
 template <class string_policy>
 void column_buffer_base<string_policy>::create_with_mask(size_type _size,
                                                          cudf::mask_state null_mask_state,
+                                                         bool memset_data,
                                                          rmm::cuda_stream_view stream,
                                                          rmm::device_async_resource_ref mr)
 {
@@ -100,16 +102,20 @@ void column_buffer_base<string_policy>::create_with_mask(size_type _size,
   _mr  = mr;
 
   switch (type.id()) {
-    case type_id::STRING: static_cast<string_policy*>(this)->allocate_strings_data(stream); break;
+    case type_id::STRING:
+      static_cast<string_policy*>(this)->allocate_strings_data(memset_data, stream);
+      break;
 
     // list columns store a buffer of int32's as offsets to represent
     // their individual rows
-    case type_id::LIST: _data = create_data(data_type{type_id::INT32}, size, stream, _mr); break;
+    case type_id::LIST:
+      _data = create_data(data_type{type_to_id<size_type>()}, size, memset_data, stream, _mr);
+      break;
 
     // struct columns store no data themselves.  just validity and children.
     case type_id::STRUCT: break;
 
-    default: _data = create_data(type, size, stream, _mr); break;
+    default: _data = create_data(type, size, memset_data, stream, _mr); break;
   }
   if (is_nullable) {
     _null_mask =
@@ -117,12 +123,21 @@ void column_buffer_base<string_policy>::create_with_mask(size_type _size,
   }
 }
 
+template <class string_policy>
+void column_buffer_base<string_policy>::create(size_type _size,
+                                               bool memset_data,
+                                               rmm::cuda_stream_view stream,
+                                               rmm::device_async_resource_ref mr)
+{
+  create_with_mask(_size, mask_state::ALL_NULL, memset_data, stream, mr);
+}
+
 template <class string_policy>
 void column_buffer_base<string_policy>::create(size_type _size,
                                                rmm::cuda_stream_view stream,
                                                rmm::device_async_resource_ref mr)
 {
-  create_with_mask(_size, mask_state::ALL_NULL, stream, mr);
+  create_with_mask(_size, mask_state::ALL_NULL, true, stream, mr);
 }
 
 template <class string_policy>
diff --git a/cpp/src/io/utilities/column_buffer.hpp b/cpp/src/io/utilities/column_buffer.hpp
index ed6bb8bbdca..b2290965bb9 100644
--- a/cpp/src/io/utilities/column_buffer.hpp
+++ b/cpp/src/io/utilities/column_buffer.hpp
@@ -44,6 +44,7 @@ namespace detail {
  *
  * @param type The intended data type to populate
  * @param size The number of elements to be represented by the mask
+ * @param memset_data Defines whether data should be memset to 0
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned device_buffer
  *
@@ -51,17 +52,25 @@ namespace detail {
  */
 inline rmm::device_buffer create_data(data_type type,
                                       size_type size,
+                                      bool memset_data,
                                       rmm::cuda_stream_view stream,
                                       rmm::device_async_resource_ref mr)
 {
   std::size_t data_size = size_of(type) * size;
 
   rmm::device_buffer data(data_size, stream, mr);
-  CUDF_CUDA_TRY(cudaMemsetAsync(data.data(), 0, data_size, stream.value()));
-
+  if (memset_data) { CUDF_CUDA_TRY(cudaMemsetAsync(data.data(), 0, data_size, stream.value())); }
   return data;
 }
 
+inline rmm::device_buffer create_data(data_type type,
+                                      size_type size,
+                                      rmm::cuda_stream_view stream,
+                                      rmm::device_async_resource_ref mr)
+{
+  return create_data(type, size, true, stream, mr);
+}
+
 using string_index_pair = thrust::pair<char const*, size_type>;
 
 // forward declare friend functions
@@ -113,12 +122,18 @@ class column_buffer_base {
 
   // instantiate a column of known type with a specified size.  Allows deferred creation for
   // preprocessing steps such as in the Parquet reader
+  void create(size_type _size,
+              bool memset_data,
+              rmm::cuda_stream_view stream,
+              rmm::device_async_resource_ref mr);
+
   void create(size_type _size, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr);
 
   // like create(), but also takes a `cudf::mask_state` to allow initializing the null mask as
   // something other than `ALL_NULL`
   void create_with_mask(size_type _size,
                         cudf::mask_state null_mask_state,
+                        bool memset_data,
                         rmm::cuda_stream_view stream,
                         rmm::device_async_resource_ref mr);
 
@@ -192,7 +207,7 @@ class gather_column_buffer : public column_buffer_base<gather_column_buffer> {
     create(_size, stream, mr);
   }
 
-  void allocate_strings_data(rmm::cuda_stream_view stream);
+  void allocate_strings_data(bool memset_data, rmm::cuda_stream_view stream);
 
   [[nodiscard]] void* data_impl() { return _strings ? _strings->data() : _data.data(); }
   [[nodiscard]] void const* data_impl() const { return _strings ? _strings->data() : _data.data(); }
@@ -226,7 +241,7 @@ class inline_column_buffer : public column_buffer_base<inline_column_buffer> {
     create(_size, stream, mr);
   }
 
-  void allocate_strings_data(rmm::cuda_stream_view stream);
+  void allocate_strings_data(bool memset_data, rmm::cuda_stream_view stream);
 
   void* data_impl() { return _data.data(); }
   [[nodiscard]] void const* data_impl() const { return _data.data(); }
diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp
index 91be154e09d..e4313eba454 100644
--- a/cpp/src/io/utilities/datasource.cpp
+++ b/cpp/src/io/utilities/datasource.cpp
@@ -297,10 +297,10 @@ class device_buffer_source final : public datasource {
   {
     auto const count  = std::min(size, this->size() - offset);
     auto const stream = cudf::get_default_stream();
-    auto h_data       = cudf::detail::make_std_vector_async(
+    auto h_data       = cudf::detail::make_host_vector_async(
       cudf::device_span<std::byte const>{_d_buffer.data() + offset, count}, stream);
     stream.synchronize();
-    return std::make_unique<owning_buffer<std::vector<std::byte>>>(std::move(h_data));
+    return std::make_unique<owning_buffer<cudf::detail::host_vector<std::byte>>>(std::move(h_data));
   }
 
   [[nodiscard]] bool supports_device_read() const override { return true; }
diff --git a/cpp/src/join/conditional_join.cu b/cpp/src/join/conditional_join.cu
index d4ef2747c9d..789702ce538 100644
--- a/cpp/src/join/conditional_join.cu
+++ b/cpp/src/join/conditional_join.cu
@@ -432,13 +432,13 @@ std::unique_ptr<rmm::device_uvector<size_type>> conditional_left_semi_join(
   rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
-  return std::move(detail::conditional_join_anti_semi(left,
-                                                      right,
-                                                      binary_predicate,
-                                                      detail::join_kind::LEFT_SEMI_JOIN,
-                                                      output_size,
-                                                      cudf::get_default_stream(),
-                                                      mr));
+  return detail::conditional_join_anti_semi(left,
+                                            right,
+                                            binary_predicate,
+                                            detail::join_kind::LEFT_SEMI_JOIN,
+                                            output_size,
+                                            cudf::get_default_stream(),
+                                            mr);
 }
 
 std::unique_ptr<rmm::device_uvector<size_type>> conditional_left_anti_join(
@@ -449,13 +449,13 @@ std::unique_ptr<rmm::device_uvector<size_type>> conditional_left_anti_join(
   rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
-  return std::move(detail::conditional_join_anti_semi(left,
-                                                      right,
-                                                      binary_predicate,
-                                                      detail::join_kind::LEFT_ANTI_JOIN,
-                                                      output_size,
-                                                      cudf::get_default_stream(),
-                                                      mr));
+  return detail::conditional_join_anti_semi(left,
+                                            right,
+                                            binary_predicate,
+                                            detail::join_kind::LEFT_ANTI_JOIN,
+                                            output_size,
+                                            cudf::get_default_stream(),
+                                            mr);
 }
 
 std::size_t conditional_inner_join_size(table_view const& left,
@@ -484,12 +484,12 @@ std::size_t conditional_left_semi_join_size(table_view const& left,
                                             rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
-  return std::move(detail::compute_conditional_join_output_size(left,
-                                                                right,
-                                                                binary_predicate,
-                                                                detail::join_kind::LEFT_SEMI_JOIN,
-                                                                cudf::get_default_stream(),
-                                                                mr));
+  return detail::compute_conditional_join_output_size(left,
+                                                      right,
+                                                      binary_predicate,
+                                                      detail::join_kind::LEFT_SEMI_JOIN,
+                                                      cudf::get_default_stream(),
+                                                      mr);
 }
 
 std::size_t conditional_left_anti_join_size(table_view const& left,
@@ -498,12 +498,12 @@ std::size_t conditional_left_anti_join_size(table_view const& left,
                                             rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
-  return std::move(detail::compute_conditional_join_output_size(left,
-                                                                right,
-                                                                binary_predicate,
-                                                                detail::join_kind::LEFT_ANTI_JOIN,
-                                                                cudf::get_default_stream(),
-                                                                mr));
+  return detail::compute_conditional_join_output_size(left,
+                                                      right,
+                                                      binary_predicate,
+                                                      detail::join_kind::LEFT_ANTI_JOIN,
+                                                      cudf::get_default_stream(),
+                                                      mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/join/distinct_hash_join.cu b/cpp/src/join/distinct_hash_join.cu
index daa1bf17c0d..3d95b0c5a5c 100644
--- a/cpp/src/join/distinct_hash_join.cu
+++ b/cpp/src/join/distinct_hash_join.cu
@@ -119,7 +119,7 @@ distinct_hash_join<HasNested>::distinct_hash_join(cudf::table_view const& build,
                 {},
                 cuco::thread_scope_device,
                 cuco_storage_type{},
-                cudf::detail::cuco_allocator{stream},
+                cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream},
                 stream.value()}
 {
   CUDF_FUNC_RANGE();
diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index eb9b687630b..5d01482f44a 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -374,7 +374,7 @@ hash_join<Hasher>::hash_join(cudf::table_view const& build,
                 cuco::empty_key{std::numeric_limits<hash_value_type>::max()},
                 cuco::empty_value{cudf::detail::JoinNoneValue},
                 stream.value(),
-                cudf::detail::cuco_allocator{stream}},
+                cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream}},
     _build{build},
     _preprocessed_build{
       cudf::experimental::row::equality::preprocessed_table::create(_build, stream)}
diff --git a/cpp/src/join/join_common_utils.hpp b/cpp/src/join/join_common_utils.hpp
index 10ed59f71b0..573101cefd9 100644
--- a/cpp/src/join/join_common_utils.hpp
+++ b/cpp/src/join/join_common_utils.hpp
@@ -47,7 +47,7 @@ using mixed_multimap_type =
   cuco::static_multimap<hash_value_type,
                         size_type,
                         cuda::thread_scope_device,
-                        cudf::detail::cuco_allocator,
+                        cudf::detail::cuco_allocator<char>,
                         cuco::legacy::double_hashing<1, hash_type, hash_type>>;
 
 using row_hash_legacy =
diff --git a/cpp/src/join/mixed_join.cu b/cpp/src/join/mixed_join.cu
index 90748e6f322..eb12065c6a9 100644
--- a/cpp/src/join/mixed_join.cu
+++ b/cpp/src/join/mixed_join.cu
@@ -16,7 +16,8 @@
 
 #include "join_common_utils.cuh"
 #include "join_common_utils.hpp"
-#include "mixed_join_kernels.cuh"
+#include "mixed_join_kernel.hpp"
+#include "mixed_join_size_kernel.hpp"
 
 #include <cudf/ast/detail/expression_parser.hpp>
 #include <cudf/ast/expressions.hpp>
@@ -126,11 +127,12 @@ mixed_join(
   auto build_view = table_device_view::create(build, stream);
 
   // Don't use multimap_type because we want a CG size of 1.
-  mixed_multimap_type hash_table{compute_hash_table_size(build.num_rows()),
-                                 cuco::empty_key{std::numeric_limits<hash_value_type>::max()},
-                                 cuco::empty_value{cudf::detail::JoinNoneValue},
-                                 stream.value(),
-                                 cudf::detail::cuco_allocator{stream}};
+  mixed_multimap_type hash_table{
+    compute_hash_table_size(build.num_rows()),
+    cuco::empty_key{std::numeric_limits<hash_value_type>::max()},
+    cuco::empty_value{cudf::detail::JoinNoneValue},
+    stream.value(),
+    cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream}};
 
   // TODO: To add support for nested columns we will need to flatten in many
   // places. However, this probably isn't worth adding any time soon since we
@@ -177,9 +179,6 @@ mixed_join(
     join_size            = output_size_data->first;
     matches_per_row_span = output_size_data->second;
   } else {
-    // Allocate storage for the counter used to get the size of the join output
-    rmm::device_scalar<std::size_t> size(0, stream, mr);
-
     matches_per_row =
       rmm::device_uvector<size_type>{static_cast<std::size_t>(outer_num_rows), stream, mr};
     // Note that the view goes out of scope after this else statement, but the
@@ -189,37 +188,38 @@ mixed_join(
     matches_per_row_span = cudf::device_span<size_type const>{
       matches_per_row->begin(), static_cast<std::size_t>(outer_num_rows)};
     if (has_nulls) {
-      compute_mixed_join_output_size<DEFAULT_JOIN_BLOCK_SIZE, true>
-        <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
-          *left_conditional_view,
-          *right_conditional_view,
-          *probe_view,
-          *build_view,
-          hash_probe,
-          equality_probe,
-          kernel_join_type,
-          hash_table_view,
-          parser.device_expression_data,
-          swap_tables,
-          size.data(),
-          mutable_matches_per_row_span);
+      join_size = launch_compute_mixed_join_output_size<true>(*left_conditional_view,
+                                                              *right_conditional_view,
+                                                              *probe_view,
+                                                              *build_view,
+                                                              hash_probe,
+                                                              equality_probe,
+                                                              kernel_join_type,
+                                                              hash_table_view,
+                                                              parser.device_expression_data,
+                                                              swap_tables,
+                                                              mutable_matches_per_row_span,
+                                                              config,
+                                                              shmem_size_per_block,
+                                                              stream,
+                                                              mr);
     } else {
-      compute_mixed_join_output_size<DEFAULT_JOIN_BLOCK_SIZE, false>
-        <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
-          *left_conditional_view,
-          *right_conditional_view,
-          *probe_view,
-          *build_view,
-          hash_probe,
-          equality_probe,
-          kernel_join_type,
-          hash_table_view,
-          parser.device_expression_data,
-          swap_tables,
-          size.data(),
-          mutable_matches_per_row_span);
+      join_size = launch_compute_mixed_join_output_size<false>(*left_conditional_view,
+                                                               *right_conditional_view,
+                                                               *probe_view,
+                                                               *build_view,
+                                                               hash_probe,
+                                                               equality_probe,
+                                                               kernel_join_type,
+                                                               hash_table_view,
+                                                               parser.device_expression_data,
+                                                               swap_tables,
+                                                               mutable_matches_per_row_span,
+                                                               config,
+                                                               shmem_size_per_block,
+                                                               stream,
+                                                               mr);
     }
-    join_size = size.value(stream);
   }
 
   // The initial early exit clauses guarantee that we will not reach this point
@@ -248,37 +248,39 @@ mixed_join(
   auto const& join_output_r = right_indices->data();
 
   if (has_nulls) {
-    mixed_join<DEFAULT_JOIN_BLOCK_SIZE, true>
-      <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
-        *left_conditional_view,
-        *right_conditional_view,
-        *probe_view,
-        *build_view,
-        hash_probe,
-        equality_probe,
-        kernel_join_type,
-        hash_table_view,
-        join_output_l,
-        join_output_r,
-        parser.device_expression_data,
-        join_result_offsets.data(),
-        swap_tables);
+    launch_mixed_join<true>(*left_conditional_view,
+                            *right_conditional_view,
+                            *probe_view,
+                            *build_view,
+                            hash_probe,
+                            equality_probe,
+                            kernel_join_type,
+                            hash_table_view,
+                            join_output_l,
+                            join_output_r,
+                            parser.device_expression_data,
+                            join_result_offsets.data(),
+                            swap_tables,
+                            config,
+                            shmem_size_per_block,
+                            stream);
   } else {
-    mixed_join<DEFAULT_JOIN_BLOCK_SIZE, false>
-      <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
-        *left_conditional_view,
-        *right_conditional_view,
-        *probe_view,
-        *build_view,
-        hash_probe,
-        equality_probe,
-        kernel_join_type,
-        hash_table_view,
-        join_output_l,
-        join_output_r,
-        parser.device_expression_data,
-        join_result_offsets.data(),
-        swap_tables);
+    launch_mixed_join<false>(*left_conditional_view,
+                             *right_conditional_view,
+                             *probe_view,
+                             *build_view,
+                             hash_probe,
+                             equality_probe,
+                             kernel_join_type,
+                             hash_table_view,
+                             join_output_l,
+                             join_output_r,
+                             parser.device_expression_data,
+                             join_result_offsets.data(),
+                             swap_tables,
+                             config,
+                             shmem_size_per_block,
+                             stream);
   }
 
   auto join_indices = std::pair(std::move(left_indices), std::move(right_indices));
@@ -391,11 +393,12 @@ compute_mixed_join_output_size(table_view const& left_equality,
   auto build_view = table_device_view::create(build, stream);
 
   // Don't use multimap_type because we want a CG size of 1.
-  mixed_multimap_type hash_table{compute_hash_table_size(build.num_rows()),
-                                 cuco::empty_key{std::numeric_limits<hash_value_type>::max()},
-                                 cuco::empty_value{cudf::detail::JoinNoneValue},
-                                 stream.value(),
-                                 cudf::detail::cuco_allocator{stream}};
+  mixed_multimap_type hash_table{
+    compute_hash_table_size(build.num_rows()),
+    cuco::empty_key{std::numeric_limits<hash_value_type>::max()},
+    cuco::empty_value{cudf::detail::JoinNoneValue},
+    stream.value(),
+    cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream}};
 
   // TODO: To add support for nested columns we will need to flatten in many
   // places. However, this probably isn't worth adding any time soon since we
@@ -421,9 +424,6 @@ compute_mixed_join_output_size(table_view const& left_equality,
   detail::grid_1d const config(outer_num_rows, DEFAULT_JOIN_BLOCK_SIZE);
   auto const shmem_size_per_block = parser.shmem_per_thread * config.num_threads_per_block;
 
-  // Allocate storage for the counter used to get the size of the join output
-  rmm::device_scalar<std::size_t> size(0, stream, mr);
-
   auto const preprocessed_probe =
     experimental::row::equality::preprocessed_table::create(probe, stream);
   auto const row_hash   = cudf::experimental::row::hash::row_hasher{preprocessed_probe};
@@ -434,39 +434,42 @@ compute_mixed_join_output_size(table_view const& left_equality,
 
   // Determine number of output rows without actually building the output to simply
   // find what the size of the output will be.
+  std::size_t size = 0;
   if (has_nulls) {
-    compute_mixed_join_output_size<DEFAULT_JOIN_BLOCK_SIZE, true>
-      <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
-        *left_conditional_view,
-        *right_conditional_view,
-        *probe_view,
-        *build_view,
-        hash_probe,
-        equality_probe,
-        join_type,
-        hash_table_view,
-        parser.device_expression_data,
-        swap_tables,
-        size.data(),
-        matches_per_row_span);
+    size = launch_compute_mixed_join_output_size<true>(*left_conditional_view,
+                                                       *right_conditional_view,
+                                                       *probe_view,
+                                                       *build_view,
+                                                       hash_probe,
+                                                       equality_probe,
+                                                       join_type,
+                                                       hash_table_view,
+                                                       parser.device_expression_data,
+                                                       swap_tables,
+                                                       matches_per_row_span,
+                                                       config,
+                                                       shmem_size_per_block,
+                                                       stream,
+                                                       mr);
   } else {
-    compute_mixed_join_output_size<DEFAULT_JOIN_BLOCK_SIZE, false>
-      <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
-        *left_conditional_view,
-        *right_conditional_view,
-        *probe_view,
-        *build_view,
-        hash_probe,
-        equality_probe,
-        join_type,
-        hash_table_view,
-        parser.device_expression_data,
-        swap_tables,
-        size.data(),
-        matches_per_row_span);
+    size = launch_compute_mixed_join_output_size<false>(*left_conditional_view,
+                                                        *right_conditional_view,
+                                                        *probe_view,
+                                                        *build_view,
+                                                        hash_probe,
+                                                        equality_probe,
+                                                        join_type,
+                                                        hash_table_view,
+                                                        parser.device_expression_data,
+                                                        swap_tables,
+                                                        matches_per_row_span,
+                                                        config,
+                                                        shmem_size_per_block,
+                                                        stream,
+                                                        mr);
   }
 
-  return {size.value(stream), std::move(matches_per_row)};
+  return {size, std::move(matches_per_row)};
 }
 
 }  // namespace detail
diff --git a/cpp/src/join/mixed_join_common_utils.cuh b/cpp/src/join/mixed_join_common_utils.cuh
index 18322d0623d..f9a477bb9ac 100644
--- a/cpp/src/join/mixed_join_common_utils.cuh
+++ b/cpp/src/join/mixed_join_common_utils.cuh
@@ -164,37 +164,30 @@ struct pair_expression_equality : public expression_equality<has_nulls> {
 /**
  * @brief Equality comparator that composes two row_equality comparators.
  */
-class double_row_equality {
- public:
-  double_row_equality(row_equality equality_comparator, row_equality conditional_comparator)
-    : _equality_comparator{equality_comparator}, _conditional_comparator{conditional_comparator}
-  {
-  }
+struct double_row_equality_comparator {
+  row_equality const equality_comparator;
+  row_equality const conditional_comparator;
 
   __device__ bool operator()(size_type lhs_row_index, size_type rhs_row_index) const noexcept
   {
     using experimental::row::lhs_index_type;
     using experimental::row::rhs_index_type;
 
-    return _equality_comparator(lhs_index_type{lhs_row_index}, rhs_index_type{rhs_row_index}) &&
-           _conditional_comparator(lhs_index_type{lhs_row_index}, rhs_index_type{rhs_row_index});
+    return equality_comparator(lhs_index_type{lhs_row_index}, rhs_index_type{rhs_row_index}) &&
+           conditional_comparator(lhs_index_type{lhs_row_index}, rhs_index_type{rhs_row_index});
   }
-
- private:
-  row_equality _equality_comparator;
-  row_equality _conditional_comparator;
 };
 
-// The hash table type used by mixed_semi_join with the build_table.
+// The hash set type used by mixed_semi_join with the build_table.
 using hash_set_type = cuco::static_set<size_type,
                                        cuco::extent<size_t>,
                                        cuda::thread_scope_device,
-                                       double_row_equality,
+                                       double_row_equality_comparator,
                                        cuco::linear_probing<4, row_hash>,
-                                       cudf::detail::cuco_allocator,
+                                       cudf::detail::cuco_allocator<char>,
                                        cuco::storage<1>>;
 
-// The device_ref_type used by mixed_semi_join kerenels for probing.
+// The hash_set_ref_type used by mixed_semi_join kerenels for probing.
 using hash_set_ref_type = hash_set_type::ref_type<cuco::contains_tag>;
 
 }  // namespace detail
diff --git a/cpp/src/join/mixed_join_kernel.cu b/cpp/src/join/mixed_join_kernel.cu
index 61cfa168b03..cd4016837cc 100644
--- a/cpp/src/join/mixed_join_kernel.cu
+++ b/cpp/src/join/mixed_join_kernel.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,11 +15,12 @@
  */
 
 #include "mixed_join_kernel.cuh"
+#include "mixed_join_kernel.hpp"
 
 namespace cudf {
 namespace detail {
 
-template __global__ void mixed_join<DEFAULT_JOIN_BLOCK_SIZE, false>(
+template void launch_mixed_join<false>(
   table_device_view left_table,
   table_device_view right_table,
   table_device_view probe,
@@ -32,7 +33,10 @@ template __global__ void mixed_join<DEFAULT_JOIN_BLOCK_SIZE, false>(
   size_type* join_output_r,
   cudf::ast::detail::expression_device_view device_expression_data,
   cudf::size_type const* join_result_offsets,
-  bool const swap_tables);
+  bool const swap_tables,
+  detail::grid_1d const config,
+  int64_t shmem_size_per_block,
+  rmm::cuda_stream_view stream);
 
 }  // namespace detail
 
diff --git a/cpp/src/join/mixed_join_kernel.cuh b/cpp/src/join/mixed_join_kernel.cuh
index ea59f23c77f..368b1fba870 100644
--- a/cpp/src/join/mixed_join_kernel.cuh
+++ b/cpp/src/join/mixed_join_kernel.cuh
@@ -19,6 +19,7 @@
 #include "join_common_utils.cuh"
 #include "join_common_utils.hpp"
 #include "mixed_join_common_utils.cuh"
+#include "mixed_join_kernel.hpp"
 
 #include <cudf/ast/detail/expression_evaluator.cuh>
 #include <cudf/ast/detail/expression_parser.hpp>
@@ -39,20 +40,20 @@ namespace cg = cooperative_groups;
 #pragma GCC diagnostic ignored "-Wattributes"
 
 template <cudf::size_type block_size, bool has_nulls>
-CUDF_HIDDEN __launch_bounds__(block_size) __global__
-  void mixed_join(table_device_view left_table,
-                  table_device_view right_table,
-                  table_device_view probe,
-                  table_device_view build,
-                  row_hash const hash_probe,
-                  row_equality const equality_probe,
-                  join_kind const join_type,
-                  cudf::detail::mixed_multimap_type::device_view hash_table_view,
-                  size_type* join_output_l,
-                  size_type* join_output_r,
-                  cudf::ast::detail::expression_device_view device_expression_data,
-                  cudf::size_type const* join_result_offsets,
-                  bool const swap_tables)
+CUDF_KERNEL void __launch_bounds__(block_size)
+  mixed_join(table_device_view left_table,
+             table_device_view right_table,
+             table_device_view probe,
+             table_device_view build,
+             row_hash const hash_probe,
+             row_equality const equality_probe,
+             join_kind const join_type,
+             cudf::detail::mixed_multimap_type::device_view hash_table_view,
+             size_type* join_output_l,
+             size_type* join_output_r,
+             cudf::ast::detail::expression_device_view device_expression_data,
+             cudf::size_type const* join_result_offsets,
+             bool const swap_tables)
 {
   // Normally the casting of a shared memory array is used to create multiple
   // arrays of different types from the shared memory buffer, but here it is
@@ -111,6 +112,41 @@ CUDF_HIDDEN __launch_bounds__(block_size) __global__
   }
 }
 
+template <bool has_nulls>
+void launch_mixed_join(table_device_view left_table,
+                       table_device_view right_table,
+                       table_device_view probe,
+                       table_device_view build,
+                       row_hash const hash_probe,
+                       row_equality const equality_probe,
+                       join_kind const join_type,
+                       cudf::detail::mixed_multimap_type::device_view hash_table_view,
+                       size_type* join_output_l,
+                       size_type* join_output_r,
+                       cudf::ast::detail::expression_device_view device_expression_data,
+                       cudf::size_type const* join_result_offsets,
+                       bool const swap_tables,
+                       detail::grid_1d const config,
+                       int64_t shmem_size_per_block,
+                       rmm::cuda_stream_view stream)
+{
+  mixed_join<DEFAULT_JOIN_BLOCK_SIZE, has_nulls>
+    <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
+      left_table,
+      right_table,
+      probe,
+      build,
+      hash_probe,
+      equality_probe,
+      join_type,
+      hash_table_view,
+      join_output_l,
+      join_output_r,
+      device_expression_data,
+      join_result_offsets,
+      swap_tables);
+}
+
 }  // namespace detail
 
 }  // namespace cudf
diff --git a/cpp/src/join/mixed_join_kernel.hpp b/cpp/src/join/mixed_join_kernel.hpp
new file mode 100644
index 00000000000..cc92e9d8ba4
--- /dev/null
+++ b/cpp/src/join/mixed_join_kernel.hpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "join/join_common_utils.hpp"
+#include "join/mixed_join_common_utils.cuh"
+
+#include <cudf/ast/detail/expression_parser.hpp>
+#include <cudf/table/table_device_view.cuh>
+#include <cudf/utilities/span.hpp>
+
+namespace CUDF_EXPORT cudf {
+namespace detail {
+
+/**
+ * @brief Performs a join using the combination of a hash lookup to identify
+ * equal rows between one pair of tables and the evaluation of an expression
+ * containing an arbitrary expression.
+ *
+ * This method probes the hash table with each row in the probe table using a
+ * custom equality comparator that also checks that the conditional expression
+ * evaluates to true between the left/right tables when a match is found
+ * between probe and build rows.
+ *
+ * @tparam block_size The number of threads per block for this kernel
+ * @tparam has_nulls Whether or not the inputs may contain nulls.
+ *
+ * @param[in] left_table The left table
+ * @param[in] right_table The right table
+ * @param[in] probe The table with which to probe the hash table for matches.
+ * @param[in] build The table with which the hash table was built.
+ * @param[in] hash_probe The hasher used for the probe table.
+ * @param[in] equality_probe The equality comparator used when probing the hash table.
+ * @param[in] join_type The type of join to be performed
+ * @param[in] hash_table_view The hash table built from `build`.
+ * @param[out] join_output_l The left result of the join operation
+ * @param[out] join_output_r The right result of the join operation
+ * @param[in] device_expression_data Container of device data required to evaluate the desired
+ * expression.
+ * @param[in] join_result_offsets The starting indices in join_output[l|r]
+ * where the matches for each row begin. Equivalent to a prefix sum of
+ * matches_per_row.
+ * @param[in] swap_tables If true, the kernel was launched with one thread per right row and
+ * the kernel needs to internally loop over left rows. Otherwise, loop over right rows.
+ */
+template <bool has_nulls>
+void launch_mixed_join(table_device_view left_table,
+                       table_device_view right_table,
+                       table_device_view probe,
+                       table_device_view build,
+                       row_hash const hash_probe,
+                       row_equality const equality_probe,
+                       join_kind const join_type,
+                       cudf::detail::mixed_multimap_type::device_view hash_table_view,
+                       size_type* join_output_l,
+                       size_type* join_output_r,
+                       cudf::ast::detail::expression_device_view device_expression_data,
+                       cudf::size_type const* join_result_offsets,
+                       bool const swap_tables,
+                       detail::grid_1d const config,
+                       int64_t shmem_size_per_block,
+                       rmm::cuda_stream_view stream);
+
+}  // namespace detail
+
+}  // namespace CUDF_EXPORT cudf
diff --git a/cpp/src/join/mixed_join_kernel_nulls.cu b/cpp/src/join/mixed_join_kernel_nulls.cu
index 518f8ed8555..185aa133f2d 100644
--- a/cpp/src/join/mixed_join_kernel_nulls.cu
+++ b/cpp/src/join/mixed_join_kernel_nulls.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,11 +15,12 @@
  */
 
 #include "mixed_join_kernel.cuh"
+#include "mixed_join_kernel.hpp"
 
 namespace cudf {
 namespace detail {
 
-template __global__ void mixed_join<DEFAULT_JOIN_BLOCK_SIZE, true>(
+template void launch_mixed_join<true>(
   table_device_view left_table,
   table_device_view right_table,
   table_device_view probe,
@@ -32,7 +33,10 @@ template __global__ void mixed_join<DEFAULT_JOIN_BLOCK_SIZE, true>(
   size_type* join_output_r,
   cudf::ast::detail::expression_device_view device_expression_data,
   cudf::size_type const* join_result_offsets,
-  bool const swap_tables);
+  bool const swap_tables,
+  detail::grid_1d const config,
+  int64_t shmem_size_per_block,
+  rmm::cuda_stream_view stream);
 
 }  // namespace detail
 
diff --git a/cpp/src/join/mixed_join_kernels.cuh b/cpp/src/join/mixed_join_kernels.cuh
deleted file mode 100644
index 037c02666d4..00000000000
--- a/cpp/src/join/mixed_join_kernels.cuh
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "join/join_common_utils.hpp"
-#include "join/mixed_join_common_utils.cuh"
-
-#include <cudf/ast/detail/expression_parser.hpp>
-#include <cudf/table/table_device_view.cuh>
-#include <cudf/utilities/span.hpp>
-
-namespace cudf {
-namespace detail {
-
-/**
- * @brief Computes the output size of joining the left table to the right table.
- *
- * This method probes the hash table with each row in the probe table using a
- * custom equality comparator that also checks that the conditional expression
- * evaluates to true between the left/right tables when a match is found
- * between probe and build rows.
- *
- * @tparam block_size The number of threads per block for this kernel
- * @tparam has_nulls Whether or not the inputs may contain nulls.
- *
- * @param[in] left_table The left table
- * @param[in] right_table The right table
- * @param[in] probe The table with which to probe the hash table for matches.
- * @param[in] build The table with which the hash table was built.
- * @param[in] hash_probe The hasher used for the probe table.
- * @param[in] equality_probe The equality comparator used when probing the hash table.
- * @param[in] join_type The type of join to be performed
- * @param[in] hash_table_view The hash table built from `build`.
- * @param[in] device_expression_data Container of device data required to evaluate the desired
- * expression.
- * @param[in] swap_tables If true, the kernel was launched with one thread per right row and
- * the kernel needs to internally loop over left rows. Otherwise, loop over right rows.
- * @param[out] output_size The resulting output size
- * @param[out] matches_per_row The number of matches in one pair of
- * equality/conditional tables for each row in the other pair of tables. If
- * swap_tables is true, matches_per_row corresponds to the right_table,
- * otherwise it corresponds to the left_table. Note that corresponding swap of
- * left/right tables to determine which is the build table and which is the
- * probe table has already happened on the host.
- */
-
-template <int block_size, bool has_nulls>
-__global__ void compute_mixed_join_output_size(
-  table_device_view left_table,
-  table_device_view right_table,
-  table_device_view probe,
-  table_device_view build,
-  row_hash const hash_probe,
-  row_equality const equality_probe,
-  join_kind const join_type,
-  cudf::detail::mixed_multimap_type::device_view hash_table_view,
-  ast::detail::expression_device_view device_expression_data,
-  bool const swap_tables,
-  std::size_t* output_size,
-  cudf::device_span<cudf::size_type> matches_per_row);
-
-/**
- * @brief Performs a join using the combination of a hash lookup to identify
- * equal rows between one pair of tables and the evaluation of an expression
- * containing an arbitrary expression.
- *
- * This method probes the hash table with each row in the probe table using a
- * custom equality comparator that also checks that the conditional expression
- * evaluates to true between the left/right tables when a match is found
- * between probe and build rows.
- *
- * @tparam block_size The number of threads per block for this kernel
- * @tparam has_nulls Whether or not the inputs may contain nulls.
- *
- * @param[in] left_table The left table
- * @param[in] right_table The right table
- * @param[in] probe The table with which to probe the hash table for matches.
- * @param[in] build The table with which the hash table was built.
- * @param[in] hash_probe The hasher used for the probe table.
- * @param[in] equality_probe The equality comparator used when probing the hash table.
- * @param[in] join_type The type of join to be performed
- * @param[in] hash_table_view The hash table built from `build`.
- * @param[out] join_output_l The left result of the join operation
- * @param[out] join_output_r The right result of the join operation
- * @param[in] device_expression_data Container of device data required to evaluate the desired
- * expression.
- * @param[in] join_result_offsets The starting indices in join_output[l|r]
- * where the matches for each row begin. Equivalent to a prefix sum of
- * matches_per_row.
- * @param[in] swap_tables If true, the kernel was launched with one thread per right row and
- * the kernel needs to internally loop over left rows. Otherwise, loop over right rows.
- */
-template <cudf::size_type block_size, bool has_nulls>
-__global__ void mixed_join(table_device_view left_table,
-                           table_device_view right_table,
-                           table_device_view probe,
-                           table_device_view build,
-                           row_hash const hash_probe,
-                           row_equality const equality_probe,
-                           join_kind const join_type,
-                           cudf::detail::mixed_multimap_type::device_view hash_table_view,
-                           size_type* join_output_l,
-                           size_type* join_output_r,
-                           cudf::ast::detail::expression_device_view device_expression_data,
-                           cudf::size_type const* join_result_offsets,
-                           bool const swap_tables);
-
-}  // namespace detail
-
-}  // namespace cudf
diff --git a/cpp/src/join/mixed_join_kernels_semi.cu b/cpp/src/join/mixed_join_kernels_semi.cu
index 3e282f358c7..d2c76d11340 100644
--- a/cpp/src/join/mixed_join_kernels_semi.cu
+++ b/cpp/src/join/mixed_join_kernels_semi.cu
@@ -14,9 +14,7 @@
  * limitations under the License.
  */
 
-#include "join/join_common_utils.cuh"
-#include "join/join_common_utils.hpp"
-#include "join/mixed_join_common_utils.cuh"
+#include "join/mixed_join_kernels_semi.cuh"
 
 #include <cudf/ast/detail/expression_evaluator.cuh>
 #include <cudf/ast/detail/expression_parser.hpp>
@@ -35,15 +33,15 @@ namespace cg = cooperative_groups;
 #pragma GCC diagnostic ignored "-Wattributes"
 
 template <cudf::size_type block_size, bool has_nulls>
-CUDF_HIDDEN __launch_bounds__(block_size) __global__
-  void mixed_join_semi(table_device_view left_table,
-                       table_device_view right_table,
-                       table_device_view probe,
-                       table_device_view build,
-                       row_equality const equality_probe,
-                       hash_set_ref_type set_ref,
-                       cudf::device_span<bool> left_table_keep_mask,
-                       cudf::ast::detail::expression_device_view device_expression_data)
+CUDF_KERNEL void __launch_bounds__(block_size)
+  mixed_join_semi(table_device_view left_table,
+                  table_device_view right_table,
+                  table_device_view probe,
+                  table_device_view build,
+                  row_equality const equality_probe,
+                  hash_set_ref_type set_ref,
+                  cudf::device_span<bool> left_table_keep_mask,
+                  cudf::ast::detail::expression_device_view device_expression_data)
 {
   auto constexpr cg_size = hash_set_ref_type::cg_size;
 
@@ -60,8 +58,8 @@ CUDF_HIDDEN __launch_bounds__(block_size) __global__
   auto thread_intermediate_storage =
     &intermediate_storage[(threadIdx.x / cg_size) * device_expression_data.num_intermediates];
 
-  cudf::size_type const outer_num_rows = left_table.num_rows();
-  cudf::size_type outer_row_index      = (threadIdx.x + blockIdx.x * block_size) / cg_size;
+  cudf::size_type const outer_num_rows  = left_table.num_rows();
+  cudf::size_type const outer_row_index = (threadIdx.x + blockIdx.x * block_size) / cg_size;
 
   auto evaluator = cudf::ast::detail::expression_evaluator<has_nulls>(
     left_table, right_table, device_expression_data);
@@ -71,32 +69,49 @@ CUDF_HIDDEN __launch_bounds__(block_size) __global__
     auto equality = single_expression_equality<has_nulls>{
       evaluator, thread_intermediate_storage, false, equality_probe};
 
-    auto set_ref_equality = set_ref.with_key_eq(equality);
-    const auto result     = set_ref_equality.contains(tile, outer_row_index);
+    auto const set_ref_equality = set_ref.with_key_eq(equality);
+    auto const result           = set_ref_equality.contains(tile, outer_row_index);
     if (tile.thread_rank() == 0) left_table_keep_mask[outer_row_index] = result;
   }
 }
 
-template __global__ void mixed_join_semi<DEFAULT_JOIN_BLOCK_SIZE, true>(
-  table_device_view left_table,
-  table_device_view right_table,
-  table_device_view probe,
-  table_device_view build,
-  row_equality const equality_probe,
-  hash_set_ref_type set_ref,
-  cudf::device_span<bool> left_table_keep_mask,
-  cudf::ast::detail::expression_device_view device_expression_data);
-
-template __global__ void mixed_join_semi<DEFAULT_JOIN_BLOCK_SIZE, false>(
-  table_device_view left_table,
-  table_device_view right_table,
-  table_device_view probe,
-  table_device_view build,
-  row_equality const equality_probe,
-  hash_set_ref_type set_ref,
-  cudf::device_span<bool> left_table_keep_mask,
-  cudf::ast::detail::expression_device_view device_expression_data);
+void launch_mixed_join_semi(bool has_nulls,
+                            table_device_view left_table,
+                            table_device_view right_table,
+                            table_device_view probe,
+                            table_device_view build,
+                            row_equality const equality_probe,
+                            hash_set_ref_type set_ref,
+                            cudf::device_span<bool> left_table_keep_mask,
+                            cudf::ast::detail::expression_device_view device_expression_data,
+                            detail::grid_1d const config,
+                            int64_t shmem_size_per_block,
+                            rmm::cuda_stream_view stream)
+{
+  if (has_nulls) {
+    mixed_join_semi<DEFAULT_JOIN_BLOCK_SIZE, true>
+      <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
+        left_table,
+        right_table,
+        probe,
+        build,
+        equality_probe,
+        set_ref,
+        left_table_keep_mask,
+        device_expression_data);
+  } else {
+    mixed_join_semi<DEFAULT_JOIN_BLOCK_SIZE, false>
+      <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
+        left_table,
+        right_table,
+        probe,
+        build,
+        equality_probe,
+        set_ref,
+        left_table_keep_mask,
+        device_expression_data);
+  }
+}
 
 }  // namespace detail
-
 }  // namespace cudf
diff --git a/cpp/src/join/mixed_join_kernels_semi.cuh b/cpp/src/join/mixed_join_kernels_semi.cuh
index 6c72c83cbdd..b08298e64e4 100644
--- a/cpp/src/join/mixed_join_kernels_semi.cuh
+++ b/cpp/src/join/mixed_join_kernels_semi.cuh
@@ -16,8 +16,9 @@
 
 #pragma once
 
-#include "join/join_common_utils.hpp"
-#include "join/mixed_join_common_utils.cuh"
+#include "join_common_utils.cuh"
+#include "join_common_utils.hpp"
+#include "mixed_join_common_utils.cuh"
 
 #include <cudf/ast/detail/expression_parser.hpp>
 #include <cudf/table/table_device_view.cuh>
@@ -39,6 +40,7 @@ namespace detail {
  * @tparam block_size The number of threads per block for this kernel
  * @tparam has_nulls Whether or not the inputs may contain nulls.
  *
+ * @param[in] has_nulls If the input has nulls
  * @param[in] left_table The left table
  * @param[in] right_table The right table
  * @param[in] probe The table with which to probe the hash table for matches.
@@ -50,15 +52,18 @@ namespace detail {
  * @param[in] device_expression_data Container of device data required to evaluate the desired
  * expression.
  */
-template <cudf::size_type block_size, bool has_nulls>
-__global__ void mixed_join_semi(table_device_view left_table,
-                                table_device_view right_table,
-                                table_device_view probe,
-                                table_device_view build,
-                                row_equality const equality_probe,
-                                hash_set_ref_type set_ref,
-                                cudf::device_span<bool> left_table_keep_mask,
-                                cudf::ast::detail::expression_device_view device_expression_data);
+void launch_mixed_join_semi(bool has_nulls,
+                            table_device_view left_table,
+                            table_device_view right_table,
+                            table_device_view probe,
+                            table_device_view build,
+                            row_equality const equality_probe,
+                            hash_set_ref_type set_ref,
+                            cudf::device_span<bool> left_table_keep_mask,
+                            cudf::ast::detail::expression_device_view device_expression_data,
+                            detail::grid_1d const config,
+                            int64_t shmem_size_per_block,
+                            rmm::cuda_stream_view stream);
 
 }  // namespace detail
 
diff --git a/cpp/src/join/mixed_join_semi.cu b/cpp/src/join/mixed_join_semi.cu
index ec3f26eb148..ae22ab4f11e 100644
--- a/cpp/src/join/mixed_join_semi.cu
+++ b/cpp/src/join/mixed_join_semi.cu
@@ -116,13 +116,11 @@ std::unique_ptr<rmm::device_uvector<size_type>> mixed_join_semi(
   auto left_conditional_view  = table_device_view::create(left_conditional, stream);
   auto right_conditional_view = table_device_view::create(right_conditional, stream);
 
-  auto const preprocessed_build =
-    experimental::row::equality::preprocessed_table::create(build, stream);
-  auto const preprocessed_probe =
-    experimental::row::equality::preprocessed_table::create(probe, stream);
-  auto const row_comparator =
-    cudf::experimental::row::equality::two_table_comparator{preprocessed_build, preprocessed_probe};
-  auto const equality_probe = row_comparator.equal_to<false>(has_nulls, compare_nulls);
+  using namespace cudf::experimental::row::equality;
+  auto const preprocessed_build = preprocessed_table::create(build, stream);
+  auto const preprocessed_probe = preprocessed_table::create(probe, stream);
+  auto const row_comparator     = two_table_comparator{preprocessed_build, preprocessed_probe};
+  auto const equality_probe     = row_comparator.equal_to<false>(has_nulls, compare_nulls);
 
   // Create hash table containing all keys found in right table
   // TODO: To add support for nested columns we will need to flatten in many
@@ -140,35 +138,34 @@ std::unique_ptr<rmm::device_uvector<size_type>> mixed_join_semi(
   // the columns of the conditional table that are used by the expression, but
   // that requires additional plumbing through the AST machinery and is out of
   // scope for now.
-  auto const row_comparator_build =
-    cudf::experimental::row::equality::two_table_comparator{preprocessed_build, preprocessed_build};
+  auto const row_comparator_build = two_table_comparator{preprocessed_build, preprocessed_build};
   auto const equality_build_equality =
     row_comparator_build.equal_to<false>(build_nulls, compare_nulls);
-  auto const preprocessed_build_condtional =
-    experimental::row::equality::preprocessed_table::create(right_conditional, stream);
+  auto const preprocessed_build_condtional = preprocessed_table::create(right_conditional, stream);
   auto const row_comparator_conditional_build =
-    cudf::experimental::row::equality::two_table_comparator{preprocessed_build_condtional,
-                                                            preprocessed_build_condtional};
+    two_table_comparator{preprocessed_build_condtional, preprocessed_build_condtional};
   auto const equality_build_conditional =
     row_comparator_conditional_build.equal_to<false>(build_nulls, compare_nulls);
-  double_row_equality equality_build{equality_build_equality, equality_build_conditional};
+  double_row_equality_comparator equality_build{equality_build_equality,
+                                                equality_build_conditional};
 
   auto const build_num_rows = compute_hash_table_size(build.num_rows());
 
-  hash_set_type set{build_num_rows,
-                    cuco::empty_key{JoinNoneValue},
-                    equality_build,
-                    {row_hash_build.device_hasher(build_nulls)},
-                    {},
-                    {},
-                    cudf::detail::cuco_allocator{stream},
-                    stream.value()};
+  hash_set_type row_set{
+    build_num_rows,
+    cuco::empty_key{JoinNoneValue},
+    equality_build,
+    {row_hash_build.device_hasher(build_nulls)},
+    {},
+    {},
+    cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream},
+    stream.value()};
 
   auto iter = thrust::make_counting_iterator(0);
 
   // skip rows that are null here.
   if ((compare_nulls == null_equality::EQUAL) or (not nullable(build))) {
-    set.insert(iter, iter + right_num_rows, stream.value());
+    row_set.insert(iter, iter + right_num_rows, stream.value());
   } else {
     thrust::counting_iterator<cudf::size_type> stencil(0);
     auto const [row_bitmask, _] =
@@ -176,7 +173,7 @@ std::unique_ptr<rmm::device_uvector<size_type>> mixed_join_semi(
     row_is_valid pred{static_cast<bitmask_type const*>(row_bitmask.data())};
 
     // insert valid rows
-    set.insert_if(iter, iter + right_num_rows, stencil, pred, stream.value());
+    row_set.insert_if(iter, iter + right_num_rows, stencil, pred, stream.value());
   }
 
   auto constexpr cg_size = hash_set_type::cg_size;
@@ -185,37 +182,26 @@ std::unique_ptr<rmm::device_uvector<size_type>> mixed_join_semi(
   auto const shmem_size_per_block =
     (parser.shmem_per_thread / cg_size) * config.num_threads_per_block;
 
-  auto const row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe};
+  auto const row_hash   = cudf::experimental::row::hash::row_hasher{preprocessed_probe};
+  auto const hash_probe = row_hash.device_hasher(has_nulls);
 
-  hash_set_ref_type set_ref =
-    set.ref(cuco::contains).with_hash_function(row_hash.device_hasher(has_nulls));
+  hash_set_ref_type const row_set_ref = row_set.ref(cuco::contains).with_hash_function(hash_probe);
 
   // Vector used to indicate indices from left/probe table which are present in output
   auto left_table_keep_mask = rmm::device_uvector<bool>(probe.num_rows(), stream);
 
-  if (has_nulls) {
-    mixed_join_semi<DEFAULT_JOIN_BLOCK_SIZE, true>
-      <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
-        *left_conditional_view,
-        *right_conditional_view,
-        *probe_view,
-        *build_view,
-        equality_probe,
-        set_ref,
-        cudf::device_span<bool>(left_table_keep_mask),
-        parser.device_expression_data);
-  } else {
-    mixed_join_semi<DEFAULT_JOIN_BLOCK_SIZE, false>
-      <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
-        *left_conditional_view,
-        *right_conditional_view,
-        *probe_view,
-        *build_view,
-        equality_probe,
-        set_ref,
-        cudf::device_span<bool>(left_table_keep_mask),
-        parser.device_expression_data);
-  }
+  launch_mixed_join_semi(has_nulls,
+                         *left_conditional_view,
+                         *right_conditional_view,
+                         *probe_view,
+                         *build_view,
+                         equality_probe,
+                         row_set_ref,
+                         cudf::device_span<bool>(left_table_keep_mask),
+                         parser.device_expression_data,
+                         config,
+                         shmem_size_per_block,
+                         stream);
 
   auto gather_map = std::make_unique<rmm::device_uvector<size_type>>(probe.num_rows(), stream, mr);
 
diff --git a/cpp/src/join/mixed_join_size_kernel.cu b/cpp/src/join/mixed_join_size_kernel.cu
index 4011acb65d6..4882c8769e6 100644
--- a/cpp/src/join/mixed_join_size_kernel.cu
+++ b/cpp/src/join/mixed_join_size_kernel.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,11 +15,12 @@
  */
 
 #include "mixed_join_size_kernel.cuh"
+#include "mixed_join_size_kernel.hpp"
 
 namespace cudf {
 namespace detail {
 
-template __global__ void compute_mixed_join_output_size<DEFAULT_JOIN_BLOCK_SIZE, false>(
+template std::size_t launch_compute_mixed_join_output_size<false>(
   table_device_view left_table,
   table_device_view right_table,
   table_device_view probe,
@@ -30,8 +31,11 @@ template __global__ void compute_mixed_join_output_size<DEFAULT_JOIN_BLOCK_SIZE,
   cudf::detail::mixed_multimap_type::device_view hash_table_view,
   ast::detail::expression_device_view device_expression_data,
   bool const swap_tables,
-  std::size_t* output_size,
-  cudf::device_span<cudf::size_type> matches_per_row);
+  cudf::device_span<cudf::size_type> matches_per_row,
+  detail::grid_1d const config,
+  int64_t shmem_size_per_block,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr);
 
 }  // namespace detail
 }  // namespace cudf
diff --git a/cpp/src/join/mixed_join_size_kernel.cuh b/cpp/src/join/mixed_join_size_kernel.cuh
index 00a90f8273f..84e9be45030 100644
--- a/cpp/src/join/mixed_join_size_kernel.cuh
+++ b/cpp/src/join/mixed_join_size_kernel.cuh
@@ -36,19 +36,19 @@ namespace cg = cooperative_groups;
 #pragma GCC diagnostic ignored "-Wattributes"
 
 template <int block_size, bool has_nulls>
-CUDF_HIDDEN __launch_bounds__(block_size) __global__ void compute_mixed_join_output_size(
-  table_device_view left_table,
-  table_device_view right_table,
-  table_device_view probe,
-  table_device_view build,
-  row_hash const hash_probe,
-  row_equality const equality_probe,
-  join_kind const join_type,
-  cudf::detail::mixed_multimap_type::device_view hash_table_view,
-  ast::detail::expression_device_view device_expression_data,
-  bool const swap_tables,
-  std::size_t* output_size,
-  cudf::device_span<cudf::size_type> matches_per_row)
+CUDF_KERNEL void __launch_bounds__(block_size)
+  compute_mixed_join_output_size(table_device_view left_table,
+                                 table_device_view right_table,
+                                 table_device_view probe,
+                                 table_device_view build,
+                                 row_hash const hash_probe,
+                                 row_equality const equality_probe,
+                                 join_kind const join_type,
+                                 cudf::detail::mixed_multimap_type::device_view hash_table_view,
+                                 ast::detail::expression_device_view device_expression_data,
+                                 bool const swap_tables,
+                                 std::size_t* output_size,
+                                 cudf::device_span<cudf::size_type> matches_per_row)
 {
   // The (required) extern storage of the shared memory array leads to
   // conflicting declarations between different templates. The easiest
@@ -103,5 +103,43 @@ CUDF_HIDDEN __launch_bounds__(block_size) __global__ void compute_mixed_join_out
   }
 }
 
+template <bool has_nulls>
+std::size_t launch_compute_mixed_join_output_size(
+  table_device_view left_table,
+  table_device_view right_table,
+  table_device_view probe,
+  table_device_view build,
+  row_hash const hash_probe,
+  row_equality const equality_probe,
+  join_kind const join_type,
+  cudf::detail::mixed_multimap_type::device_view hash_table_view,
+  ast::detail::expression_device_view device_expression_data,
+  bool const swap_tables,
+  cudf::device_span<cudf::size_type> matches_per_row,
+  detail::grid_1d const config,
+  int64_t shmem_size_per_block,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr)
+{
+  // Allocate storage for the counter used to get the size of the join output
+  rmm::device_scalar<std::size_t> size(0, stream, mr);
+
+  compute_mixed_join_output_size<DEFAULT_JOIN_BLOCK_SIZE, has_nulls>
+    <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
+      left_table,
+      right_table,
+      probe,
+      build,
+      hash_probe,
+      equality_probe,
+      join_type,
+      hash_table_view,
+      device_expression_data,
+      swap_tables,
+      size.data(),
+      matches_per_row);
+  return size.value(stream);
+}
+
 }  // namespace detail
 }  // namespace cudf
diff --git a/cpp/src/join/mixed_join_size_kernel.hpp b/cpp/src/join/mixed_join_size_kernel.hpp
new file mode 100644
index 00000000000..b09805c14dc
--- /dev/null
+++ b/cpp/src/join/mixed_join_size_kernel.hpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "join_common_utils.cuh"
+#include "join_common_utils.hpp"
+#include "mixed_join_common_utils.cuh"
+
+#include <cudf/ast/detail/expression_evaluator.cuh>
+#include <cudf/ast/detail/expression_parser.hpp>
+#include <cudf/detail/utilities/cuda.cuh>
+#include <cudf/table/table_device_view.cuh>
+#include <cudf/utilities/export.hpp>
+#include <cudf/utilities/span.hpp>
+
+#include <cooperative_groups.h>
+#include <cub/cub.cuh>
+#include <thrust/iterator/discard_iterator.h>
+
+namespace CUDF_EXPORT cudf {
+namespace detail {
+
+/**
+ * @brief Computes the output size of joining the left table to the right table.
+ *
+ * This method probes the hash table with each row in the probe table using a
+ * custom equality comparator that also checks that the conditional expression
+ * evaluates to true between the left/right tables when a match is found
+ * between probe and build rows.
+ *
+ * @tparam block_size The number of threads per block for this kernel
+ * @tparam has_nulls Whether or not the inputs may contain nulls.
+ *
+ * @param[in] left_table The left table
+ * @param[in] right_table The right table
+ * @param[in] probe The table with which to probe the hash table for matches.
+ * @param[in] build The table with which the hash table was built.
+ * @param[in] hash_probe The hasher used for the probe table.
+ * @param[in] equality_probe The equality comparator used when probing the hash table.
+ * @param[in] join_type The type of join to be performed
+ * @param[in] hash_table_view The hash table built from `build`.
+ * @param[in] device_expression_data Container of device data required to evaluate the desired
+ * expression.
+ * @param[in] swap_tables If true, the kernel was launched with one thread per right row and
+ * the kernel needs to internally loop over left rows. Otherwise, loop over right rows.
+ * @param[out] output_size The resulting output size
+ * @param[out] matches_per_row The number of matches in one pair of
+ * equality/conditional tables for each row in the other pair of tables. If
+ * swap_tables is true, matches_per_row corresponds to the right_table,
+ * otherwise it corresponds to the left_table. Note that corresponding swap of
+ * left/right tables to determine which is the build table and which is the
+ * probe table has already happened on the host.
+ */
+
+template <bool has_nulls>
+std::size_t launch_compute_mixed_join_output_size(
+  cudf::table_device_view left_table,
+  cudf::table_device_view right_table,
+  cudf::table_device_view probe,
+  cudf::table_device_view build,
+  row_hash const hash_probe,
+  row_equality const equality_probe,
+  join_kind const join_type,
+  cudf::detail::mixed_multimap_type::device_view hash_table_view,
+  ast::detail::expression_device_view device_expression_data,
+  bool const swap_tables,
+  cudf::device_span<cudf::size_type> matches_per_row,
+  detail::grid_1d const config,
+  int64_t shmem_size_per_block,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr);
+}  // namespace detail
+}  // namespace CUDF_EXPORT cudf
diff --git a/cpp/src/join/mixed_join_size_kernel_nulls.cu b/cpp/src/join/mixed_join_size_kernel_nulls.cu
index 2868113bf33..11f9103da4d 100644
--- a/cpp/src/join/mixed_join_size_kernel_nulls.cu
+++ b/cpp/src/join/mixed_join_size_kernel_nulls.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,7 +19,7 @@
 namespace cudf {
 namespace detail {
 
-template __global__ void compute_mixed_join_output_size<DEFAULT_JOIN_BLOCK_SIZE, true>(
+template std::size_t launch_compute_mixed_join_output_size<true>(
   table_device_view left_table,
   table_device_view right_table,
   table_device_view probe,
@@ -30,8 +30,10 @@ template __global__ void compute_mixed_join_output_size<DEFAULT_JOIN_BLOCK_SIZE,
   cudf::detail::mixed_multimap_type::device_view hash_table_view,
   ast::detail::expression_device_view device_expression_data,
   bool const swap_tables,
-  std::size_t* output_size,
-  cudf::device_span<cudf::size_type> matches_per_row);
-
+  cudf::device_span<cudf::size_type> matches_per_row,
+  detail::grid_1d const config,
+  int64_t shmem_size_per_block,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr);
 }  // namespace detail
 }  // namespace cudf
diff --git a/cpp/src/json/json_path.cu b/cpp/src/json/json_path.cu
index d1a1097de35..1bf4bf3b153 100644
--- a/cpp/src/json/json_path.cu
+++ b/cpp/src/json/json_path.cu
@@ -39,7 +39,7 @@
 #include <rmm/exec_policy.hpp>
 #include <rmm/resource_ref.hpp>
 
-#include <thrust/optional.h>
+#include <cuda/std/optional>
 #include <thrust/pair.h>
 #include <thrust/scan.h>
 #include <thrust/tuple.h>
@@ -207,7 +207,7 @@ class parser {
 struct json_output {
   size_t output_max_len;
   char* output;
-  thrust::optional<size_t> output_len;
+  cuda::std::optional<size_t> output_len;
 
   __device__ void add_output(char const* str, size_t len)
   {
@@ -656,7 +656,7 @@ class path_state : private parser {
  * @param stream Cuda stream to perform any gpu actions on
  * @returns A pair containing the command buffer, and maximum stack depth required.
  */
-std::pair<thrust::optional<rmm::device_uvector<path_operator>>, int> build_command_buffer(
+std::pair<cuda::std::optional<rmm::device_uvector<path_operator>>, int> build_command_buffer(
   cudf::string_scalar const& json_path, rmm::cuda_stream_view stream)
 {
   std::string h_json_path = json_path.to_string(stream);
@@ -690,8 +690,8 @@ std::pair<thrust::optional<rmm::device_uvector<path_operator>>, int> build_comma
   } while (op.type != path_operator_type::END);
 
   auto const is_empty = h_operators.size() == 1 && h_operators[0].type == path_operator_type::END;
-  return is_empty ? std::pair(thrust::nullopt, 0)
-                  : std::pair(thrust::make_optional(cudf::detail::make_device_uvector_sync(
+  return is_empty ? std::pair(cuda::std::nullopt, 0)
+                  : std::pair(cuda::std::make_optional(cudf::detail::make_device_uvector_sync(
                                 h_operators, stream, rmm::mr::get_current_device_resource())),
                               max_stack_depth);
 }
@@ -920,9 +920,9 @@ __launch_bounds__(block_size) CUDF_KERNEL
                               path_operator const* const commands,
                               size_type* d_sizes,
                               cudf::detail::input_offsetalator output_offsets,
-                              thrust::optional<char*> out_buf,
-                              thrust::optional<bitmask_type*> out_validity,
-                              thrust::optional<size_type*> out_valid_count,
+                              cuda::std::optional<char*> out_buf,
+                              cuda::std::optional<bitmask_type*> out_validity,
+                              cuda::std::optional<size_type*> out_valid_count,
                               get_json_object_options options)
 {
   auto tid          = cudf::detail::grid_1d::global_thread_id();
@@ -1012,9 +1012,9 @@ std::unique_ptr<cudf::column> get_json_object(cudf::strings_column_view const& c
       std::get<0>(preprocess).value().data(),
       sizes.data(),
       d_offsets,
-      thrust::nullopt,
-      thrust::nullopt,
-      thrust::nullopt,
+      cuda::std::nullopt,
+      cuda::std::nullopt,
+      cuda::std::nullopt,
       options);
 
   // convert sizes to offsets
diff --git a/cpp/src/lists/contains.cu b/cpp/src/lists/contains.cu
index 30c03a8cd68..11703527d26 100644
--- a/cpp/src/lists/contains.cu
+++ b/cpp/src/lists/contains.cu
@@ -40,7 +40,6 @@
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/iterator/reverse_iterator.h>
 #include <thrust/logical.h>
-#include <thrust/optional.h>
 #include <thrust/pair.h>
 #include <thrust/tabulate.h>
 #include <thrust/transform.h>
diff --git a/cpp/src/lists/explode.cu b/cpp/src/lists/explode.cu
index 46c4fc78a6f..74a0d842aad 100644
--- a/cpp/src/lists/explode.cu
+++ b/cpp/src/lists/explode.cu
@@ -29,6 +29,7 @@
 #include <rmm/resource_ref.hpp>
 
 #include <cuda/functional>
+#include <cuda/std/optional>
 #include <thrust/advance.h>
 #include <thrust/binary_search.h>
 #include <thrust/distance.h>
@@ -36,7 +37,6 @@
 #include <thrust/for_each.h>
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/iterator/transform_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/scan.h>
 #include <thrust/transform.h>
 
@@ -57,8 +57,8 @@ std::unique_ptr<table> build_table(
   size_type const explode_column_idx,
   column_view const& sliced_child,
   cudf::device_span<size_type const> gather_map,
-  thrust::optional<cudf::device_span<size_type const>> explode_col_gather_map,
-  thrust::optional<rmm::device_uvector<size_type>> position_array,
+  cuda::std::optional<cudf::device_span<size_type const>> explode_col_gather_map,
+  cuda::std::optional<rmm::device_uvector<size_type>> position_array,
   rmm::cuda_stream_view stream,
   rmm::device_async_resource_ref mr)
 {
@@ -143,8 +143,8 @@ std::unique_ptr<table> explode(table_view const& input_table,
                      explode_column_idx,
                      sliced_child,
                      gather_map,
-                     thrust::nullopt,
-                     thrust::nullopt,
+                     cuda::std::nullopt,
+                     cuda::std::nullopt,
                      stream,
                      mr);
 }
@@ -193,7 +193,7 @@ std::unique_ptr<table> explode_position(table_view const& input_table,
                      explode_column_idx,
                      sliced_child,
                      gather_map,
-                     thrust::nullopt,
+                     cuda::std::nullopt,
                      std::move(pos),
                      stream,
                      mr);
@@ -292,7 +292,7 @@ std::unique_ptr<table> explode_outer(table_view const& input_table,
     sliced_child,
     gather_map,
     explode_col_gather_map,
-    include_position ? std::move(pos) : thrust::optional<rmm::device_uvector<size_type>>{},
+    include_position ? std::move(pos) : cuda::std::optional<rmm::device_uvector<size_type>>{},
     stream,
     mr);
 }
diff --git a/cpp/src/reductions/histogram.cu b/cpp/src/reductions/histogram.cu
index bebb9d14923..d49c0c6f0d2 100644
--- a/cpp/src/reductions/histogram.cu
+++ b/cpp/src/reductions/histogram.cu
@@ -164,11 +164,13 @@ compute_row_frequencies(table_view const& input,
                "Nested types are not yet supported in histogram aggregation.",
                std::invalid_argument);
 
-  auto map = cudf::detail::hash_map_type{compute_hash_table_size(input.num_rows()),
-                                         cuco::empty_key{-1},
-                                         cuco::empty_value{std::numeric_limits<size_type>::min()},
-                                         cudf::detail::cuco_allocator{stream},
-                                         stream.value()};
+  auto map = cudf::detail::hash_map_type{
+    compute_hash_table_size(input.num_rows()),
+    cuco::empty_key{-1},
+    cuco::empty_value{std::numeric_limits<size_type>::min()},
+
+    cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream},
+    stream.value()};
 
   auto const preprocessed_input =
     cudf::experimental::row::hash::preprocessed_table::create(input, stream);
diff --git a/cpp/src/rolling/jit/operation.hpp b/cpp/src/rolling/jit/operation.hpp
index f8a52c03d4e..3be739ec5bf 100644
--- a/cpp/src/rolling/jit/operation.hpp
+++ b/cpp/src/rolling/jit/operation.hpp
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
+#pragma once
+
 #include "rolling/jit/operation-udf.hpp"
 
 #include <cudf/types.hpp>
 
-#pragma once
-
 struct rolling_udf_ptx {
   template <typename OutType, typename InType>
   static OutType operate(InType const* in_col, cudf::size_type start, cudf::size_type count)
diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu
index 81227cb9a2d..66cefd0aa2f 100644
--- a/cpp/src/search/contains_table.cu
+++ b/cpp/src/search/contains_table.cu
@@ -229,14 +229,15 @@ rmm::device_uvector<bool> contains(table_view const& haystack,
     [&](auto const& d_self_equal, auto const& d_two_table_equal, auto const& probing_scheme) {
       auto const d_equal = comparator_adapter{d_self_equal, d_two_table_equal};
 
-      auto set = cuco::static_set{cuco::extent{compute_hash_table_size(haystack.num_rows())},
-                                  cuco::empty_key{rhs_index_type{-1}},
-                                  d_equal,
-                                  probing_scheme,
-                                  {},
-                                  {},
-                                  cudf::detail::cuco_allocator{stream},
-                                  stream.value()};
+      auto set = cuco::static_set{
+        cuco::extent{compute_hash_table_size(haystack.num_rows())},
+        cuco::empty_key{rhs_index_type{-1}},
+        d_equal,
+        probing_scheme,
+        {},
+        {},
+        cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream},
+        stream.value()};
 
       if (haystack_has_nulls && compare_nulls == null_equality::UNEQUAL) {
         auto const bitmask_buffer_and_ptr = build_row_bitmask(haystack, stream);
diff --git a/cpp/src/sort/segmented_sort_impl.cuh b/cpp/src/sort/segmented_sort_impl.cuh
index 6d472925b30..281fdfa6b8f 100644
--- a/cpp/src/sort/segmented_sort_impl.cuh
+++ b/cpp/src/sort/segmented_sort_impl.cuh
@@ -79,6 +79,8 @@ struct column_fast_sort_fn {
                                                 stream,
                                                 rmm::mr::get_current_device_resource());
     mutable_column_view output_view = temp_col->mutable_view();
+    auto temp_indices               = cudf::column(
+      cudf::column_view(indices.type(), indices.size(), indices.head(), nullptr, 0), stream);
 
     // DeviceSegmentedSort is faster than DeviceSegmentedRadixSort at this time
     auto fast_sort_impl = [stream](bool ascending, [[maybe_unused]] auto&&... args) {
@@ -118,7 +120,7 @@ struct column_fast_sort_fn {
     fast_sort_impl(ascending,
                    input.begin<T>(),
                    output_view.begin<T>(),
-                   indices.begin<size_type>(),
+                   temp_indices.view().begin<size_type>(),
                    indices.begin<size_type>(),
                    input.size(),
                    segment_offsets.size() - 1,
diff --git a/cpp/src/stream_compaction/apply_boolean_mask.cu b/cpp/src/stream_compaction/apply_boolean_mask.cu
index cdca9517d94..9812f4ffbd7 100644
--- a/cpp/src/stream_compaction/apply_boolean_mask.cu
+++ b/cpp/src/stream_compaction/apply_boolean_mask.cu
@@ -91,9 +91,10 @@ std::unique_ptr<table> apply_boolean_mask(table_view const& input,
  */
 std::unique_ptr<table> apply_boolean_mask(table_view const& input,
                                           column_view const& boolean_mask,
+                                          rmm::cuda_stream_view stream,
                                           rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::apply_boolean_mask(input, boolean_mask, cudf::get_default_stream(), mr);
+  return detail::apply_boolean_mask(input, boolean_mask, stream, mr);
 }
 }  // namespace cudf
diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu
index e5cf29f3ebf..24e2692cb6f 100644
--- a/cpp/src/stream_compaction/distinct.cu
+++ b/cpp/src/stream_compaction/distinct.cu
@@ -51,7 +51,7 @@ namespace {
  * @param func The input functor to invoke
  */
 template <bool HasNested, typename Func>
-rmm::device_uvector<cudf::size_type> dipatch_row_equal(
+rmm::device_uvector<cudf::size_type> dispatch_row_equal(
   null_equality compare_nulls,
   nan_equality compare_nans,
   bool has_nulls,
@@ -97,22 +97,23 @@ rmm::device_uvector<size_type> distinct_indices(table_view const& input,
 
   auto const helper_func = [&](auto const& d_equal) {
     using RowHasher = std::decay_t<decltype(d_equal)>;
-    auto set        = hash_set_type<RowHasher>{num_rows,
-                                               0.5,  // desired load factor
-                                               cuco::empty_key{cudf::detail::CUDF_SIZE_TYPE_SENTINEL},
-                                               d_equal,
-                                               {row_hash.device_hasher(has_nulls)},
-                                               {},
-                                               {},
-                                               cudf::detail::cuco_allocator{stream},
-                                               stream.value()};
+    auto set        = hash_set_type<RowHasher>{
+      num_rows,
+      0.5,  // desired load factor
+      cuco::empty_key{cudf::detail::CUDF_SIZE_TYPE_SENTINEL},
+      d_equal,
+      {row_hash.device_hasher(has_nulls)},
+      {},
+      {},
+      cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream},
+      stream.value()};
     return detail::reduce_by_row(set, num_rows, keep, stream, mr);
   };
 
   if (cudf::detail::has_nested_columns(input)) {
-    return dipatch_row_equal<true>(nulls_equal, nans_equal, has_nulls, row_equal, helper_func);
+    return dispatch_row_equal<true>(nulls_equal, nans_equal, has_nulls, row_equal, helper_func);
   } else {
-    return dipatch_row_equal<false>(nulls_equal, nans_equal, has_nulls, row_equal, helper_func);
+    return dispatch_row_equal<false>(nulls_equal, nans_equal, has_nulls, row_equal, helper_func);
   }
 }
 
@@ -149,11 +150,11 @@ std::unique_ptr<table> distinct(table_view const& input,
                                 duplicate_keep_option keep,
                                 null_equality nulls_equal,
                                 nan_equality nans_equal,
+                                rmm::cuda_stream_view stream,
                                 rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::distinct(
-    input, keys, keep, nulls_equal, nans_equal, cudf::get_default_stream(), mr);
+  return detail::distinct(input, keys, keep, nulls_equal, nans_equal, stream, mr);
 }
 
 std::unique_ptr<column> distinct_indices(table_view const& input,
diff --git a/cpp/src/stream_compaction/distinct_count.cu b/cpp/src/stream_compaction/distinct_count.cu
index 9843bb889f4..78eb0fa5212 100644
--- a/cpp/src/stream_compaction/distinct_count.cu
+++ b/cpp/src/stream_compaction/distinct_count.cu
@@ -141,14 +141,15 @@ cudf::size_type distinct_count(table_view const& keys,
 
   auto const comparator_helper = [&](auto const row_equal) {
     using hasher_type = decltype(hash_key);
-    auto key_set      = cuco::static_set{cuco::extent{compute_hash_table_size(num_rows)},
-                                    cuco::empty_key<cudf::size_type>{-1},
-                                    row_equal,
-                                    cuco::linear_probing<1, hasher_type>{hash_key},
-                                         {},
-                                         {},
-                                    cudf::detail::cuco_allocator{stream},
-                                    stream.value()};
+    auto key_set      = cuco::static_set{
+      cuco::extent{compute_hash_table_size(num_rows)},
+      cuco::empty_key<cudf::size_type>{-1},
+      row_equal,
+      cuco::linear_probing<1, hasher_type>{hash_key},
+           {},
+           {},
+      cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream},
+      stream.value()};
 
     auto const iter = thrust::counting_iterator<cudf::size_type>(0);
     // when nulls are equal, we skip hashing any row that has a null
@@ -217,15 +218,18 @@ cudf::size_type distinct_count(column_view const& input,
 
 cudf::size_type distinct_count(column_view const& input,
                                null_policy null_handling,
-                               nan_policy nan_handling)
+                               nan_policy nan_handling,
+                               rmm::cuda_stream_view stream)
 {
   CUDF_FUNC_RANGE();
-  return detail::distinct_count(input, null_handling, nan_handling, cudf::get_default_stream());
+  return detail::distinct_count(input, null_handling, nan_handling, stream);
 }
 
-cudf::size_type distinct_count(table_view const& input, null_equality nulls_equal)
+cudf::size_type distinct_count(table_view const& input,
+                               null_equality nulls_equal,
+                               rmm::cuda_stream_view stream)
 {
   CUDF_FUNC_RANGE();
-  return detail::distinct_count(input, nulls_equal, cudf::get_default_stream());
+  return detail::distinct_count(input, nulls_equal, stream);
 }
 }  // namespace cudf
diff --git a/cpp/src/stream_compaction/distinct_helpers.hpp b/cpp/src/stream_compaction/distinct_helpers.hpp
index fca67c98873..bea02e3dbe8 100644
--- a/cpp/src/stream_compaction/distinct_helpers.hpp
+++ b/cpp/src/stream_compaction/distinct_helpers.hpp
@@ -57,7 +57,7 @@ using hash_set_type =
                                         cudf::experimental::row::hash::device_row_hasher<
                                           cudf::hashing::detail::default_hash,
                                           cudf::nullate::DYNAMIC>>,
-                   cudf::detail::cuco_allocator,
+                   cudf::detail::cuco_allocator<char>,
                    cuco::storage<1>>;
 
 /**
diff --git a/cpp/src/stream_compaction/drop_nans.cu b/cpp/src/stream_compaction/drop_nans.cu
index b46381c8ff6..b98ebbc2ecc 100644
--- a/cpp/src/stream_compaction/drop_nans.cu
+++ b/cpp/src/stream_compaction/drop_nans.cu
@@ -117,20 +117,22 @@ std::unique_ptr<table> drop_nans(table_view const& input,
 std::unique_ptr<table> drop_nans(table_view const& input,
                                  std::vector<size_type> const& keys,
                                  cudf::size_type keep_threshold,
+                                 rmm::cuda_stream_view stream,
                                  rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::drop_nans(input, keys, keep_threshold, cudf::get_default_stream(), mr);
+  return detail::drop_nans(input, keys, keep_threshold, stream, mr);
 }
 /*
  * Filters a table to remove nan elements.
  */
 std::unique_ptr<table> drop_nans(table_view const& input,
                                  std::vector<size_type> const& keys,
+                                 rmm::cuda_stream_view stream,
                                  rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::drop_nans(input, keys, keys.size(), cudf::get_default_stream(), mr);
+  return detail::drop_nans(input, keys, keys.size(), stream, mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/stream_compaction/drop_nulls.cu b/cpp/src/stream_compaction/drop_nulls.cu
index cb7cd61bf02..2497e4e5065 100644
--- a/cpp/src/stream_compaction/drop_nulls.cu
+++ b/cpp/src/stream_compaction/drop_nulls.cu
@@ -90,20 +90,22 @@ std::unique_ptr<table> drop_nulls(table_view const& input,
 std::unique_ptr<table> drop_nulls(table_view const& input,
                                   std::vector<size_type> const& keys,
                                   cudf::size_type keep_threshold,
+                                  rmm::cuda_stream_view stream,
                                   rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::drop_nulls(input, keys, keep_threshold, cudf::get_default_stream(), mr);
+  return detail::drop_nulls(input, keys, keep_threshold, stream, mr);
 }
 /*
  * Filters a table to remove null elements.
  */
 std::unique_ptr<table> drop_nulls(table_view const& input,
                                   std::vector<size_type> const& keys,
+                                  rmm::cuda_stream_view stream,
                                   rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::drop_nulls(input, keys, keys.size(), cudf::get_default_stream(), mr);
+  return detail::drop_nulls(input, keys, keys.size(), stream, mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/stream_compaction/unique.cu b/cpp/src/stream_compaction/unique.cu
index edb47984d13..93de0e60b6d 100644
--- a/cpp/src/stream_compaction/unique.cu
+++ b/cpp/src/stream_compaction/unique.cu
@@ -119,10 +119,11 @@ std::unique_ptr<table> unique(table_view const& input,
                               std::vector<size_type> const& keys,
                               duplicate_keep_option const keep,
                               null_equality nulls_equal,
+                              rmm::cuda_stream_view stream,
                               rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::unique(input, keys, keep, nulls_equal, cudf::get_default_stream(), mr);
+  return detail::unique(input, keys, keep, nulls_equal, stream, mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/stream_compaction/unique_count.cu b/cpp/src/stream_compaction/unique_count.cu
index 19607fe8105..d842f63cd7b 100644
--- a/cpp/src/stream_compaction/unique_count.cu
+++ b/cpp/src/stream_compaction/unique_count.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -67,10 +67,12 @@ cudf::size_type unique_count(table_view const& keys,
 
 }  // namespace detail
 
-cudf::size_type unique_count(table_view const& input, null_equality nulls_equal)
+cudf::size_type unique_count(table_view const& input,
+                             null_equality nulls_equal,
+                             rmm::cuda_stream_view stream)
 {
   CUDF_FUNC_RANGE();
-  return detail::unique_count(input, nulls_equal, cudf::get_default_stream());
+  return detail::unique_count(input, nulls_equal, stream);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/stream_compaction/unique_count_column.cu b/cpp/src/stream_compaction/unique_count_column.cu
index 16758b6e3a7..89ce2391a7b 100644
--- a/cpp/src/stream_compaction/unique_count_column.cu
+++ b/cpp/src/stream_compaction/unique_count_column.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -101,10 +101,11 @@ cudf::size_type unique_count(column_view const& input,
 
 cudf::size_type unique_count(column_view const& input,
                              null_policy null_handling,
-                             nan_policy nan_handling)
+                             nan_policy nan_handling,
+                             rmm::cuda_stream_view stream)
 {
   CUDF_FUNC_RANGE();
-  return detail::unique_count(input, null_handling, nan_handling, cudf::get_default_stream());
+  return detail::unique_count(input, null_handling, nan_handling, stream);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/strings/contains.cu b/cpp/src/strings/contains.cu
index 718ac41e36c..79d241205df 100644
--- a/cpp/src/strings/contains.cu
+++ b/cpp/src/strings/contains.cu
@@ -112,7 +112,7 @@ std::unique_ptr<column> count_re(strings_column_view const& input,
 
   auto const d_strings = column_device_view::create(input.parent(), stream);
 
-  auto result = count_matches(*d_strings, *d_prog, input.size(), stream, mr);
+  auto result = count_matches(*d_strings, *d_prog, stream, mr);
   if (input.has_nulls()) {
     result->set_null_mask(cudf::detail::copy_bitmask(input.parent(), stream, mr),
                           input.null_count());
diff --git a/cpp/src/strings/convert/convert_datetime.cu b/cpp/src/strings/convert/convert_datetime.cu
index 64a2107e17a..99c40f00b00 100644
--- a/cpp/src/strings/convert/convert_datetime.cu
+++ b/cpp/src/strings/convert/convert_datetime.cu
@@ -36,11 +36,11 @@
 #include <rmm/device_uvector.hpp>
 #include <rmm/resource_ref.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/execution_policy.h>
 #include <thrust/functional.h>
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/logical.h>
-#include <thrust/optional.h>
 #include <thrust/pair.h>
 #include <thrust/transform.h>
 
@@ -519,7 +519,7 @@ struct check_datetime_format {
    * The checking here is a little more strict than the actual
    * parser used for conversion.
    */
-  __device__ thrust::optional<timestamp_components> check_string(string_view const& d_string)
+  __device__ cuda::std::optional<timestamp_components> check_string(string_view const& d_string)
   {
     timestamp_components dateparts = {1970, 1, 1, 0};  // init to epoch time
 
@@ -529,7 +529,7 @@ struct check_datetime_format {
       // eliminate static character values first
       if (item.item_type == format_char_type::literal) {
         // check static character matches
-        if (*ptr != item.value) return thrust::nullopt;
+        if (*ptr != item.value) return cuda::std::nullopt;
         ptr += item.length;
         length -= item.length;
         continue;
@@ -645,7 +645,7 @@ struct check_datetime_format {
         case 'Z': result = true;  // skip
         default: break;
       }
-      if (!result) return thrust::nullopt;
+      if (!result) return cuda::std::nullopt;
       ptr += bytes_read;
       length -= bytes_read;
     }
@@ -821,7 +821,7 @@ struct datetime_formatter_fn {
     // We only dissect the timestamp into components if needed
     // by a specifier. And then we only do it once and reuse it.
     // This can improve performance when not using uncommon specifiers.
-    thrust::optional<cuda::std::chrono::sys_days> days;
+    cuda::std::optional<cuda::std::chrono::sys_days> days;
 
     auto days_from_timestamp = [tstamp]() {
       auto const count = tstamp.time_since_epoch().count();
diff --git a/cpp/src/strings/convert/convert_ipv4.cu b/cpp/src/strings/convert/convert_ipv4.cu
index 68a24e000ae..13d6e9bc3ba 100644
--- a/cpp/src/strings/convert/convert_ipv4.cu
+++ b/cpp/src/strings/convert/convert_ipv4.cu
@@ -46,7 +46,7 @@ namespace {
 struct ipv4_to_integers_fn {
   column_device_view const d_strings;
 
-  __device__ int64_t operator()(size_type idx)
+  __device__ uint32_t operator()(size_type idx)
   {
     if (d_strings.is_null(idx)) return 0;
     string_view d_str  = d_strings.element<string_view>(idx);
@@ -66,7 +66,7 @@ struct ipv4_to_integers_fn {
       }
     }
     uint32_t result = (ipvals[0] << 24) + (ipvals[1] << 16) + (ipvals[2] << 8) + ipvals[3];
-    return static_cast<int64_t>(result);
+    return result;
   }
 };
 
@@ -79,18 +79,18 @@ std::unique_ptr<column> ipv4_to_integers(strings_column_view const& input,
 {
   size_type strings_count = input.size();
   if (strings_count == 0) {
-    return make_numeric_column(data_type{type_id::INT64}, 0, mask_state::UNALLOCATED, stream);
+    return make_numeric_column(data_type{type_id::UINT32}, 0, mask_state::UNALLOCATED, stream);
   }
 
   auto strings_column = column_device_view::create(input.parent(), stream);
   // create output column copying the strings' null-mask
-  auto results   = make_numeric_column(data_type{type_id::INT64},
+  auto results   = make_numeric_column(data_type{type_id::UINT32},
                                      strings_count,
                                      cudf::detail::copy_bitmask(input.parent(), stream, mr),
                                      input.null_count(),
                                      stream,
                                      mr);
-  auto d_results = results->mutable_view().data<int64_t>();
+  auto d_results = results->mutable_view().data<uint32_t>();
   // fill output column with ipv4 integers
   thrust::transform(rmm::exec_policy(stream),
                     thrust::make_counting_iterator<size_type>(0),
@@ -135,7 +135,7 @@ struct integers_to_ipv4_fn {
       return;
     }
 
-    auto const ip_number = d_column.element<int64_t>(idx);
+    auto const ip_number = d_column.element<uint32_t>(idx);
 
     char* out_ptr   = d_chars ? d_chars + d_offsets[idx] : nullptr;
     int shift_bits  = 24;
@@ -165,7 +165,7 @@ std::unique_ptr<column> integers_to_ipv4(column_view const& integers,
 {
   if (integers.is_empty()) return make_empty_column(type_id::STRING);
 
-  CUDF_EXPECTS(integers.type().id() == type_id::INT64, "Input column must be type_id::INT64 type");
+  CUDF_EXPECTS(integers.type().id() == type_id::UINT32, "Input column must be UINT32 type");
 
   auto d_column = column_device_view::create(integers, stream);
   auto [offsets_column, chars] =
diff --git a/cpp/src/strings/copying/copy_range.cu b/cpp/src/strings/copying/copy_range.cu
index 9f8c47602f8..2434de1795e 100644
--- a/cpp/src/strings/copying/copy_range.cu
+++ b/cpp/src/strings/copying/copy_range.cu
@@ -40,20 +40,14 @@ struct compute_element_size {
   size_type source_begin;
   size_type target_begin;
   size_type target_end;
-  bool source_has_nulls;
-  bool target_has_nulls;
 
   __device__ cudf::size_type operator()(cudf::size_type idx)
   {
     if (idx >= target_begin && idx < target_end) {
       auto const str_idx = source_begin + (idx - target_begin);
-      return source_has_nulls && d_source.is_null_nocheck(str_idx)
-               ? 0
-               : d_source.element<string_view>(str_idx).size_bytes();
+      return d_source.is_null(str_idx) ? 0 : d_source.element<string_view>(str_idx).size_bytes();
     } else {
-      return target_has_nulls && d_target.is_null_nocheck(idx)
-               ? 0
-               : d_target.element<string_view>(idx).size_bytes();
+      return d_target.is_null(idx) ? 0 : d_target.element<string_view>(idx).size_bytes();
     }
   }
 };
@@ -97,20 +91,9 @@ std::unique_ptr<column> copy_range(strings_column_view const& source,
       mr);
   }();
 
-  auto [check_source, check_target] = [target, null_count = null_count] {
-    // check validities for both source & target
-    if (target.has_nulls()) { return std::make_pair(true, true); }
-    // check validities for source only
-    if (null_count > 0) { return std::make_pair(true, false); }
-    // no need to check validities
-    return std::make_pair(false, false);
-  }();
-
   // create offsets
   auto sizes_begin = cudf::detail::make_counting_transform_iterator(
-    0,
-    compute_element_size{
-      d_source, d_target, source_begin, target_begin, target_end, check_source, check_target});
+    0, compute_element_size{d_source, d_target, source_begin, target_begin, target_end});
   auto [offsets_column, chars_bytes] = cudf::strings::detail::make_offsets_child_column(
     sizes_begin, sizes_begin + target.size(), stream, mr);
   auto d_offsets = cudf::detail::offsetalator_factory::make_input_iterator(offsets_column->view());
diff --git a/cpp/src/strings/count_matches.cu b/cpp/src/strings/count_matches.cu
index e8672ea5335..4ad3a75baf7 100644
--- a/cpp/src/strings/count_matches.cu
+++ b/cpp/src/strings/count_matches.cu
@@ -60,18 +60,15 @@ struct count_fn {
 
 std::unique_ptr<column> count_matches(column_device_view const& d_strings,
                                       reprog_device& d_prog,
-                                      size_type output_size,
                                       rmm::cuda_stream_view stream,
                                       rmm::device_async_resource_ref mr)
 {
-  assert(output_size >= d_strings.size() and "Unexpected output size");
-
   auto results = make_numeric_column(
-    data_type{type_to_id<size_type>()}, output_size, mask_state::UNALLOCATED, stream, mr);
+    data_type{type_to_id<size_type>()}, d_strings.size(), mask_state::UNALLOCATED, stream, mr);
 
-  if (d_strings.size() == 0) return results;
+  if (d_strings.size() == 0) { return results; }
 
-  auto d_results = results->mutable_view().data<int32_t>();
+  auto d_results = results->mutable_view().data<cudf::size_type>();
 
   launch_transform_kernel(count_fn{d_strings}, d_prog, d_results, d_strings.size(), stream);
 
diff --git a/cpp/src/strings/count_matches.hpp b/cpp/src/strings/count_matches.hpp
index 4a5efac37fd..eab9863b975 100644
--- a/cpp/src/strings/count_matches.hpp
+++ b/cpp/src/strings/count_matches.hpp
@@ -37,14 +37,12 @@ class reprog_device;
  *
  * @param d_strings Device view of the input strings column.
  * @param d_prog Regex instance to evaluate on each string.
- * @param output_size Number of rows for the output column.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return Integer column of match counts
  */
 std::unique_ptr<column> count_matches(column_device_view const& d_strings,
                                       reprog_device& d_prog,
-                                      size_type output_size,
                                       rmm::cuda_stream_view stream,
                                       rmm::device_async_resource_ref mr);
 
diff --git a/cpp/src/strings/extract/extract_all.cu b/cpp/src/strings/extract/extract_all.cu
index 27691068d5a..897eba58833 100644
--- a/cpp/src/strings/extract/extract_all.cu
+++ b/cpp/src/strings/extract/extract_all.cu
@@ -119,7 +119,7 @@ std::unique_ptr<column> extract_all_record(strings_column_view const& input,
 
   // Get the match counts for each string.
   // This column will become the output lists child offsets column.
-  auto counts   = count_matches(*d_strings, *d_prog, strings_count, stream, mr);
+  auto counts   = count_matches(*d_strings, *d_prog, stream, mr);
   auto d_counts = counts->mutable_view().data<size_type>();
 
   // Compute null output rows
diff --git a/cpp/src/strings/regex/regex.cuh b/cpp/src/strings/regex/regex.cuh
index e6134296e45..2df404048f7 100644
--- a/cpp/src/strings/regex/regex.cuh
+++ b/cpp/src/strings/regex/regex.cuh
@@ -23,8 +23,8 @@
 
 #include <rmm/cuda_stream_view.hpp>
 
+#include <cuda/std/optional>
 #include <cuda_runtime.h>
-#include <thrust/optional.h>
 #include <thrust/pair.h>
 
 #include <memory>
@@ -36,7 +36,7 @@ namespace detail {
 struct relist;
 
 using match_pair   = thrust::pair<cudf::size_type, cudf::size_type>;
-using match_result = thrust::optional<match_pair>;
+using match_result = cuda::std::optional<match_pair>;
 
 constexpr int32_t MAX_SHARED_MEM      = 2048;  ///< Memory size for storing prog instruction data
 constexpr std::size_t MAX_WORKING_MEM = 0x01'FFFF'FFFF;  ///< Memory size for state data
diff --git a/cpp/src/strings/regex/regex.inl b/cpp/src/strings/regex/regex.inl
index 23e1944cda4..3b899e4edc1 100644
--- a/cpp/src/strings/regex/regex.inl
+++ b/cpp/src/strings/regex/regex.inl
@@ -260,12 +260,12 @@ __device__ __forceinline__ match_result reprog_device::regexec(string_view const
       switch (jnk.starttype) {
         case BOL:
           if (pos == 0) break;
-          if (jnk.startchar != '^') { return thrust::nullopt; }
+          if (jnk.startchar != '^') { return cuda::std::nullopt; }
           --itr;
           startchar = static_cast<char_utf8>('\n');
         case CHAR: {
           auto const find_itr = find_char(startchar, dstr, itr);
-          if (find_itr.byte_offset() >= dstr.size_bytes()) { return thrust::nullopt; }
+          if (find_itr.byte_offset() >= dstr.size_bytes()) { return cuda::std::nullopt; }
           itr = find_itr + (jnk.starttype == BOL);
           pos = itr.position();
           break;
@@ -396,7 +396,7 @@ __device__ __forceinline__ match_result reprog_device::regexec(string_view const
     checkstart = jnk.list1->get_size() == 0;
   } while (!last_character && (!checkstart || !match));
 
-  return match ? match_result({begin, end}) : thrust::nullopt;
+  return match ? match_result({begin, end}) : cuda::std::nullopt;
 }
 
 __device__ __forceinline__ match_result reprog_device::find(int32_t const thread_idx,
diff --git a/cpp/src/strings/replace/multi.cu b/cpp/src/strings/replace/multi.cu
index 2ca22f0e017..b5248700d53 100644
--- a/cpp/src/strings/replace/multi.cu
+++ b/cpp/src/strings/replace/multi.cu
@@ -533,16 +533,5 @@ std::unique_ptr<column> replace_multiple(strings_column_view const& strings,
   return detail::replace_multiple(strings, targets, repls, stream, mr);
 }
 
-// deprecated in 24.08
-std::unique_ptr<column> replace(strings_column_view const& strings,
-                                strings_column_view const& targets,
-                                strings_column_view const& repls,
-                                rmm::cuda_stream_view stream,
-                                rmm::device_async_resource_ref mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::replace_multiple(strings, targets, repls, stream, mr);
-}
-
 }  // namespace strings
 }  // namespace cudf
diff --git a/cpp/src/strings/replace/multi_re.cu b/cpp/src/strings/replace/multi_re.cu
index 31234ea42ec..0ad3ab2305c 100644
--- a/cpp/src/strings/replace/multi_re.cu
+++ b/cpp/src/strings/replace/multi_re.cu
@@ -92,7 +92,7 @@ struct replace_multi_regex_fn {
         }
         reprog_device prog = progs[ptn_idx];
 
-        auto const result = !prog.is_empty() ? prog.find(idx, d_str, itr) : thrust::nullopt;
+        auto const result = !prog.is_empty() ? prog.find(idx, d_str, itr) : cuda::std::nullopt;
         d_ranges[ptn_idx] =
           result ? found_range{result->first, result->second} : found_range{nchars, nchars};
       }
diff --git a/cpp/src/strings/search/findall.cu b/cpp/src/strings/search/findall.cu
index 0d0962258cf..2f7e7352458 100644
--- a/cpp/src/strings/search/findall.cu
+++ b/cpp/src/strings/search/findall.cu
@@ -104,7 +104,7 @@ std::unique_ptr<column> findall(strings_column_view const& input,
   auto d_prog = regex_device_builder::create_prog_device(prog, stream);
 
   // Create lists offsets column
-  auto const sizes              = count_matches(*d_strings, *d_prog, strings_count, stream, mr);
+  auto const sizes              = count_matches(*d_strings, *d_prog, stream, mr);
   auto [offsets, total_matches] = cudf::detail::make_offsets_child_column(
     sizes->view().begin<size_type>(), sizes->view().end<size_type>(), stream, mr);
   auto const d_offsets = offsets->view().data<size_type>();
diff --git a/cpp/src/strings/slice.cu b/cpp/src/strings/slice.cu
index cf82a837c51..d8324a9b08e 100644
--- a/cpp/src/strings/slice.cu
+++ b/cpp/src/strings/slice.cu
@@ -22,6 +22,7 @@
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/scalar/scalar_device_view.cuh>
 #include <cudf/strings/detail/strings_children.cuh>
+#include <cudf/strings/detail/strings_column_factories.cuh>
 #include <cudf/strings/slice.hpp>
 #include <cudf/strings/string_view.cuh>
 #include <cudf/strings/strings_column_view.hpp>
@@ -32,6 +33,8 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/resource_ref.hpp>
 
+#include <cooperative_groups.h>
+#include <cooperative_groups/reduce.h>
 #include <thrust/iterator/constant_iterator.h>
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/transform.h>
@@ -40,6 +43,9 @@ namespace cudf {
 namespace strings {
 namespace detail {
 namespace {
+
+constexpr size_type AVG_CHAR_BYTES_THRESHOLD = 128;
+
 /**
  * @brief Function logic for compute_substrings_from_fn API
  *
@@ -51,17 +57,19 @@ struct substring_from_fn {
   IndexIterator const starts;
   IndexIterator const stops;
 
-  __device__ string_view operator()(size_type idx) const
+  __device__ string_index_pair operator()(size_type idx) const
   {
-    if (d_column.is_null(idx)) { return string_view{nullptr, 0}; }
+    if (d_column.is_null(idx)) { return string_index_pair{nullptr, 0}; }
     auto const d_str  = d_column.template element<string_view>(idx);
     auto const length = d_str.length();
     auto const start  = std::max(starts[idx], 0);
-    if (start >= length) { return string_view{}; }
+    if (start >= length) { return string_index_pair{"", 0}; }
 
-    auto const stop = stops[idx];
-    auto const end  = (((stop < 0) || (stop > length)) ? length : stop);
-    return start < end ? d_str.substr(start, end - start) : string_view{};
+    auto const stop    = stops[idx];
+    auto const end     = (((stop < 0) || (stop > length)) ? length : stop);
+    auto const sub_str = start < end ? d_str.substr(start, end - start) : string_view{};
+    return sub_str.empty() ? string_index_pair{"", 0}
+                           : string_index_pair{sub_str.data(), sub_str.size_bytes()};
   }
 
   substring_from_fn(column_device_view const& d_column, IndexIterator starts, IndexIterator stops)
@@ -70,6 +78,82 @@ struct substring_from_fn {
   }
 };
 
+template <typename IndexIterator>
+CUDF_KERNEL void substring_from_kernel(column_device_view const d_strings,
+                                       IndexIterator starts,
+                                       IndexIterator stops,
+                                       string_index_pair* d_output)
+{
+  auto const idx     = cudf::detail::grid_1d::global_thread_id();
+  auto const str_idx = idx / cudf::detail::warp_size;
+  if (str_idx >= d_strings.size()) { return; }
+
+  namespace cg    = cooperative_groups;
+  auto const warp = cg::tiled_partition<cudf::detail::warp_size>(cg::this_thread_block());
+
+  if (d_strings.is_null(str_idx)) {
+    if (warp.thread_rank() == 0) { d_output[str_idx] = string_index_pair{nullptr, 0}; }
+    return;
+  }
+  auto const d_str = d_strings.element<cudf::string_view>(str_idx);
+  if (d_str.empty()) {
+    if (warp.thread_rank() == 0) { d_output[str_idx] = string_index_pair{"", 0}; }
+    return;
+  }
+
+  auto const start = max(starts[str_idx], 0);
+  auto stop        = [stop = stops[str_idx]] {
+    return (stop < 0) ? std::numeric_limits<size_type>::max() : stop;
+  }();
+  auto const end = d_str.data() + d_str.size_bytes();
+
+  auto start_counts = thrust::make_pair(0, 0);
+  auto stop_counts  = thrust::make_pair(0, 0);
+
+  auto itr = d_str.data() + warp.thread_rank();
+
+  size_type char_count = 0;
+  size_type byte_count = 0;
+  while (byte_count < d_str.size_bytes()) {
+    if (char_count <= start) { start_counts = {char_count, byte_count}; }
+    if (char_count <= stop) {
+      stop_counts = {char_count, byte_count};
+    } else {
+      break;
+    }
+    size_type const cc = (itr < end) && is_begin_utf8_char(*itr);
+    size_type const bc = (itr < end);
+    char_count += cg::reduce(warp, cc, cg::plus<int>());
+    byte_count += cg::reduce(warp, bc, cg::plus<int>());
+    itr += cudf::detail::warp_size;
+  }
+
+  if (warp.thread_rank() == 0) {
+    if (start >= char_count) {
+      d_output[str_idx] = string_index_pair{"", 0};
+      return;
+    }
+
+    // we are just below start/stop and must now increment up to it from here
+    auto first_byte = start_counts.second;
+    if (start_counts.first < start) {
+      auto const sub_str = string_view(d_str.data() + first_byte, d_str.size_bytes() - first_byte);
+      first_byte += std::get<0>(bytes_to_character_position(sub_str, start - start_counts.first));
+    }
+
+    stop           = max(stop, char_count);
+    auto last_byte = stop_counts.second;
+    if (stop_counts.first < stop) {
+      auto const sub_str = string_view(d_str.data() + last_byte, d_str.size_bytes() - last_byte);
+      last_byte += std::get<0>(bytes_to_character_position(sub_str, stop - stop_counts.first));
+    }
+
+    d_output[str_idx] = (first_byte < last_byte)
+                          ? string_index_pair{d_str.data() + first_byte, last_byte - first_byte}
+                          : string_index_pair{"", 0};
+  }
+}
+
 /**
  * @brief Function logic for the substring API.
  *
@@ -149,54 +233,67 @@ struct substring_fn {
  *
  * @tparam IndexIterator Iterator type for character position values
  *
- * @param d_column Input strings column to substring
+ * @param input Input strings column to substring
  * @param starts Start positions index iterator
  * @param stops Stop positions index iterator
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned column's device memory
  */
 template <typename IndexIterator>
-std::unique_ptr<column> compute_substrings_from_fn(column_device_view const& d_column,
+std::unique_ptr<column> compute_substrings_from_fn(strings_column_view const& input,
                                                    IndexIterator starts,
                                                    IndexIterator stops,
                                                    rmm::cuda_stream_view stream,
                                                    rmm::device_async_resource_ref mr)
 {
-  auto results = rmm::device_uvector<string_view>(d_column.size(), stream);
-  thrust::transform(rmm::exec_policy(stream),
-                    thrust::counting_iterator<size_type>(0),
-                    thrust::counting_iterator<size_type>(d_column.size()),
-                    results.begin(),
-                    substring_from_fn{d_column, starts, stops});
-  return make_strings_column(results, string_view{nullptr, 0}, stream, mr);
+  auto results = rmm::device_uvector<string_index_pair>(input.size(), stream);
+
+  auto const d_column = column_device_view::create(input.parent(), stream);
+
+  if ((input.chars_size(stream) / (input.size() - input.null_count())) < AVG_CHAR_BYTES_THRESHOLD) {
+    thrust::transform(rmm::exec_policy(stream),
+                      thrust::counting_iterator<size_type>(0),
+                      thrust::counting_iterator<size_type>(input.size()),
+                      results.begin(),
+                      substring_from_fn{*d_column, starts, stops});
+  } else {
+    constexpr thread_index_type block_size = 512;
+    auto const threads =
+      static_cast<cudf::thread_index_type>(input.size()) * cudf::detail::warp_size;
+    auto const num_blocks = util::div_rounding_up_safe(threads, block_size);
+    substring_from_kernel<IndexIterator>
+      <<<num_blocks, block_size, 0, stream.value()>>>(*d_column, starts, stops, results.data());
+  }
+  return make_strings_column(results.begin(), results.end(), stream, mr);
 }
 
 }  // namespace
 
 //
-std::unique_ptr<column> slice_strings(strings_column_view const& strings,
+std::unique_ptr<column> slice_strings(strings_column_view const& input,
                                       numeric_scalar<size_type> const& start,
                                       numeric_scalar<size_type> const& stop,
                                       numeric_scalar<size_type> const& step,
                                       rmm::cuda_stream_view stream,
                                       rmm::device_async_resource_ref mr)
 {
-  if (strings.is_empty()) return make_empty_column(type_id::STRING);
+  if (input.size() == input.null_count()) {
+    return std::make_unique<column>(input.parent(), stream, mr);
+  }
 
   auto const step_valid = step.is_valid(stream);
-  auto const step_value = step_valid ? step.value(stream) : 0;
+  auto const step_value = step_valid ? step.value(stream) : 1;
   if (step_valid) { CUDF_EXPECTS(step_value != 0, "Step parameter must not be 0"); }
 
-  auto const d_column = column_device_view::create(strings.parent(), stream);
-
   // optimization for (step==1 and start < stop) -- expect this to be most common
-  if (step_value == 1 and start.is_valid(stream) and stop.is_valid(stream)) {
-    auto const start_value = start.value(stream);
-    auto const stop_value  = stop.value(stream);
+  if (step_value == 1) {
+    auto const start_value = start.is_valid(stream) ? start.value(stream) : 0;
+    auto const stop_value =
+      stop.is_valid(stream) ? stop.value(stream) : std::numeric_limits<size_type>::max();
     // note that any negative values here must use the alternate function below
     if ((start_value >= 0) && (start_value < stop_value)) {
       // this is about 2x faster on long strings for this common case
-      return compute_substrings_from_fn(*d_column,
+      return compute_substrings_from_fn(input,
                                         thrust::constant_iterator<size_type>(start_value),
                                         thrust::constant_iterator<size_type>(stop_value),
                                         stream,
@@ -204,31 +301,35 @@ std::unique_ptr<column> slice_strings(strings_column_view const& strings,
     }
   }
 
+  auto const d_column = column_device_view::create(input.parent(), stream);
+
   auto const d_start = get_scalar_device_view(const_cast<numeric_scalar<size_type>&>(start));
   auto const d_stop  = get_scalar_device_view(const_cast<numeric_scalar<size_type>&>(stop));
   auto const d_step  = get_scalar_device_view(const_cast<numeric_scalar<size_type>&>(step));
 
   auto [offsets, chars] = make_strings_children(
-    substring_fn{*d_column, d_start, d_stop, d_step}, strings.size(), stream, mr);
+    substring_fn{*d_column, d_start, d_stop, d_step}, input.size(), stream, mr);
 
-  return make_strings_column(strings.size(),
+  return make_strings_column(input.size(),
                              std::move(offsets),
                              chars.release(),
-                             strings.null_count(),
-                             cudf::detail::copy_bitmask(strings.parent(), stream, mr));
+                             input.null_count(),
+                             cudf::detail::copy_bitmask(input.parent(), stream, mr));
 }
 
-std::unique_ptr<column> slice_strings(strings_column_view const& strings,
+std::unique_ptr<column> slice_strings(strings_column_view const& input,
                                       column_view const& starts_column,
                                       column_view const& stops_column,
                                       rmm::cuda_stream_view stream,
                                       rmm::device_async_resource_ref mr)
 {
-  size_type strings_count = strings.size();
-  if (strings_count == 0) return make_empty_column(type_id::STRING);
-  CUDF_EXPECTS(starts_column.size() == strings_count,
+  if (input.size() == input.null_count()) {
+    return std::make_unique<column>(input.parent(), stream, mr);
+  }
+
+  CUDF_EXPECTS(starts_column.size() == input.size(),
                "Parameter starts must have the same number of rows as strings.");
-  CUDF_EXPECTS(stops_column.size() == strings_count,
+  CUDF_EXPECTS(stops_column.size() == input.size(),
                "Parameter stops must have the same number of rows as strings.");
   CUDF_EXPECTS(cudf::have_same_types(starts_column, stops_column),
                "Parameters starts and stops must be of the same type.",
@@ -242,17 +343,16 @@ std::unique_ptr<column> slice_strings(strings_column_view const& strings,
                "Positions values must be fixed width type.",
                cudf::data_type_error);
 
-  auto strings_column = column_device_view::create(strings.parent(), stream);
-  auto starts_iter    = cudf::detail::indexalator_factory::make_input_iterator(starts_column);
-  auto stops_iter     = cudf::detail::indexalator_factory::make_input_iterator(stops_column);
-  return compute_substrings_from_fn(*strings_column, starts_iter, stops_iter, stream, mr);
+  auto starts_iter = cudf::detail::indexalator_factory::make_input_iterator(starts_column);
+  auto stops_iter  = cudf::detail::indexalator_factory::make_input_iterator(stops_column);
+  return compute_substrings_from_fn(input, starts_iter, stops_iter, stream, mr);
 }
 
 }  // namespace detail
 
 // external API
 
-std::unique_ptr<column> slice_strings(strings_column_view const& strings,
+std::unique_ptr<column> slice_strings(strings_column_view const& input,
                                       numeric_scalar<size_type> const& start,
                                       numeric_scalar<size_type> const& stop,
                                       numeric_scalar<size_type> const& step,
@@ -260,17 +360,17 @@ std::unique_ptr<column> slice_strings(strings_column_view const& strings,
                                       rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::slice_strings(strings, start, stop, step, stream, mr);
+  return detail::slice_strings(input, start, stop, step, stream, mr);
 }
 
-std::unique_ptr<column> slice_strings(strings_column_view const& strings,
+std::unique_ptr<column> slice_strings(strings_column_view const& input,
                                       column_view const& starts_column,
                                       column_view const& stops_column,
                                       rmm::cuda_stream_view stream,
                                       rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::slice_strings(strings, starts_column, stops_column, stream, mr);
+  return detail::slice_strings(input, starts_column, stops_column, stream, mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/split/split.cuh b/cpp/src/strings/split/split.cuh
index 4d7096c02ca..af70367678e 100644
--- a/cpp/src/strings/split/split.cuh
+++ b/cpp/src/strings/split/split.cuh
@@ -142,7 +142,7 @@ struct base_split_tokenizer {
 
     // max_tokens already included in token counts
     if (d_tokens.size() == 1) {
-      d_tokens[0] = string_index_pair{d_str.data(), d_str.size_bytes()};
+      d_tokens[0] = string_index_pair{(d_str.empty() ? "" : d_str.data()), d_str.size_bytes()};
       return;
     }
 
@@ -357,24 +357,20 @@ std::pair<std::unique_ptr<column>, rmm::device_uvector<string_index_pair>> split
   auto const chars_bytes =
     get_offset_value(input.offsets(), input.offset() + strings_count, stream) -
     get_offset_value(input.offsets(), input.offset(), stream);
-  if (chars_bytes == 0) {
-    auto offsets = cudf::make_column_from_scalar(
-      numeric_scalar<int32_t>(0, true, stream), strings_count + 1, stream, mr);
-    auto tokens = rmm::device_uvector<string_index_pair>(0, stream);
-    return std::pair{std::move(offsets), std::move(tokens)};
-  }
   auto const d_offsets =
     cudf::detail::offsetalator_factory::make_input_iterator(input.offsets(), input.offset());
 
   // count the number of delimiters in the entire column
   rmm::device_scalar<int64_t> d_count(0, stream);
-  constexpr int64_t block_size         = 512;
-  constexpr size_type bytes_per_thread = 4;
-  auto const num_blocks                = util::div_rounding_up_safe(
-    util::div_rounding_up_safe(chars_bytes, static_cast<int64_t>(bytes_per_thread)), block_size);
-  count_delimiters_kernel<Tokenizer, block_size, bytes_per_thread>
-    <<<num_blocks, block_size, 0, stream.value()>>>(
-      tokenizer, d_offsets, chars_bytes, d_count.data());
+  if (chars_bytes > 0) {
+    constexpr int64_t block_size         = 512;
+    constexpr size_type bytes_per_thread = 4;
+    auto const num_blocks                = util::div_rounding_up_safe(
+      util::div_rounding_up_safe(chars_bytes, static_cast<int64_t>(bytes_per_thread)), block_size);
+    count_delimiters_kernel<Tokenizer, block_size, bytes_per_thread>
+      <<<num_blocks, block_size, 0, stream.value()>>>(
+        tokenizer, d_offsets, chars_bytes, d_count.data());
+  }
 
   // Create a vector of every delimiter position in the chars column.
   // These may include overlapping or otherwise out-of-bounds delimiters which
diff --git a/cpp/src/strings/split/split_re.cu b/cpp/src/strings/split/split_re.cu
index d72ec1085b5..d273c93ec12 100644
--- a/cpp/src/strings/split/split_re.cu
+++ b/cpp/src/strings/split/split_re.cu
@@ -71,6 +71,10 @@ struct token_reader_fn {
     auto const token_offset = d_token_offsets[idx];
     auto const token_count  = d_token_offsets[idx + 1] - token_offset;
     auto const d_result     = d_tokens + token_offset;  // store tokens here
+    if (nchars == 0) {
+      d_result[0] = string_index_pair{"", 0};
+      return;
+    }
 
     int64_t token_idx = 0;
     auto itr          = d_str.begin();
@@ -206,8 +210,8 @@ std::unique_ptr<table> split_re(strings_column_view const& input,
   auto d_strings = column_device_view::create(input.parent(), stream);
 
   // count the number of delimiters matched in each string
-  auto const counts = count_matches(
-    *d_strings, *d_prog, strings_count, stream, rmm::mr::get_current_device_resource());
+  auto const counts =
+    count_matches(*d_strings, *d_prog, stream, rmm::mr::get_current_device_resource());
 
   // get the split tokens from the input column; this also converts the counts into offsets
   auto [tokens, offsets] =
@@ -271,7 +275,7 @@ std::unique_ptr<column> split_record_re(strings_column_view const& input,
   auto d_strings = column_device_view::create(input.parent(), stream);
 
   // count the number of delimiters matched in each string
-  auto counts = count_matches(*d_strings, *d_prog, strings_count, stream, mr);
+  auto counts = count_matches(*d_strings, *d_prog, stream, mr);
 
   // get the split tokens from the input column; this also converts the counts into offsets
   auto [tokens, offsets] =
diff --git a/cpp/src/text/bpe/byte_pair_encoding.cuh b/cpp/src/text/bpe/byte_pair_encoding.cuh
index a2e441c3284..69c77224eb7 100644
--- a/cpp/src/text/bpe/byte_pair_encoding.cuh
+++ b/cpp/src/text/bpe/byte_pair_encoding.cuh
@@ -106,7 +106,7 @@ using merge_pairs_map_type = cuco::static_map<cudf::size_type,
                                               cuda::thread_scope_device,
                                               bpe_equal,
                                               bpe_probe_scheme,
-                                              cudf::detail::cuco_allocator,
+                                              cudf::detail::cuco_allocator<char>,
                                               cuco_storage>;
 
 /**
@@ -164,7 +164,7 @@ using mp_table_map_type = cuco::static_map<cudf::size_type,
                                            cuda::thread_scope_device,
                                            mp_equal,
                                            mp_probe_scheme,
-                                           cudf::detail::cuco_allocator,
+                                           cudf::detail::cuco_allocator<char>,
                                            cuco_storage>;
 
 }  // namespace detail
diff --git a/cpp/src/text/bpe/load_merge_pairs.cu b/cpp/src/text/bpe/load_merge_pairs.cu
index f34c5c4f7f6..9fb86aecce3 100644
--- a/cpp/src/text/bpe/load_merge_pairs.cu
+++ b/cpp/src/text/bpe/load_merge_pairs.cu
@@ -43,16 +43,16 @@ namespace {
 std::unique_ptr<detail::merge_pairs_map_type> initialize_merge_pairs_map(
   cudf::column_device_view const& input, rmm::cuda_stream_view stream)
 {
-  auto merge_pairs_map =
-    std::make_unique<merge_pairs_map_type>(static_cast<size_t>(input.size()),
-                                           cuco::empty_key{-1},
-                                           cuco::empty_value{-1},
-                                           bpe_equal{input},
-                                           bpe_probe_scheme{bpe_hasher{input}},
-                                           cuco::thread_scope_device,
-                                           cuco_storage{},
-                                           cudf::detail::cuco_allocator{stream},
-                                           stream.value());
+  auto merge_pairs_map = std::make_unique<merge_pairs_map_type>(
+    static_cast<size_t>(input.size()),
+    cuco::empty_key{-1},
+    cuco::empty_value{-1},
+    bpe_equal{input},
+    bpe_probe_scheme{bpe_hasher{input}},
+    cuco::thread_scope_device,
+    cuco_storage{},
+    cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream},
+    stream.value());
 
   auto iter = cudf::detail::make_counting_transform_iterator(
     0,
@@ -67,15 +67,16 @@ std::unique_ptr<detail::merge_pairs_map_type> initialize_merge_pairs_map(
 std::unique_ptr<detail::mp_table_map_type> initialize_mp_table_map(
   cudf::column_device_view const& input, rmm::cuda_stream_view stream)
 {
-  auto mp_table_map = std::make_unique<mp_table_map_type>(static_cast<size_t>(input.size()),
-                                                          cuco::empty_key{-1},
-                                                          cuco::empty_value{-1},
-                                                          mp_equal{input},
-                                                          mp_probe_scheme{mp_hasher{input}},
-                                                          cuco::thread_scope_device,
-                                                          cuco_storage{},
-                                                          cudf::detail::cuco_allocator{stream},
-                                                          stream.value());
+  auto mp_table_map = std::make_unique<mp_table_map_type>(
+    static_cast<size_t>(input.size()),
+    cuco::empty_key{-1},
+    cuco::empty_value{-1},
+    mp_equal{input},
+    mp_probe_scheme{mp_hasher{input}},
+    cuco::thread_scope_device,
+    cuco_storage{},
+    cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream},
+    stream.value());
 
   auto iter = cudf::detail::make_counting_transform_iterator(
     0,
diff --git a/cpp/src/text/generate_ngrams.cu b/cpp/src/text/generate_ngrams.cu
index 724f3603f29..6f700f84ec4 100644
--- a/cpp/src/text/generate_ngrams.cu
+++ b/cpp/src/text/generate_ngrams.cu
@@ -36,10 +36,12 @@
 #include <rmm/exec_policy.hpp>
 #include <rmm/resource_ref.hpp>
 
+#include <cooperative_groups.h>
+#include <cooperative_groups/reduce.h>
 #include <cuda/functional>
+#include <thrust/copy.h>
 #include <thrust/functional.h>
 #include <thrust/iterator/counting_iterator.h>
-#include <thrust/transform_scan.h>
 
 #include <stdexcept>
 
@@ -165,6 +167,47 @@ std::unique_ptr<cudf::column> generate_ngrams(cudf::strings_column_view const& s
 namespace detail {
 namespace {
 
+constexpr cudf::thread_index_type block_size       = 256;
+constexpr cudf::thread_index_type bytes_per_thread = 4;
+
+/**
+ * @brief Counts the number of ngrams in each row of the given strings column
+ *
+ * Each warp processes a single string.
+ * Formula is `count = max(0,str.length() - ngrams + 1)`
+ * If a string has less than ngrams characters, its count is 0.
+ */
+CUDF_KERNEL void count_char_ngrams_kernel(cudf::column_device_view const d_strings,
+                                          cudf::size_type ngrams,
+                                          cudf::size_type* d_counts)
+{
+  auto const idx = cudf::detail::grid_1d::global_thread_id();
+
+  auto const str_idx = idx / cudf::detail::warp_size;
+  if (str_idx >= d_strings.size()) { return; }
+  if (d_strings.is_null(str_idx)) {
+    d_counts[str_idx] = 0;
+    return;
+  }
+
+  namespace cg    = cooperative_groups;
+  auto const warp = cg::tiled_partition<cudf::detail::warp_size>(cg::this_thread_block());
+
+  auto const d_str = d_strings.element<cudf::string_view>(str_idx);
+  auto const end   = d_str.data() + d_str.size_bytes();
+
+  auto const lane_idx   = warp.thread_rank();
+  cudf::size_type count = 0;
+  for (auto itr = d_str.data() + (lane_idx * bytes_per_thread); itr < end;
+       itr += cudf::detail::warp_size * bytes_per_thread) {
+    for (auto s = itr; (s < (itr + bytes_per_thread)) && (s < end); ++s) {
+      count += static_cast<cudf::size_type>(cudf::strings::detail::is_begin_utf8_char(*s));
+    }
+  }
+  auto const char_count = cg::reduce(warp, count, cg::plus<int>());
+  if (lane_idx == 0) { d_counts[str_idx] = cuda::std::max(0, char_count - ngrams + 1); }
+}
+
 /**
  * @brief Generate character ngrams for each string
  *
@@ -220,17 +263,16 @@ std::unique_ptr<cudf::column> generate_character_ngrams(cudf::strings_column_vie
 
   auto const d_strings = cudf::column_device_view::create(input.parent(), stream);
 
-  auto sizes_itr = cudf::detail::make_counting_transform_iterator(
-    0,
-    cuda::proclaim_return_type<cudf::size_type>(
-      [d_strings = *d_strings, ngrams] __device__(auto idx) {
-        if (d_strings.is_null(idx)) { return 0; }
-        auto const length = d_strings.element<cudf::string_view>(idx).length();
-        return std::max(0, static_cast<cudf::size_type>(length + 1 - ngrams));
-      }));
-  auto [offsets, total_ngrams] =
-    cudf::detail::make_offsets_child_column(sizes_itr, sizes_itr + input.size(), stream, mr);
+  auto [offsets, total_ngrams] = [&] {
+    auto counts           = rmm::device_uvector<cudf::size_type>(input.size(), stream);
+    auto const num_blocks = cudf::util::div_rounding_up_safe(
+      static_cast<cudf::thread_index_type>(input.size()) * cudf::detail::warp_size, block_size);
+    count_char_ngrams_kernel<<<num_blocks, block_size, 0, stream.value()>>>(
+      *d_strings, ngrams, counts.data());
+    return cudf::detail::make_offsets_child_column(counts.begin(), counts.end(), stream, mr);
+  }();
   auto d_offsets = offsets->view().data<cudf::size_type>();
+
   CUDF_EXPECTS(total_ngrams > 0,
                "Insufficient number of characters in each string to generate ngrams");
 
@@ -246,36 +288,64 @@ std::unique_ptr<cudf::column> generate_character_ngrams(cudf::strings_column_vie
 }
 
 namespace {
+
 /**
  * @brief Computes the hash of each character ngram
  *
- * Each thread processes a single string. Substrings are resolved for every character
+ * Each warp processes a single string. Substrings are resolved for every character
  * of the string and hashed.
  */
-struct character_ngram_hash_fn {
-  cudf::column_device_view const d_strings;
-  cudf::size_type ngrams;
-  cudf::size_type const* d_ngram_offsets;
-  cudf::hash_value_type* d_results;
+CUDF_KERNEL void character_ngram_hash_kernel(cudf::column_device_view const d_strings,
+                                             cudf::size_type ngrams,
+                                             cudf::size_type const* d_ngram_offsets,
+                                             cudf::hash_value_type* d_results)
+{
+  auto const idx = cudf::detail::grid_1d::global_thread_id();
+  if (idx >= (static_cast<cudf::thread_index_type>(d_strings.size()) * cudf::detail::warp_size)) {
+    return;
+  }
 
-  __device__ void operator()(cudf::size_type idx) const
-  {
-    if (d_strings.is_null(idx)) return;
-    auto const d_str = d_strings.element<cudf::string_view>(idx);
-    if (d_str.empty()) return;
-    auto itr                = d_str.begin();
-    auto const ngram_offset = d_ngram_offsets[idx];
-    auto const ngram_count  = d_ngram_offsets[idx + 1] - ngram_offset;
-    auto const hasher       = cudf::hashing::detail::MurmurHash3_x86_32<cudf::string_view>{0};
-    auto d_hashes           = d_results + ngram_offset;
-    for (cudf::size_type n = 0; n < ngram_count; ++n, ++itr) {
-      auto const begin = itr.byte_offset();
-      auto const end   = (itr + ngrams).byte_offset();
-      auto const ngram = cudf::string_view(d_str.data() + begin, end - begin);
-      *d_hashes++      = hasher(ngram);
+  auto const str_idx = idx / cudf::detail::warp_size;
+
+  if (d_strings.is_null(str_idx)) { return; }
+  auto const d_str = d_strings.element<cudf::string_view>(str_idx);
+  if (d_str.empty()) { return; }
+
+  __shared__ cudf::hash_value_type hvs[block_size];  // temp store for hash values
+
+  auto const ngram_offset = d_ngram_offsets[str_idx];
+  auto const hasher       = cudf::hashing::detail::MurmurHash3_x86_32<cudf::string_view>{0};
+
+  auto const end        = d_str.data() + d_str.size_bytes();
+  auto const warp_count = (d_str.size_bytes() / cudf::detail::warp_size) + 1;
+  auto const lane_idx   = idx % cudf::detail::warp_size;
+
+  auto d_hashes = d_results + ngram_offset;
+  auto itr      = d_str.data() + lane_idx;
+  for (auto i = 0; i < warp_count; ++i) {
+    cudf::hash_value_type hash = 0;
+    if (itr < end && cudf::strings::detail::is_begin_utf8_char(*itr)) {
+      // resolve ngram substring
+      auto const sub_str =
+        cudf::string_view(itr, static_cast<cudf::size_type>(thrust::distance(itr, end)));
+      auto const [bytes, left] =
+        cudf::strings::detail::bytes_to_character_position(sub_str, ngrams);
+      if (left == 0) { hash = hasher(cudf::string_view(itr, bytes)); }
+    }
+    hvs[threadIdx.x] = hash;  // store hash into shared memory
+    __syncwarp();
+    if (lane_idx == 0) {
+      // copy valid hash values into d_hashes
+      auto const hashes = &hvs[threadIdx.x];
+      d_hashes          = thrust::copy_if(
+        thrust::seq, hashes, hashes + cudf::detail::warp_size, d_hashes, [](auto h) {
+          return h != 0;
+        });
     }
+    __syncwarp();
+    itr += cudf::detail::warp_size;
   }
-};
+}
 }  // namespace
 
 std::unique_ptr<cudf::column> hash_character_ngrams(cudf::strings_column_view const& input,
@@ -291,18 +361,16 @@ std::unique_ptr<cudf::column> hash_character_ngrams(cudf::strings_column_view co
   if (input.is_empty()) { return cudf::make_empty_column(output_type); }
 
   auto const d_strings = cudf::column_device_view::create(input.parent(), stream);
+  auto const grid      = cudf::detail::grid_1d(
+    static_cast<cudf::thread_index_type>(input.size()) * cudf::detail::warp_size, block_size);
 
   // build offsets column by computing the number of ngrams per string
-  auto sizes_itr = cudf::detail::make_counting_transform_iterator(
-    0,
-    cuda::proclaim_return_type<cudf::size_type>(
-      [d_strings = *d_strings, ngrams] __device__(auto idx) {
-        if (d_strings.is_null(idx)) { return 0; }
-        auto const length = d_strings.element<cudf::string_view>(idx).length();
-        return std::max(0, static_cast<cudf::size_type>(length + 1 - ngrams));
-      }));
-  auto [offsets, total_ngrams] =
-    cudf::detail::make_offsets_child_column(sizes_itr, sizes_itr + input.size(), stream, mr);
+  auto [offsets, total_ngrams] = [&] {
+    auto counts = rmm::device_uvector<cudf::size_type>(input.size(), stream);
+    count_char_ngrams_kernel<<<grid.num_blocks, grid.num_threads_per_block, 0, stream.value()>>>(
+      *d_strings, ngrams, counts.data());
+    return cudf::detail::make_offsets_child_column(counts.begin(), counts.end(), stream, mr);
+  }();
   auto d_offsets = offsets->view().data<cudf::size_type>();
 
   CUDF_EXPECTS(total_ngrams > 0,
@@ -313,11 +381,8 @@ std::unique_ptr<cudf::column> hash_character_ngrams(cudf::strings_column_view co
     cudf::make_numeric_column(output_type, total_ngrams, cudf::mask_state::UNALLOCATED, stream, mr);
   auto d_hashes = hashes->mutable_view().data<cudf::hash_value_type>();
 
-  character_ngram_hash_fn generator{*d_strings, ngrams, d_offsets, d_hashes};
-  thrust::for_each_n(rmm::exec_policy(stream),
-                     thrust::counting_iterator<cudf::size_type>(0),
-                     input.size(),
-                     generator);
+  character_ngram_hash_kernel<<<grid.num_blocks, grid.num_threads_per_block, 0, stream.value()>>>(
+    *d_strings, ngrams, d_offsets, d_hashes);
 
   return make_lists_column(
     input.size(), std::move(offsets), std::move(hashes), 0, rmm::device_buffer{}, stream, mr);
diff --git a/cpp/src/text/jaccard.cu b/cpp/src/text/jaccard.cu
index e465fb79c89..e856b89b836 100644
--- a/cpp/src/text/jaccard.cu
+++ b/cpp/src/text/jaccard.cu
@@ -376,7 +376,7 @@ std::pair<rmm::device_uvector<uint32_t>, rmm::device_uvector<int64_t>> hash_subs
                           sub_offsets.begin(),
                           sub_offsets.end(),
                           indices.begin());
-      return cudf::detail::make_std_vector_sync(indices, stream);
+      return cudf::detail::make_host_vector_sync(indices, stream);
     }();
 
     // Call segmented sort with the sort sections
diff --git a/cpp/src/text/vocabulary_tokenize.cu b/cpp/src/text/vocabulary_tokenize.cu
index 97abb1487d8..5945921ed9d 100644
--- a/cpp/src/text/vocabulary_tokenize.cu
+++ b/cpp/src/text/vocabulary_tokenize.cu
@@ -100,7 +100,7 @@ using vocabulary_map_type = cuco::static_map<cudf::size_type,
                                              cuda::thread_scope_device,
                                              vocab_equal,
                                              probe_scheme,
-                                             cudf::detail::cuco_allocator,
+                                             cudf::detail::cuco_allocator<char>,
                                              cuco_storage>;
 }  // namespace
 }  // namespace detail
@@ -152,7 +152,7 @@ tokenize_vocabulary::tokenize_vocabulary(cudf::strings_column_view const& input,
     detail::probe_scheme{detail::vocab_hasher{*d_vocabulary}},
     cuco::thread_scope_device,
     detail::cuco_storage{},
-    cudf::detail::cuco_allocator{stream},
+    cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream},
     stream.value());
 
   // the row index is the token id (value for each key in the map)
diff --git a/cpp/src/transform/bools_to_mask.cu b/cpp/src/transform/bools_to_mask.cu
index c12f65deb46..452aebf4428 100644
--- a/cpp/src/transform/bools_to_mask.cu
+++ b/cpp/src/transform/bools_to_mask.cu
@@ -59,10 +59,10 @@ std::pair<std::unique_ptr<rmm::device_buffer>, cudf::size_type> bools_to_mask(
 }  // namespace detail
 
 std::pair<std::unique_ptr<rmm::device_buffer>, cudf::size_type> bools_to_mask(
-  column_view const& input, rmm::device_async_resource_ref mr)
+  column_view const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::bools_to_mask(input, cudf::get_default_stream(), mr);
+  return detail::bools_to_mask(input, stream, mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/transform/compute_column.cu b/cpp/src/transform/compute_column.cu
index 7960731f3a1..c4fc8d58552 100644
--- a/cpp/src/transform/compute_column.cu
+++ b/cpp/src/transform/compute_column.cu
@@ -138,10 +138,11 @@ std::unique_ptr<column> compute_column(table_view const& table,
 
 std::unique_ptr<column> compute_column(table_view const& table,
                                        ast::expression const& expr,
+                                       rmm::cuda_stream_view stream,
                                        rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::compute_column(table, expr, cudf::get_default_stream(), mr);
+  return detail::compute_column(table, expr, stream, mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/transform/encode.cu b/cpp/src/transform/encode.cu
index 7a044b9f6f7..1c9d52bce1b 100644
--- a/cpp/src/transform/encode.cu
+++ b/cpp/src/transform/encode.cu
@@ -72,10 +72,10 @@ std::pair<std::unique_ptr<table>, std::unique_ptr<column>> encode(table_view con
 }  // namespace detail
 
 std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::column>> encode(
-  cudf::table_view const& input, rmm::device_async_resource_ref mr)
+  cudf::table_view const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::encode(input, cudf::get_default_stream(), mr);
+  return detail::encode(input, stream, mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/transform/mask_to_bools.cu b/cpp/src/transform/mask_to_bools.cu
index adf5db02d9c..be0b80a2633 100644
--- a/cpp/src/transform/mask_to_bools.cu
+++ b/cpp/src/transform/mask_to_bools.cu
@@ -62,9 +62,10 @@ std::unique_ptr<column> mask_to_bools(bitmask_type const* bitmask,
 std::unique_ptr<column> mask_to_bools(bitmask_type const* bitmask,
                                       size_type begin_bit,
                                       size_type end_bit,
+                                      rmm::cuda_stream_view stream,
                                       rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::mask_to_bools(bitmask, begin_bit, end_bit, cudf::get_default_stream(), mr);
+  return detail::mask_to_bools(bitmask, begin_bit, end_bit, stream, mr);
 }
 }  // namespace cudf
diff --git a/cpp/src/transform/nans_to_nulls.cu b/cpp/src/transform/nans_to_nulls.cu
index fd4f33c594c..a24ba304004 100644
--- a/cpp/src/transform/nans_to_nulls.cu
+++ b/cpp/src/transform/nans_to_nulls.cu
@@ -93,10 +93,10 @@ std::pair<std::unique_ptr<rmm::device_buffer>, cudf::size_type> nans_to_nulls(
 }  // namespace detail
 
 std::pair<std::unique_ptr<rmm::device_buffer>, cudf::size_type> nans_to_nulls(
-  column_view const& input, rmm::device_async_resource_ref mr)
+  column_view const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::nans_to_nulls(input, cudf::get_default_stream(), mr);
+  return detail::nans_to_nulls(input, stream, mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/transform/one_hot_encode.cu b/cpp/src/transform/one_hot_encode.cu
index 808f2d1b284..46e6e55b0b7 100644
--- a/cpp/src/transform/one_hot_encode.cu
+++ b/cpp/src/transform/one_hot_encode.cu
@@ -115,9 +115,10 @@ std::pair<std::unique_ptr<column>, table_view> one_hot_encode(column_view const&
 
 std::pair<std::unique_ptr<column>, table_view> one_hot_encode(column_view const& input,
                                                               column_view const& categories,
+                                                              rmm::cuda_stream_view stream,
                                                               rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::one_hot_encode(input, categories, cudf::get_default_stream(), mr);
+  return detail::one_hot_encode(input, categories, stream, mr);
 }
 }  // namespace cudf
diff --git a/cpp/src/transform/row_bit_count.cu b/cpp/src/transform/row_bit_count.cu
index 12a15eb7e34..6a965d10184 100644
--- a/cpp/src/transform/row_bit_count.cu
+++ b/cpp/src/transform/row_bit_count.cu
@@ -35,8 +35,8 @@
 #include <rmm/resource_ref.hpp>
 
 #include <cuda/functional>
+#include <cuda/std/optional>
 #include <thrust/iterator/counting_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/tabulate.h>
 
 namespace cudf {
@@ -159,9 +159,9 @@ void flatten_hierarchy(ColIter begin,
                        std::vector<column_info>& info,
                        hierarchy_info& h_info,
                        rmm::cuda_stream_view stream,
-                       size_type cur_depth                = 0,
-                       size_type cur_branch_depth         = 0,
-                       thrust::optional<int> parent_index = {});
+                       size_type cur_depth                   = 0,
+                       size_type cur_branch_depth            = 0,
+                       cuda::std::optional<int> parent_index = {});
 
 /**
  * @brief Type-dispatched functor called by flatten_hierarchy.
@@ -177,7 +177,7 @@ struct flatten_functor {
                   rmm::cuda_stream_view,
                   size_type cur_depth,
                   size_type cur_branch_depth,
-                  thrust::optional<int>)
+                  cuda::std::optional<int>)
   {
     out.push_back(col);
     info.push_back({cur_depth, cur_branch_depth, cur_branch_depth});
@@ -194,7 +194,7 @@ struct flatten_functor {
                   rmm::cuda_stream_view,
                   size_type cur_depth,
                   size_type cur_branch_depth,
-                  thrust::optional<int>)
+                  cuda::std::optional<int>)
   {
     out.push_back(col);
     info.push_back({cur_depth, cur_branch_depth, cur_branch_depth});
@@ -210,7 +210,7 @@ struct flatten_functor {
                   rmm::cuda_stream_view stream,
                   size_type cur_depth,
                   size_type cur_branch_depth,
-                  thrust::optional<int> parent_index)
+                  cuda::std::optional<int> parent_index)
   {
     // track branch depth as we reach this list and after we pass it
     auto const branch_depth_start = cur_branch_depth;
@@ -243,7 +243,7 @@ struct flatten_functor {
                   rmm::cuda_stream_view stream,
                   size_type cur_depth,
                   size_type cur_branch_depth,
-                  thrust::optional<int>)
+                  cuda::std::optional<int>)
   {
     out.push_back(col);
     info.push_back({cur_depth, cur_branch_depth, cur_branch_depth});
@@ -284,7 +284,7 @@ void flatten_hierarchy(ColIter begin,
                        rmm::cuda_stream_view stream,
                        size_type cur_depth,
                        size_type cur_branch_depth,
-                       thrust::optional<int> parent_index)
+                       cuda::std::optional<int> parent_index)
 {
   std::for_each(begin, end, [&](column_view const& col) {
     cudf::type_dispatcher(col.type(),
@@ -561,23 +561,26 @@ std::unique_ptr<column> row_bit_count(table_view const& t,
                                       rmm::cuda_stream_view stream,
                                       rmm::device_async_resource_ref mr)
 {
-  return segmented_row_bit_count(t, 1, stream, mr);
+  return detail::segmented_row_bit_count(t, 1, stream, mr);
 }
 
 }  // namespace detail
 
 std::unique_ptr<column> segmented_row_bit_count(table_view const& t,
                                                 size_type segment_length,
+                                                rmm::cuda_stream_view stream,
                                                 rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::segmented_row_bit_count(t, segment_length, cudf::get_default_stream(), mr);
+  return detail::segmented_row_bit_count(t, segment_length, stream, mr);
 }
 
-std::unique_ptr<column> row_bit_count(table_view const& t, rmm::device_async_resource_ref mr)
+std::unique_ptr<column> row_bit_count(table_view const& t,
+                                      rmm::cuda_stream_view stream,
+                                      rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::row_bit_count(t, cudf::get_default_stream(), mr);
+  return detail::row_bit_count(t, stream, mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp
index 98ec44758b9..f5e9048fa0a 100644
--- a/cpp/src/transform/transform.cpp
+++ b/cpp/src/transform/transform.cpp
@@ -97,10 +97,11 @@ std::unique_ptr<column> transform(column_view const& input,
                                   std::string const& unary_udf,
                                   data_type output_type,
                                   bool is_ptx,
+                                  rmm::cuda_stream_view stream,
                                   rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::transform(input, unary_udf, output_type, is_ptx, cudf::get_default_stream(), mr);
+  return detail::transform(input, unary_udf, output_type, is_ptx, stream, mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/interop/detail/arrow_allocator.hpp b/cpp/src/utilities/cuda.cpp
similarity index 53%
rename from cpp/src/interop/detail/arrow_allocator.hpp
rename to cpp/src/utilities/cuda.cpp
index 75c1baa0dca..53ca0608170 100644
--- a/cpp/src/interop/detail/arrow_allocator.hpp
+++ b/cpp/src/utilities/cuda.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,18 +14,21 @@
  * limitations under the License.
  */
 
-#pragma once
+#include <cudf/detail/utilities/cuda.hpp>
+#include <cudf/types.hpp>
+#include <cudf/utilities/error.hpp>
 
-#include <cudf/detail/interop.hpp>
+#include <cuda_runtime.h>
 
-namespace cudf {
-namespace detail {
+namespace cudf::detail {
 
-// unique_ptr because that is what AllocateBuffer returns
-std::unique_ptr<arrow::Buffer> allocate_arrow_buffer(int64_t const size, arrow::MemoryPool* ar_mr);
+cudf::size_type num_multiprocessors()
+{
+  int device = 0;
+  CUDF_CUDA_TRY(cudaGetDevice(&device));
+  int num_sms = 0;
+  CUDF_CUDA_TRY(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, device));
+  return num_sms;
+}
 
-// shared_ptr because that is what AllocateBitmap returns
-std::shared_ptr<arrow::Buffer> allocate_arrow_bitmap(int64_t const size, arrow::MemoryPool* ar_mr);
-
-}  // namespace detail
-}  // namespace cudf
+}  // namespace cudf::detail
diff --git a/cpp/src/utilities/type_checks.cpp b/cpp/src/utilities/type_checks.cpp
index dac981fb532..3095b342748 100644
--- a/cpp/src/utilities/type_checks.cpp
+++ b/cpp/src/utilities/type_checks.cpp
@@ -139,11 +139,6 @@ bool have_same_types(column_view const& lhs, column_view const& rhs)
   return type_dispatcher(lhs.type(), columns_equal_fn{}, lhs, rhs);
 }
 
-bool column_types_equal(column_view const& lhs, column_view const& rhs)
-{
-  return have_same_types(lhs, rhs);
-}
-
 bool have_same_types(column_view const& lhs, scalar const& rhs)
 {
   return type_dispatcher(lhs.type(), column_scalar_equal_fn{}, lhs, rhs);
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 4dffcb41ba2..1bedb344a01 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -24,8 +24,8 @@ rapids_test_init()
 # properties and linking to build the test
 function(ConfigureTest CMAKE_TEST_NAME)
   set(options)
-  set(one_value GPUS PERCENT STREAM_MODE EXTRA_LIB)
-  set(multi_value)
+  set(one_value GPUS PERCENT STREAM_MODE)
+  set(multi_value EXTRA_LIBS)
   cmake_parse_arguments(_CUDF_TEST "${options}" "${one_value}" "${multi_value}" ${ARGN})
   if(NOT DEFINED _CUDF_TEST_GPUS AND NOT DEFINED _CUDF_TEST_PERCENT)
     set(_CUDF_TEST_GPUS 1)
@@ -57,7 +57,7 @@ function(ConfigureTest CMAKE_TEST_NAME)
   target_link_libraries(
     ${CMAKE_TEST_NAME}
     PRIVATE cudftestutil GTest::gmock GTest::gmock_main GTest::gtest GTest::gtest_main
-            nvtx3::nvtx3-cpp $<TARGET_NAME_IF_EXISTS:conda_env> "${_CUDF_TEST_EXTRA_LIB}"
+            nvtx3::nvtx3-cpp $<TARGET_NAME_IF_EXISTS:conda_env> "${_CUDF_TEST_EXTRA_LIBS}"
   )
   rapids_cuda_set_runtime(${CMAKE_TEST_NAME} USE_STATIC ${CUDA_STATIC_RUNTIME})
   rapids_test_add(
@@ -78,6 +78,14 @@ function(ConfigureTest CMAKE_TEST_NAME)
   endif()
 endfunction()
 
+# ##################################################################################################
+# dependencies  ###################################################################################
+# ##################################################################################################
+
+# No need to install Arrow libs when only the final test executables are shipped.
+set(CUDF_EXCLUDE_ARROW_FROM_ALL ON)
+include(../cmake/thirdparty/get_arrow.cmake)
+
 # ##################################################################################################
 # test sources ##################################################################################
 # ##################################################################################################
@@ -102,10 +110,6 @@ ConfigureTest(SCALAR_TEST scalar/scalar_test.cpp scalar/scalar_device_view_test.
 # * timestamps tests ------------------------------------------------------------------------------
 ConfigureTest(TIMESTAMPS_TEST wrappers/timestamps_test.cu)
 
-# ##################################################################################################
-# * cudf tests ------------------------------------------------------------------------------------
-ConfigureTest(ERROR_TEST error/error_handling_test.cu)
-
 # ##################################################################################################
 # * groupby tests ---------------------------------------------------------------------------------
 ConfigureTest(
@@ -197,7 +201,7 @@ ConfigureTest(
   QUANTILES_TEST quantiles/percentile_approx_test.cpp quantiles/quantile_test.cpp
   quantiles/quantiles_test.cpp
   GPUS 1
-  PERCENT 70
+  PERCENT 70 EXTRA_LIBS ${ARROW_LIBRARIES}
 )
 
 # ##################################################################################################
@@ -276,8 +280,9 @@ ConfigureTest(
   interop/from_arrow_host_test.cpp
   interop/from_arrow_stream_test.cpp
   interop/dlpack_test.cpp
-  EXTRA_LIB
+  EXTRA_LIBS
   nanoarrow
+  ${ARROW_LIBRARIES}
 )
 
 # ##################################################################################################
@@ -288,7 +293,7 @@ ConfigureTest(ROW_SELECTION_TEST io/row_selection_test.cpp)
 ConfigureTest(
   CSV_TEST io/csv_test.cpp
   GPUS 1
-  PERCENT 30
+  PERCENT 30 EXTRA_LIBS ${ARROW_LIBRARIES}
 )
 ConfigureTest(
   FILE_IO_TEST io/file_io_test.cpp
@@ -316,12 +321,11 @@ ConfigureTest(
 ConfigureTest(
   JSON_TEST io/json/json_test.cpp io/json/json_chunked_reader.cu
   GPUS 1
-  PERCENT 30
+  PERCENT 30 EXTRA_LIBS ${ARROW_LIBRARIES}
 )
 ConfigureTest(JSON_WRITER_TEST io/json/json_writer.cpp)
 ConfigureTest(JSON_TYPE_CAST_TEST io/json/json_type_cast_test.cu)
 ConfigureTest(NESTED_JSON_TEST io/json/nested_json_test.cpp io/json/json_tree.cpp)
-ConfigureTest(ARROW_IO_SOURCE_TEST io/arrow_io_source_test.cpp)
 ConfigureTest(MULTIBYTE_SPLIT_TEST io/text/multibyte_split_test.cpp)
 ConfigureTest(JSON_QUOTE_NORMALIZATION io/json/json_quote_normalization_test.cpp)
 ConfigureTest(JSON_WHITESPACE_NORMALIZATION io/json/json_whitespace_normalization_test.cu)
@@ -334,9 +338,6 @@ target_link_libraries(DATA_CHUNK_SOURCE_TEST PRIVATE ZLIB::ZLIB)
 ConfigureTest(LOGICAL_STACK_TEST io/fst/logical_stack_test.cu)
 ConfigureTest(FST_TEST io/fst/fst_test.cu)
 ConfigureTest(TYPE_INFERENCE_TEST io/type_inference_test.cu)
-if(CUDF_ENABLE_ARROW_S3)
-  target_compile_definitions(ARROW_IO_SOURCE_TEST PRIVATE "S3_ENABLED")
-endif()
 
 # ##################################################################################################
 # * sort tests ------------------------------------------------------------------------------------
@@ -393,6 +394,7 @@ ConfigureTest(
   utilities_tests/pinned_memory_tests.cpp
   utilities_tests/type_check_tests.cpp
   utilities_tests/type_list_tests.cpp
+  utilities_tests/batched_memset_tests.cu
 )
 
 # ##################################################################################################
@@ -689,10 +691,6 @@ ConfigureTest(STREAM_DICTIONARY_TEST streams/dictionary_test.cpp STREAM_MODE tes
 ConfigureTest(STREAM_FILLING_TEST streams/filling_test.cpp STREAM_MODE testing)
 ConfigureTest(STREAM_GROUPBY_TEST streams/groupby_test.cpp STREAM_MODE testing)
 ConfigureTest(STREAM_HASHING_TEST streams/hash_test.cpp STREAM_MODE testing)
-# Deprecation from 16297 and fixes in 16379 caused this test to be empty This will be reenabled once
-# the deprecated APIs have been replaced in 24.10.
-#
-# ConfigureTest(STREAM_INTEROP_TEST streams/interop_test.cpp STREAM_MODE testing)
 ConfigureTest(STREAM_JSONIO_TEST streams/io/json_test.cpp STREAM_MODE testing)
 ConfigureTest(STREAM_LABELING_BINS_TEST streams/labeling_bins_test.cpp STREAM_MODE testing)
 ConfigureTest(STREAM_LISTS_TEST streams/lists_test.cpp STREAM_MODE testing)
@@ -735,6 +733,7 @@ ConfigureTest(
   STREAM_MODE
   testing
 )
+ConfigureTest(STREAM_TRANSFORM_TEST streams/transform_test.cpp STREAM_MODE testing)
 ConfigureTest(STREAM_UNARY_TEST streams/unary_test.cpp STREAM_MODE testing)
 
 # ##################################################################################################
diff --git a/cpp/tests/copying/copy_range_tests.cpp b/cpp/tests/copying/copy_range_tests.cpp
index 223946ddcee..25d93da277b 100644
--- a/cpp/tests/copying/copy_range_tests.cpp
+++ b/cpp/tests/copying/copy_range_tests.cpp
@@ -232,6 +232,16 @@ TEST_F(CopyRangeTestFixture, CopyWithNullsString)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*p_ret, expected);
 }
 
+TEST_F(CopyRangeTestFixture, CopyWithTargetNullsString)
+{
+  auto target =
+    cudf::test::strings_column_wrapper({"a", "b", "", "d", "", "é"}, {1, 1, 0, 1, 1, 1});
+  auto source   = cudf::test::strings_column_wrapper({"A", "B", "C", "D", "E", "F"});
+  auto result   = cudf::copy_range(source, target, 1, 5, 1);
+  auto expected = cudf::test::strings_column_wrapper({"a", "B", "C", "D", "E", "é"});
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
+}
+
 TEST_F(CopyRangeTestFixture, CopyNoNullsString)
 {
   cudf::size_type size{100};
diff --git a/cpp/tests/copying/gather_tests.cpp b/cpp/tests/copying/gather_tests.cpp
index 284b6c4c50c..07ce672b14d 100644
--- a/cpp/tests/copying/gather_tests.cpp
+++ b/cpp/tests/copying/gather_tests.cpp
@@ -43,7 +43,7 @@ TYPED_TEST(GatherTest, IdentityTest)
 
   cudf::table_view source_table({source_column});
 
-  std::unique_ptr<cudf::table> result = std::move(cudf::gather(source_table, gather_map));
+  std::unique_ptr<cudf::table> result = cudf::gather(source_table, gather_map);
 
   for (auto i = 0; i < source_table.num_columns(); ++i) {
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(source_table.column(i), result->view().column(i));
@@ -66,7 +66,7 @@ TYPED_TEST(GatherTest, ReverseIdentityTest)
 
   cudf::table_view source_table({source_column});
 
-  std::unique_ptr<cudf::table> result = std::move(cudf::gather(source_table, gather_map));
+  std::unique_ptr<cudf::table> result = cudf::gather(source_table, gather_map);
   cudf::test::fixed_width_column_wrapper<TypeParam> expect_column(reversed_data,
                                                                   reversed_data + source_size);
 
@@ -94,7 +94,7 @@ TYPED_TEST(GatherTest, EveryOtherNullOdds)
 
   cudf::table_view source_table({source_column});
 
-  std::unique_ptr<cudf::table> result = std::move(cudf::gather(source_table, gather_map));
+  std::unique_ptr<cudf::table> result = cudf::gather(source_table, gather_map);
 
   auto expect_data  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return 0; });
   auto expect_valid = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return 0; });
@@ -126,7 +126,7 @@ TYPED_TEST(GatherTest, EveryOtherNullEvens)
 
   cudf::table_view source_table({source_column});
 
-  std::unique_ptr<cudf::table> result = std::move(cudf::gather(source_table, gather_map));
+  std::unique_ptr<cudf::table> result = cudf::gather(source_table, gather_map);
 
   auto expect_data =
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i * 2 + 1; });
@@ -160,7 +160,7 @@ TYPED_TEST(GatherTest, AllNull)
 
   cudf::table_view source_table({source_column});
 
-  std::unique_ptr<cudf::table> result = std::move(cudf::gather(source_table, gather_map));
+  std::unique_ptr<cudf::table> result = cudf::gather(source_table, gather_map);
 
   // Check that the result is also all invalid
   CUDF_TEST_EXPECT_TABLES_EQUAL(source_table, result->view());
@@ -190,7 +190,7 @@ TYPED_TEST(GatherTest, MultiColReverseIdentityTest)
 
   cudf::table_view source_table{source_columns};
 
-  std::unique_ptr<cudf::table> result = std::move(cudf::gather(source_table, gather_map));
+  std::unique_ptr<cudf::table> result = cudf::gather(source_table, gather_map);
 
   cudf::test::fixed_width_column_wrapper<TypeParam> expect_column(reversed_data,
                                                                   reversed_data + source_size);
@@ -228,7 +228,7 @@ TYPED_TEST(GatherTest, MultiColNulls)
 
   cudf::table_view source_table{source_columns};
 
-  std::unique_ptr<cudf::table> result = std::move(cudf::gather(source_table, gather_map));
+  std::unique_ptr<cudf::table> result = cudf::gather(source_table, gather_map);
 
   // Expected data
   auto expect_data =
diff --git a/cpp/tests/copying/pack_tests.cpp b/cpp/tests/copying/pack_tests.cpp
index ea4408efa6a..8a50e071cb9 100644
--- a/cpp/tests/copying/pack_tests.cpp
+++ b/cpp/tests/copying/pack_tests.cpp
@@ -573,6 +573,8 @@ TEST_F(PackUnpackTest, SlicedEmpty)
 
   cudf::table_view t({a, b, c, d});
 
-  auto sliced = cudf::split(t, {0});
-  this->run_test(sliced[0]);
+  auto sliced   = cudf::split(t, {0});
+  auto packed   = cudf::pack(t);
+  auto unpacked = cudf::unpack(packed);
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(t, unpacked);
 }
diff --git a/cpp/tests/error/error_handling_test.cu b/cpp/tests/error/error_handling_test.cu
deleted file mode 100644
index 1dfe45556c4..00000000000
--- a/cpp/tests/error/error_handling_test.cu
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (c) 2018-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/default_stream.hpp>
-#include <cudf_test/stream_checking_resource_adaptor.hpp>
-#include <cudf_test/testing_main.hpp>
-
-#include <cudf/filling.hpp>
-#include <cudf/utilities/default_stream.hpp>
-#include <cudf/utilities/error.hpp>
-
-#include <rmm/cuda_stream.hpp>
-
-TEST(ExpectsTest, FalseCondition)
-{
-  EXPECT_THROW(CUDF_EXPECTS(false, "condition is false"), cudf::logic_error);
-}
-
-TEST(ExpectsTest, TrueCondition) { EXPECT_NO_THROW(CUDF_EXPECTS(true, "condition is true")); }
-
-TEST(CudaTryTest, Error) { EXPECT_THROW(CUDF_CUDA_TRY(cudaErrorLaunchFailure), cudf::cuda_error); }
-
-TEST(CudaTryTest, Success) { EXPECT_NO_THROW(CUDF_CUDA_TRY(cudaSuccess)); }
-
-TEST(StreamCheck, success) { EXPECT_NO_THROW(CUDF_CHECK_CUDA(0)); }
-
-namespace {
-// Some silly kernel that will cause an error
-CUDF_KERNEL void test_kernel(int* data) { data[threadIdx.x] = threadIdx.x; }
-}  // namespace
-
-// In a release build and without explicit synchronization, CUDF_CHECK_CUDA may
-// or may not fail on erroneous asynchronous CUDA calls. Invoke
-// cudaStreamSynchronize to guarantee failure on error. In a non-release build,
-// CUDF_CHECK_CUDA deterministically fails on erroneous asynchronous CUDA
-// calls.
-TEST(StreamCheck, FailedKernel)
-{
-  if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { GTEST_SKIP(); }
-
-  rmm::cuda_stream stream;
-  int a;
-  test_kernel<<<0, 0, 0, stream.value()>>>(&a);
-#ifdef NDEBUG
-  stream.synchronize();
-#endif
-  EXPECT_THROW(CUDF_CHECK_CUDA(stream.value()), cudf::cuda_error);
-}
-
-TEST(StreamCheck, CatchFailedKernel)
-{
-  if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { GTEST_SKIP(); }
-
-  rmm::cuda_stream stream;
-  int a;
-  test_kernel<<<0, 0, 0, stream.value()>>>(&a);
-#ifndef NDEBUG
-  stream.synchronize();
-#endif
-  EXPECT_THROW(CUDF_CHECK_CUDA(stream.value()), cudf::cuda_error);
-}
-
-CUDF_KERNEL void kernel() { asm("trap;"); }
-
-TEST(DeathTest, CudaFatalError)
-{
-  testing::FLAGS_gtest_death_test_style = "threadsafe";
-  auto call_kernel                      = []() {
-    kernel<<<1, 1, 0, cudf::get_default_stream().value()>>>();
-    try {
-      CUDF_CUDA_TRY(cudaDeviceSynchronize());
-    } catch (const cudf::fatal_cuda_error& fe) {
-      std::abort();
-    }
-  };
-  ASSERT_DEATH(call_kernel(), "");
-}
-
-#ifndef NDEBUG
-
-CUDF_KERNEL void assert_false_kernel() { cudf_assert(false && "this kernel should die"); }
-
-CUDF_KERNEL void assert_true_kernel() { cudf_assert(true && "this kernel should live"); }
-
-TEST(DebugAssertDeathTest, cudf_assert_false)
-{
-  testing::FLAGS_gtest_death_test_style = "threadsafe";
-
-  auto call_kernel = []() {
-    auto const stream = cudf::get_default_stream().value();
-    assert_false_kernel<<<1, 1, 0, stream>>>();
-
-    // Kernel should fail with `cudaErrorAssert`
-    // This error invalidates the current device context, so we need to kill
-    // the current process. Running with EXPECT_DEATH spawns a new process for
-    // each attempted kernel launch
-    if (cudaErrorAssert == cudaDeviceSynchronize()) { std::abort(); }
-
-    // If we reach this point, the cudf_assert didn't work so we exit normally, which will cause
-    // EXPECT_DEATH to fail.
-  };
-
-  EXPECT_DEATH(call_kernel(), "this kernel should die");
-}
-
-TEST(DebugAssert, cudf_assert_true)
-{
-  auto const stream = cudf::get_default_stream().value();
-  assert_true_kernel<<<1, 1, 0, stream>>>();
-  ASSERT_EQ(cudaSuccess, cudaDeviceSynchronize());
-}
-
-#endif
-
-// These tests don't use CUDF_TEST_PROGRAM_MAIN because :
-// 1.) They don't need the RMM Pool
-// 2.) The RMM Pool interferes with the death test
-int main(int argc, char** argv)
-{
-  ::testing::InitGoogleTest(&argc, argv);
-  auto const cmd_opts = parse_cudf_test_opts(argc, argv);
-  auto adaptor        = make_stream_mode_adaptor(cmd_opts);
-  return RUN_ALL_TESTS();
-}
diff --git a/cpp/tests/interop/arrow_utils.hpp b/cpp/tests/interop/arrow_utils.hpp
index 1fdf02e02f1..70a9fe64d70 100644
--- a/cpp/tests/interop/arrow_utils.hpp
+++ b/cpp/tests/interop/arrow_utils.hpp
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#pragma once
+
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
@@ -30,9 +32,65 @@
 #include <cudf/transform.hpp>
 #include <cudf/types.hpp>
 
+#include <arrow/api.h>
 #include <arrow/util/bitmap_builders.h>
 
-#pragma once
+// Creating arrow as per given type_id and buffer arguments
+template <typename... Ts>
+std::shared_ptr<arrow::Array> to_arrow_array(cudf::type_id id, Ts&&... args)
+{
+  switch (id) {
+    case cudf::type_id::BOOL8:
+      return std::make_shared<arrow::BooleanArray>(std::forward<Ts>(args)...);
+    case cudf::type_id::INT8: return std::make_shared<arrow::Int8Array>(std::forward<Ts>(args)...);
+    case cudf::type_id::INT16:
+      return std::make_shared<arrow::Int16Array>(std::forward<Ts>(args)...);
+    case cudf::type_id::INT32:
+      return std::make_shared<arrow::Int32Array>(std::forward<Ts>(args)...);
+    case cudf::type_id::INT64:
+      return std::make_shared<arrow::Int64Array>(std::forward<Ts>(args)...);
+    case cudf::type_id::UINT8:
+      return std::make_shared<arrow::UInt8Array>(std::forward<Ts>(args)...);
+    case cudf::type_id::UINT16:
+      return std::make_shared<arrow::UInt16Array>(std::forward<Ts>(args)...);
+    case cudf::type_id::UINT32:
+      return std::make_shared<arrow::UInt32Array>(std::forward<Ts>(args)...);
+    case cudf::type_id::UINT64:
+      return std::make_shared<arrow::UInt64Array>(std::forward<Ts>(args)...);
+    case cudf::type_id::FLOAT32:
+      return std::make_shared<arrow::FloatArray>(std::forward<Ts>(args)...);
+    case cudf::type_id::FLOAT64:
+      return std::make_shared<arrow::DoubleArray>(std::forward<Ts>(args)...);
+    case cudf::type_id::TIMESTAMP_DAYS:
+      return std::make_shared<arrow::Date32Array>(std::make_shared<arrow::Date32Type>(),
+                                                  std::forward<Ts>(args)...);
+    case cudf::type_id::TIMESTAMP_SECONDS:
+      return std::make_shared<arrow::TimestampArray>(arrow::timestamp(arrow::TimeUnit::SECOND),
+                                                     std::forward<Ts>(args)...);
+    case cudf::type_id::TIMESTAMP_MILLISECONDS:
+      return std::make_shared<arrow::TimestampArray>(arrow::timestamp(arrow::TimeUnit::MILLI),
+                                                     std::forward<Ts>(args)...);
+    case cudf::type_id::TIMESTAMP_MICROSECONDS:
+      return std::make_shared<arrow::TimestampArray>(arrow::timestamp(arrow::TimeUnit::MICRO),
+                                                     std::forward<Ts>(args)...);
+    case cudf::type_id::TIMESTAMP_NANOSECONDS:
+      return std::make_shared<arrow::TimestampArray>(arrow::timestamp(arrow::TimeUnit::NANO),
+                                                     std::forward<Ts>(args)...);
+    case cudf::type_id::DURATION_SECONDS:
+      return std::make_shared<arrow::DurationArray>(arrow::duration(arrow::TimeUnit::SECOND),
+                                                    std::forward<Ts>(args)...);
+    case cudf::type_id::DURATION_MILLISECONDS:
+      return std::make_shared<arrow::DurationArray>(arrow::duration(arrow::TimeUnit::MILLI),
+                                                    std::forward<Ts>(args)...);
+    case cudf::type_id::DURATION_MICROSECONDS:
+      return std::make_shared<arrow::DurationArray>(arrow::duration(arrow::TimeUnit::MICRO),
+                                                    std::forward<Ts>(args)...);
+    case cudf::type_id::DURATION_NANOSECONDS:
+      return std::make_shared<arrow::DurationArray>(arrow::duration(arrow::TimeUnit::NANO),
+                                                    std::forward<Ts>(args)...);
+    default: CUDF_FAIL("Unsupported type_id conversion to arrow");
+  }
+}
 
 template <typename T>
 std::enable_if_t<cudf::is_fixed_width<T>() and !std::is_same_v<T, bool>,
@@ -48,7 +106,7 @@ get_arrow_array(std::vector<T> const& data, std::vector<uint8_t> const& mask = {
   std::shared_ptr<arrow::Buffer> mask_buffer =
     mask.empty() ? nullptr : arrow::internal::BytesToBits(mask).ValueOrDie();
 
-  return cudf::detail::to_arrow_array(cudf::type_to_id<T>(), data.size(), data_buffer, mask_buffer);
+  return to_arrow_array(cudf::type_to_id<T>(), data.size(), data_buffer, mask_buffer);
 }
 
 template <typename T>
@@ -154,8 +212,9 @@ std::shared_ptr<arrow::Array> get_arrow_list_array(std::vector<T> data,
                "Failed to append values to buffer builder");
   CUDF_EXPECTS(buff_builder.Finish(&offset_buffer).ok(), "Failed to allocate buffer");
 
+  auto nullable = std::accumulate(list_validity.begin(), list_validity.end(), 0) > 0;
   return std::make_shared<arrow::ListArray>(
-    arrow::list(data_array->type()),
+    arrow::list(arrow::field("", data_array->type(), nullable)),
     offsets.size() - 1,
     offset_buffer,
     data_array,
diff --git a/cpp/tests/interop/from_arrow_test.cpp b/cpp/tests/interop/from_arrow_test.cpp
index 733e5814425..81c406c0faf 100644
--- a/cpp/tests/interop/from_arrow_test.cpp
+++ b/cpp/tests/interop/from_arrow_test.cpp
@@ -14,13 +14,6 @@
  * limitations under the License.
  */
 
-// These interop functions are deprecated. We keep the code in this
-// test and will migrate the tests to export the arrow C data
-// interface which we consume with from_arrow_host. For now, the tests
-// are commented out.
-
-#if 0
-
 #include <tests/interop/arrow_utils.hpp>
 
 #include <cudf_test/base_fixture.hpp>
@@ -43,6 +36,10 @@
 
 #include <thrust/iterator/counting_iterator.h>
 
+#include <arrow/c/bridge.h>
+#include <nanoarrow/nanoarrow.h>
+#include <nanoarrow/nanoarrow_device.h>
+
 std::unique_ptr<cudf::table> get_cudf_table()
 {
   std::vector<std::unique_ptr<cudf::column>> columns;
@@ -93,6 +90,45 @@ struct FromArrowTest : public cudf::test::BaseFixture {};
 template <typename T>
 struct FromArrowTestDurationsTest : public cudf::test::BaseFixture {};
 
+std::optional<std::unique_ptr<cudf::table>> export_table(std::shared_ptr<arrow::Table> arrow_table)
+{
+  ArrowSchema schema;
+  if (!arrow::ExportSchema(*arrow_table->schema(), &schema).ok()) { return std::nullopt; }
+  auto batch = arrow_table->CombineChunksToBatch().ValueOrDie();
+  ArrowArray arr;
+  if (!arrow::ExportRecordBatch(*batch, &arr).ok()) { return std::nullopt; }
+  auto ret = cudf::from_arrow(&schema, &arr);
+  arr.release(&arr);
+  schema.release(&schema);
+  return {std::move(ret)};
+}
+
+std::optional<std::unique_ptr<cudf::scalar>> export_scalar(arrow::Scalar const& arrow_scalar)
+{
+  auto maybe_array = arrow::MakeArrayFromScalar(arrow_scalar, 1);
+  if (!maybe_array.ok()) { return std::nullopt; }
+  auto array = *maybe_array;
+
+  ArrowSchema schema;
+  if (!arrow::ExportType(*array->type(), &schema).ok()) { return std::nullopt; }
+
+  ArrowArray arr;
+  if (!arrow::ExportArray(*array, &arr).ok()) { return std::nullopt; }
+
+  auto col = cudf::from_arrow_column(&schema, &arr);
+  auto ret = cudf::get_element(col->view(), 0);
+
+  arr.release(&arr);
+  schema.release(&schema);
+  return {std::move(ret)};
+}
+
+std::optional<std::unique_ptr<cudf::scalar>> export_scalar(
+  std::shared_ptr<arrow::Scalar> const arrow_scalar)
+{
+  return export_scalar(*arrow_scalar);
+}
+
 TYPED_TEST_SUITE(FromArrowTestDurationsTest, cudf::test::DurationTypes);
 
 TEST_F(FromArrowTest, EmptyTable)
@@ -102,9 +138,10 @@ TEST_F(FromArrowTest, EmptyTable)
   auto expected_cudf_table = tables.first->view();
   auto arrow_table         = tables.second;
 
-  auto got_cudf_table = cudf::from_arrow(*arrow_table);
+  auto got_cudf_table = export_table(arrow_table);
+  ASSERT_TRUE(got_cudf_table.has_value());
 
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table, got_cudf_table->view());
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table, got_cudf_table.value()->view());
 }
 
 TEST_F(FromArrowTest, DateTimeTable)
@@ -127,9 +164,10 @@ TEST_F(FromArrowTest, DateTimeTable)
 
   auto arrow_table = arrow::Table::Make(schema, {arr});
 
-  auto got_cudf_table = cudf::from_arrow(*arrow_table);
+  auto got_cudf_table = export_table(arrow_table);
+  ASSERT_TRUE(got_cudf_table.has_value());
 
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table_view, got_cudf_table->view());
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table_view, got_cudf_table.value()->view());
 }
 
 TYPED_TEST(FromArrowTestDurationsTest, DurationTable)
@@ -160,9 +198,10 @@ TYPED_TEST(FromArrowTestDurationsTest, DurationTable)
 
   auto arrow_table = arrow::Table::Make(schema, {arr});
 
-  auto got_cudf_table = cudf::from_arrow(*arrow_table);
+  auto got_cudf_table = export_table(arrow_table);
+  ASSERT_TRUE(got_cudf_table.has_value());
 
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table_view, got_cudf_table->view());
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table_view, got_cudf_table.value()->view());
 }
 
 TEST_F(FromArrowTest, NestedList)
@@ -188,8 +227,9 @@ TEST_F(FromArrowTest, NestedList)
 
   auto arrow_table = arrow::Table::Make(schema, {nested_list_arr});
 
-  auto got_cudf_table = cudf::from_arrow(*arrow_table);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table_view, got_cudf_table->view());
+  auto got_cudf_table = export_table(arrow_table);
+  ASSERT_TRUE(got_cudf_table.has_value());
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table_view, got_cudf_table.value()->view());
 }
 
 TEST_F(FromArrowTest, StructColumn)
@@ -274,9 +314,10 @@ TEST_F(FromArrowTest, StructColumn)
   auto schema = std::make_shared<arrow::Schema>(schema_vector);
   auto input  = arrow::Table::Make(schema, {struct_array});
 
-  auto got_cudf_table = cudf::from_arrow(*input);
+  auto got_cudf_table = export_table(input);
+  ASSERT_TRUE(got_cudf_table.has_value());
 
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table, got_cudf_table->view());
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table, got_cudf_table.value()->view());
 }
 
 TEST_F(FromArrowTest, DictionaryIndicesType)
@@ -304,9 +345,10 @@ TEST_F(FromArrowTest, DictionaryIndicesType)
 
   cudf::table expected_table(std::move(columns));
 
-  auto got_cudf_table = cudf::from_arrow(*arrow_table);
+  auto got_cudf_table = export_table(arrow_table);
+  ASSERT_TRUE(got_cudf_table.has_value());
 
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table.view(), got_cudf_table->view());
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table.view(), got_cudf_table.value()->view());
 }
 
 TEST_F(FromArrowTest, ChunkedArray)
@@ -369,9 +411,10 @@ TEST_F(FromArrowTest, ChunkedArray)
 
   auto expected_cudf_table = get_cudf_table();
 
-  auto got_cudf_table = cudf::from_arrow(*arrow_table);
+  auto got_cudf_table = export_table(arrow_table);
+  ASSERT_TRUE(got_cudf_table.has_value());
 
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table->view(), got_cudf_table->view());
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table->view(), got_cudf_table.value()->view());
 }
 
 struct FromArrowTestSlice
@@ -388,13 +431,14 @@ TEST_P(FromArrowTestSlice, SliceTest)
   auto sliced_cudf_table   = cudf::slice(cudf_table_view, {start, end})[0];
   auto expected_cudf_table = cudf::table{sliced_cudf_table};
   auto sliced_arrow_table  = arrow_table->Slice(start, end - start);
-  auto got_cudf_table      = cudf::from_arrow(*sliced_arrow_table);
+  auto got_cudf_table      = export_table(sliced_arrow_table);
+  ASSERT_TRUE(got_cudf_table.has_value());
 
   // This has been added to take-care of empty string column issue with no children
-  if (got_cudf_table->num_rows() == 0 and expected_cudf_table.num_rows() == 0) {
-    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_cudf_table.view(), got_cudf_table->view());
+  if (got_cudf_table.value()->num_rows() == 0 and expected_cudf_table.num_rows() == 0) {
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_cudf_table.view(), got_cudf_table.value()->view());
   } else {
-    CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table.view(), got_cudf_table->view());
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table.view(), got_cudf_table.value()->view());
   }
 }
 
@@ -417,9 +461,10 @@ TEST_F(FromArrowTest, FixedPoint128Table)
     auto const schema        = std::make_shared<arrow::Schema>(schema_vector);
     auto const arrow_table   = arrow::Table::Make(schema, {arr});
 
-    auto got_cudf_table = cudf::from_arrow(*arrow_table);
+    auto got_cudf_table = export_table(arrow_table);
+    ASSERT_TRUE(got_cudf_table.has_value());
 
-    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table->view());
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table.value()->view());
   }
 }
 
@@ -441,9 +486,10 @@ TEST_F(FromArrowTest, FixedPoint128TableLarge)
     auto const schema        = std::make_shared<arrow::Schema>(schema_vector);
     auto const arrow_table   = arrow::Table::Make(schema, {arr});
 
-    auto got_cudf_table = cudf::from_arrow(*arrow_table);
+    auto got_cudf_table = export_table(arrow_table);
+    ASSERT_TRUE(got_cudf_table.has_value());
 
-    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table->view());
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table.value()->view());
   }
 }
 
@@ -466,9 +512,10 @@ TEST_F(FromArrowTest, FixedPoint128TableNulls)
     auto const schema        = std::make_shared<arrow::Schema>(schema_vector);
     auto const arrow_table   = arrow::Table::Make(schema, {arr});
 
-    auto got_cudf_table = cudf::from_arrow(*arrow_table);
+    auto got_cudf_table = export_table(arrow_table);
+    ASSERT_TRUE(got_cudf_table.has_value());
 
-    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table->view());
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table.value()->view());
   }
 }
 
@@ -493,9 +540,10 @@ TEST_F(FromArrowTest, FixedPoint128TableNullsLarge)
     auto const schema        = std::make_shared<arrow::Schema>(schema_vector);
     auto const arrow_table   = arrow::Table::Make(schema, {arr});
 
-    auto got_cudf_table = cudf::from_arrow(*arrow_table);
+    auto got_cudf_table = export_table(arrow_table);
+    ASSERT_TRUE(got_cudf_table.has_value());
 
-    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table->view());
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table.value()->view());
   }
 }
 
@@ -519,9 +567,12 @@ TYPED_TEST(FromArrowNumericScalarTest, Basic)
 {
   TypeParam const value{42};
   auto const arrow_scalar = arrow::MakeScalar(value);
-  auto const cudf_scalar  = cudf::from_arrow(*arrow_scalar);
+
+  auto const cudf_scalar = export_scalar(arrow_scalar);
+  ASSERT_TRUE(cudf_scalar.has_value());
+
   auto const cudf_numeric_scalar =
-    dynamic_cast<cudf::numeric_scalar<TypeParam>*>(cudf_scalar.get());
+    dynamic_cast<cudf::numeric_scalar<TypeParam>*>(cudf_scalar.value().get());
   if (cudf_numeric_scalar == nullptr) { CUDF_FAIL("Attempted to test with a non-numeric type."); }
   EXPECT_EQ(cudf_numeric_scalar->type(), cudf::data_type(cudf::type_to_id<TypeParam>()));
   EXPECT_EQ(cudf_numeric_scalar->value(), value);
@@ -535,12 +586,13 @@ TEST_F(FromArrowDecimalScalarTest, Basic)
   auto const value{42};
   auto const precision{8};
   auto const scale{4};
-  auto arrow_scalar = arrow::Decimal128Scalar(value, arrow::decimal128(precision, -scale));
-  auto cudf_scalar  = cudf::from_arrow(arrow_scalar);
+  auto arrow_scalar      = arrow::Decimal128Scalar(value, arrow::decimal128(precision, -scale));
+  auto const cudf_scalar = export_scalar(arrow_scalar);
+  ASSERT_TRUE(cudf_scalar.has_value());
 
   // Arrow offers a minimum of 128 bits for the Decimal type.
   auto const cudf_decimal_scalar =
-    dynamic_cast<cudf::fixed_point_scalar<numeric::decimal128>*>(cudf_scalar.get());
+    dynamic_cast<cudf::fixed_point_scalar<numeric::decimal128>*>(cudf_scalar.value().get());
   EXPECT_EQ(cudf_decimal_scalar->type(),
             cudf::data_type(cudf::type_to_id<numeric::decimal128>(), scale));
   EXPECT_EQ(cudf_decimal_scalar->value(), value);
@@ -552,9 +604,10 @@ TEST_F(FromArrowStringScalarTest, Basic)
 {
   auto const value        = std::string("hello world");
   auto const arrow_scalar = arrow::StringScalar(value);
-  auto const cudf_scalar  = cudf::from_arrow(arrow_scalar);
+  auto const cudf_scalar  = export_scalar(arrow_scalar);
+  ASSERT_TRUE(cudf_scalar.has_value());
 
-  auto const cudf_string_scalar = dynamic_cast<cudf::string_scalar*>(cudf_scalar.get());
+  auto const cudf_string_scalar = dynamic_cast<cudf::string_scalar*>(cudf_scalar.value().get());
   EXPECT_EQ(cudf_string_scalar->type(), cudf::data_type(cudf::type_id::STRING));
   EXPECT_EQ(cudf_string_scalar->to_string(), value);
 }
@@ -572,9 +625,10 @@ TEST_F(FromArrowListScalarTest, Basic)
   auto const array       = *maybe_array;
 
   auto const arrow_scalar = arrow::ListScalar(array);
-  auto const cudf_scalar  = cudf::from_arrow(arrow_scalar);
+  auto const cudf_scalar  = export_scalar(arrow_scalar);
+  ASSERT_TRUE(cudf_scalar.has_value());
 
-  auto const cudf_list_scalar = dynamic_cast<cudf::list_scalar*>(cudf_scalar.get());
+  auto const cudf_list_scalar = dynamic_cast<cudf::list_scalar*>(cudf_scalar.value().get());
   EXPECT_EQ(cudf_list_scalar->type(), cudf::data_type(cudf::type_id::LIST));
 
   cudf::test::fixed_width_column_wrapper<int64_t> const lhs(
@@ -592,9 +646,10 @@ TEST_F(FromArrowStructScalarTest, Basic)
   auto const field        = arrow::field("", underlying_arrow_scalar->type);
   auto const arrow_type   = arrow::struct_({field});
   auto const arrow_scalar = arrow::StructScalar({underlying_arrow_scalar}, arrow_type);
-  auto const cudf_scalar  = cudf::from_arrow(arrow_scalar);
+  auto const cudf_scalar  = export_scalar(arrow_scalar);
+  ASSERT_TRUE(cudf_scalar.has_value());
 
-  auto const cudf_struct_scalar = dynamic_cast<cudf::struct_scalar*>(cudf_scalar.get());
+  auto const cudf_struct_scalar = dynamic_cast<cudf::struct_scalar*>(cudf_scalar.value().get());
   EXPECT_EQ(cudf_struct_scalar->type(), cudf::data_type(cudf::type_id::STRUCT));
 
   cudf::test::fixed_width_column_wrapper<int64_t> const col({value});
@@ -602,5 +657,3 @@ TEST_F(FromArrowStructScalarTest, Basic)
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(lhs, cudf_struct_scalar->view());
 }
-
-#endif
diff --git a/cpp/tests/interop/to_arrow_test.cpp b/cpp/tests/interop/to_arrow_test.cpp
index 328ba210a3f..90ae12cdd90 100644
--- a/cpp/tests/interop/to_arrow_test.cpp
+++ b/cpp/tests/interop/to_arrow_test.cpp
@@ -14,13 +14,6 @@
  * limitations under the License.
  */
 
-// These interop functions are deprecated. We keep the code in this
-// test and will migrate the tests to export via the arrow C data
-// interface with to_arrow_host which arrow can consume. For now, the
-// test is commented out.
-
-#if 0
-
 #include <tests/interop/arrow_utils.hpp>
 
 #include <cudf_test/base_fixture.hpp>
@@ -38,6 +31,7 @@
 #include <cudf/dictionary/dictionary_column_view.hpp>
 #include <cudf/dictionary/encode.hpp>
 #include <cudf/interop.hpp>
+#include <cudf/lists/lists_column_view.hpp>
 #include <cudf/scalar/scalar_factories.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_view.hpp>
@@ -45,6 +39,8 @@
 
 #include <thrust/iterator/counting_iterator.h>
 
+#include <arrow/c/bridge.h>
+
 using vector_of_columns = std::vector<std::unique_ptr<cudf::column>>;
 
 std::pair<std::unique_ptr<cudf::table>, std::shared_ptr<arrow::Table>> get_tables(
@@ -130,7 +126,7 @@ std::pair<std::unique_ptr<cudf::table>, std::shared_ptr<arrow::Table>> get_table
   auto keys       = cudf::test::to_host<int64_t>(view.keys()).first;
   auto indices    = cudf::test::to_host<uint32_t>(view.indices()).first;
   auto dict_array = get_arrow_dict_array(std::vector<int64_t>(keys.begin(), keys.end()),
-                                         std::vector<int32_t>(indices.begin(), indices.end()),
+                                         std::vector<uint32_t>(indices.begin(), indices.end()),
                                          validity);
   auto boolarray  = get_arrow_array<bool>(bool_data, bool_validity);
   auto list_array = get_arrow_list_array<int64_t>(
@@ -168,6 +164,21 @@ struct ToArrowTest : public cudf::test::BaseFixture {};
 template <typename T>
 struct ToArrowTestDurationsTest : public cudf::test::BaseFixture {};
 
+auto is_equal(cudf::table_view const& table,
+              cudf::host_span<cudf::column_metadata const> metadata,
+              std::shared_ptr<arrow::Table> expected_arrow_table)
+{
+  auto got_arrow_schema = cudf::to_arrow_schema(table, metadata);
+  auto got_arrow_table  = cudf::to_arrow_host(table);
+
+  for (auto i = 0; i < got_arrow_schema->n_children; ++i) {
+    auto arr = arrow::ImportArray(got_arrow_table->array.children[i], got_arrow_schema->children[i])
+                 .ValueOrDie();
+    if (!expected_arrow_table->column(i)->Equals(arrow::ChunkedArray(arr))) { return false; }
+  }
+  return true;
+}
+
 TYPED_TEST_SUITE(ToArrowTestDurationsTest, cudf::test::DurationTypes);
 
 TEST_F(ToArrowTest, EmptyTable)
@@ -179,10 +190,9 @@ TEST_F(ToArrowTest, EmptyTable)
   auto struct_meta          = cudf::column_metadata{"f"};
   struct_meta.children_meta = {{"integral"}, {"string"}};
 
-  auto got_arrow_table =
-    cudf::to_arrow(cudf_table_view, {{"a"}, {"b"}, {"c"}, {"d"}, {"e"}, struct_meta});
-
-  ASSERT_EQ(expected_arrow_table->Equals(*got_arrow_table, true), true);
+  std::vector<cudf::column_metadata> const metadata = {
+    {"a"}, {"b"}, {"c"}, {"d"}, {"e"}, struct_meta};
+  ASSERT_TRUE(is_equal(cudf_table_view, metadata, expected_arrow_table));
 }
 
 TEST_F(ToArrowTest, DateTimeTable)
@@ -203,12 +213,10 @@ TEST_F(ToArrowTest, DateTimeTable)
   std::vector<std::shared_ptr<arrow::Field>> schema_vector({arrow::field("a", arr->type())});
   auto schema = std::make_shared<arrow::Schema>(schema_vector);
 
-
   auto expected_arrow_table = arrow::Table::Make(schema, {arr});
 
-  auto got_arrow_table = cudf::to_arrow(input_view, {{"a"}});
-
-  ASSERT_EQ(expected_arrow_table->Equals(*got_arrow_table, true), true);
+  std::vector<cudf::column_metadata> const metadata = {{"a"}};
+  ASSERT_TRUE(is_equal(input_view, metadata, expected_arrow_table));
 }
 
 TYPED_TEST(ToArrowTestDurationsTest, DurationTable)
@@ -239,9 +247,8 @@ TYPED_TEST(ToArrowTestDurationsTest, DurationTable)
 
   auto expected_arrow_table = arrow::Table::Make(schema, {arr});
 
-  auto got_arrow_table = cudf::to_arrow(input_view, {{"a"}});
-
-  ASSERT_EQ(expected_arrow_table->Equals(*got_arrow_table, true), true);
+  std::vector<cudf::column_metadata> const metadata = {{"a"}};
+  ASSERT_TRUE(is_equal(input_view, metadata, expected_arrow_table));
 }
 
 TEST_F(ToArrowTest, NestedList)
@@ -255,20 +262,20 @@ TEST_F(ToArrowTest, NestedList)
   auto list_arr = get_arrow_list_array<int64_t>({6, 7, 8, 9}, {0, 1, 4}, {1, 0, 1, 1});
   std::vector<int32_t> offset{0, 0, 2};
   auto mask_buffer     = arrow::internal::BytesToBits({0, 1}).ValueOrDie();
-  auto nested_list_arr = std::make_shared<arrow::ListArray>(arrow::list(list(arrow::int64())),
-                                                            offset.size() - 1,
-                                                            arrow::Buffer::Wrap(offset),
-                                                            list_arr,
-                                                            mask_buffer);
+  auto nested_list_arr = std::make_shared<arrow::ListArray>(
+    arrow::list(arrow::field("a", arrow::list(arrow::int64()), false)),
+    offset.size() - 1,
+    arrow::Buffer::Wrap(offset),
+    list_arr,
+    mask_buffer);
 
   std::vector<std::shared_ptr<arrow::Field>> schema_vector(
     {arrow::field("a", nested_list_arr->type())});
   auto schema = std::make_shared<arrow::Schema>(schema_vector);
 
-  auto expected_arrow_table = arrow::Table::Make(schema, {nested_list_arr});
-  auto got_arrow_table      = cudf::to_arrow(input_view, {{"a"}});
-
-  ASSERT_TRUE(expected_arrow_table->Equals(*got_arrow_table, true));
+  auto expected_arrow_table                         = arrow::Table::Make(schema, {nested_list_arr});
+  std::vector<cudf::column_metadata> const metadata = {{"a"}};
+  ASSERT_TRUE(is_equal(input_view, metadata, expected_arrow_table));
 }
 
 TEST_F(ToArrowTest, StructColumn)
@@ -324,7 +331,10 @@ TEST_F(ToArrowTest, StructColumn)
   auto list_arr = get_arrow_list_array<int64_t>({1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 2, 4, 5, 6, 7, 9});
   std::vector<int32_t> offset{0, 3, 4, 6};
   auto nested_list_arr = std::make_shared<arrow::ListArray>(
-    arrow::list(list(arrow::int64())), offset.size() - 1, arrow::Buffer::Wrap(offset), list_arr);
+    arrow::list(arrow::field("a", arrow::list(arrow::field("a", arrow::int64(), false)), false)),
+    offset.size() - 1,
+    arrow::Buffer::Wrap(offset),
+    list_arr);
 
   std::vector<std::shared_ptr<arrow::Array>> child_arrays2({str2_array, int2_array});
   auto fields2 = std::vector<std::shared_ptr<arrow::Field>>{
@@ -356,9 +366,8 @@ TEST_F(ToArrowTest, StructColumn)
 
   auto expected_arrow_table = arrow::Table::Make(schema, {struct_array});
 
-  auto got_arrow_table = cudf::to_arrow(input_view, {metadata});
-
-  ASSERT_TRUE(expected_arrow_table->Equals(*got_arrow_table, true));
+  std::vector<cudf::column_metadata> const meta = {metadata};
+  ASSERT_TRUE(is_equal(input_view, meta, expected_arrow_table));
 }
 
 template <typename T>
@@ -380,9 +389,8 @@ TEST_F(ToArrowTest, FixedPoint64Table)
     auto const schema               = std::make_shared<arrow::Schema>(schema_vector);
     auto const expected_arrow_table = arrow::Table::Make(schema, {arr});
 
-    auto got_arrow_table = cudf::to_arrow(input, {{"a"}});
-
-    ASSERT_TRUE(expected_arrow_table->Equals(*got_arrow_table, true));
+    std::vector<cudf::column_metadata> const metadata = {{"a"}};
+    ASSERT_TRUE(is_equal(input, metadata, expected_arrow_table));
   }
 }
 
@@ -402,9 +410,8 @@ TEST_F(ToArrowTest, FixedPoint128Table)
     auto const schema               = std::make_shared<arrow::Schema>(schema_vector);
     auto const expected_arrow_table = arrow::Table::Make(schema, {arr});
 
-    auto got_arrow_table = cudf::to_arrow(input, {{"a"}});
-
-    ASSERT_TRUE(expected_arrow_table->Equals(*got_arrow_table, true));
+    std::vector<cudf::column_metadata> const metadata = {{"a"}};
+    ASSERT_TRUE(is_equal(input, metadata, expected_arrow_table));
   }
 }
 
@@ -431,9 +438,8 @@ TEST_F(ToArrowTest, FixedPoint64TableLarge)
     auto const schema               = std::make_shared<arrow::Schema>(schema_vector);
     auto const expected_arrow_table = arrow::Table::Make(schema, {arr});
 
-    auto got_arrow_table = cudf::to_arrow(input, {{"a"}});
-
-    ASSERT_TRUE(expected_arrow_table->Equals(*got_arrow_table, true));
+    std::vector<cudf::column_metadata> const metadata = {{"a"}};
+    ASSERT_TRUE(is_equal(input, metadata, expected_arrow_table));
   }
 }
 
@@ -455,9 +461,8 @@ TEST_F(ToArrowTest, FixedPoint128TableLarge)
     auto const schema               = std::make_shared<arrow::Schema>(schema_vector);
     auto const expected_arrow_table = arrow::Table::Make(schema, {arr});
 
-    auto got_arrow_table = cudf::to_arrow(input, {{"a"}});
-
-    ASSERT_TRUE(expected_arrow_table->Equals(*got_arrow_table, true));
+    std::vector<cudf::column_metadata> const metadata = {{"a"}};
+    ASSERT_TRUE(is_equal(input, metadata, expected_arrow_table));
   }
 }
 
@@ -479,9 +484,8 @@ TEST_F(ToArrowTest, FixedPoint64TableNullsSimple)
     auto const schema        = std::make_shared<arrow::Schema>(schema_vector);
     auto const arrow_table   = arrow::Table::Make(schema, {arr});
 
-    auto got_arrow_table = cudf::to_arrow(input, {{"a"}});
-
-    ASSERT_TRUE(arrow_table->Equals(*got_arrow_table, true));
+    std::vector<cudf::column_metadata> const metadata = {{"a"}};
+    ASSERT_TRUE(is_equal(input, metadata, arrow_table));
   }
 }
 
@@ -503,9 +507,8 @@ TEST_F(ToArrowTest, FixedPoint128TableNullsSimple)
     auto const schema        = std::make_shared<arrow::Schema>(schema_vector);
     auto const arrow_table   = arrow::Table::Make(schema, {arr});
 
-    auto got_arrow_table = cudf::to_arrow(input, {{"a"}});
-
-    ASSERT_TRUE(arrow_table->Equals(*got_arrow_table, true));
+    std::vector<cudf::column_metadata> const metadata = {{"a"}};
+    ASSERT_TRUE(is_equal(input, metadata, arrow_table));
   }
 }
 
@@ -529,9 +532,8 @@ TEST_F(ToArrowTest, FixedPoint64TableNulls)
     auto const schema               = std::make_shared<arrow::Schema>(schema_vector);
     auto const expected_arrow_table = arrow::Table::Make(schema, {arr});
 
-    auto got_arrow_table = cudf::to_arrow(input, {{"a"}});
-
-    ASSERT_TRUE(expected_arrow_table->Equals(*got_arrow_table, true));
+    std::vector<cudf::column_metadata> const metadata = {{"a"}};
+    ASSERT_TRUE(is_equal(input, metadata, expected_arrow_table));
   }
 }
 
@@ -554,9 +556,8 @@ TEST_F(ToArrowTest, FixedPoint128TableNulls)
     auto const schema               = std::make_shared<arrow::Schema>(schema_vector);
     auto const expected_arrow_table = arrow::Table::Make(schema, {arr});
 
-    auto const got_arrow_table = cudf::to_arrow(input, {{"a"}});
-
-    ASSERT_TRUE(expected_arrow_table->Equals(*got_arrow_table, true));
+    std::vector<cudf::column_metadata> const metadata = {{"a"}};
+    ASSERT_TRUE(is_equal(input, metadata, expected_arrow_table));
   }
 }
 
@@ -575,10 +576,10 @@ TEST_P(ToArrowTestSlice, SliceTest)
   auto expected_arrow_table = arrow_table->Slice(start, end - start);
   auto struct_meta          = cudf::column_metadata{"f"};
   struct_meta.children_meta = {{"integral"}, {"string"}};
-  auto got_arrow_table =
-    cudf::to_arrow(sliced_cudf_table, {{"a"}, {"b"}, {"c"}, {"d"}, {"e"}, struct_meta});
 
-  ASSERT_EQ(expected_arrow_table->Equals(*got_arrow_table, true), true);
+  std::vector<cudf::column_metadata> const metadata = {
+    {"a"}, {"b"}, {"c"}, {"d"}, {"e"}, struct_meta};
+  ASSERT_TRUE(is_equal(sliced_cudf_table, metadata, expected_arrow_table));
 }
 
 INSTANTIATE_TEST_CASE_P(ToArrowTest,
@@ -595,13 +596,58 @@ using NumericTypesNotBool =
   cudf::test::Concat<cudf::test::IntegralTypesNotBool, cudf::test::FloatingPointTypes>;
 TYPED_TEST_SUITE(ToArrowNumericScalarTest, NumericTypesNotBool);
 
+auto col_to_arrow_type(cudf::column_view const& col)
+{
+  switch (col.type().id()) {
+    case cudf::type_id::BOOL8: return arrow::boolean();
+    case cudf::type_id::INT8: return arrow::int8();
+    case cudf::type_id::INT16: return arrow::int16();
+    case cudf::type_id::INT32: return arrow::int32();
+    case cudf::type_id::INT64: return arrow::int64();
+    case cudf::type_id::UINT8: return arrow::uint8();
+    case cudf::type_id::UINT16: return arrow::uint16();
+    case cudf::type_id::UINT32: return arrow::uint32();
+    case cudf::type_id::UINT64: return arrow::uint64();
+    case cudf::type_id::FLOAT32: return arrow::float32();
+    case cudf::type_id::FLOAT64: return arrow::float64();
+    case cudf::type_id::TIMESTAMP_DAYS: return arrow::date32();
+    case cudf::type_id::STRING: return arrow::utf8();
+    case cudf::type_id::LIST:
+      return arrow::list(col_to_arrow_type(col.child(cudf::lists_column_view::child_column_index)));
+    case cudf::type_id::DECIMAL128: return arrow::decimal(38, -col.type().scale());
+    default: CUDF_FAIL("Unsupported type_id conversion to arrow type", cudf::data_type_error);
+  }
+}
+
+std::optional<std::shared_ptr<arrow::Scalar>> cudf_scalar_to_arrow(
+  cudf::scalar const& scalar, std::optional<cudf::column_metadata> metadata = std::nullopt)
+{
+  auto const cudf_column   = cudf::make_column_from_scalar(scalar, 1);
+  auto const c_arrow_array = cudf::to_arrow_host(*cudf_column);
+  auto const arrow_array   = [&]() {
+    if (metadata.has_value()) {
+      auto const table = cudf::table_view({cudf_column->view()});
+      std::vector<cudf::column_metadata> const table_metadata = {metadata.value()};
+      auto const arrow_schema = cudf::to_arrow_schema(table, table_metadata);
+      return arrow::ImportArray(&c_arrow_array->array, arrow_schema->children[0]).ValueOrDie();
+    } else {
+      auto const arrow_type = col_to_arrow_type(cudf_column->view());
+      return arrow::ImportArray(&c_arrow_array->array, arrow_type).ValueOrDie();
+    }
+  }();
+  auto const maybe_scalar = arrow_array->GetScalar(0);
+  if (!maybe_scalar.ok()) { return std::nullopt; }
+  return maybe_scalar.ValueOrDie();
+}
+
 TYPED_TEST(ToArrowNumericScalarTest, Basic)
 {
   TypeParam const value{42};
   auto const cudf_scalar = cudf::make_fixed_width_scalar<TypeParam>(value);
 
-  cudf::column_metadata const metadata{""};
-  auto const arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata);
+  auto const maybe_scalar = cudf_scalar_to_arrow(*cudf_scalar);
+  ASSERT_TRUE(maybe_scalar.has_value());
+  auto const arrow_scalar = *maybe_scalar;
 
   auto const ref_arrow_scalar = arrow::MakeScalar(value);
   EXPECT_TRUE(arrow_scalar->Equals(*ref_arrow_scalar));
@@ -621,8 +667,9 @@ TEST_F(ToArrowDecimalScalarTest, Basic)
   auto const cudf_scalar =
     cudf::make_fixed_point_scalar<numeric::decimal128>(value, numeric::scale_type{scale});
 
-  cudf::column_metadata const metadata{""};
-  auto const arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata);
+  auto const maybe_scalar = cudf_scalar_to_arrow(*cudf_scalar);
+  ASSERT_TRUE(maybe_scalar.has_value());
+  auto const arrow_scalar = *maybe_scalar;
 
   auto const maybe_ref_arrow_scalar =
     arrow::MakeScalar(arrow::decimal128(precision, -scale), value);
@@ -636,9 +683,10 @@ struct ToArrowStringScalarTest : public cudf::test::BaseFixture {};
 TEST_F(ToArrowStringScalarTest, Basic)
 {
   std::string const value{"hello world"};
-  auto const cudf_scalar = cudf::make_string_scalar(value);
-  cudf::column_metadata const metadata{""};
-  auto const arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata);
+  auto const cudf_scalar  = cudf::make_string_scalar(value);
+  auto const maybe_scalar = cudf_scalar_to_arrow(*cudf_scalar);
+  ASSERT_TRUE(maybe_scalar.has_value());
+  auto const arrow_scalar = *maybe_scalar;
 
   auto const ref_arrow_scalar = arrow::MakeScalar(value);
   EXPECT_TRUE(arrow_scalar->Equals(*ref_arrow_scalar));
@@ -656,8 +704,9 @@ TEST_F(ToArrowListScalarTest, Basic)
 
   auto const cudf_scalar = cudf::make_list_scalar(col);
 
-  cudf::column_metadata const metadata{""};
-  auto const arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata);
+  auto const maybe_scalar = cudf_scalar_to_arrow(*cudf_scalar);
+  ASSERT_TRUE(maybe_scalar.has_value());
+  auto const arrow_scalar = *maybe_scalar;
 
   arrow::Int64Builder builder;
   auto const status      = builder.AppendValues(host_values, host_validity);
@@ -682,7 +731,10 @@ TEST_F(ToArrowStructScalarTest, Basic)
 
   cudf::column_metadata metadata{""};
   metadata.children_meta.emplace_back(field_name);
-  auto const arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata);
+
+  auto const maybe_scalar = cudf_scalar_to_arrow(*cudf_scalar, metadata);
+  ASSERT_TRUE(maybe_scalar.has_value());
+  auto const arrow_scalar = *maybe_scalar;
 
   auto const underlying_arrow_scalar = arrow::MakeScalar(value);
   auto const field            = arrow::field(field_name, underlying_arrow_scalar->type, false);
@@ -693,5 +745,3 @@ TEST_F(ToArrowStructScalarTest, Basic)
 }
 
 CUDF_TEST_PROGRAM_MAIN()
-
-#endif
diff --git a/cpp/tests/io/arrow_io_source_test.cpp b/cpp/tests/io/arrow_io_source_test.cpp
deleted file mode 100644
index ffdf2c7e00f..00000000000
--- a/cpp/tests/io/arrow_io_source_test.cpp
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/column_utilities.hpp>
-#include <cudf_test/column_wrapper.hpp>
-#include <cudf_test/cudf_gtest.hpp>
-#include <cudf_test/table_utilities.hpp>
-#include <cudf_test/testing_main.hpp>
-#include <cudf_test/type_lists.hpp>
-
-#include <cudf/io/arrow_io_source.hpp>
-#include <cudf/io/json.hpp>
-#include <cudf/io/parquet.hpp>
-
-#include <arrow/filesystem/filesystem.h>
-#include <arrow/filesystem/s3fs.h>
-#include <arrow/io/api.h>
-#include <arrow/util/config.h>
-
-#include <fstream>
-#include <memory>
-#include <string>
-
-// Global environment for temporary files
-auto const temp_env = static_cast<cudf::test::TempDirTestEnvironment*>(
-  ::testing::AddGlobalTestEnvironment(new cudf::test::TempDirTestEnvironment));
-
-// Base test fixture for tests
-struct ArrowIOTest : public cudf::test::BaseFixture {};
-
-TEST_F(ArrowIOTest, URIFileSystem)
-{
-  const std::string file_name = temp_env->get_temp_dir() + "JsonLinesFileTest.json";
-  std::ofstream outfile(file_name, std::ofstream::out);
-  outfile << "{\"a\":11, \"b\":1.1}\n{\"a\":22, \"b\":2.2}";
-  outfile.close();
-
-  std::string file_uri = "file://" + file_name;
-  auto datasource      = std::make_unique<cudf::io::arrow_io_source>(file_uri);
-
-  // Populate the JSON Reader Options
-  cudf::io::json_reader_options options =
-    cudf::io::json_reader_options::builder(cudf::io::source_info(datasource.get())).lines(true);
-
-  // Read the JSON file from the LocalFileSystem
-  cudf::io::table_with_metadata tbl = cudf::io::read_json(options);
-
-  ASSERT_EQ(2, tbl.tbl->num_columns());
-  ASSERT_EQ(2, tbl.tbl->num_rows());
-}
-
-TEST_F(ArrowIOTest, S3FileSystem)
-{
-  std::string s3_uri = "s3://rapidsai-data/cudf/test/tips.parquet?region=us-east-2";
-
-  // Check to see if Arrow was built with support for S3. If not, ensure this
-  // test throws. If so, validate the S3 file contents.
-  auto const s3_unsupported = arrow::fs::FileSystemFromUri(s3_uri).status().IsNotImplemented();
-  if (s3_unsupported) {
-    EXPECT_THROW(std::make_unique<cudf::io::arrow_io_source>(s3_uri), cudf::logic_error);
-  } else {
-    auto datasource = std::make_unique<cudf::io::arrow_io_source>(s3_uri);
-
-    // Populate the Parquet Reader Options
-    cudf::io::source_info src(datasource.get());
-    std::vector<std::string> single_column;
-    single_column.insert(single_column.begin(), "total_bill");
-    cudf::io::parquet_reader_options_builder builder(src);
-    cudf::io::parquet_reader_options options = builder.columns(single_column).build();
-
-    // Read the Parquet file from S3
-    cudf::io::table_with_metadata tbl = cudf::io::read_parquet(options);
-
-    ASSERT_EQ(1, tbl.tbl->num_columns());  // Only single column specified in reader_options
-    ASSERT_EQ(244, tbl.tbl->num_rows());   // known number of rows from the S3 file
-  }
-
-#ifdef ARROW_S3
-  if (!s3_unsupported) {
-    // Verify that we are using Arrow with S3, and call finalize
-    // https://github.com/apache/arrow/issues/36974
-    // This needs to be in a separate conditional to ensure we call
-    // finalize after all arrow_io_source instances have been deleted.
-    [[maybe_unused]] auto _ = arrow::fs::EnsureS3Finalized();
-  }
-#endif
-}
-
-CUDF_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/io/csv_test.cpp b/cpp/tests/io/csv_test.cpp
index ff433264446..dc14824d834 100644
--- a/cpp/tests/io/csv_test.cpp
+++ b/cpp/tests/io/csv_test.cpp
@@ -25,8 +25,8 @@
 
 #include <cudf/detail/iterator.cuh>
 #include <cudf/fixed_point/fixed_point.hpp>
-#include <cudf/io/arrow_io_source.hpp>
 #include <cudf/io/csv.hpp>
+#include <cudf/io/datasource.hpp>
 #include <cudf/strings/convert/convert_datetime.hpp>
 #include <cudf/strings/convert/convert_fixed_point.hpp>
 #include <cudf/strings/strings_column_view.hpp>
@@ -1197,30 +1197,6 @@ TEST_F(CsvReaderTest, HeaderOnlyFile)
   EXPECT_EQ(3, view.num_columns());
 }
 
-TEST_F(CsvReaderTest, ArrowFileSource)
-{
-  auto filepath = temp_env->get_temp_dir() + "ArrowFileSource.csv";
-  {
-    std::ofstream outfile(filepath, std::ofstream::out);
-    outfile << "A\n9\n8\n7\n6\n5\n4\n3\n2\n";
-  }
-
-  std::shared_ptr<arrow::io::ReadableFile> infile;
-  ASSERT_TRUE(arrow::io::ReadableFile::Open(filepath).Value(&infile).ok());
-
-  auto arrow_source = cudf::io::arrow_io_source{infile};
-  cudf::io::csv_reader_options in_opts =
-    cudf::io::csv_reader_options::builder(cudf::io::source_info{&arrow_source})
-      .dtypes({dtype<int8_t>()});
-  auto result = cudf::io::read_csv(in_opts);
-
-  auto const view = result.tbl->view();
-  EXPECT_EQ(1, view.num_columns());
-  ASSERT_EQ(type_id::INT8, view.column(0).type().id());
-
-  expect_column_data_equal(std::vector<int8_t>{9, 8, 7, 6, 5, 4, 3, 2}, view.column(0));
-}
-
 TEST_F(CsvReaderTest, InvalidFloatingPoint)
 {
   auto const filepath = temp_env->get_temp_dir() + "InvalidFloatingPoint.csv";
diff --git a/cpp/tests/io/json/json_quote_normalization_test.cpp b/cpp/tests/io/json/json_quote_normalization_test.cpp
index 55ad0afe499..3a9ba8d9f3b 100644
--- a/cpp/tests/io/json/json_quote_normalization_test.cpp
+++ b/cpp/tests/io/json/json_quote_normalization_test.cpp
@@ -26,7 +26,7 @@
 #include <cudf/io/json.hpp>
 #include <cudf/io/types.hpp>
 
-#include <rmm/device_uvector.hpp>
+#include <rmm/device_buffer.hpp>
 #include <rmm/mr/device/cuda_memory_resource.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
 
@@ -42,12 +42,11 @@ void run_test(std::string const& host_input, std::string const& expected_host_ou
     std::make_shared<rmm::mr::cuda_memory_resource>();
 
   auto stream_view  = cudf::test::get_default_stream();
-  auto device_input = cudf::detail::make_device_uvector_async(
-    host_input, stream_view, rmm::mr::get_current_device_resource());
+  auto device_input = rmm::device_buffer(
+    host_input.c_str(), host_input.size(), stream_view, rmm::mr::get_current_device_resource());
 
   // Preprocessing FST
-  cudf::io::datasource::owning_buffer<rmm::device_uvector<char>> device_data(
-    std::move(device_input));
+  cudf::io::datasource::owning_buffer<rmm::device_buffer> device_data(std::move(device_input));
   cudf::io::json::detail::normalize_single_quotes(device_data, stream_view, rsc.get());
 
   std::string preprocessed_host_output(device_data.size(), 0);
diff --git a/cpp/tests/io/json/json_test.cpp b/cpp/tests/io/json/json_test.cpp
index 993ab82f423..c26e5ca3edb 100644
--- a/cpp/tests/io/json/json_test.cpp
+++ b/cpp/tests/io/json/json_test.cpp
@@ -26,7 +26,6 @@
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/detail/iterator.cuh>
-#include <cudf/io/arrow_io_source.hpp>
 #include <cudf/io/json.hpp>
 #include <cudf/strings/convert/convert_fixed_point.hpp>
 #include <cudf/strings/repeat_strings.hpp>
@@ -681,6 +680,53 @@ TEST_F(JsonReaderTest, JsonLinesByteRange)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(0), int64_wrapper{{3000, 4000, 5000}});
 }
 
+TEST_F(JsonReaderTest, JsonLinesByteRangeWithRealloc)
+{
+  std::string long_string     = "haha";
+  std::size_t log_repetitions = 12;
+  long_string.reserve(long_string.size() * (1UL << log_repetitions));
+  for (std::size_t i = 0; i < log_repetitions; i++) {
+    long_string += long_string;
+  }
+
+  auto json_string = [&long_string]() {
+    std::string json_string   = R"(
+      { "a": { "y" : 6}, "b" : [1, 2, 3], "c": 11 }
+      { "a": { "y" : 6}, "b" : [4, 5   ], "c": 12 }
+      { "a": { "y" : 6}, "b" : [6      ], "c": 13 }
+      { "a": { "y" : 6}, "b" : [7      ], "c": 14 })";
+    std::string replace_chars = "c";
+    std::size_t pos           = json_string.find(replace_chars);
+    while (pos != std::string::npos) {
+      // Replace the substring with the specified string
+      json_string.replace(pos, replace_chars.size(), long_string);
+
+      // Find the next occurrence of the substring
+      pos = json_string.find(replace_chars, pos + long_string.size());
+    }
+    return json_string;
+  }();
+
+  // Initialize parsing options (reading json lines). Set byte range offset and size so as to read
+  // the second row of input
+  cudf::io::json_reader_options json_lines_options =
+    cudf::io::json_reader_options::builder(
+      cudf::io::source_info{cudf::host_span<std::byte>(
+        reinterpret_cast<std::byte*>(json_string.data()), json_string.size())})
+      .lines(true)
+      .compression(cudf::io::compression_type::NONE)
+      .recovery_mode(cudf::io::json_recovery_mode_t::FAIL)
+      .byte_range_offset(16430)
+      .byte_range_size(30);
+
+  // Read full test data via existing, nested JSON lines reader
+  cudf::io::table_with_metadata result = cudf::io::read_json(json_lines_options);
+
+  EXPECT_EQ(result.tbl->num_columns(), 3);
+  EXPECT_EQ(result.tbl->num_rows(), 1);
+  EXPECT_EQ(result.metadata.schema_info[2].name, long_string);
+}
+
 TEST_F(JsonReaderTest, JsonLinesMultipleFilesByteRange_AcrossFiles)
 {
   const std::string file1 = temp_env->get_temp_dir() + "JsonLinesMultipleFilesByteRangeTest1.json";
@@ -958,31 +1004,6 @@ TEST_F(JsonReaderTest, NoDataFileValues)
   EXPECT_EQ(0, view.num_columns());
 }
 
-TEST_F(JsonReaderTest, ArrowFileSource)
-{
-  const std::string fname = temp_env->get_temp_dir() + "ArrowFileSource.csv";
-
-  std::ofstream outfile(fname, std::ofstream::out);
-  outfile << "[9]\n[8]\n[7]\n[6]\n[5]\n[4]\n[3]\n[2]\n";
-  outfile.close();
-
-  std::shared_ptr<arrow::io::ReadableFile> infile;
-  ASSERT_TRUE(arrow::io::ReadableFile::Open(fname).Value(&infile).ok());
-
-  auto arrow_source = cudf::io::arrow_io_source{infile};
-  cudf::io::json_reader_options in_options =
-    cudf::io::json_reader_options::builder(cudf::io::source_info{&arrow_source})
-      .dtypes({dtype<int8_t>()})
-      .lines(true);
-
-  cudf::io::table_with_metadata result = cudf::io::read_json(in_options);
-
-  EXPECT_EQ(result.tbl->num_columns(), 1);
-  EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::INT8);
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(0), int8_wrapper{{9, 8, 7, 6, 5, 4, 3, 2}});
-}
-
 TEST_P(JsonReaderParamTest, InvalidFloatingPoint)
 {
   auto const test_opt       = GetParam();
@@ -2351,7 +2372,7 @@ TEST_F(JsonReaderTest, MapTypes)
   // Testing function for mixed types in JSON (for spark json reader)
   auto test_fn = [](std::string_view json_string, bool lines, std::vector<type_id> types) {
     std::map<std::string, cudf::io::schema_element> dtype_schema{
-      {"foo1", {data_type{type_id::STRING}}},  // list won't be a string
+      {"foo1", {data_type{type_id::STRING}}},  // list forced as a string
       {"foo2", {data_type{type_id::STRING}}},  // struct forced as a string
       {"1", {data_type{type_id::STRING}}},
       {"2", {data_type{type_id::STRING}}},
@@ -2378,17 +2399,17 @@ TEST_F(JsonReaderTest, MapTypes)
   test_fn(R"([{ "foo1": [1,2,3], "bar": 123 },
               { "foo2": { "a": 1 }, "bar": 456 }])",
           false,
-          {type_id::LIST, type_id::INT32, type_id::STRING});
+          {type_id::STRING, type_id::INT32, type_id::STRING});
   // jsonl
   test_fn(R"( { "foo1": [1,2,3], "bar": 123 }
               { "foo2": { "a": 1 }, "bar": 456 })",
           true,
-          {type_id::LIST, type_id::INT32, type_id::STRING});
+          {type_id::STRING, type_id::INT32, type_id::STRING});
   // jsonl-array
   test_fn(R"([123, [1,2,3]]
               [456, null,  { "a": 1 }])",
           true,
-          {type_id::INT64, type_id::LIST, type_id::STRING});
+          {type_id::INT64, type_id::STRING, type_id::STRING});
   // json-array
   test_fn(R"([[[1,2,3], null, 123],
               [null, { "a": 1 }, 456 ]])",
@@ -2678,38 +2699,81 @@ TEST_F(JsonReaderTest, JsonNestedDtypeFilter)
 
 TEST_F(JsonReaderTest, JSONMixedTypeChildren)
 {
-  std::string const json_str = R"(
-{ "Root": { "Key": [ { "EE": "A" } ] } }
-{ "Root": { "Key": {  } } }
-{ "Root": { "Key": [{ "YY": 1}] } }
-)";
-  // Column "EE" is created and destroyed
-  // Column "YY" should not be created
-
-  cudf::io::json_reader_options options =
-    cudf::io::json_reader_options::builder(cudf::io::source_info{json_str.c_str(), json_str.size()})
-      .lines(true)
-      .recovery_mode(cudf::io::json_recovery_mode_t::RECOVER_WITH_NULL)
-      .normalize_single_quotes(true)
-      .normalize_whitespace(false)
-      .mixed_types_as_string(true)
-      .keep_quotes(true);
-
-  auto result = cudf::io::read_json(options);
+  // struct mixed.
+  {
+    std::string const json_str = R"(
+  { "Root": { "Key": [ { "EE": "A" } ] } }
+  { "Root": { "Key": {  } } }
+  { "Root": { "Key": [{ "YY": 1}] } }
+  )";
+    // Column "EE" is created and destroyed
+    // Column "YY" should not be created
+
+    cudf::io::json_reader_options options =
+      cudf::io::json_reader_options::builder(
+        cudf::io::source_info{json_str.c_str(), json_str.size()})
+        .lines(true)
+        .recovery_mode(cudf::io::json_recovery_mode_t::RECOVER_WITH_NULL)
+        .normalize_single_quotes(true)
+        .normalize_whitespace(false)
+        .mixed_types_as_string(true)
+        .keep_quotes(true);
+
+    auto result = cudf::io::read_json(options);
+
+    ASSERT_EQ(result.tbl->num_columns(), 1);
+    ASSERT_EQ(result.metadata.schema_info.size(), 1);
+    EXPECT_EQ(result.metadata.schema_info[0].name, "Root");
+    ASSERT_EQ(result.metadata.schema_info[0].children.size(), 1);
+    EXPECT_EQ(result.metadata.schema_info[0].children[0].name, "Key");
+    ASSERT_EQ(result.metadata.schema_info[0].children[0].children.size(), 2);
+    EXPECT_EQ(result.metadata.schema_info[0].children[0].children[0].name, "offsets");
+    // types
+    EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::STRUCT);
+    EXPECT_EQ(result.tbl->get_column(0).child(0).type().id(), cudf::type_id::STRING);
+    cudf::test::strings_column_wrapper expected(
+      {R"([ { "EE": "A" } ])", "{  }", R"([{ "YY": 1}])"});
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result.tbl->get_column(0).child(0));
+  }
 
-  ASSERT_EQ(result.tbl->num_columns(), 1);
-  ASSERT_EQ(result.metadata.schema_info.size(), 1);
-  EXPECT_EQ(result.metadata.schema_info[0].name, "Root");
-  ASSERT_EQ(result.metadata.schema_info[0].children.size(), 1);
-  EXPECT_EQ(result.metadata.schema_info[0].children[0].name, "Key");
-  ASSERT_EQ(result.metadata.schema_info[0].children[0].children.size(), 2);
-  EXPECT_EQ(result.metadata.schema_info[0].children[0].children[0].name, "offsets");
-  // types
-  EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::STRUCT);
-  EXPECT_EQ(result.tbl->get_column(0).child(0).type().id(), cudf::type_id::STRING);
-  cudf::test::strings_column_wrapper expected({R"([ { "EE": "A" } ])", "{  }", R"([{ "YY": 1}])"});
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result.tbl->get_column(0).child(0));
+  // list mixed.
+  {
+    std::string const json_str = R"(
+  { "Root": { "Key": [ { "EE": "A" } ] } }
+  { "Root": { "Key": "abc" } }
+  { "Root": { "Key": [{ "YY": 1}] } }
+  )";
+    // Column "EE" is created and destroyed
+    // Column "YY" should not be created
+
+    cudf::io::json_reader_options options =
+      cudf::io::json_reader_options::builder(
+        cudf::io::source_info{json_str.c_str(), json_str.size()})
+        .lines(true)
+        .recovery_mode(cudf::io::json_recovery_mode_t::RECOVER_WITH_NULL)
+        .normalize_single_quotes(true)
+        .normalize_whitespace(false)
+        .mixed_types_as_string(true)
+        .keep_quotes(true);
+
+    auto result = cudf::io::read_json(options);
+
+    ASSERT_EQ(result.tbl->num_columns(), 1);
+    ASSERT_EQ(result.metadata.schema_info.size(), 1);
+    EXPECT_EQ(result.metadata.schema_info[0].name, "Root");
+    ASSERT_EQ(result.metadata.schema_info[0].children.size(), 1);
+    EXPECT_EQ(result.metadata.schema_info[0].children[0].name, "Key");
+    ASSERT_EQ(result.metadata.schema_info[0].children[0].children.size(), 2);
+    EXPECT_EQ(result.metadata.schema_info[0].children[0].children[0].name, "offsets");
+    // types
+    EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::STRUCT);
+    EXPECT_EQ(result.tbl->get_column(0).child(0).type().id(), cudf::type_id::STRING);
+    cudf::test::strings_column_wrapper expected(
+      {R"([ { "EE": "A" } ])", "\"abc\"", R"([{ "YY": 1}])"});
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result.tbl->get_column(0).child(0));
+  }
 }
 
 CUDF_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/io/json/json_whitespace_normalization_test.cu b/cpp/tests/io/json/json_whitespace_normalization_test.cu
index 8ed5fa81b12..01dd17fab98 100644
--- a/cpp/tests/io/json/json_whitespace_normalization_test.cu
+++ b/cpp/tests/io/json/json_whitespace_normalization_test.cu
@@ -38,12 +38,11 @@ void run_test(std::string const& host_input, std::string const& expected_host_ou
   // Prepare cuda stream for data transfers & kernels
   auto stream_view = cudf::test::get_default_stream();
 
-  auto device_input = cudf::detail::make_device_uvector_async(
-    host_input, stream_view, rmm::mr::get_current_device_resource());
+  auto device_input = rmm::device_buffer(
+    host_input.c_str(), host_input.size(), stream_view, rmm::mr::get_current_device_resource());
 
   // Preprocessing FST
-  cudf::io::datasource::owning_buffer<rmm::device_uvector<char>> device_data(
-    std::move(device_input));
+  cudf::io::datasource::owning_buffer<rmm::device_buffer> device_data(std::move(device_input));
   cudf::io::json::detail::normalize_whitespace(
     device_data, stream_view, rmm::mr::get_current_device_resource());
 
diff --git a/cpp/tests/io/parquet_common.cpp b/cpp/tests/io/parquet_common.cpp
index c1211869bcc..3dd5ad145ea 100644
--- a/cpp/tests/io/parquet_common.cpp
+++ b/cpp/tests/io/parquet_common.cpp
@@ -744,7 +744,7 @@ int32_t compare(T& v1, T& v2)
 int32_t compare_binary(std::vector<uint8_t> const& v1,
                        std::vector<uint8_t> const& v2,
                        cudf::io::parquet::detail::Type ptype,
-                       thrust::optional<cudf::io::parquet::detail::ConvertedType> const& ctype)
+                       cuda::std::optional<cudf::io::parquet::detail::ConvertedType> const& ctype)
 {
   auto ctype_val = ctype.value_or(cudf::io::parquet::detail::UNKNOWN);
   switch (ptype) {
diff --git a/cpp/tests/io/parquet_common.hpp b/cpp/tests/io/parquet_common.hpp
index 59ee85444f2..bc6145d77da 100644
--- a/cpp/tests/io/parquet_common.hpp
+++ b/cpp/tests/io/parquet_common.hpp
@@ -172,7 +172,7 @@ std::pair<cudf::table, std::string> create_parquet_typed_with_stats(std::string
 int32_t compare_binary(std::vector<uint8_t> const& v1,
                        std::vector<uint8_t> const& v2,
                        cudf::io::parquet::detail::Type ptype,
-                       thrust::optional<cudf::io::parquet::detail::ConvertedType> const& ctype);
+                       cuda::std::optional<cudf::io::parquet::detail::ConvertedType> const& ctype);
 
 void expect_compression_stats_empty(std::shared_ptr<cudf::io::writer_compression_statistics> stats);
 
diff --git a/cpp/tests/iterator/indexalator_test.cu b/cpp/tests/iterator/indexalator_test.cu
index 0c10853ec02..dac2356dcb0 100644
--- a/cpp/tests/iterator/indexalator_test.cu
+++ b/cpp/tests/iterator/indexalator_test.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,10 +20,10 @@
 
 #include <cudf/detail/indexalator.cuh>
 
+#include <cuda/std/optional>
 #include <thrust/binary_search.h>
 #include <thrust/gather.h>
 #include <thrust/host_vector.h>
-#include <thrust/optional.h>
 #include <thrust/pair.h>
 #include <thrust/scatter.h>
 #include <thrust/sequence.h>
@@ -84,15 +84,16 @@ TYPED_TEST(IndexalatorTest, optional_iterator)
   auto d_col = cudf::test::fixed_width_column_wrapper<T>(
     host_values.begin(), host_values.end(), validity.begin());
 
-  auto expected_values = thrust::host_vector<thrust::optional<cudf::size_type>>(host_values.size());
+  auto expected_values =
+    thrust::host_vector<cuda::std::optional<cudf::size_type>>(host_values.size());
 
   std::transform(host_values.begin(),
                  host_values.end(),
                  validity.begin(),
                  expected_values.begin(),
                  [](T v, bool b) {
-                   return (b) ? thrust::make_optional(static_cast<cudf::size_type>(v))
-                              : thrust::nullopt;
+                   return (b) ? cuda::std::make_optional(static_cast<cudf::size_type>(v))
+                              : cuda::std::nullopt;
                  });
 
   auto it_dev = cudf::detail::indexalator_factory::make_input_optional_iterator(d_col);
diff --git a/cpp/tests/iterator/offsetalator_test.cu b/cpp/tests/iterator/offsetalator_test.cu
index e569e58f42a..b206ff947bb 100644
--- a/cpp/tests/iterator/offsetalator_test.cu
+++ b/cpp/tests/iterator/offsetalator_test.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -23,7 +23,6 @@
 #include <thrust/binary_search.h>
 #include <thrust/gather.h>
 #include <thrust/host_vector.h>
-#include <thrust/optional.h>
 #include <thrust/pair.h>
 #include <thrust/scatter.h>
 #include <thrust/sequence.h>
diff --git a/cpp/tests/iterator/optional_iterator_test.cuh b/cpp/tests/iterator/optional_iterator_test.cuh
index 6a264cee9a8..04f5410a44f 100644
--- a/cpp/tests/iterator/optional_iterator_test.cuh
+++ b/cpp/tests/iterator/optional_iterator_test.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,8 +16,8 @@
 
 #include <tests/iterator/iterator_tests.cuh>
 
+#include <cuda/std/optional>
 #include <thrust/host_vector.h>
-#include <thrust/optional.h>
 
 template <typename T>
 void nonull_optional_iterator(IteratorTest<T>& testFixture)
@@ -32,9 +32,9 @@ void nonull_optional_iterator(IteratorTest<T>& testFixture)
   auto d_col = cudf::column_device_view::create(w_col);
 
   // calculate the expected value by CPU.
-  thrust::host_vector<thrust::optional<T>> replaced_array(host_values.size());
+  thrust::host_vector<cuda::std::optional<T>> replaced_array(host_values.size());
   std::transform(host_values.begin(), host_values.end(), replaced_array.begin(), [](auto s) {
-    return thrust::optional<T>{s};
+    return cuda::std::optional<T>{s};
   });
 
   // GPU test
@@ -61,19 +61,20 @@ void null_optional_iterator(IteratorTest<T>& testFixture)
   auto d_col = cudf::column_device_view::create(w_col);
 
   // calculate the expected value by CPU.
-  thrust::host_vector<thrust::optional<T>> optional_values(host_values.size());
-  std::transform(host_values.begin(),
-                 host_values.end(),
-                 host_bools.begin(),
-                 optional_values.begin(),
-                 [](auto s, bool b) { return b ? thrust::optional<T>{s} : thrust::optional<T>{}; });
+  thrust::host_vector<cuda::std::optional<T>> optional_values(host_values.size());
+  std::transform(
+    host_values.begin(),
+    host_values.end(),
+    host_bools.begin(),
+    optional_values.begin(),
+    [](auto s, bool b) { return b ? cuda::std::optional<T>{s} : cuda::std::optional<T>{}; });
 
-  thrust::host_vector<thrust::optional<T>> value_all_valid(host_values.size());
+  thrust::host_vector<cuda::std::optional<T>> value_all_valid(host_values.size());
   std::transform(host_values.begin(),
                  host_values.end(),
                  host_bools.begin(),
                  value_all_valid.begin(),
-                 [](auto s, bool b) { return thrust::optional<T>{s}; });
+                 [](auto s, bool b) { return cuda::std::optional<T>{s}; });
 
   // GPU test for correct null mapping
   testFixture.iterator_test_thrust(
diff --git a/cpp/tests/iterator/optional_iterator_test_numeric.cu b/cpp/tests/iterator/optional_iterator_test_numeric.cu
index 98befb0a3ee..257c0979017 100644
--- a/cpp/tests/iterator/optional_iterator_test_numeric.cu
+++ b/cpp/tests/iterator/optional_iterator_test_numeric.cu
@@ -18,9 +18,9 @@
 
 #include <cudf/utilities/default_stream.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/execution_policy.h>
 #include <thrust/iterator/transform_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/reduce.h>
 #include <thrust/transform.h>
 
@@ -49,21 +49,21 @@ TYPED_TEST(NumericOptionalIteratorTest, null_optional_iterator) { null_optional_
 // Transformers and Operators for optional_iterator test
 template <typename ElementType>
 struct transformer_optional_meanvar {
-  using ResultType = thrust::optional<cudf::meanvar<ElementType>>;
+  using ResultType = cuda::std::optional<cudf::meanvar<ElementType>>;
 
-  CUDF_HOST_DEVICE inline ResultType operator()(thrust::optional<ElementType> const& optional)
+  CUDF_HOST_DEVICE inline ResultType operator()(cuda::std::optional<ElementType> const& optional)
   {
     if (optional.has_value()) {
       auto v = *optional;
       return cudf::meanvar<ElementType>{v, static_cast<ElementType>(v * v), 1};
     }
-    return thrust::nullopt;
+    return cuda::std::nullopt;
   }
 };
 
 template <typename T>
 struct optional_to_meanvar {
-  CUDF_HOST_DEVICE inline T operator()(thrust::optional<T> const& v) { return v.value_or(T{0}); }
+  CUDF_HOST_DEVICE inline T operator()(cuda::std::optional<T> const& v) { return v.value_or(T{0}); }
 };
 
 // TODO: enable this test also at __CUDACC_DEBUG__
diff --git a/cpp/tests/iterator/value_iterator_test_numeric.cu b/cpp/tests/iterator/value_iterator_test_numeric.cu
index d3d1c12bdc7..39e05ff6832 100644
--- a/cpp/tests/iterator/value_iterator_test_numeric.cu
+++ b/cpp/tests/iterator/value_iterator_test_numeric.cu
@@ -23,17 +23,5 @@ template <typename T>
 struct NumericValueIteratorTest : public IteratorTest<T> {};
 
 TYPED_TEST_SUITE(NumericValueIteratorTest, TestingTypes);
-TYPED_TEST(NumericValueIteratorTest, non_null_iterator)
-{
-  if constexpr (std::is_same_v<TypeParam, int16_t> || std::is_same_v<TypeParam, uint16_t>) {
-    if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { return; }
-  }
-  non_null_iterator(*this);
-}
-TYPED_TEST(NumericValueIteratorTest, null_iterator)
-{
-  if constexpr (std::is_same_v<TypeParam, int16_t> || std::is_same_v<TypeParam, uint16_t>) {
-    if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { return; }
-  }
-  null_iterator(*this);
-}
+TYPED_TEST(NumericValueIteratorTest, non_null_iterator) { non_null_iterator(*this); }
+TYPED_TEST(NumericValueIteratorTest, null_iterator) { null_iterator(*this); }
diff --git a/cpp/tests/large_strings/json_tests.cu b/cpp/tests/large_strings/json_tests.cu
index 49abf7b484d..e34ab991c11 100644
--- a/cpp/tests/large_strings/json_tests.cu
+++ b/cpp/tests/large_strings/json_tests.cu
@@ -28,13 +28,17 @@ struct JsonLargeReaderTest : public cudf::test::StringsLargeTest {};
 
 TEST_F(JsonLargeReaderTest, MultiBatch)
 {
-  std::string json_string             = R"(
+  std::string json_string = R"(
     { "a": { "y" : 6}, "b" : [1, 2, 3], "c": 11 }
     { "a": { "y" : 6}, "b" : [4, 5   ], "c": 12 }
     { "a": { "y" : 6}, "b" : [6      ], "c": 13 }
     { "a": { "y" : 6}, "b" : [7      ], "c": 14 })";
-  constexpr size_t batch_size_ub      = std::numeric_limits<int>::max();
-  constexpr size_t expected_file_size = 1.5 * static_cast<double>(batch_size_ub);
+
+  std::size_t const batch_size_upper_bound = std::numeric_limits<int32_t>::max() / 16;
+  // set smaller batch_size to reduce file size and execution time
+  setenv("LIBCUDF_JSON_BATCH_SIZE", std::to_string(batch_size_upper_bound).c_str(), 1);
+
+  constexpr std::size_t expected_file_size = 1.5 * static_cast<double>(batch_size_upper_bound);
   std::size_t const log_repetitions =
     static_cast<std::size_t>(std::ceil(std::log2(expected_file_size / json_string.size())));
 
@@ -66,8 +70,11 @@ TEST_F(JsonLargeReaderTest, MultiBatch)
     datasources.emplace_back(cudf::io::datasource::create(hb));
   }
   // Test for different chunk sizes
-  std::vector<size_t> chunk_sizes{
-    batch_size_ub / 4, batch_size_ub / 2, batch_size_ub, static_cast<size_t>(batch_size_ub * 2)};
+  std::vector<std::size_t> chunk_sizes{batch_size_upper_bound / 4,
+                                       batch_size_upper_bound / 2,
+                                       batch_size_upper_bound,
+                                       static_cast<std::size_t>(batch_size_upper_bound * 2)};
+
   for (auto chunk_size : chunk_sizes) {
     auto const tables =
       split_byte_range_reading<std::int64_t>(datasources,
@@ -86,4 +93,7 @@ TEST_F(JsonLargeReaderTest, MultiBatch)
     // cannot use EQUAL due to concatenate removing null mask
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(current_reader_table.tbl->view(), result->view());
   }
+
+  // go back to normal batch_size
+  unsetenv("LIBCUDF_LARGE_STRINGS_THRESHOLD");
 }
diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp
index 0ec4cfa34c4..949ffcc26a6 100644
--- a/cpp/tests/reductions/reduction_tests.cpp
+++ b/cpp/tests/reductions/reduction_tests.cpp
@@ -300,9 +300,6 @@ TYPED_TEST_SUITE(ReductionTest, cudf::test::NumericTypes);
 TYPED_TEST(ReductionTest, Product)
 {
   using T = TypeParam;
-  if constexpr (std::is_same_v<T, int16_t> || std::is_same_v<T, uint16_t>) {
-    if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { return; }
-  }
 
   std::vector<int> int_values({5, -1, 1, 0, 3, 2, 4});
   std::vector<bool> host_bools({true, true, false, false, true, true, true});
diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp
index 37efc116d2a..668690639a6 100644
--- a/cpp/tests/reductions/segmented_reduction_tests.cpp
+++ b/cpp/tests/reductions/segmented_reduction_tests.cpp
@@ -87,10 +87,6 @@ TYPED_TEST(SegmentedReductionTest, SumExcludeNulls)
 
 TYPED_TEST(SegmentedReductionTest, ProductExcludeNulls)
 {
-  if constexpr (std::is_same_v<TypeParam, int16_t> || std::is_same_v<TypeParam, uint16_t>) {
-    if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { return; }
-  }
-
   // [1, 3, 5], [null, 3, 5], [1], [null], [null, null], []
   // values:    {1, 3, 5, XXX, 3, 5, 1, XXX, XXX, XXX}
   // offsets:   {0, 3, 6, 7, 8, 10, 10}
@@ -141,10 +137,6 @@ TYPED_TEST(SegmentedReductionTest, ProductExcludeNulls)
 
 TYPED_TEST(SegmentedReductionTest, MaxExcludeNulls)
 {
-  if constexpr (std::is_same_v<TypeParam, int16_t> || std::is_same_v<TypeParam, uint16_t>) {
-    if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { return; }
-  }
-
   // [1, 2, 3], [1, null, 3], [1], [null], [null, null], []
   // values:    {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}
   // offsets:   {0, 3, 6, 7, 8, 10, 10}
@@ -193,10 +185,6 @@ TYPED_TEST(SegmentedReductionTest, MaxExcludeNulls)
 
 TYPED_TEST(SegmentedReductionTest, MinExcludeNulls)
 {
-  if constexpr (std::is_same_v<TypeParam, int16_t> || std::is_same_v<TypeParam, uint16_t>) {
-    if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { return; }
-  }
-
   // [1, 2, 3], [1, null, 3], [1], [null], [null, null], []
   // values:   {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}
   // offsets:  {0, 3, 6, 7, 8, 10, 10}
@@ -388,10 +376,6 @@ TYPED_TEST(SegmentedReductionTest, SumIncludeNulls)
 
 TYPED_TEST(SegmentedReductionTest, ProductIncludeNulls)
 {
-  if constexpr (std::is_same_v<TypeParam, int16_t> || std::is_same_v<TypeParam, uint16_t>) {
-    if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { return; }
-  }
-
   // [1, 3, 5], [null, 3, 5], [1], [null], [null, null], []
   // values:    {1, 3, 5, XXX, 3, 5, 1, XXX, XXX, XXX}
   // offsets:   {0, 3, 6, 7, 8, 10, 10}
@@ -445,10 +429,6 @@ TYPED_TEST(SegmentedReductionTest, ProductIncludeNulls)
 
 TYPED_TEST(SegmentedReductionTest, MaxIncludeNulls)
 {
-  if constexpr (std::is_same_v<TypeParam, int16_t> || std::is_same_v<TypeParam, uint16_t>) {
-    if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { return; }
-  }
-
   // [1, 2, 3], [1, null, 3], [1], [null], [null, null], []
   // values:    {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}
   // offsets:   {0, 3, 6, 7, 8, 10, 10}
@@ -500,10 +480,6 @@ TYPED_TEST(SegmentedReductionTest, MaxIncludeNulls)
 
 TYPED_TEST(SegmentedReductionTest, MinIncludeNulls)
 {
-  if constexpr (std::is_same_v<TypeParam, int16_t> || std::is_same_v<TypeParam, uint16_t>) {
-    if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { return; }
-  }
-
   // [1, 2, 3], [1, null, 3], [1], [null], [null, null], []
   // values:   {1, 2, 3, 1, XXX, 3, 1, XXX, XXX}
   // offsets:  {0, 3, 6, 7, 8, 10, 10}
diff --git a/cpp/tests/replace/replace_nulls_tests.cpp b/cpp/tests/replace/replace_nulls_tests.cpp
index 9603ea44a76..fcee27305f2 100644
--- a/cpp/tests/replace/replace_nulls_tests.cpp
+++ b/cpp/tests/replace/replace_nulls_tests.cpp
@@ -674,7 +674,7 @@ TEST_F(ReplaceDictionaryTest, ReplaceNullsEmpty)
   cudf::test::fixed_width_column_wrapper<int64_t> input_empty_w({});
   auto input_empty = cudf::dictionary::encode(input_empty_w);
   auto result      = cudf::replace_nulls(input_empty->view(), input_empty->view());
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), input_empty->view());
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), input_empty->view());
 }
 
 TEST_F(ReplaceDictionaryTest, ReplaceNullsNoNulls)
diff --git a/cpp/tests/reshape/byte_cast_tests.cpp b/cpp/tests/reshape/byte_cast_tests.cpp
index cd280302677..b3d9b2e2f5f 100644
--- a/cpp/tests/reshape/byte_cast_tests.cpp
+++ b/cpp/tests/reshape/byte_cast_tests.cpp
@@ -61,8 +61,8 @@ TEST_F(ByteCastTest, int16ValuesWithNulls)
   auto [null_mask, null_count] = cudf::test::detail::make_null_mask(odd_validity, odd_validity + 5);
   auto int16_expected          = cudf::make_lists_column(
     5,
-    std::move(cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 0, 2, 2, 4, 4}.release()),
-    std::move(int16_data.release()),
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 0, 2, 2, 4, 4}.release(),
+    int16_data.release(),
     null_count,
     std::move(null_mask));
 
@@ -109,8 +109,8 @@ TEST_F(ByteCastTest, int32ValuesWithNulls)
 
   auto int32_expected = cudf::make_lists_column(
     5,
-    std::move(cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 4, 4, 8, 8, 12}.release()),
-    std::move(int32_data.release()),
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 4, 4, 8, 8, 12}.release(),
+    int32_data.release(),
     null_count,
     std::move(null_mask));
 
@@ -163,9 +163,8 @@ TEST_F(ByteCastTest, int64ValuesWithNulls)
   auto [null_mask, null_count] = cudf::test::detail::make_null_mask(odd_validity, odd_validity + 5);
   auto int64_expected          = cudf::make_lists_column(
     5,
-    std::move(
-      cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 0, 8, 8, 16, 16}.release()),
-    std::move(int64_data.release()),
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 0, 8, 8, 16, 16}.release(),
+    int64_data.release(),
     null_count,
     std::move(null_mask));
 
@@ -226,8 +225,8 @@ TEST_F(ByteCastTest, fp32ValuesWithNulls)
     cudf::test::detail::make_null_mask(even_validity, even_validity + 5);
   auto fp32_expected = cudf::make_lists_column(
     5,
-    std::move(cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 4, 4, 8, 8, 12}.release()),
-    std::move(fp32_data.release()),
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 4, 4, 8, 8, 12}.release(),
+    fp32_data.release(),
     null_count,
     std::move(null_mask));
 
@@ -297,9 +296,8 @@ TEST_F(ByteCastTest, fp64ValuesWithNulls)
   auto [null_mask, null_count] = cudf::test::detail::make_null_mask(odd_validity, odd_validity + 5);
   auto fp64_expected           = cudf::make_lists_column(
     5,
-    std::move(
-      cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 0, 8, 8, 16, 16}.release()),
-    std::move(fp64_data.release()),
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 0, 8, 8, 16, 16}.release(),
+    fp64_data.release(),
     null_count,
     std::move(null_mask));
 
diff --git a/cpp/tests/sort/segmented_sort_tests.cpp b/cpp/tests/sort/segmented_sort_tests.cpp
index da9666cbc74..f4fe2c5956a 100644
--- a/cpp/tests/sort/segmented_sort_tests.cpp
+++ b/cpp/tests/sort/segmented_sort_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,7 +21,9 @@
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/copying.hpp>
+#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/sorting.hpp>
+#include <cudf/utilities/span.hpp>
 
 #include <type_traits>
 #include <vector>
@@ -338,3 +340,25 @@ TEST_F(SegmentedSortInt, Bool)
   result = cudf::stable_segmented_sorted_order(cudf::table_view({test_col}), segments);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
 }
+
+// Specific test for fix in https://github.com/rapidsai/cudf/pull/16463
+TEST_F(SegmentedSortInt, UnbalancedOffsets)
+{
+  auto h_input = std::vector<int64_t>(3535);
+  std::iota(h_input.begin(), h_input.end(), 1);
+  std::sort(h_input.begin(), h_input.end(), std::greater<int64_t>{});
+  std::fill_n(h_input.begin(), 4, 0);
+  std::fill(h_input.begin() + 3533, h_input.end(), 10000);
+  auto d_input = cudf::detail::make_device_uvector_sync(
+    h_input, cudf::get_default_stream(), rmm::mr::get_current_device_resource());
+  auto input    = cudf::column_view(cudf::device_span<int64_t const>(d_input));
+  auto segments = cudf::test::fixed_width_column_wrapper<int32_t>({0, 4, 3533, 3535});
+  // full sort should match handcrafted input data here
+  auto expected = cudf::sort(cudf::table_view({input}));
+
+  auto input_view = cudf::table_view({input});
+  auto result     = cudf::segmented_sort_by_key(input_view, input_view, segments);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view().column(0), expected->view().column(0));
+  result = cudf::stable_segmented_sort_by_key(input_view, input_view, segments);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view().column(0), expected->view().column(0));
+}
diff --git a/cpp/tests/streams/interop_test.cpp b/cpp/tests/streams/interop_test.cpp
deleted file mode 100644
index 9ba862585d0..00000000000
--- a/cpp/tests/streams/interop_test.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// These interop functions are deprecated. We keep the code in this
-// test and will migrate the tests to export via the arrow C data
-// interface with to_arrow_host which arrow can consume. For now, the
-// test is commented out.
-
-#if 0
-
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/column_wrapper.hpp>
-#include <cudf_test/default_stream.hpp>
-
-#include <cudf/interop.hpp>
-#include <cudf/scalar/scalar.hpp>
-#include <cudf/scalar/scalar_factories.hpp>
-#include <cudf/table/table_view.hpp>
-
-struct ArrowTest : public cudf::test::BaseFixture {};
-
-TEST_F(ArrowTest, ToArrow)
-{
-  int32_t const value{42};
-  auto col = cudf::test::fixed_width_column_wrapper<int32_t>{{value}};
-  cudf::table_view tbl{{col}};
-
-  std::vector<cudf::column_metadata> metadata{{""}};
-  cudf::to_arrow(tbl, metadata, cudf::test::get_default_stream());
-}
-
-TEST_F(ArrowTest, FromArrow)
-{
-  std::vector<int64_t> host_values = {1, 2, 3, 5, 6, 7, 8};
-  std::vector<bool> host_validity  = {true, true, true, false, true, true, true};
-
-  arrow::Int64Builder builder;
-  auto status      = builder.AppendValues(host_values, host_validity);
-  auto maybe_array = builder.Finish();
-  auto array       = *maybe_array;
-
-  auto field  = arrow::field("", arrow::int32());
-  auto schema = arrow::schema({field});
-  auto table  = arrow::Table::Make(schema, {array});
-  cudf::from_arrow(*table, cudf::test::get_default_stream());
-}
-
-TEST_F(ArrowTest, ToArrowScalar)
-{
-  int32_t const value{42};
-  auto cudf_scalar =
-    cudf::make_fixed_width_scalar<int32_t>(value, cudf::test::get_default_stream());
-
-  cudf::column_metadata metadata{""};
-  cudf::to_arrow(*cudf_scalar, metadata, cudf::test::get_default_stream());
-}
-
-TEST_F(ArrowTest, FromArrowScalar)
-{
-  int32_t const value{42};
-  auto arrow_scalar = arrow::MakeScalar(value);
-  cudf::from_arrow(*arrow_scalar, cudf::test::get_default_stream());
-}
-
-#endif
diff --git a/cpp/tests/streams/stream_compaction_test.cpp b/cpp/tests/streams/stream_compaction_test.cpp
index 56443870602..443f4548b2c 100644
--- a/cpp/tests/streams/stream_compaction_test.cpp
+++ b/cpp/tests/streams/stream_compaction_test.cpp
@@ -41,6 +41,7 @@ auto constexpr NULL_UNEQUAL = cudf::null_equality::UNEQUAL;
 auto constexpr NAN_EQUAL    = cudf::nan_equality::ALL_EQUAL;
 auto constexpr NAN_UNEQUAL  = cudf::nan_equality::UNEQUAL;
 
+using int16s_col = cudf::test::fixed_width_column_wrapper<int16_t>;
 using int32s_col = cudf::test::fixed_width_column_wrapper<int32_t>;
 using floats_col = cudf::test::fixed_width_column_wrapper<float>;
 
@@ -51,50 +52,9 @@ using cudf::test::iterators::no_nulls;
 using cudf::test::iterators::null_at;
 using cudf::test::iterators::nulls_at;
 
-struct StableDistinctKeepAny : public cudf::test::BaseFixture {};
+struct StreamCompactionTest : public cudf::test::BaseFixture {};
 
-struct StableDistinctKeepFirstLastNone : public cudf::test::BaseFixture {};
-
-TEST_F(StableDistinctKeepAny, NoNullsTableWithNaNs)
-{
-  // Column(s) used to test KEEP_ANY needs to have same rows in contiguous
-  // groups for equivalent keys because KEEP_ANY is nondeterministic.
-  auto const col1  = int32s_col{6, 6, 6, 1, 1, 1, 3, 5, 8, 5};
-  auto const col2  = floats_col{6, 6, 6, 1, 1, 1, 3, 4, 9, 4};
-  auto const keys1 = int32s_col{20, 20, 20, 15, 15, 15, 20, 19, 21, 9};
-  auto const keys2 = floats_col{19., 19., 19., NaN, NaN, NaN, 20., 20., 9., 21.};
-
-  auto const input   = cudf::table_view{{col1, col2, keys1, keys2}};
-  auto const key_idx = std::vector<cudf::size_type>{2, 3};
-
-  // NaNs are unequal.
-  {
-    auto const exp_col1  = int32s_col{6, 1, 1, 1, 3, 5, 8, 5};
-    auto const exp_col2  = floats_col{6, 1, 1, 1, 3, 4, 9, 4};
-    auto const exp_keys1 = int32s_col{20, 15, 15, 15, 20, 19, 21, 9};
-    auto const exp_keys2 = floats_col{19., NaN, NaN, NaN, 20., 20., 9., 21.};
-    auto const expected  = cudf::table_view{{exp_col1, exp_col2, exp_keys1, exp_keys2}};
-
-    auto const result = cudf::stable_distinct(
-      input, key_idx, KEEP_ANY, NULL_EQUAL, NAN_UNEQUAL, cudf::test::get_default_stream());
-    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
-  }
-
-  // NaNs are equal.
-  {
-    auto const exp_col1  = int32s_col{6, 1, 3, 5, 8, 5};
-    auto const exp_col2  = floats_col{6, 1, 3, 4, 9, 4};
-    auto const exp_keys1 = int32s_col{20, 15, 20, 19, 21, 9};
-    auto const exp_keys2 = floats_col{19., NaN, 20., 20., 9., 21.};
-    auto const expected  = cudf::table_view{{exp_col1, exp_col2, exp_keys1, exp_keys2}};
-
-    auto const result = cudf::stable_distinct(
-      input, key_idx, KEEP_ANY, NULL_EQUAL, NAN_EQUAL, cudf::test::get_default_stream());
-    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
-  }
-}
-
-TEST_F(StableDistinctKeepAny, InputWithNullsAndNaNs)
+TEST_F(StreamCompactionTest, StableDistinctKeepAny)
 {
   auto constexpr null{0.0};  // shadow the global `null` variable of type int
 
@@ -150,7 +110,7 @@ TEST_F(StableDistinctKeepAny, InputWithNullsAndNaNs)
   }
 }
 
-TEST_F(StableDistinctKeepFirstLastNone, InputWithNaNsEqual)
+TEST_F(StreamCompactionTest, StableDistinctKeepFirstLastNone)
 {
   // Column(s) used to test needs to have different rows for the same keys.
   auto const col     = int32s_col{0, 1, 2, 3, 4, 5, 6};
@@ -192,44 +152,313 @@ TEST_F(StableDistinctKeepFirstLastNone, InputWithNaNsEqual)
   }
 }
 
-TEST_F(StableDistinctKeepFirstLastNone, InputWithNaNsUnequal)
+TEST_F(StreamCompactionTest, DropNaNs)
 {
-  // Column(s) used to test needs to have different rows for the same keys.
-  auto const col     = int32s_col{0, 1, 2, 3, 4, 5, 6, 7};
-  auto const keys    = floats_col{20., NaN, NaN, 19., 21., 19., 22., 20.};
-  auto const input   = cudf::table_view{{col, keys}};
-  auto const key_idx = std::vector<cudf::size_type>{1};
+  auto const col1 = floats_col{{1., 2., NaN, NaN, 5., 6.}, nulls_at({2, 5})};
+  auto const col2 = int32s_col{{10, 40, 70, 5, 2, 10}, nulls_at({2, 5})};
+  auto const col3 = floats_col{{NaN, 40., 70., NaN, 2., 10.}, nulls_at({2, 5})};
+  cudf::table_view input{{col1, col2, col3}};
+
+  std::vector<cudf::size_type> keys{0, 2};
 
-  // KEEP_FIRST
   {
-    auto const exp_col  = int32s_col{0, 1, 2, 3, 4, 6};
-    auto const exp_keys = floats_col{20., NaN, NaN, 19., 21., 22.};
-    auto const expected = cudf::table_view{{exp_col, exp_keys}};
+    // With keep_threshold
+    auto const col1_expected = floats_col{{1., 2., 3., 5., 6.}, nulls_at({2, 4})};
+    auto const col2_expected = int32s_col{{10, 40, 70, 2, 10}, nulls_at({2, 4})};
+    auto const col3_expected = floats_col{{NaN, 40., 70., 2., 10.}, nulls_at({2, 4})};
+    cudf::table_view expected{{col1_expected, col2_expected, col3_expected}};
+
+    auto result = cudf::drop_nans(input, keys, keys.size() - 1, cudf::test::get_default_stream());
 
-    auto const result = cudf::stable_distinct(
-      input, key_idx, KEEP_FIRST, NULL_UNEQUAL, NAN_UNEQUAL, cudf::test::get_default_stream());
     CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
   }
 
-  // KEEP_LAST
   {
-    auto const exp_col  = int32s_col{1, 2, 4, 5, 6, 7};
-    auto const exp_keys = floats_col{NaN, NaN, 21., 19., 22., 20.};
-    auto const expected = cudf::table_view{{exp_col, exp_keys}};
+    // Without keep_threshold
+    auto const col1_expected = floats_col{{2., 3., 5., 6.}, nulls_at({1, 3})};
+    auto const col2_expected = int32s_col{{40, 70, 2, 10}, nulls_at({1, 3})};
+    auto const col3_expected = floats_col{{40., 70., 2., 10.}, nulls_at({1, 3})};
+    cudf::table_view expected{{col1_expected, col2_expected, col3_expected}};
+
+    auto result = cudf::drop_nans(input, keys, cudf::test::get_default_stream());
 
-    auto const result = cudf::stable_distinct(
-      input, key_idx, KEEP_LAST, NULL_UNEQUAL, NAN_UNEQUAL, cudf::test::get_default_stream());
     CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
   }
+}
+
+TEST_F(StreamCompactionTest, DropNulls)
+{
+  auto const col1 = int16s_col{{1, 0, 1, 0, 1, 0}, nulls_at({2, 5})};
+  auto const col2 = int32s_col{{10, 40, 70, 5, 2, 10}, nulls_at({2})};
+  auto const col3 = floats_col{{10., 40., 70., 5., 2., 10.}, no_nulls()};
+  cudf::table_view input{{col1, col2, col3}};
+  std::vector<cudf::size_type> keys{0, 1, 2};
 
-  // KEEP_NONE
   {
-    auto const exp_col  = int32s_col{1, 2, 4, 6};
-    auto const exp_keys = floats_col{NaN, NaN, 21., 22.};
-    auto const expected = cudf::table_view{{exp_col, exp_keys}};
+    // With keep_threshold
+    auto const col1_expected = int16s_col{{1, 0, 0, 1, 0}, null_at(4)};
+    auto const col2_expected = int32s_col{{10, 40, 5, 2, 10}, no_nulls()};
+    auto const col3_expected = floats_col{{10., 40., 5., 2., 10.}, no_nulls()};
+    cudf::table_view expected{{col1_expected, col2_expected, col3_expected}};
+
+    auto result = cudf::drop_nulls(input, keys, keys.size() - 1, cudf::test::get_default_stream());
+
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+
+  {
+    // Without keep_threshold
+    auto const col1_expected = int16s_col{{1, 0, 0, 1}, no_nulls()};
+    auto const col2_expected = int32s_col{{10, 40, 5, 2}, no_nulls()};
+    auto const col3_expected = floats_col{{10., 40., 5., 2.}, no_nulls()};
+    cudf::table_view expected{{col1_expected, col2_expected, col3_expected}};
+
+    auto result = cudf::drop_nulls(input, keys, cudf::test::get_default_stream());
 
-    auto const result = cudf::stable_distinct(
-      input, key_idx, KEEP_NONE, NULL_UNEQUAL, NAN_UNEQUAL, cudf::test::get_default_stream());
     CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
   }
 }
+
+TEST_F(StreamCompactionTest, Unique)
+{
+  auto const col1     = int32s_col{5, 4, 3, 5, 8, 5};
+  auto const col2     = floats_col{4., 5., 3., 4., 9., 4.};
+  auto const col1_key = int32s_col{20, 20, 20, 19, 21, 9};
+  auto const col2_key = int32s_col{19, 19, 20, 20, 9, 21};
+
+  cudf::table_view input{{col1, col2, col1_key, col2_key}};
+  std::vector<cudf::size_type> keys = {2, 3};
+
+  {
+    // KEEP_FIRST
+    auto const exp_col1_first     = int32s_col{5, 3, 5, 8, 5};
+    auto const exp_col2_first     = floats_col{4., 3., 4., 9., 4.};
+    auto const exp_col1_key_first = int32s_col{20, 20, 19, 21, 9};
+    auto const exp_col2_key_first = int32s_col{19, 20, 20, 9, 21};
+    cudf::table_view expected_first{
+      {exp_col1_first, exp_col2_first, exp_col1_key_first, exp_col2_key_first}};
+
+    auto const result = cudf::unique(input,
+                                     keys,
+                                     cudf::duplicate_keep_option::KEEP_FIRST,
+                                     cudf::null_equality::EQUAL,
+                                     cudf::test::get_default_stream());
+
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected_first, *result);
+  }
+
+  {
+    // KEEP_LAST
+    auto const exp_col1_last     = int32s_col{4, 3, 5, 8, 5};
+    auto const exp_col2_last     = floats_col{5., 3., 4., 9., 4.};
+    auto const exp_col1_key_last = int32s_col{20, 20, 19, 21, 9};
+    auto const exp_col2_key_last = int32s_col{19, 20, 20, 9, 21};
+    cudf::table_view expected_last{
+      {exp_col1_last, exp_col2_last, exp_col1_key_last, exp_col2_key_last}};
+
+    auto const result = cudf::unique(input,
+                                     keys,
+                                     cudf::duplicate_keep_option::KEEP_LAST,
+                                     cudf::null_equality::EQUAL,
+                                     cudf::test::get_default_stream());
+
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected_last, *result);
+  }
+
+  {
+    // KEEP_NONE
+    auto const exp_col1_unique     = int32s_col{3, 5, 8, 5};
+    auto const exp_col2_unique     = floats_col{3., 4., 9., 4.};
+    auto const exp_col1_key_unique = int32s_col{20, 19, 21, 9};
+    auto const exp_col2_key_unique = int32s_col{20, 20, 9, 21};
+    cudf::table_view expected_unique{
+      {exp_col1_unique, exp_col2_unique, exp_col1_key_unique, exp_col2_key_unique}};
+
+    auto const result = cudf::unique(input,
+                                     keys,
+                                     cudf::duplicate_keep_option::KEEP_NONE,
+                                     cudf::null_equality::EQUAL,
+                                     cudf::test::get_default_stream());
+
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected_unique, *result);
+  }
+}
+
+TEST_F(StreamCompactionTest, Distinct)
+{
+  // Column(s) used to test needs to have different rows for the same keys.
+  auto const col1  = int32s_col{0, 1, 2, 3, 4, 5, 6};
+  auto const col2  = floats_col{10, 11, 12, 13, 14, 15, 16};
+  auto const keys1 = int32s_col{20, 20, 20, 20, 19, 21, 9};
+  auto const keys2 = int32s_col{19, 19, 19, 20, 20, 9, 21};
+
+  auto const input   = cudf::table_view{{col1, col2, keys1, keys2}};
+  auto const key_idx = std::vector<cudf::size_type>{2, 3};
+
+  // KEEP_FIRST
+  {
+    auto const exp_col1_sort  = int32s_col{6, 4, 0, 3, 5};
+    auto const exp_col2_sort  = floats_col{16, 14, 10, 13, 15};
+    auto const exp_keys1_sort = int32s_col{9, 19, 20, 20, 21};
+    auto const exp_keys2_sort = int32s_col{21, 20, 19, 20, 9};
+    auto const expected_sort =
+      cudf::table_view{{exp_col1_sort, exp_col2_sort, exp_keys1_sort, exp_keys2_sort}};
+
+    auto const result = cudf::distinct(input,
+                                       key_idx,
+                                       cudf::duplicate_keep_option::KEEP_FIRST,
+                                       cudf::null_equality::EQUAL,
+                                       cudf::nan_equality::ALL_EQUAL,
+                                       cudf::test::get_default_stream());
+    auto const result_sort =
+      cudf::sort_by_key(*result, result->select(key_idx), {}, {}, cudf::test::get_default_stream());
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected_sort, *result_sort);
+  }
+
+  // KEEP_LAST
+  {
+    auto const exp_col1_sort  = int32s_col{6, 4, 2, 3, 5};
+    auto const exp_col2_sort  = floats_col{16, 14, 12, 13, 15};
+    auto const exp_keys1_sort = int32s_col{9, 19, 20, 20, 21};
+    auto const exp_keys2_sort = int32s_col{21, 20, 19, 20, 9};
+    auto const expected_sort =
+      cudf::table_view{{exp_col1_sort, exp_col2_sort, exp_keys1_sort, exp_keys2_sort}};
+
+    auto const result = cudf::distinct(input,
+                                       key_idx,
+                                       cudf::duplicate_keep_option::KEEP_LAST,
+                                       cudf::null_equality::EQUAL,
+                                       cudf::nan_equality::ALL_EQUAL,
+                                       cudf::test::get_default_stream());
+    auto const result_sort =
+      cudf::sort_by_key(*result, result->select(key_idx), {}, {}, cudf::test::get_default_stream());
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected_sort, *result_sort);
+  }
+
+  // KEEP_NONE
+  {
+    auto const exp_col1_sort  = int32s_col{6, 4, 3, 5};
+    auto const exp_col2_sort  = floats_col{16, 14, 13, 15};
+    auto const exp_keys1_sort = int32s_col{9, 19, 20, 21};
+    auto const exp_keys2_sort = int32s_col{21, 20, 20, 9};
+    auto const expected_sort =
+      cudf::table_view{{exp_col1_sort, exp_col2_sort, exp_keys1_sort, exp_keys2_sort}};
+
+    auto const result = cudf::distinct(input,
+                                       key_idx,
+                                       cudf::duplicate_keep_option::KEEP_NONE,
+                                       cudf::null_equality::EQUAL,
+                                       cudf::nan_equality::ALL_EQUAL,
+                                       cudf::test::get_default_stream());
+    auto const result_sort =
+      cudf::sort_by_key(*result, result->select(key_idx), {}, {}, cudf::test::get_default_stream());
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected_sort, *result_sort);
+  }
+}
+
+TEST_F(StreamCompactionTest, ApplyBooleanMask)
+{
+  auto const col = int32s_col{
+    9668, 9590, 9526, 9205, 9434, 9347, 9160, 9569, 9143, 9807, 9606, 9446, 9279, 9822, 9691};
+  cudf::test::fixed_width_column_wrapper<bool> mask({false,
+                                                     false,
+                                                     true,
+                                                     false,
+                                                     false,
+                                                     true,
+                                                     false,
+                                                     true,
+                                                     false,
+                                                     true,
+                                                     false,
+                                                     false,
+                                                     true,
+                                                     false,
+                                                     true});
+  cudf::table_view input({col});
+  auto const col_expected = int32s_col{9526, 9347, 9569, 9807, 9279, 9691};
+  cudf::table_view expected({col_expected});
+  auto const result = cudf::apply_boolean_mask(input, mask, cudf::test::get_default_stream());
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+}
+
+TEST_F(StreamCompactionTest, UniqueCountColumn)
+{
+  std::vector<int32_t> const input = {1, 3,  3,  4,  31, 1, 8,  2, 0, 4, 1,
+                                      4, 10, 40, 31, 42, 0, 42, 8, 5, 4};
+
+  cudf::test::fixed_width_column_wrapper<int32_t> input_col(input.begin(), input.end());
+  std::vector<double> input_data(input.begin(), input.end());
+
+  auto const new_end  = std::unique(input_data.begin(), input_data.end());
+  auto const expected = std::distance(input_data.begin(), new_end);
+  EXPECT_EQ(
+    expected,
+    cudf::unique_count(
+      input_col, null_policy::INCLUDE, nan_policy::NAN_IS_VALID, cudf::test::get_default_stream()));
+}
+
+TEST_F(StreamCompactionTest, UniqueCountTable)
+{
+  std::vector<int32_t> const input1 = {1, 3, 3,  3,  4,  31, 1, 8,  2, 0, 4,
+                                       1, 4, 10, 40, 31, 42, 0, 42, 8, 5, 4};
+  std::vector<int32_t> const input2 = {3, 3,  3,  4,  31, 1, 8,  5, 0, 4, 1,
+                                       4, 10, 40, 31, 42, 0, 42, 8, 5, 4, 1};
+
+  std::vector<std::pair<int32_t, int32_t>> pair_input;
+  std::transform(input1.begin(),
+                 input1.end(),
+                 input2.begin(),
+                 std::back_inserter(pair_input),
+                 [](int32_t a, int32_t b) { return std::pair(a, b); });
+
+  cudf::test::fixed_width_column_wrapper<int32_t> input_col1(input1.begin(), input1.end());
+  cudf::test::fixed_width_column_wrapper<int32_t> input_col2(input2.begin(), input2.end());
+  cudf::table_view input_table({input_col1, input_col2});
+
+  auto const new_end = std::unique(pair_input.begin(), pair_input.end());
+  auto const result  = std::distance(pair_input.begin(), new_end);
+  EXPECT_EQ(
+    result,
+    cudf::unique_count(input_table, null_equality::EQUAL, cudf::test::get_default_stream()));
+}
+
+TEST_F(StreamCompactionTest, DistinctCountColumn)
+{
+  std::vector<int32_t> const input = {1, 3,  3,  4,  31, 1, 8,  2, 0, 4, 1,
+                                      4, 10, 40, 31, 42, 0, 42, 8, 5, 4};
+
+  cudf::test::fixed_width_column_wrapper<int32_t> input_col(input.begin(), input.end());
+
+  auto const expected =
+    static_cast<cudf::size_type>(std::set<double>(input.begin(), input.end()).size());
+  EXPECT_EQ(
+    expected,
+    cudf::distinct_count(
+      input_col, null_policy::INCLUDE, nan_policy::NAN_IS_VALID, cudf::test::get_default_stream()));
+}
+
+TEST_F(StreamCompactionTest, DistinctCountTable)
+{
+  std::vector<int32_t> const input1 = {1, 3, 3,  3,  4,  31, 1, 8,  2, 0, 4,
+                                       1, 4, 10, 40, 31, 42, 0, 42, 8, 5, 4};
+  std::vector<int32_t> const input2 = {3, 3,  3,  4,  31, 1, 8,  5, 0, 4, 1,
+                                       4, 10, 40, 31, 42, 0, 42, 8, 5, 4, 1};
+
+  std::vector<std::pair<int32_t, int32_t>> pair_input;
+  std::transform(input1.begin(),
+                 input1.end(),
+                 input2.begin(),
+                 std::back_inserter(pair_input),
+                 [](int32_t a, int32_t b) { return std::pair(a, b); });
+
+  cudf::test::fixed_width_column_wrapper<int32_t> input_col1(input1.begin(), input1.end());
+  cudf::test::fixed_width_column_wrapper<int32_t> input_col2(input2.begin(), input2.end());
+  cudf::table_view input_table({input_col1, input_col2});
+
+  auto const expected = static_cast<cudf::size_type>(
+    std::set<std::pair<int32_t, int32_t>>(pair_input.begin(), pair_input.end()).size());
+  EXPECT_EQ(
+    expected,
+    cudf::distinct_count(input_table, null_equality::EQUAL, cudf::test::get_default_stream()));
+}
diff --git a/cpp/tests/streams/transform_test.cpp b/cpp/tests/streams/transform_test.cpp
new file mode 100644
index 00000000000..9187672221c
--- /dev/null
+++ b/cpp/tests/streams/transform_test.cpp
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/default_stream.hpp>
+#include <cudf_test/table_utilities.hpp>
+#include <cudf_test/type_lists.hpp>
+
+#include <cudf/ast/expressions.hpp>
+#include <cudf/column/column.hpp>
+#include <cudf/column/column_factories.hpp>
+#include <cudf/column/column_view.hpp>
+#include <cudf/detail/utilities/integer_utils.hpp>
+#include <cudf/transform.hpp>
+#include <cudf/types.hpp>
+
+class TransformTest : public cudf::test::BaseFixture {};
+
+template <class dtype, class Data>
+void test_udf(char const udf[], Data data_init, cudf::size_type size, bool is_ptx)
+{
+  auto all_valid = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
+  auto data_iter = cudf::detail::make_counting_transform_iterator(0, data_init);
+  cudf::test::fixed_width_column_wrapper<dtype, typename decltype(data_iter)::value_type> in(
+    data_iter, data_iter + size, all_valid);
+  cudf::transform(
+    in, udf, cudf::data_type(cudf::type_to_id<dtype>()), is_ptx, cudf::test::get_default_stream());
+}
+
+TEST_F(TransformTest, Transform)
+{
+  char const* cuda =
+    R"***(
+__device__ inline void    fdsf   (
+       float* C,
+       float a
+)
+{
+  *C = a*a*a*a;
+}
+)***";
+
+  char const* ptx =
+    R"***(
+//
+// Generated by NVIDIA NVVM Compiler
+//
+// Compiler Build ID: CL-24817639
+// Cuda compilation tools, release 10.0, V10.0.130
+// Based on LLVM 3.4svn
+//
+
+.version 6.3
+.target sm_70
+.address_size 64
+
+	// .globl	_ZN8__main__7add$241Ef
+.common .global .align 8 .u64 _ZN08NumbaEnv8__main__7add$241Ef;
+.common .global .align 8 .u64 _ZN08NumbaEnv5numba7targets7numbers14int_power_impl12$3clocals$3e13int_power$242Efx;
+
+.visible .func  (.param .b32 func_retval0) _ZN8__main__7add$241Ef(
+	.param .b64 _ZN8__main__7add$241Ef_param_0,
+	.param .b32 _ZN8__main__7add$241Ef_param_1
+)
+{
+	.reg .f32 	%f<4>;
+	.reg .b32 	%r<2>;
+	.reg .b64 	%rd<2>;
+
+
+	ld.param.u64 	%rd1, [_ZN8__main__7add$241Ef_param_0];
+	ld.param.f32 	%f1, [_ZN8__main__7add$241Ef_param_1];
+	mul.f32 	%f2, %f1, %f1;
+	mul.f32 	%f3, %f2, %f2;
+	st.f32 	[%rd1], %f3;
+	mov.u32 	%r1, 0;
+	st.param.b32	[func_retval0+0], %r1;
+	ret;
+}
+)***";
+
+  auto data_init = [](cudf::size_type row) { return row % 3; };
+  test_udf<float>(cuda, data_init, 500, false);
+  test_udf<float>(ptx, data_init, 500, true);
+}
+
+TEST_F(TransformTest, ComputeColumn)
+{
+  auto c_0        = cudf::test::fixed_width_column_wrapper<cudf::size_type>{3, 20, 1, 50};
+  auto c_1        = cudf::test::fixed_width_column_wrapper<cudf::size_type>{10, 7, 20, 0};
+  auto table      = cudf::table_view{{c_0, c_1}};
+  auto col_ref_0  = cudf::ast::column_reference(0);
+  auto col_ref_1  = cudf::ast::column_reference(1);
+  auto expression = cudf::ast::operation(cudf::ast::ast_operator::ADD, col_ref_0, col_ref_1);
+  cudf::compute_column(table, expression, cudf::test::get_default_stream());
+}
+
+TEST_F(TransformTest, BoolsToMask)
+{
+  std::vector<bool> input({1, 0, 1, 0, 1, 0, 1, 0});
+  cudf::test::fixed_width_column_wrapper<bool> input_column(input.begin(), input.end());
+  cudf::bools_to_mask(input_column, cudf::test::get_default_stream());
+}
+
+TEST_F(TransformTest, MaskToBools)
+{
+  cudf::mask_to_bools(nullptr, 0, 0, cudf::test::get_default_stream());
+}
+
+TEST_F(TransformTest, Encode)
+{
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> input{{1, 2, 3, 2, 3, 2, 1}};
+  cudf::encode(cudf::table_view({input}), cudf::test::get_default_stream());
+}
+
+TEST_F(TransformTest, OneHotEncode)
+{
+  auto input    = cudf::test::fixed_width_column_wrapper<cudf::size_type>{8, 8, 8, 9, 9};
+  auto category = cudf::test::fixed_width_column_wrapper<cudf::size_type>{8, 9};
+  cudf::one_hot_encode(input, category, cudf::test::get_default_stream());
+}
+
+TEST_F(TransformTest, NaNsToNulls)
+{
+  std::vector<float> input = {1, 2, 3, 4, 5};
+  std::vector<bool> mask   = {true, true, true, true, false, false};
+  auto input_column =
+    cudf::test::fixed_width_column_wrapper<float>(input.begin(), input.end(), mask.begin());
+  cudf::nans_to_nulls(input_column, cudf::test::get_default_stream());
+}
+
+TEST_F(TransformTest, RowBitCount)
+{
+  std::vector<std::string> strings{"abc", "ï", "", "z", "bananas", "warp", "", "zing"};
+  cudf::test::strings_column_wrapper col(strings.begin(), strings.end());
+  cudf::row_bit_count(cudf::table_view({col}), cudf::test::get_default_stream());
+}
+
+TEST_F(TransformTest, SegmentedRowBitCount)
+{
+  // clang-format off
+  std::vector<std::string> const strings { "daïs", "def", "", "z", "bananas", "warp", "", "zing" };
+  std::vector<bool>        const valids  {  1,      0,    0,  1,   0,          1,      1,  1 };
+  // clang-format on
+  cudf::test::strings_column_wrapper const col(strings.begin(), strings.end(), valids.begin());
+  auto const input              = cudf::table_view({col});
+  auto constexpr segment_length = 2;
+  cudf::segmented_row_bit_count(input, segment_length, cudf::test::get_default_stream());
+}
diff --git a/cpp/tests/strings/ipv4_tests.cpp b/cpp/tests/strings/ipv4_tests.cpp
index 3bfe0f9727e..ea3ac439e62 100644
--- a/cpp/tests/strings/ipv4_tests.cpp
+++ b/cpp/tests/strings/ipv4_tests.cpp
@@ -40,8 +40,8 @@ TEST_F(StringsConvertTest, IPv4ToIntegers)
   auto strings_view = cudf::strings_column_view(strings);
   auto results      = cudf::strings::ipv4_to_integers(strings_view);
 
-  std::vector<int64_t> h_expected{0, 0, 0, 698875905, 2130706433, 700776449, 3232235521};
-  cudf::test::fixed_width_column_wrapper<int64_t> expected(
+  std::vector<uint32_t> h_expected{0, 0, 0, 698875905, 2130706433, 700776449, 3232235521};
+  cudf::test::fixed_width_column_wrapper<uint32_t> expected(
     h_expected.cbegin(),
     h_expected.cend(),
     thrust::make_transform_iterator(h_strings.begin(),
@@ -59,8 +59,8 @@ TEST_F(StringsConvertTest, IntegersToIPv4)
     thrust::make_transform_iterator(h_strings.begin(),
                                     [](auto const str) { return str != nullptr; }));
 
-  std::vector<int64_t> h_column{3232235521, 167772161, 0, 0, 700055553, 700776449};
-  cudf::test::fixed_width_column_wrapper<int64_t> column(
+  std::vector<uint32_t> h_column{3232235521, 167772161, 0, 0, 700055553, 700776449};
+  cudf::test::fixed_width_column_wrapper<uint32_t> column(
     h_column.cbegin(),
     h_column.cend(),
     thrust::make_transform_iterator(h_strings.begin(),
diff --git a/cpp/tests/strings/split_tests.cpp b/cpp/tests/strings/split_tests.cpp
index 4c020cb4c29..7ece08b19f2 100644
--- a/cpp/tests/strings/split_tests.cpp
+++ b/cpp/tests/strings/split_tests.cpp
@@ -307,24 +307,46 @@ TEST_F(StringsSplitTest, SplitRecordWhitespaceWithMaxSplit)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected);
 }
 
-TEST_F(StringsSplitTest, SplitRecordAllEmpty)
+TEST_F(StringsSplitTest, SplitAllEmpty)
 {
   auto input     = cudf::test::strings_column_wrapper({"", "", "", ""});
   auto sv        = cudf::strings_column_view(input);
+  auto empty     = cudf::string_scalar("");
   auto delimiter = cudf::string_scalar("s");
+
+  auto result = cudf::strings::split(sv, delimiter);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view().column(0), input);
+  result = cudf::strings::rsplit(sv, delimiter);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view().column(0), input);
+
+  // whitespace hits a special case where nothing matches returns an all-null column
+  auto expected = cudf::test::strings_column_wrapper({"", "", "", ""}, {0, 0, 0, 0});
+  result        = cudf::strings::split(sv, empty);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view().column(0), expected);
+  result = cudf::strings::rsplit(sv, empty);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view().column(0), expected);
+}
+
+TEST_F(StringsSplitTest, SplitRecordAllEmpty)
+{
+  auto input     = cudf::test::strings_column_wrapper({"", "", "", ""});
+  auto sv        = cudf::strings_column_view(input);
   auto empty     = cudf::string_scalar("");
+  auto delimiter = cudf::string_scalar("s");
 
   using LCW = cudf::test::lists_column_wrapper<cudf::string_view>;
-  LCW expected({LCW{}, LCW{}, LCW{}, LCW{}});
+  LCW expected({LCW{""}, LCW{""}, LCW{""}, LCW{""}});
+  LCW expected_empty({LCW{}, LCW{}, LCW{}, LCW{}});
+
   auto result = cudf::strings::split_record(sv, delimiter);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected);
   result = cudf::strings::split_record(sv, empty);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected_empty);
 
   result = cudf::strings::rsplit_record(sv, delimiter);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected);
   result = cudf::strings::rsplit_record(sv, empty);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected_empty);
 }
 
 TEST_F(StringsSplitTest, MultiByteDelimiters)
@@ -575,6 +597,23 @@ TEST_F(StringsSplitTest, SplitRegexWordBoundary)
   }
 }
 
+TEST_F(StringsSplitTest, SplitRegexAllEmpty)
+{
+  auto input = cudf::test::strings_column_wrapper({"", "", "", ""});
+  auto sv    = cudf::strings_column_view(input);
+  auto prog  = cudf::strings::regex_program::create("[ _]");
+
+  auto result = cudf::strings::split_re(sv, *prog);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view().column(0), input);
+  result = cudf::strings::rsplit_re(sv, *prog);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view().column(0), input);
+
+  auto rec_result = cudf::strings::split_record_re(sv, *prog);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view().column(0), input);
+  rec_result = cudf::strings::rsplit_record_re(sv, *prog);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view().column(0), input);
+}
+
 TEST_F(StringsSplitTest, RSplitRecord)
 {
   std::vector<char const*> h_strings{
diff --git a/cpp/tests/structs/structs_column_tests.cpp b/cpp/tests/structs/structs_column_tests.cpp
index df005dfa1dc..f0010fc1ed9 100644
--- a/cpp/tests/structs/structs_column_tests.cpp
+++ b/cpp/tests/structs/structs_column_tests.cpp
@@ -448,12 +448,12 @@ TYPED_TEST(TypedStructColumnWrapperTest, ListOfStructOfList)
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 3; });
   auto [null_mask, null_count] =
     detail::make_null_mask(list_of_struct_of_list_validity, list_of_struct_of_list_validity + 5);
-  auto list_of_struct_of_list = cudf::make_lists_column(
-    5,
-    std::move(fixed_width_column_wrapper<size_type>{0, 2, 4, 6, 8, 10}.release()),
-    std::move(struct_of_lists_col),
-    null_count,
-    std::move(null_mask));
+  auto list_of_struct_of_list =
+    cudf::make_lists_column(5,
+                            fixed_width_column_wrapper<size_type>{0, 2, 4, 6, 8, 10}.release(),
+                            std::move(struct_of_lists_col),
+                            null_count,
+                            std::move(null_mask));
 
   // Compare with expected values.
 
@@ -468,12 +468,12 @@ TYPED_TEST(TypedStructColumnWrapperTest, ListOfStructOfList)
 
   std::tie(null_mask, null_count) =
     detail::make_null_mask(list_of_struct_of_list_validity, list_of_struct_of_list_validity + 5);
-  auto expected_level3_list = cudf::make_lists_column(
-    5,
-    std::move(fixed_width_column_wrapper<size_type>{0, 0, 2, 4, 4, 6}.release()),
-    std::move(expected_level2_struct),
-    null_count,
-    std::move(null_mask));
+  auto expected_level3_list =
+    cudf::make_lists_column(5,
+                            fixed_width_column_wrapper<size_type>{0, 0, 2, 4, 4, 6}.release(),
+                            std::move(expected_level2_struct),
+                            null_count,
+                            std::move(null_mask));
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*list_of_struct_of_list, *expected_level3_list);
 }
@@ -498,12 +498,12 @@ TYPED_TEST(TypedStructColumnWrapperTest, StructOfListOfStruct)
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 3; });
   auto [null_mask, null_count] = detail::make_null_mask(list_validity, list_validity + 5);
 
-  auto lists_col = cudf::make_lists_column(
-    5,
-    std::move(fixed_width_column_wrapper<size_type>{0, 2, 4, 6, 8, 10}.release()),
-    std::move(structs_col),
-    null_count,
-    std::move(null_mask));
+  auto lists_col =
+    cudf::make_lists_column(5,
+                            fixed_width_column_wrapper<size_type>{0, 2, 4, 6, 8, 10}.release(),
+                            std::move(structs_col),
+                            null_count,
+                            std::move(null_mask));
 
   std::vector<std::unique_ptr<cudf::column>> cols;
   cols.push_back(std::move(lists_col));
@@ -519,12 +519,12 @@ TYPED_TEST(TypedStructColumnWrapperTest, StructOfListOfStruct)
 
   std::tie(null_mask, null_count) = detail::make_null_mask(list_validity, list_validity + 5);
 
-  auto expected_lists_col = cudf::make_lists_column(
-    5,
-    std::move(fixed_width_column_wrapper<size_type>{0, 2, 4, 6, 8, 10}.release()),
-    std::move(expected_structs_col),
-    null_count,
-    std::move(null_mask));
+  auto expected_lists_col =
+    cudf::make_lists_column(5,
+                            fixed_width_column_wrapper<size_type>{0, 2, 4, 6, 8, 10}.release(),
+                            std::move(expected_structs_col),
+                            null_count,
+                            std::move(null_mask));
 
   // Test that the lists child column is as expected.
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected_lists_col, struct_of_list_of_struct->child(0));
diff --git a/cpp/tests/utilities_tests/batched_memset_tests.cu b/cpp/tests/utilities_tests/batched_memset_tests.cu
new file mode 100644
index 00000000000..9fc5baeec97
--- /dev/null
+++ b/cpp/tests/utilities_tests/batched_memset_tests.cu
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/type_lists.hpp>
+
+#include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/io/detail/batched_memset.hpp>
+#include <cudf/io/parquet.hpp>
+#include <cudf/utilities/span.hpp>
+
+#include <rmm/device_uvector.hpp>
+
+#include <thrust/iterator/counting_iterator.h>
+#include <thrust/iterator/transform_iterator.h>
+#include <thrust/tuple.h>
+
+#include <type_traits>
+
+template <typename T>
+struct MultiBufferTestIntegral : public cudf::test::BaseFixture {};
+
+TEST(MultiBufferTestIntegral, BasicTest1)
+{
+  std::vector<size_t> const BUF_SIZES{
+    50000, 4, 1000, 0, 250000, 1, 100, 8000, 0, 1, 100, 1000, 10000, 100000, 0, 1, 100000};
+
+  // Device init
+  auto stream = cudf::get_default_stream();
+  auto mr     = rmm::mr::get_current_device_resource();
+
+  // Creating base vector for data and setting it to all 0xFF
+  std::vector<std::vector<uint64_t>> expected;
+  std::transform(BUF_SIZES.begin(), BUF_SIZES.end(), std::back_inserter(expected), [](auto size) {
+    return std::vector<uint64_t>(size + 2000, std::numeric_limits<uint64_t>::max());
+  });
+
+  // set buffer region to other value
+  std::for_each(thrust::make_zip_iterator(thrust::make_tuple(expected.begin(), BUF_SIZES.begin())),
+                thrust::make_zip_iterator(thrust::make_tuple(expected.end(), BUF_SIZES.end())),
+                [](auto elem) {
+                  std::fill_n(
+                    thrust::get<0>(elem).begin() + 1000, thrust::get<1>(elem), 0xEEEEEEEEEEEEEEEE);
+                });
+
+  // Copy host vector data to device
+  std::vector<rmm::device_uvector<uint64_t>> device_bufs;
+  std::transform(expected.begin(),
+                 expected.end(),
+                 std::back_inserter(device_bufs),
+                 [stream, mr](auto const& vec) {
+                   return cudf::detail::make_device_uvector_async(vec, stream, mr);
+                 });
+
+  // Initialize device buffers for memset
+  std::vector<cudf::device_span<uint64_t>> memset_bufs;
+  std::transform(
+    thrust::make_zip_iterator(thrust::make_tuple(device_bufs.begin(), BUF_SIZES.begin())),
+    thrust::make_zip_iterator(thrust::make_tuple(device_bufs.end(), BUF_SIZES.end())),
+    std::back_inserter(memset_bufs),
+    [](auto const& elem) {
+      return cudf::device_span<uint64_t>(thrust::get<0>(elem).data() + 1000, thrust::get<1>(elem));
+    });
+
+  // Function Call
+  cudf::io::detail::batched_memset(memset_bufs, uint64_t{0}, stream);
+
+  // Set all buffer regions to 0 for expected comparison
+  std::for_each(
+    thrust::make_zip_iterator(thrust::make_tuple(expected.begin(), BUF_SIZES.begin())),
+    thrust::make_zip_iterator(thrust::make_tuple(expected.end(), BUF_SIZES.end())),
+    [](auto elem) { std::fill_n(thrust::get<0>(elem).begin() + 1000, thrust::get<1>(elem), 0UL); });
+
+  // Compare to see that only given buffers are zeroed out
+  std::for_each(
+    thrust::make_zip_iterator(thrust::make_tuple(device_bufs.begin(), expected.begin())),
+    thrust::make_zip_iterator(thrust::make_tuple(device_bufs.end(), expected.end())),
+    [stream](auto const& elem) {
+      auto after_memset = cudf::detail::make_std_vector_async(thrust::get<0>(elem), stream);
+      EXPECT_TRUE(
+        std::equal(thrust::get<1>(elem).begin(), thrust::get<1>(elem).end(), after_memset.begin()));
+    });
+}
diff --git a/dependencies.yaml b/dependencies.yaml
index 67b532d5ef6..f10b1013f71 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -10,25 +10,29 @@ files:
       - build_all
       - build_cpp
       - build_python_common
-      - build_python_cudf
       - cuda
       - cuda_version
+      - depends_on_cupy
+      - depends_on_librmm
+      - depends_on_rmm
       - develop
       - docs
-      - libarrow_build
       - notebooks
       - py_version
+      - pyarrow_run
       - rapids_build_skbuild
       - rapids_build_setuptools
       - run_common
       - run_cudf
+      - run_pylibcudf
       - run_dask_cudf
       - run_custreamz
       - test_cpp
       - test_python_common
       - test_python_cudf
       - test_python_dask_cudf
-      - depends_on_cupy
+      - test_python_pylibcudf
+      - test_python_cudf_pandas
   test_static_build:
     output: none
     includes:
@@ -37,17 +41,16 @@ files:
     output: none
     includes:
       - cuda_version
-      - libarrow_run
       - test_cpp
   test_python:
     output: none
     includes:
       - cuda_version
       - py_version
-      - pyarrow_run
       - test_python_common
       - test_python_cudf
       - test_python_dask_cudf
+      - test_python_cudf_pandas
   test_java:
     output: none
     includes:
@@ -55,7 +58,6 @@ files:
       - build_all
       - cuda
       - cuda_version
-      - libarrow_run
       - test_java
   test_notebooks:
     output: none
@@ -74,16 +76,15 @@ files:
       - cuda
       - cuda_version
       - docs
-      - libarrow_run
       - py_version
-  py_rapids_build_cudf:
+  py_build_cudf:
     output: pyproject
     pyproject_dir: python/cudf
     extras:
       table: build-system
     includes:
       - rapids_build_skbuild
-  py_build_cudf:
+  py_rapids_build_cudf:
     output: pyproject
     pyproject_dir: python/cudf
     extras:
@@ -92,7 +93,10 @@ files:
     includes:
       - build_base
       - build_python_common
-      - build_python_cudf
+      - depends_on_pylibcudf
+      - depends_on_libcudf
+      - depends_on_librmm
+      - depends_on_rmm
   py_run_cudf:
     output: pyproject
     pyproject_dir: python/cudf
@@ -103,6 +107,9 @@ files:
       - run_cudf
       - pyarrow_run
       - depends_on_cupy
+      - depends_on_libcudf
+      - depends_on_pylibcudf
+      - depends_on_rmm
   py_test_cudf:
     output: pyproject
     pyproject_dir: python/cudf
@@ -112,6 +119,61 @@ files:
     includes:
       - test_python_common
       - test_python_cudf
+  py_build_libcudf:
+    output: pyproject
+    pyproject_dir: python/libcudf
+    extras:
+      table: build-system
+    includes:
+      - rapids_build_skbuild
+  py_rapids_build_libcudf:
+    output: pyproject
+    pyproject_dir: python/libcudf
+    extras:
+      table: tool.rapids-build-backend
+      key: requires
+    includes:
+      - build_base
+      - build_cpp
+      - depends_on_librmm
+  py_build_pylibcudf:
+    output: pyproject
+    pyproject_dir: python/pylibcudf
+    extras:
+      table: build-system
+    includes:
+      - rapids_build_skbuild
+  py_rapids_build_pylibcudf:
+    output: pyproject
+    pyproject_dir: python/pylibcudf
+    extras:
+      table: tool.rapids-build-backend
+      key: requires
+    includes:
+      - build_base
+      - build_python_common
+      - depends_on_libcudf
+      - depends_on_librmm
+      - depends_on_rmm
+  py_run_pylibcudf:
+    output: pyproject
+    pyproject_dir: python/pylibcudf
+    extras:
+      table: project
+    includes:
+      - depends_on_libcudf
+      - depends_on_rmm
+      - pyarrow_run
+      - run_pylibcudf
+  py_test_pylibcudf:
+    output: pyproject
+    pyproject_dir: python/pylibcudf
+    extras:
+      table: project.optional-dependencies
+      key: test
+    includes:
+      - test_python_common
+      - test_python_pylibcudf
   py_test_pandas_cudf:
     output: pyproject
     pyproject_dir: python/cudf
@@ -142,7 +204,7 @@ files:
       table: project
     includes:
       - run_cudf_polars
-      - depends_on_cudf
+      - depends_on_pylibcudf
   py_test_cudf_polars:
     output: pyproject
     pyproject_dir: python/cudf_polars
@@ -177,14 +239,14 @@ files:
     includes:
       - test_python_common
       - test_python_dask_cudf
-  py_rapids_build_cudf_kafka:
+  py_build_cudf_kafka:
     output: pyproject
     pyproject_dir: python/cudf_kafka
     extras:
       table: build-system
     includes:
       - rapids_build_skbuild
-  py_build_cudf_kafka:
+  py_rapids_build_cudf_kafka:
     output: pyproject
     pyproject_dir: python/cudf_kafka
     extras:
@@ -301,10 +363,10 @@ dependencies:
           - &rapids_build_backend rapids-build-backend>=0.3.0,<0.4.0.dev0
       - output_types: conda
         packages:
-          - scikit-build-core>=0.7.0
+          - scikit-build-core>=0.10.0
       - output_types: [requirements, pyproject]
         packages:
-          - scikit-build-core[pyproject]>=0.7.0
+          - scikit-build-core[pyproject]>=0.10.0
   rapids_build_setuptools:
     common:
       - output_types: [requirements, pyproject]
@@ -317,69 +379,11 @@ dependencies:
       - output_types: [conda, requirements, pyproject]
         packages:
           - cython>=3.0.3
-          # Hard pin the patch version used during the build. This must be kept
-          # in sync with the version pinned in get_arrow.cmake.
-          - pyarrow==16.1.0.*
-      - output_types: pyproject
-        packages:
-          # Hard pin the patch version used during the build.
-          # Sync with conda build constraint & wheel run constraint.
-          # TODO: Change to `2.0.*` for NumPy 2
-          - numpy==1.23.*
-  build_python_cudf:
-    common:
-      - output_types: conda
-        packages:
-          - &rmm_unsuffixed rmm==24.10.*,>=0.0.0a0
-          - pip
-          - pip:
-              - git+https://github.com/python-streamz/streamz.git@master
-      - output_types: requirements
-        packages:
-          # pip recognizes the index as a global option for the requirements.txt file
-          # This index is needed for rmm-cu{11,12}.
-          - --extra-index-url=https://pypi.nvidia.com
-          - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
-          - git+https://github.com/python-streamz/streamz.git@master
-    specific:
-      - output_types: [requirements, pyproject]
-        matrices:
-          - matrix:
-              cuda: "12.*"
-              cuda_suffixed: "true"
-            packages:
-              - rmm-cu12==24.10.*,>=0.0.0a0
-          - matrix:
-              cuda: "11.*"
-              cuda_suffixed: "true"
-            packages:
-              - rmm-cu11==24.10.*,>=0.0.0a0
-          - {matrix: null, packages: [*rmm_unsuffixed]}
-  libarrow_build:
-    common:
-      - output_types: conda
-        packages:
-          # Hard pin the Arrow patch version used during the build. This must
-          # be kept in sync with the version pinned in get_arrow.cmake.
-          - libarrow-acero==16.1.0.*
-          - libarrow-dataset==16.1.0.*
-          - libarrow==16.1.0.*
-          - libparquet==16.1.0.*
-  libarrow_run:
-    common:
-      - output_types: conda
-        packages:
-          # Allow runtime version to float up to patch version
-          - libarrow-acero>=16.1.0,<16.2.0a0
-          - libarrow-dataset>=16.1.0,<16.2.0a0
-          - libarrow>=16.1.0,<16.2.0a0
-          - libparquet>=16.1.0,<16.2.0a0
   pyarrow_run:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          # Allow runtime version to float up to patch version
-          - pyarrow>=16.1.0,<16.2.0a0
+          - pyarrow>=14.0.0,<18.0.0a0
   cuda_version:
     specific:
       - output_types: conda
@@ -537,10 +541,6 @@ dependencies:
     specific:
       - output_types: conda
         matrices:
-          - matrix:
-              py: "3.9"
-            packages:
-              - python=3.9
           - matrix:
               py: "3.10"
             packages:
@@ -551,15 +551,37 @@ dependencies:
               - python=3.11
           - matrix:
             packages:
-              - python>=3.9,<3.12
+              - python>=3.10,<3.12
   run_common:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
           - fsspec>=0.6.0
-          # TODO: Update `numpy` in `build_python_common` when dropping `<2.0a0`
-          - numpy>=1.23,<2.0a0
+          - &numpy numpy>=1.23,<3.0a0
           - pandas>=2.0,<2.2.3dev0
+  run_pylibcudf:
+    common:
+      - output_types: [conda, requirements, pyproject]
+        packages:
+          - nvtx>=0.2.1
+          - packaging
+          - typing_extensions>=4.0.0
+      - output_types: requirements
+        packages:
+          # pip recognizes the index as a global option for the requirements.txt file
+          # This index is needed for rmm.
+          - --extra-index-url=https://pypi.nvidia.com
+          - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
+    specific:
+      - output_types: [conda, requirements, pyproject]
+        matrices:
+          - matrix: {cuda: "12.*"}
+            packages:
+              - cuda-python>=12.0,<13.0a0
+          - matrix: {cuda: "11.*"}
+            packages: &run_pylibcudf_packages_all_cu11
+              - cuda-python>=11.7.1,<12.0a0
+          - {matrix: null, packages: *run_pylibcudf_packages_all_cu11}
   run_cudf:
     common:
       - output_types: [conda, requirements, pyproject]
@@ -570,9 +592,6 @@ dependencies:
           - packaging
           - rich
           - typing_extensions>=4.0.0
-      - output_types: conda
-        packages:
-          - *rmm_unsuffixed
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
@@ -604,19 +623,16 @@ dependencies:
               cuda: "12.*"
               cuda_suffixed: "true"
             packages:
-              - rmm-cu12==24.10.*,>=0.0.0a0
               - pynvjitlink-cu12>=0.0.0a0
           - matrix:
               cuda: "12.*"
               cuda_suffixed: "false"
             packages:
-              - *rmm_unsuffixed
               - *pynvjitlink_unsuffixed
           - matrix:
               cuda: "11.*"
               cuda_suffixed: "true"
             packages:
-              - rmm-cu11==24.10.*,>=0.0.0a0
               - cubinlinker-cu11
               - ptxcompiler-cu11
           - matrix:
@@ -625,7 +641,6 @@ dependencies:
             packages: &run_cudf_cu11_unsuffixed
               - *cubinlinker_unsuffixed
               - *ptxcompiler_unsuffixed
-              - *rmm_unsuffixed
           - {matrix: null, packages: *run_cudf_cu11_unsuffixed}
   run_cudf_polars:
     common:
@@ -673,6 +688,7 @@ dependencies:
           - *cmake_ver
           - maven
           - openjdk=8.*
+          - boost
   test_python_common:
     common:
       - output_types: [conda, requirements, pyproject]
@@ -680,6 +696,36 @@ dependencies:
           - pytest<8
           - pytest-cov
           - pytest-xdist
+    specific:
+      # Define additional constraints for testing with oldest dependencies.
+      - output_types: [conda, requirements]
+        matrices:
+          - matrix: {dependencies: "oldest"}
+            packages:
+              - numba==0.57.*
+              - numpy==1.23.*
+              - pandas==2.0.*
+              - pyarrow==14.0.0
+              - cupy==12.0.0  # ignored as pip constraint
+          - matrix:
+            packages:
+      - output_types: requirements
+        # Using --constraints for pip install, so we list cupy multiple times
+        matrices:
+          - matrix: {dependencies: "oldest"}
+            packages:
+              - cupy-cuda11x==12.0.0
+              - cupy-cuda12x==12.0.0
+          - matrix:
+            packages:
+  test_python_pylibcudf:
+    common:
+      - output_types: [conda, requirements, pyproject]
+        packages:
+          - fastavro>=0.22.9
+          - hypothesis
+          - *numpy
+          - pandas
   test_python_cudf:
     common:
       - output_types: [conda, requirements, pyproject]
@@ -724,6 +770,56 @@ dependencies:
         packages:
           - dask-cuda==24.10.*,>=0.0.0a0
           - *numba
+  depends_on_libcudf:
+    common:
+      - output_types: conda
+        packages:
+          - &libcudf_unsuffixed libcudf==24.10.*,>=0.0.0a0
+      - output_types: requirements
+        packages:
+          # pip recognizes the index as a global option for the requirements.txt file
+          # This index is needed for libcudf-cu{11,12}.
+          - --extra-index-url=https://pypi.nvidia.com
+          - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
+    specific:
+      - output_types: [requirements, pyproject]
+        matrices:
+          - matrix:
+              cuda: "12.*"
+              cuda_suffixed: "true"
+            packages:
+              - libcudf-cu12==24.10.*,>=0.0.0a0
+          - matrix:
+              cuda: "11.*"
+              cuda_suffixed: "true"
+            packages:
+              - libcudf-cu11==24.10.*,>=0.0.0a0
+          - {matrix: null, packages: [*libcudf_unsuffixed]}
+  depends_on_pylibcudf:
+    common:
+      - output_types: conda
+        packages:
+          - &pylibcudf_unsuffixed pylibcudf==24.10.*,>=0.0.0a0
+      - output_types: requirements
+        packages:
+          # pip recognizes the index as a global option for the requirements.txt file
+          # This index is needed for rmm, cubinlinker, ptxcompiler.
+          - --extra-index-url=https://pypi.nvidia.com
+          - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
+    specific:
+      - output_types: [requirements, pyproject]
+        matrices:
+          - matrix:
+              cuda: "12.*"
+              cuda_suffixed: "true"
+            packages:
+              - pylibcudf-cu12==24.10.*,>=0.0.0a0
+          - matrix:
+              cuda: "11.*"
+              cuda_suffixed: "true"
+            packages:
+              - pylibcudf-cu11==24.10.*,>=0.0.0a0
+          - {matrix: null, packages: [*pylibcudf_unsuffixed]}
   depends_on_cudf:
     common:
       - output_types: conda
@@ -789,6 +885,60 @@ dependencies:
             packages: &cupy_packages_cu11
               - cupy-cuda11x>=12.0.0
           - {matrix: null, packages: *cupy_packages_cu11}
+  depends_on_librmm:
+    common:
+      - output_types: conda
+        packages:
+          - &librmm_unsuffixed librmm==24.10.*,>=0.0.0a0
+      - output_types: requirements
+        packages:
+          # pip recognizes the index as a global option for the requirements.txt file
+          # This index is needed for librmm-cu{11,12}.
+          - --extra-index-url=https://pypi.nvidia.com
+          - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
+    specific:
+      - output_types: [requirements, pyproject]
+        matrices:
+          - matrix:
+              cuda: "12.*"
+              cuda_suffixed: "true"
+            packages:
+              - librmm-cu12==24.10.*,>=0.0.0a0
+          - matrix:
+              cuda: "11.*"
+              cuda_suffixed: "true"
+            packages:
+              - librmm-cu11==24.10.*,>=0.0.0a0
+          - matrix:
+            packages:
+              - *librmm_unsuffixed
+  depends_on_rmm:
+    common:
+      - output_types: conda
+        packages:
+          - &rmm_unsuffixed rmm==24.10.*,>=0.0.0a0
+      - output_types: requirements
+        packages:
+          # pip recognizes the index as a global option for the requirements.txt file
+          # This index is needed for rmm-cu{11,12}.
+          - --extra-index-url=https://pypi.nvidia.com
+          - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
+    specific:
+      - output_types: [requirements, pyproject]
+        matrices:
+          - matrix:
+              cuda: "12.*"
+              cuda_suffixed: "true"
+            packages:
+              - rmm-cu12==24.10.*,>=0.0.0a0
+          - matrix:
+              cuda: "11.*"
+              cuda_suffixed: "true"
+            packages:
+              - rmm-cu11==24.10.*,>=0.0.0a0
+          - matrix:
+            packages:
+              - *rmm_unsuffixed
   test_python_pandas_cudf:
     common:
       - output_types: [requirements, pyproject]
@@ -800,9 +950,13 @@ dependencies:
           # installation issues with `psycopg2`.
           - pandas[test, pyarrow, performance, computation, fss, excel, parquet, feather, hdf5, spss, html, xml, plot, output-formatting, clipboard, compression]
           - pytest-reportlog
+          - ipython
   test_python_cudf_pandas:
     common:
-      - output_types: [requirements, pyproject]
+      - output_types: [conda, requirements, pyproject]
         packages:
           - ipython
+          - jupyter_client
+          - nbconvert
+          - nbformat
           - openpyxl
diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py
index 43e2d6031bc..95813907bf4 100644
--- a/docs/cudf/source/conf.py
+++ b/docs/cudf/source/conf.py
@@ -342,6 +342,7 @@ def clean_all_xml_files(path):
     "cudf.Series": ("cudf.core.series.Series", "cudf.Series"),
     "cudf.Index": ("cudf.core.index.Index", "cudf.Index"),
     "cupy.core.core.ndarray": ("cupy.ndarray", "cupy.ndarray"),
+    "DeviceBuffer": ("rmm._lib.device_buffer.DeviceBuffer", "rmm.DeviceBuffer"),
 }
 
 
@@ -383,6 +384,7 @@ def _generate_namespaces(namespaces):
     # Cython types that don't alias cleanly because of
     # https://github.com/cython/cython/issues/5609
     "size_type",
+    "size_t",
     "type_id",
     # Unknown base types
     "int32_t",
@@ -564,6 +566,8 @@ def on_missing_reference(app, env, node, contnode):
     ("py:obj", "cudf.Index.to_flat_index"),
     ("py:obj", "cudf.MultiIndex.to_flat_index"),
     ("py:meth", "pyarrow.Table.to_pandas"),
+    ("py:class", "pd.DataFrame"),
+    ("py:class", "pandas.core.indexes.frozen.FrozenList"),
     ("py:class", "pa.Array"),
     ("py:class", "ScalarLike"),
     ("py:class", "ParentType"),
diff --git a/docs/cudf/source/developer_guide/pylibcudf.md b/docs/cudf/source/developer_guide/pylibcudf.md
index 0b881b2b057..4e10459fe2b 100644
--- a/docs/cudf/source/developer_guide/pylibcudf.md
+++ b/docs/cudf/source/developer_guide/pylibcudf.md
@@ -13,10 +13,8 @@ To satisfy the goals of pylibcudf, we impose the following set of design princip
 - Every public function or method should be `cpdef`ed. This allows it to be used in both Cython and Python code. This incurs some slight overhead over `cdef` functions, but we assume that this is acceptable because 1) the vast majority of users will be using pure Python rather than Cython, and 2) the overhead of a `cpdef` function over a `cdef` function is on the order of a nanosecond, while CUDA kernel launch overhead is on the order of a microsecond, so these function overheads should be washed out by typical usage of pylibcudf.
 - Every variable used should be strongly typed and either be a primitive type (int, float, etc) or a cdef class. Any enums in C++ should be mirrored using `cpdef enum`, which will create both a C-style enum in Cython and a PEP 435-style Python enum that will automatically be used in Python.
 - All typing in code should be written using Cython syntax, not PEP 484 Python typing syntax. Not only does this ensure compatibility with Cython < 3, but even with Cython 3 PEP 484 support remains incomplete as of this writing.
-- All cudf code should interact only with pylibcudf, never with libcudf directly.
-- All imports should be relative so that pylibcudf can be easily extracted from cudf later
-  - Exception: All imports of libcudf API bindings in `cudf._lib.cpp` should use absolute imports of `cudf._lib.cpp as libcudf`. We should convert the `cpp` directory into a proper package so that it can be imported as `libcudf` in that fashion. When moving pylibcudf into a separate package, it will be renamed to `libcudf` and only the imports will need to change.
-- Ideally, pylibcudf should depend on nothing other than rmm and pyarrow. This will allow it to be extracted into a a largely standalone library and used in environments where the larger dependency tree of cudf may be cumbersome.
+- All cudf code should interact only with pylibcudf, never with libcudf directly. This is not currently the case, but is the direction that the library is moving towards.
+- Ideally, pylibcudf should depend on no RAPIDS component other than rmm, and should in general have minimal runtime dependencies.
 
 
 ## Relationship to libcudf
@@ -112,6 +110,9 @@ Then, a corresponding pylibcudf fixture may be created using a simple `from_arro
 This approach ensures consistent global coverage across types for various tests.
 
 In general, pylibcudf tests should prefer validating against a corresponding pyarrow implementation rather than hardcoding data.
+If there is no pyarrow implementation, another alternative is to write a pure Python implementation that loops over the values
+of the Table/Column, if a scalar Python equivalent of the pylibcudf implementation exists (this is especially relevant for string methods).
+
 This approach is more resilient to changes to input data, particularly given the fixture strategy outlined above.
 Standard tools for comparing between pylibcudf and pyarrow types are provided in the utils module.
 
@@ -149,7 +150,7 @@ Some guidelines on what should be tested:
   - Exception: In special cases where constructing suitable large tests is difficult in C++ (such as creating suitable input data for I/O testing), tests may be added to pylibcudf instead.
 - Nullable data should always be tested.
 - Expected exceptions should be tested. Tests should be written from the user's perspective in mind, and if the API is not currently throwing the appropriate exception it should be updated.
-  - Important note: If the exception should be produced by libcudf, the underlying libcudf API should be updated to throw the desired exception in C++. Such changes may require consultation with libcudf devs in nontrivial cases. [This issue](https://github.com/rapidsai/cudf/issues/12885) provides an overview and an indication of acceptable exception types that should cover most use cases. In rare cases a new C++ exception may need to be introduced in [`error.hpp`](https://github.com/rapidsai/cudf/blob/branch-24.04/cpp/include/cudf/utilities/error.hpp). If so, this exception will also need to be mapped to a suitable Python exception in [`exception_handler.pxd`](https://github.com/rapidsai/cudf/blob/branch-24.04/python/cudf/cudf/_lib/exception_handler.pxd).
+  - Important note: If the exception should be produced by libcudf, the underlying libcudf API should be updated to throw the desired exception in C++. Such changes may require consultation with libcudf devs in nontrivial cases. [This issue](https://github.com/rapidsai/cudf/issues/12885) provides an overview and an indication of acceptable exception types that should cover most use cases. In rare cases a new C++ exception may need to be introduced in [`error.hpp`](https://github.com/rapidsai/cudf/blob/branch-24.04/cpp/include/cudf/utilities/error.hpp). If so, this exception will also need to be mapped to a suitable Python exception in `exception_handler.pxd`.
 
 Some guidelines on how best to use pytests.
 - By default, fixtures producing device data containers should be of module scope and treated as immutable by tests. Allocating data on the GPU is expensive and slows tests. Almost all pylibcudf operations are out of place operations, so module-scoped fixtures should not typically be problematic to work with. Session-scoped fixtures would also work, but they are harder to reason about since they live in a different module, and if they need to change for any reason they could affect an arbitrarily large number of tests. Module scope is a good balance.
@@ -242,3 +243,8 @@ cpdef ColumnOrTable empty_like(ColumnOrTable input)
 
 [Cython supports specializing the contents of fused-type functions based on the argument types](https://cython.readthedocs.io/en/latest/src/userguide/fusedtypes.html#type-checking-specializations), so any type-specific logic may be encoded using the appropriate conditionals.
 See the pylibcudf source for examples of how to implement such functions.
+
+In the event that libcudf provides multiple overloads for the same function with differing numbers of arguments, specify the maximum number of arguments in the Cython definition,
+and set arguments not shared between overloads to `None`. If a user tries to pass in an unsupported argument for a specific overload type, you should raise `ValueError`.
+
+Finally, consider making an libcudf issue if you think this inconsistency can be addressed on the libcudf side.
diff --git a/docs/cudf/source/user_guide/10min.ipynb b/docs/cudf/source/user_guide/10min.ipynb
index c3da2558db8..2eaa75b3189 100644
--- a/docs/cudf/source/user_guide/10min.ipynb
+++ b/docs/cudf/source/user_guide/10min.ipynb
@@ -15,7 +15,11 @@
     "\n",
     "[Dask](https://dask.org/) is a flexible library for parallel computing in Python that makes scaling out your workflow smooth and simple. On the CPU, Dask uses Pandas to execute operations in parallel on DataFrame partitions.\n",
     "\n",
-    "[Dask-cuDF](https://github.com/rapidsai/cudf/tree/main/python/dask_cudf) extends Dask where necessary to allow its DataFrame partitions to be processed using cuDF GPU DataFrames instead of Pandas DataFrames. For instance, when you call `dask_cudf.read_csv(...)`, your cluster's GPUs do the work of parsing the CSV file(s) by calling [`cudf.read_csv()`](https://docs.rapids.ai/api/cudf/stable/api_docs/api/cudf.read_csv.html).\n",
+    "[Dask cuDF](https://github.com/rapidsai/cudf/tree/main/python/dask_cudf) extends Dask where necessary to allow its DataFrame partitions to be processed using cuDF GPU DataFrames instead of Pandas DataFrames. For instance, when you call `dask_cudf.read_csv(...)`, your cluster's GPUs do the work of parsing the CSV file(s) by calling [`cudf.read_csv()`](https://docs.rapids.ai/api/cudf/stable/api_docs/api/cudf.read_csv.html).\n",
+    "\n",
+    "\n",
+    "> [!NOTE]  \n",
+    "> This notebook uses the explicit Dask cuDF API (`dask_cudf`) for clarity. However, we strongly recommend that you use Dask's [configuration infrastructure](https://docs.dask.org/en/latest/configuration.html) to set the `\"dataframe.backend\"` to `\"cudf\"`, and work with the `dask.dataframe` API directly. Please see the [Dask cuDF documentation](https://github.com/rapidsai/cudf/tree/main/python/dask_cudf) for more information.\n",
     "\n",
     "\n",
     "## When to use cuDF and Dask-cuDF\n",
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/aggregation.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/aggregation.rst
index 739305af5d4..4b2b213b6c3 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/aggregation.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/aggregation.rst
@@ -2,5 +2,5 @@
 aggregation
 ===========
 
-.. automodule:: cudf._lib.pylibcudf.aggregation
+.. automodule:: pylibcudf.aggregation
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/binaryop.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/binaryop.rst
index e5bc6aa7cda..8bbbfbf88c1 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/binaryop.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/binaryop.rst
@@ -2,5 +2,5 @@
 binaryop
 ========
 
-.. automodule:: cudf._lib.pylibcudf.binaryop
+.. automodule:: pylibcudf.binaryop
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/column.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/column.rst
index d1105d356b4..d26c8737cf4 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/column.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/column.rst
@@ -2,5 +2,5 @@
 Column
 ======
 
-.. automodule:: cudf._lib.pylibcudf.column
+.. automodule:: pylibcudf.column
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/column_factories.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/column_factories.rst
index c858135b6ce..8dfaa4bae03 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/column_factories.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/column_factories.rst
@@ -2,5 +2,5 @@
 column_factories
 ================
 
-.. automodule:: cudf._lib.pylibcudf.column_factories
+.. automodule:: pylibcudf.column_factories
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/concatenate.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/concatenate.rst
index e83739056f4..7912cb83767 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/concatenate.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/concatenate.rst
@@ -2,5 +2,5 @@
 concatenate
 ===========
 
-.. automodule:: cudf._lib.pylibcudf.concatenate
+.. automodule:: pylibcudf.concatenate
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/copying.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/copying.rst
index fddd3ea440f..25e3ef50e6a 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/copying.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/copying.rst
@@ -2,5 +2,5 @@
 copying
 =======
 
-.. automodule:: cudf._lib.pylibcudf.copying
+.. automodule:: pylibcudf.copying
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/datetime.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/datetime.rst
index 558268ea495..71f7874cfbe 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/datetime.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/datetime.rst
@@ -2,5 +2,5 @@
 datetime
 ========
 
-.. automodule:: cudf._lib.pylibcudf.datetime
+.. automodule:: pylibcudf.datetime
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/expressions.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/expressions.rst
index 03f769ee861..5493d4662a9 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/expressions.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/expressions.rst
@@ -2,5 +2,5 @@
 expressions
 ===========
 
-.. automodule:: cudf._lib.pylibcudf.expressions
+.. automodule:: pylibcudf.expressions
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/filling.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/filling.rst
index 542a5e12bc4..0d328a0b0e9 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/filling.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/filling.rst
@@ -2,5 +2,5 @@
 filling
 ========
 
-.. automodule:: cudf._lib.pylibcudf.filling
+.. automodule:: pylibcudf.filling
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/gpumemoryview.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/gpumemoryview.rst
index dffc7c24e02..5515a74adcc 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/gpumemoryview.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/gpumemoryview.rst
@@ -2,5 +2,5 @@
 gpumemoryview
 =============
 
-.. automodule:: cudf._lib.pylibcudf.gpumemoryview
+.. automodule:: pylibcudf.gpumemoryview
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/groupby.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/groupby.rst
index d6e994f7dbc..27cda383818 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/groupby.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/groupby.rst
@@ -2,5 +2,5 @@
 groupby
 =======
 
-.. automodule:: cudf._lib.pylibcudf.groupby
+.. automodule:: pylibcudf.groupby
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
index 505765bba0f..6a2b66e8ea0 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
@@ -23,6 +23,7 @@ This page provides API documentation for pylibcudf.
     join
     lists
     merge
+    null_mask
     quantiles
     reduce
     replace
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/interop.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/interop.rst
index 881ab8d7be4..0d2cb55212e 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/interop.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/interop.rst
@@ -2,5 +2,5 @@
 interop
 =======
 
-.. automodule:: cudf._lib.pylibcudf.interop
+.. automodule:: pylibcudf.interop
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/avro.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/avro.rst
index 495bd505fdc..1c57a6157f5 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/avro.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/avro.rst
@@ -2,5 +2,5 @@
 Avro
 ====
 
-.. automodule:: cudf._lib.pylibcudf.io.avro
+.. automodule:: pylibcudf.io.avro
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/csv.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/csv.rst
index 5a2276f8b2d..59f7d8fe54c 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/csv.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/csv.rst
@@ -2,5 +2,5 @@
 CSV
 ===
 
-.. automodule:: cudf._lib.pylibcudf.io.csv
+.. automodule:: pylibcudf.io.csv
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/index.rst
index e2d342ffe47..c8933981736 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/index.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/index.rst
@@ -5,7 +5,7 @@ I/O
 I/O Utility Classes
 ===================
 
-.. automodule:: cudf._lib.pylibcudf.io.types
+.. automodule:: pylibcudf.io.types
    :members:
 
 
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/json.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/json.rst
index 6aeae1f322a..a4626f43cc3 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/json.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/json.rst
@@ -2,5 +2,5 @@
 JSON
 ====
 
-.. automodule:: cudf._lib.pylibcudf.io.json
+.. automodule:: pylibcudf.io.json
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/parquet.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/parquet.rst
index 9dfbadfa216..07c2503ab28 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/parquet.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/parquet.rst
@@ -2,5 +2,5 @@
 Parquet
 =======
 
-.. automodule:: cudf._lib.pylibcudf.io.parquet
+.. automodule:: pylibcudf.io.parquet
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/join.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/join.rst
index 05b9709d116..de065e4fc40 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/join.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/join.rst
@@ -2,5 +2,5 @@
 join
 ====
 
-.. automodule:: cudf._lib.pylibcudf.join
+.. automodule:: pylibcudf.join
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/lists.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/lists.rst
index a127dd6006a..0fe1a876073 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/lists.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/lists.rst
@@ -2,5 +2,5 @@
 lists
 =====
 
-.. automodule:: cudf._lib.pylibcudf.lists
+.. automodule:: pylibcudf.lists
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/merge.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/merge.rst
index ef1189a064a..3f634ffcfd7 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/merge.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/merge.rst
@@ -2,5 +2,5 @@
 merge
 =====
 
-.. automodule:: cudf._lib.pylibcudf.merge
+.. automodule:: pylibcudf.merge
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/null_mask.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/null_mask.rst
new file mode 100644
index 00000000000..4799c62eace
--- /dev/null
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/null_mask.rst
@@ -0,0 +1,6 @@
+=========
+null_mask
+=========
+
+.. automodule:: pylibcudf.null_mask
+   :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/quantiles.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/quantiles.rst
index 3417c1ff59d..0f0f701b5dc 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/quantiles.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/quantiles.rst
@@ -2,5 +2,5 @@
 quantiles
 =========
 
-.. automodule:: cudf._lib.pylibcudf.quantiles
+.. automodule:: pylibcudf.quantiles
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/reduce.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/reduce.rst
index e6f1b02331d..047f217c276 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/reduce.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/reduce.rst
@@ -2,5 +2,5 @@
 reduce
 ======
 
-.. automodule:: cudf._lib.pylibcudf.reduce
+.. automodule:: pylibcudf.reduce
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/replace.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/replace.rst
index 7f846872fca..7410b20e1b0 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/replace.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/replace.rst
@@ -2,5 +2,5 @@
 replace
 =======
 
-.. automodule:: cudf._lib.pylibcudf.replace
+.. automodule:: pylibcudf.replace
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/reshape.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/reshape.rst
index 964cef04923..09ec0501bb9 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/reshape.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/reshape.rst
@@ -2,5 +2,5 @@
 reshape
 =======
 
-.. automodule:: cudf._lib.pylibcudf.reshape
+.. automodule:: pylibcudf.reshape
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/rolling.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/rolling.rst
index 0817d117a94..1f8da467e84 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/rolling.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/rolling.rst
@@ -2,5 +2,5 @@
 rolling
 =======
 
-.. automodule:: cudf._lib.pylibcudf.rolling
+.. automodule:: pylibcudf.rolling
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/round.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/round.rst
index c97fda12301..e064357cbd1 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/round.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/round.rst
@@ -2,5 +2,5 @@
 round
 =====
 
-.. automodule:: cudf._lib.pylibcudf.round
+.. automodule:: pylibcudf.round
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/scalar.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/scalar.rst
index b12f47618fb..a9100c6bb2d 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/scalar.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/scalar.rst
@@ -2,5 +2,5 @@
 Scalar
 ======
 
-.. automodule:: cudf._lib.pylibcudf.scalar
+.. automodule:: pylibcudf.scalar
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/search.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/search.rst
index aa57bcd9d92..02307037994 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/search.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/search.rst
@@ -2,5 +2,5 @@
 search
 ======
 
-.. automodule:: cudf._lib.pylibcudf.search
+.. automodule:: pylibcudf.search
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/sorting.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/sorting.rst
index e9441366eeb..b8fd8fda9bd 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/sorting.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/sorting.rst
@@ -2,5 +2,5 @@
 sorting
 =======
 
-.. automodule:: cudf._lib.pylibcudf.sorting
+.. automodule:: pylibcudf.sorting
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/stream_compaction.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/stream_compaction.rst
index 00b479446d8..0252d0684d9 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/stream_compaction.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/stream_compaction.rst
@@ -2,5 +2,5 @@
 stream_compaction
 =================
 
-.. automodule:: cudf._lib.pylibcudf.stream_compaction
+.. automodule:: pylibcudf.stream_compaction
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/capitalize.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/capitalize.rst
new file mode 100644
index 00000000000..6b9ed8d47e7
--- /dev/null
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/capitalize.rst
@@ -0,0 +1,6 @@
+==========
+capitalize
+==========
+
+.. automodule:: pylibcudf.strings.capitalize
+   :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/char_types.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/char_types.rst
new file mode 100644
index 00000000000..896fa6086db
--- /dev/null
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/char_types.rst
@@ -0,0 +1,6 @@
+==========
+char_types
+==========
+
+.. automodule:: pylibcudf.strings.char_types
+   :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/contains.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/contains.rst
index e5745331bc7..d2d164be638 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/contains.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/contains.rst
@@ -2,5 +2,5 @@
 contains
 ========
 
-.. automodule:: cudf._lib.pylibcudf.strings.contains
+.. automodule:: pylibcudf.strings.contains
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/find.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/find.rst
new file mode 100644
index 00000000000..7c540e99929
--- /dev/null
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/find.rst
@@ -0,0 +1,6 @@
+====
+find
+====
+
+.. automodule:: pylibcudf.strings.find
+   :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/index.rst
index cecf1ccc9bb..462a756a092 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/index.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/index.rst
@@ -4,6 +4,11 @@ strings
 .. toctree::
     :maxdepth: 1
 
+    capitalize
+    char_types
     contains
+    find
+    regex_flags
+    regex_program
     replace
     slice
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/regex_flags.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/regex_flags.rst
new file mode 100644
index 00000000000..53fd712d864
--- /dev/null
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/regex_flags.rst
@@ -0,0 +1,6 @@
+===========
+regex_flags
+===========
+
+.. automodule:: pylibcudf.strings.regex_flags
+   :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/regex_program.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/regex_program.rst
new file mode 100644
index 00000000000..6f3d2f6681c
--- /dev/null
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/regex_program.rst
@@ -0,0 +1,6 @@
+=============
+regex_program
+=============
+
+.. automodule:: pylibcudf.strings.regex_program
+   :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/replace.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/replace.rst
index 9575ec226a7..d5417adac43 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/replace.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/replace.rst
@@ -2,5 +2,5 @@
 replace
 =======
 
-.. automodule:: cudf._lib.pylibcudf.strings.replace
+.. automodule:: pylibcudf.strings.replace
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/slice.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/slice.rst
index 0ee5af71c03..e9908904512 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/slice.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/slice.rst
@@ -2,5 +2,5 @@
 slice
 =====
 
-.. automodule:: cudf._lib.pylibcudf.strings.slice
+.. automodule:: pylibcudf.strings.slice
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/table.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/table.rst
index d8337b6596d..e39ca18a12b 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/table.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/table.rst
@@ -2,5 +2,5 @@
 Table
 =====
 
-.. automodule:: cudf._lib.pylibcudf.table
+.. automodule:: pylibcudf.table
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/traits.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/traits.rst
index 294ca8dc78c..2cce7b9d7d7 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/traits.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/traits.rst
@@ -2,5 +2,5 @@
 traits
 ======
 
-.. automodule:: cudf._lib.pylibcudf.traits
+.. automodule:: pylibcudf.traits
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/transform.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/transform.rst
index ef04bbad7e6..839163f83fc 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/transform.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/transform.rst
@@ -2,5 +2,5 @@
 transform
 =========
 
-.. automodule:: cudf._lib.pylibcudf.transform
+.. automodule:: pylibcudf.transform
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/types.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/types.rst
index 8d5409bbd97..75521ac2f4d 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/types.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/types.rst
@@ -2,5 +2,5 @@
 types
 =====
 
-.. automodule:: cudf._lib.pylibcudf.types
+.. automodule:: pylibcudf.types
    :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/unary.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/unary.rst
index add4baa0a54..34077242b90 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/unary.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/unary.rst
@@ -2,5 +2,5 @@
 unary
 =====
 
-.. automodule:: cudf._lib.pylibcudf.unary
+.. automodule:: pylibcudf.unary
    :members:
diff --git a/java/ci/Dockerfile.rocky b/java/ci/Dockerfile.rocky
index 6b87f3ed34e..152af22f7e4 100644
--- a/java/ci/Dockerfile.rocky
+++ b/java/ci/Dockerfile.rocky
@@ -28,7 +28,7 @@ ARG TARGETPLATFORM=linux/amd64
 FROM --platform=$TARGETPLATFORM nvidia/cuda:$CUDA_VERSION-devel-rockylinux$OS_RELEASE
 ARG TOOLSET_VERSION=11
 ### Install basic requirements
-RUN dnf --enablerepo=powertools install -y  scl-utils gcc-toolset-${TOOLSET_VERSION} git zlib-devel maven tar wget patch ninja-build
+RUN dnf --enablerepo=powertools install -y  scl-utils gcc-toolset-${TOOLSET_VERSION} git zlib-devel maven tar wget patch ninja-build boost-devel
 ## pre-create the CMAKE_INSTALL_PREFIX folder, set writable by any user for Jenkins
 RUN mkdir /usr/local/rapids /rapids && chmod 777 /usr/local/rapids /rapids
 
diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java
index 8ff2f0f0a73..6bd4e06c47e 100644
--- a/java/src/main/java/ai/rapids/cudf/ColumnView.java
+++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java
@@ -913,25 +913,6 @@ public final ColumnVector mergeAndSetValidity(BinaryOp mergeOp, ColumnView... co
     return new ColumnVector(bitwiseMergeAndSetValidity(getNativeView(), columnViews, mergeOp.nativeId));
   }
 
-  /**
-   * Creates a deep copy of a column while replacing the validity mask. The validity mask is the
-   * device_vector equivalent of the boolean column given as argument.
-   *
-   * The boolColumn must have the same number of rows as the current column.
-   * The result column will have the same number of rows as the current column.
-   * For all indices `i` where the boolColumn is `true`, the result column will have a valid value at index i.
-   * For all other values (i.e. `false` or `null`), the result column will have nulls.
-   *
-   * If the current column has a null at a given index `i`, and the new validity mask is `true` at index `i`,
-   * then the row value is undefined.
-   *
-   * @param boolColumn bool column whose value is to be used as the validity mask.
-   * @return Deep copy of the column with replaced validity mask.
-   */
-  public final ColumnVector copyWithBooleanColumnAsValidity(ColumnView boolColumn) {
-    return new ColumnVector(copyWithBooleanColumnAsValidity(getNativeView(), boolColumn.getNativeView()));
-  }
-
   /////////////////////////////////////////////////////////////////////////////
   // DATE/TIME
   /////////////////////////////////////////////////////////////////////////////
@@ -4767,25 +4748,6 @@ private static native long clamper(long nativeView, long loScalarHandle, long lo
   private static native long bitwiseMergeAndSetValidity(long baseHandle, long[] viewHandles,
                                                         int nullConfig) throws CudfException;
 
-  /**
-   * Native method to deep copy a column while replacing the null mask. The null mask is the
-   * device_vector equivalent of the boolean column given as argument.
-   *
-   * The boolColumn must have the same number of rows as the exemplar column.
-   * The result column will have the same number of rows as the exemplar.
-   * For all indices `i` where the boolean column is `true`, the result column will have a valid value at index i.
-   * For all other values (i.e. `false` or `null`), the result column will have nulls.
-   *
-   * If the exemplar column has a null at a given index `i`, and the new validity mask is `true` at index `i`,
-   * then the resultant row value is undefined.
-   *
-   * @param exemplarViewHandle column view of the column that is deep copied.
-   * @param boolColumnViewHandle bool column whose value is to be used as the null mask.
-   * @return Deep copy of the column with replaced null mask.
-   */
-  private static native long copyWithBooleanColumnAsValidity(long exemplarViewHandle,
-                                                             long boolColumnViewHandle) throws CudfException;
-
   ////////
   // Native cudf::column_view life cycle and metadata access methods. Life cycle methods
   // should typically only be called from the OffHeap inner class.
diff --git a/java/src/main/java/ai/rapids/cudf/Schema.java b/java/src/main/java/ai/rapids/cudf/Schema.java
index 43603386649..76b2799aad6 100644
--- a/java/src/main/java/ai/rapids/cudf/Schema.java
+++ b/java/src/main/java/ai/rapids/cudf/Schema.java
@@ -120,7 +120,7 @@ private void flattenIfNeeded() {
 
   private int flattenedLength(int startingLength) {
     if (childSchemas != null) {
-      for (Schema child: childSchemas) {
+      for (Schema child : childSchemas) {
         startingLength++;
         startingLength = child.flattenedLength(startingLength);
       }
@@ -150,11 +150,19 @@ public static Builder builder() {
     return new Builder(DType.STRUCT);
   }
 
+  /**
+   * Get names of the columns flattened from all levels in schema by depth-first traversal.
+   * @return An array containing names of all columns in schema.
+   */
   public String[] getFlattenedColumnNames() {
     flattenIfNeeded();
     return flattenedNames;
   }
 
+  /**
+   * Get names of the top level child columns in schema.
+   * @return An array containing names of top level child columns.
+   */
   public String[] getColumnNames() {
     if (childNames == null) {
       return null;
@@ -162,6 +170,10 @@ public String[] getColumnNames() {
     return childNames.toArray(new String[childNames.size()]);
   }
 
+  /**
+   * Check if the schema is nested (i.e., top level type is LIST or STRUCT).
+   * @return true if the schema is nested, false otherwise.
+   */
   public boolean isNested() {
     return childSchemas != null && childSchemas.size() > 0;
   }
@@ -173,7 +185,7 @@ public boolean isNested() {
    */
   public boolean hasNestedChildren() {
     if (childSchemas != null) {
-      for (Schema child: childSchemas) {
+      for (Schema child : childSchemas) {
         if (child.isNested()) {
           return true;
         }
@@ -182,7 +194,11 @@ public boolean hasNestedChildren() {
     return false;
   }
 
-  int[] getFlattenedTypeIds() {
+  /**
+   * Get type ids of the columns flattened from all levels in schema by depth-first traversal.
+   * @return An array containing type ids of all columns in schema.
+   */
+  public int[] getFlattenedTypeIds() {
     flattenIfNeeded();
     if (flattenedTypes == null) {
       return null;
@@ -194,7 +210,11 @@ int[] getFlattenedTypeIds() {
     return ret;
   }
 
-  int[] getFlattenedTypeScales() {
+  /**
+   * Get scales of the columns' types flattened from all levels in schema by depth-first traversal.
+   * @return An array containing type scales of all columns in schema.
+   */
+  public int[] getFlattenedTypeScales() {
     flattenIfNeeded();
     if (flattenedTypes == null) {
       return null;
@@ -206,11 +226,19 @@ int[] getFlattenedTypeScales() {
     return ret;
   }
 
-  DType[] getFlattenedTypes() {
+  /**
+   * Get the types of the columns in schema flattened from all levels by depth-first traversal.
+   * @return An array containing types of all columns in schema.
+   */
+  public DType[] getFlattenedTypes() {
     flattenIfNeeded();
     return flattenedTypes;
   }
 
+  /**
+   * Get types of the top level child columns in schema.
+   * @return An array containing types of top level child columns.
+   */
   public DType[] getChildTypes() {
     if (childSchemas == null) {
       return null;
@@ -222,6 +250,10 @@ public DType[] getChildTypes() {
     return ret;
   }
 
+  /**
+   * Get number of top level child columns in schema.
+   * @return Number of child columns.
+   */
   public int getNumChildren() {
     if (childSchemas == null) {
       return 0;
@@ -229,7 +261,11 @@ public int getNumChildren() {
     return childSchemas.size();
   }
 
-  int[] getFlattenedNumChildren() {
+  /**
+   * Get numbers of child columns for each level in schema.
+   * @return Numbers of child columns for all levels flattened by depth-first traversal.
+   */
+  public int[] getFlattenedNumChildren() {
     flattenIfNeeded();
     return flattenedCounts;
   }
@@ -253,7 +289,7 @@ public boolean isStructOrHasStructDescendant() {
 
   public HostColumnVector.DataType asHostDataType() {
     if (topLevelType == DType.LIST) {
-      assert(childSchemas != null && childSchemas.size() == 1);
+      assert (childSchemas != null && childSchemas.size() == 1);
       HostColumnVector.DataType element = childSchemas.get(0).asHostDataType();
       return new HostColumnVector.ListType(true, element);
     } else if (topLevelType == DType.STRUCT) {
@@ -261,7 +297,7 @@ public HostColumnVector.DataType asHostDataType() {
         return new HostColumnVector.StructType(true);
       } else {
         List<HostColumnVector.DataType> childTypes =
-                childSchemas.stream().map(Schema::asHostDataType).collect(Collectors.toList());
+            childSchemas.stream().map(Schema::asHostDataType).collect(Collectors.toList());
         return new HostColumnVector.StructType(true, childTypes);
       }
     } else {
@@ -269,7 +305,7 @@ public HostColumnVector.DataType asHostDataType() {
     }
   }
 
-    public static class Builder {
+  public static class Builder {
     private final DType topLevelType;
     private final List<String> names;
     private final List<Builder> types;
@@ -326,7 +362,7 @@ public Schema build() {
       List<Schema> children = null;
       if (types != null) {
         children = new ArrayList<>(types.size());
-        for (Builder b: types) {
+        for (Builder b : types) {
           children.add(b.build());
         }
       }
diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java
index 4e737451ed6..36e342cae13 100644
--- a/java/src/main/java/ai/rapids/cudf/Table.java
+++ b/java/src/main/java/ai/rapids/cudf/Table.java
@@ -1084,7 +1084,12 @@ private static DidViewChange gatherJSONColumns(Schema schema, TableWithMeta.Nest
         // The types don't match so just return the input unchanged...
         return DidViewChange.no();
       } else {
-        String[] foundNames = children.getNames();
+        String[] foundNames;
+        if (children == null) {
+          foundNames = new String[0];
+        } else {
+          foundNames = children.getNames();
+        }
         HashMap<String, Integer> indices = new HashMap<>();
         for (int i = 0; i < foundNames.length; i++) {
           indices.put(foundNames[i], i);
@@ -1101,8 +1106,9 @@ private static DidViewChange gatherJSONColumns(Schema schema, TableWithMeta.Nest
           for (int i = 0; i < columns.length; i++) {
             String neededColumnName = neededNames[i];
             Integer index = indices.get(neededColumnName);
+            Schema childSchema = schema.getChild(i);
             if (index != null) {
-              if (schema.getChild(i).isStructOrHasStructDescendant()) {
+              if (childSchema.isStructOrHasStructDescendant()) {
                 ColumnView child = cv.getChildColumnView(index);
                 boolean shouldCloseChild = true;
                 try {
@@ -1131,8 +1137,23 @@ private static DidViewChange gatherJSONColumns(Schema schema, TableWithMeta.Nest
               }
             } else {
               somethingChanged = true;
-              try (Scalar s = Scalar.fromNull(types[i])) {
-                columns[i] = ColumnVector.fromScalar(s, (int) cv.getRowCount());
+              if (types[i] == DType.LIST) {
+                try (Scalar s = Scalar.listFromNull(childSchema.getChild(0).asHostDataType())) {
+                  columns[i] = ColumnVector.fromScalar(s, (int) cv.getRowCount());
+                }
+              } else if (types[i] == DType.STRUCT) {
+                int numStructChildren = childSchema.getNumChildren();
+                HostColumnVector.DataType[] structChildren = new HostColumnVector.DataType[numStructChildren];
+                for (int structChildIndex = 0; structChildIndex < numStructChildren; structChildIndex++) {
+                  structChildren[structChildIndex] = childSchema.getChild(structChildIndex).asHostDataType();
+                }
+                try (Scalar s = Scalar.structFromNull(structChildren)) {
+                  columns[i] = ColumnVector.fromScalar(s, (int) cv.getRowCount());
+                }
+              } else {
+                try (Scalar s = Scalar.fromNull(types[i])) {
+                  columns[i] = ColumnVector.fromScalar(s, (int) cv.getRowCount());
+                }
               }
             }
           }
diff --git a/java/src/main/native/CMakeLists.txt b/java/src/main/native/CMakeLists.txt
index 22059c5bc7f..c18a90140b6 100644
--- a/java/src/main/native/CMakeLists.txt
+++ b/java/src/main/native/CMakeLists.txt
@@ -212,6 +212,10 @@ target_compile_definitions(
 )
 target_link_options(cudfjni PRIVATE "-Wl,--no-undefined")
 
+set(CUDF_ENABLE_ARROW_PARQUET ON)
+include(../../../../cpp/cmake/thirdparty/get_arrow.cmake)
+target_link_libraries(cudfjni PRIVATE ${ARROW_LIBRARIES})
+
 if(USE_GDS)
   add_library(cufilejni src/CuFileJni.cpp)
   set_target_properties(
diff --git a/java/src/main/native/src/ColumnVectorJni.cpp b/java/src/main/native/src/ColumnVectorJni.cpp
index cdc5aa41abe..9b718b2ed83 100644
--- a/java/src/main/native/src/ColumnVectorJni.cpp
+++ b/java/src/main/native/src/ColumnVectorJni.cpp
@@ -38,12 +38,70 @@
 #include <rmm/mr/device/per_device_resource.hpp>
 
 #include <arrow/api.h>
+#include <arrow/c/bridge.h>
 
 #include <algorithm>
 
 using cudf::jni::ptr_as_jlong;
 using cudf::jni::release_as_jlong;
 
+// Creating arrow as per given type_id and buffer arguments
+template <typename... Ts>
+std::shared_ptr<arrow::Array> to_arrow_array(cudf::type_id id, Ts&&... args)
+{
+  switch (id) {
+    case cudf::type_id::BOOL8:
+      return std::make_shared<arrow::BooleanArray>(std::forward<Ts>(args)...);
+    case cudf::type_id::INT8: return std::make_shared<arrow::Int8Array>(std::forward<Ts>(args)...);
+    case cudf::type_id::INT16:
+      return std::make_shared<arrow::Int16Array>(std::forward<Ts>(args)...);
+    case cudf::type_id::INT32:
+      return std::make_shared<arrow::Int32Array>(std::forward<Ts>(args)...);
+    case cudf::type_id::INT64:
+      return std::make_shared<arrow::Int64Array>(std::forward<Ts>(args)...);
+    case cudf::type_id::UINT8:
+      return std::make_shared<arrow::UInt8Array>(std::forward<Ts>(args)...);
+    case cudf::type_id::UINT16:
+      return std::make_shared<arrow::UInt16Array>(std::forward<Ts>(args)...);
+    case cudf::type_id::UINT32:
+      return std::make_shared<arrow::UInt32Array>(std::forward<Ts>(args)...);
+    case cudf::type_id::UINT64:
+      return std::make_shared<arrow::UInt64Array>(std::forward<Ts>(args)...);
+    case cudf::type_id::FLOAT32:
+      return std::make_shared<arrow::FloatArray>(std::forward<Ts>(args)...);
+    case cudf::type_id::FLOAT64:
+      return std::make_shared<arrow::DoubleArray>(std::forward<Ts>(args)...);
+    case cudf::type_id::TIMESTAMP_DAYS:
+      return std::make_shared<arrow::Date32Array>(std::make_shared<arrow::Date32Type>(),
+                                                  std::forward<Ts>(args)...);
+    case cudf::type_id::TIMESTAMP_SECONDS:
+      return std::make_shared<arrow::TimestampArray>(arrow::timestamp(arrow::TimeUnit::SECOND),
+                                                     std::forward<Ts>(args)...);
+    case cudf::type_id::TIMESTAMP_MILLISECONDS:
+      return std::make_shared<arrow::TimestampArray>(arrow::timestamp(arrow::TimeUnit::MILLI),
+                                                     std::forward<Ts>(args)...);
+    case cudf::type_id::TIMESTAMP_MICROSECONDS:
+      return std::make_shared<arrow::TimestampArray>(arrow::timestamp(arrow::TimeUnit::MICRO),
+                                                     std::forward<Ts>(args)...);
+    case cudf::type_id::TIMESTAMP_NANOSECONDS:
+      return std::make_shared<arrow::TimestampArray>(arrow::timestamp(arrow::TimeUnit::NANO),
+                                                     std::forward<Ts>(args)...);
+    case cudf::type_id::DURATION_SECONDS:
+      return std::make_shared<arrow::DurationArray>(arrow::duration(arrow::TimeUnit::SECOND),
+                                                    std::forward<Ts>(args)...);
+    case cudf::type_id::DURATION_MILLISECONDS:
+      return std::make_shared<arrow::DurationArray>(arrow::duration(arrow::TimeUnit::MILLI),
+                                                    std::forward<Ts>(args)...);
+    case cudf::type_id::DURATION_MICROSECONDS:
+      return std::make_shared<arrow::DurationArray>(arrow::duration(arrow::TimeUnit::MICRO),
+                                                    std::forward<Ts>(args)...);
+    case cudf::type_id::DURATION_NANOSECONDS:
+      return std::make_shared<arrow::DurationArray>(arrow::duration(arrow::TimeUnit::NANO),
+                                                    std::forward<Ts>(args)...);
+    default: CUDF_FAIL("Unsupported type_id conversion to arrow");
+  }
+}
+
 extern "C" {
 
 JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_sequence(
@@ -141,15 +199,27 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_fromArrow(JNIEnv* env,
         break;
       default:
         // this handles the primitive types
-        arrow_array = cudf::detail::to_arrow_array(
-          n_type, j_col_length, data_buffer, null_buffer, j_null_count);
+        arrow_array = to_arrow_array(n_type, j_col_length, data_buffer, null_buffer, j_null_count);
     }
     auto name_and_type                                = arrow::field("col", arrow_array->type());
     std::vector<std::shared_ptr<arrow::Field>> fields = {name_and_type};
     std::shared_ptr<arrow::Schema> schema             = std::make_shared<arrow::Schema>(fields);
     auto arrow_table =
       arrow::Table::Make(schema, std::vector<std::shared_ptr<arrow::Array>>{arrow_array});
-    auto retCols = cudf::from_arrow(*(arrow_table))->release();
+
+    ArrowSchema sch;
+    if (!arrow::ExportSchema(*arrow_table->schema(), &sch).ok()) {
+      JNI_THROW_NEW(env, "java/lang/RuntimeException", "Unable to produce an ArrowSchema", 0)
+    }
+    auto batch = arrow_table->CombineChunksToBatch().ValueOrDie();
+    ArrowArray arr;
+    if (!arrow::ExportRecordBatch(*batch, &arr).ok()) {
+      JNI_THROW_NEW(env, "java/lang/RuntimeException", "Unable to produce an ArrowArray", 0)
+    }
+    auto retCols = cudf::from_arrow(&sch, &arr)->release();
+    arr.release(&arr);
+    sch.release(&sch);
+
     if (retCols.size() != 1) {
       JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "Must result in one column", 0);
     }
diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp
index 4551325ebb1..72f0ad19912 100644
--- a/java/src/main/native/src/ColumnViewJni.cpp
+++ b/java/src/main/native/src/ColumnViewJni.cpp
@@ -2090,21 +2090,6 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_bitwiseMergeAndSetValidit
   CATCH_STD(env, 0);
 }
 
-JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_copyWithBooleanColumnAsValidity(
-  JNIEnv* env, jobject j_object, jlong exemplar_handle, jlong validity_column_handle)
-{
-  JNI_NULL_CHECK(env, exemplar_handle, "ColumnView handle is null", 0);
-  JNI_NULL_CHECK(env, validity_column_handle, "Validity column handle is null", 0);
-  try {
-    cudf::jni::auto_set_device(env);
-    auto const exemplar = *reinterpret_cast<cudf::column_view*>(exemplar_handle);
-    auto const validity = *reinterpret_cast<cudf::column_view*>(validity_column_handle);
-    return release_as_jlong(
-      cudf::jni::new_column_with_boolean_column_as_validity(exemplar, validity));
-  }
-  CATCH_STD(env, 0);
-}
-
 ////////
 // Native cudf::column_view life cycle and metadata access methods. Life cycle methods
 // should typically only be called from the CudfColumn inner class.
diff --git a/java/src/main/native/src/ColumnViewJni.cu b/java/src/main/native/src/ColumnViewJni.cu
index 2dbff923544..46261b087ae 100644
--- a/java/src/main/native/src/ColumnViewJni.cu
+++ b/java/src/main/native/src/ColumnViewJni.cu
@@ -43,37 +43,6 @@
 
 namespace cudf::jni {
 
-std::unique_ptr<cudf::column> new_column_with_boolean_column_as_validity(
-  cudf::column_view const& exemplar, cudf::column_view const& validity_column)
-{
-  CUDF_EXPECTS(validity_column.type().id() == type_id::BOOL8,
-               "Validity column must be of type bool");
-  CUDF_EXPECTS(validity_column.size() == exemplar.size(),
-               "Exemplar and validity columns must have the same size");
-
-  auto validity_device_view = cudf::column_device_view::create(validity_column);
-  auto validity_begin       = cudf::detail::make_optional_iterator<bool>(
-    *validity_device_view, cudf::nullate::DYNAMIC{validity_column.has_nulls()});
-  auto validity_end            = validity_begin + validity_device_view->size();
-  auto [null_mask, null_count] = cudf::detail::valid_if(
-    validity_begin,
-    validity_end,
-    [] __device__(auto optional_bool) { return optional_bool.value_or(false); },
-    cudf::get_default_stream(),
-    rmm::mr::get_current_device_resource());
-  auto const exemplar_without_null_mask =
-    cudf::column_view{exemplar.type(),
-                      exemplar.size(),
-                      exemplar.head<void>(),
-                      nullptr,
-                      0,
-                      exemplar.offset(),
-                      std::vector<cudf::column_view>{exemplar.child_begin(), exemplar.child_end()}};
-  auto deep_copy = std::make_unique<cudf::column>(exemplar_without_null_mask);
-  deep_copy->set_null_mask(std::move(null_mask), null_count);
-  return deep_copy;
-}
-
 std::unique_ptr<cudf::column> generate_list_offsets(cudf::column_view const& list_length,
                                                     rmm::cuda_stream_view stream)
 {
diff --git a/java/src/main/native/src/ColumnViewJni.hpp b/java/src/main/native/src/ColumnViewJni.hpp
index c9eef0139ea..c8c441e8fae 100644
--- a/java/src/main/native/src/ColumnViewJni.hpp
+++ b/java/src/main/native/src/ColumnViewJni.hpp
@@ -22,22 +22,6 @@
 
 namespace cudf::jni {
 
-/**
- * @brief Creates a deep copy of the exemplar column, with its validity set to the equivalent
- * of the boolean `validity` column's value.
- *
- * The bool_column must have the same number of rows as the exemplar column.
- * The result column will have the same number of rows as the exemplar.
- * For all indices `i` where the boolean column is `true`, the result column will have a valid value
- * at index i. For all other values (i.e. `false` or `null`), the result column will have nulls.
- *
- * @param exemplar The column to be deep copied.
- * @param bool_column bool column whose value is to be used as the validity.
- * @return Deep copy of the exemplar, with the replaced validity.
- */
-std::unique_ptr<cudf::column> new_column_with_boolean_column_as_validity(
-  cudf::column_view const& exemplar, cudf::column_view const& bool_column);
-
 /**
  * @brief Generates list offsets with lengths of each list.
  *
diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp
index a9ace1398e4..c749c8c84bf 100644
--- a/java/src/main/native/src/TableJni.cpp
+++ b/java/src/main/native/src/TableJni.cpp
@@ -54,6 +54,8 @@
 
 #include <thrust/iterator/counting_iterator.h>
 
+#include <arrow/api.h>
+#include <arrow/c/bridge.h>
 #include <arrow/io/api.h>
 #include <arrow/ipc/api.h>
 
@@ -1037,9 +1039,9 @@ cudf::io::schema_element read_schema_element(int& index,
     // go to the next entry, so recursion can parse it.
     index++;
     for (int i = 0; i < num_children; i++) {
+      auto const name = std::string{names.get(index).get()};
       child_elems.insert(
-        std::pair{names.get(index).get(),
-                  cudf::jni::read_schema_element(index, children, names, types, scales)});
+        std::pair{name, cudf::jni::read_schema_element(index, children, names, types, scales)});
     }
     return cudf::io::schema_element{d_type, std::move(child_elems)};
   } else {
@@ -1069,6 +1071,15 @@ void append_flattened_child_names(cudf::io::column_name_info const& info,
   }
 }
 
+// Recursively make schema and its children nullable
+void set_nullable(ArrowSchema* schema)
+{
+  schema->flags |= ARROW_FLAG_NULLABLE;
+  for (int i = 0; i < schema->n_children; ++i) {
+    set_nullable(schema->children[i]);
+  }
+}
+
 }  // namespace
 
 }  // namespace jni
@@ -1830,9 +1841,9 @@ Java_ai_rapids_cudf_Table_readJSONFromDataSource(JNIEnv* env,
       std::map<std::string, cudf::io::schema_element> data_types;
       int at = 0;
       while (at < n_types.size()) {
+        auto const name = std::string{n_col_names.get(at).get()};
         data_types.insert(std::pair{
-          n_col_names.get(at).get(),
-          cudf::jni::read_schema_element(at, n_children, n_col_names, n_types, n_scales)});
+          name, cudf::jni::read_schema_element(at, n_children, n_col_names, n_types, n_scales)});
       }
       opts.dtypes(data_types);
     } else {
@@ -1929,9 +1940,9 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_readJSON(JNIEnv* env,
       std::map<std::string, cudf::io::schema_element> data_types;
       int at = 0;
       while (at < n_types.size()) {
+        auto const name = std::string{n_col_names.get(at).get()};
         data_types.insert(std::pair{
-          n_col_names.get(at).get(),
-          cudf::jni::read_schema_element(at, n_children, n_col_names, n_types, n_scales)});
+          name, cudf::jni::read_schema_element(at, n_children, n_col_names, n_types, n_scales)});
       }
       opts.dtypes(data_types);
     } else {
@@ -2635,7 +2646,13 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_convertCudfToArrowTable(JNIEnv
     // The pointer to the shared_ptr<> is returned as a jlong.
     using result_t = std::shared_ptr<arrow::Table>;
 
-    auto result = cudf::to_arrow(*tview, state->get_column_metadata(*tview));
+    auto got_arrow_schema = cudf::to_arrow_schema(*tview, state->get_column_metadata(*tview));
+    cudf::jni::set_nullable(got_arrow_schema.get());
+    auto got_arrow_array = cudf::to_arrow_host(*tview);
+    auto batch =
+      arrow::ImportRecordBatch(&got_arrow_array->array, got_arrow_schema.get()).ValueOrDie();
+    auto result = arrow::Table::FromRecordBatches({batch}).ValueOrDie();
+
     return ptr_as_jlong(new result_t{result});
   }
   CATCH_STD(env, 0)
@@ -2746,7 +2763,21 @@ Java_ai_rapids_cudf_Table_convertArrowTableToCudf(JNIEnv* env, jclass, jlong arr
 
   try {
     cudf::jni::auto_set_device(env);
-    return convert_table_for_return(env, cudf::from_arrow(*(handle->get())));
+
+    ArrowSchema sch;
+    if (!arrow::ExportSchema(*handle->get()->schema(), &sch).ok()) {
+      JNI_THROW_NEW(env, "java/lang/RuntimeException", "Unable to produce an ArrowSchema", 0)
+    }
+    auto batch = handle->get()->CombineChunksToBatch().ValueOrDie();
+    ArrowArray arr;
+    if (!arrow::ExportRecordBatch(*batch, &arr).ok()) {
+      JNI_THROW_NEW(env, "java/lang/RuntimeException", "Unable to produce an ArrowArray", 0)
+    }
+    auto ret = cudf::from_arrow(&sch, &arr);
+    arr.release(&arr);
+    sch.release(&sch);
+
+    return convert_table_for_return(env, ret);
   }
   CATCH_STD(env, 0)
 }
@@ -3919,6 +3950,7 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_dropDuplicates(
                      keep_option,
                      nulls_equal ? cudf::null_equality::EQUAL : cudf::null_equality::UNEQUAL,
                      cudf::nan_equality::ALL_EQUAL,
+                     cudf::get_default_stream(),
                      rmm::mr::get_current_device_resource());
     return convert_table_for_return(env, result);
   }
diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
index 7136b162c13..708744569df 100644
--- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
@@ -1,6 +1,6 @@
 /*
  *
- *  Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ *  Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
@@ -6395,46 +6395,6 @@ void testReplaceSameIndexColumnInStruct() {
     assertTrue(e.getMessage().contains("Duplicate mapping found for replacing child index"));
   }
 
-  @Test
-  void testCopyWithBooleanColumnAsValidity() {
-    final Boolean T = true;
-    final Boolean F = false;
-    final Integer X = null;
-
-    // Straight-line: Invalidate every other row.
-    try (ColumnVector exemplar = ColumnVector.fromBoxedInts(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
-         ColumnVector validity = ColumnVector.fromBoxedBooleans(F, T, F, T, F, T, F, T, F, T);
-         ColumnVector expected = ColumnVector.fromBoxedInts(X, 2, X, 4, X, 6, X, 8, X, 10);
-         ColumnVector result = exemplar.copyWithBooleanColumnAsValidity(validity)) {
-      assertColumnsAreEqual(expected, result);
-    }
-
-    // Straight-line: Invalidate all Rows.
-    try (ColumnVector exemplar = ColumnVector.fromBoxedInts(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
-         ColumnVector validity = ColumnVector.fromBoxedBooleans(F, F, F, F, F, F, F, F, F, F);
-         ColumnVector expected = ColumnVector.fromBoxedInts(X, X, X, X, X, X, X, X, X, X);
-         ColumnVector result = exemplar.copyWithBooleanColumnAsValidity(validity)) {
-      assertColumnsAreEqual(expected, result);
-    }
-
-    // Nulls in the validity column are treated as invalid.
-    try (ColumnVector exemplar = ColumnVector.fromBoxedInts(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
-         ColumnVector validity = ColumnVector.fromBoxedBooleans(F, T, F, T, F, T, F, null, F, null);
-         ColumnVector expected = ColumnVector.fromBoxedInts(X, 2, X, 4, X, 6, X, X, X, X);
-         ColumnVector result = exemplar.copyWithBooleanColumnAsValidity(validity)) {
-      assertColumnsAreEqual(expected, result);
-    }
-
-    // Negative case: Mismatch in row count.
-    Exception x = assertThrows(CudfException.class, () ->  {
-      try (ColumnVector exemplar = ColumnVector.fromBoxedInts(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
-         ColumnVector validity = ColumnVector.fromBoxedBooleans(F, T, F, T);
-         ColumnVector result = exemplar.copyWithBooleanColumnAsValidity(validity)) {
-      }
-    });
-    assertTrue(x.getMessage().contains("Exemplar and validity columns must have the same size"));
-  }
-
   @Test
   void testSegmentedGather() {
     HostColumnVector.DataType dt = new ListType(true, new BasicType(true, DType.STRING));
diff --git a/pyproject.toml b/pyproject.toml
index e15cb7b3cdd..8f9aa165e5a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -87,7 +87,9 @@ select = [
     # non-pep585-annotation
     "UP006",
     # non-pep604-annotation
-    "UP007"
+    "UP007",
+    # Import from `collections.abc` instead: `Callable`
+    "UP035",
 ]
 ignore = [
     # whitespace before :
diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt
index ecadbf5cbbc..7193ada5b93 100644
--- a/python/cudf/CMakeLists.txt
+++ b/python/cudf/CMakeLists.txt
@@ -24,75 +24,17 @@ project(
   LANGUAGES CXX CUDA
 )
 
-option(FIND_CUDF_CPP "Search for existing CUDF C++ installations before defaulting to local files"
-       OFF
-)
-option(USE_LIBARROW_FROM_PYARROW "Only use the libarrow contained in pyarrow" OFF)
-mark_as_advanced(USE_LIBARROW_FROM_PYARROW)
-
-# Find Python early so that later commands can use it
-find_package(Python 3.9 REQUIRED COMPONENTS Interpreter)
-
-# If the user requested it we attempt to find CUDF.
-if(FIND_CUDF_CPP)
-  include(rapids-cpm)
-  include(rapids-export)
-  include(rapids-find)
-  rapids_cpm_init()
+find_package(cudf "${RAPIDS_VERSION}" REQUIRED)
 
-  if(USE_LIBARROW_FROM_PYARROW)
-    # We need to find arrow before libcudf since libcudf requires it but doesn't bundle arrow
-    # libraries. These variables have no effect because we are always searching for arrow via
-    # pyarrow, but they must be set as they are required arguments to the function in
-    # get_arrow.cmake.
-    set(CUDF_USE_ARROW_STATIC OFF)
-    set(CUDF_ENABLE_ARROW_S3 OFF)
-    set(CUDF_ENABLE_ARROW_ORC OFF)
-    set(CUDF_ENABLE_ARROW_PYTHON OFF)
-    set(CUDF_ENABLE_ARROW_PARQUET OFF)
-    include(../../cpp/cmake/thirdparty/get_arrow.cmake)
-  endif()
-
-  find_package(cudf "${RAPIDS_VERSION}" REQUIRED)
-
-  # an installed version of libcudf doesn't provide the dlpack headers so we need to download dlpack
-  # for the interop.pyx
-  include(../../cpp/cmake/thirdparty/get_dlpack.cmake)
-else()
-  set(cudf_FOUND OFF)
-endif()
+# an installed version of libcudf doesn't provide the dlpack headers so we need to download dlpack
+# for the interop.pyx
+include(rapids-cpm)
+rapids_cpm_init()
+include(../../cpp/cmake/thirdparty/get_dlpack.cmake)
 
 include(rapids-cython-core)
-
-if(NOT cudf_FOUND)
-  set(BUILD_TESTS OFF)
-  set(BUILD_BENCHMARKS OFF)
-  set(CUDF_BUILD_TESTUTIL OFF)
-  set(CUDF_BUILD_STREAMS_TEST_UTIL OFF)
-  set(CUDA_STATIC_RUNTIME ON)
-
-  add_subdirectory(../../cpp cudf-cpp EXCLUDE_FROM_ALL)
-
-  # libcudf targets are excluded by default above via EXCLUDE_FROM_ALL to remove extraneous
-  # components like headers from libcudacxx, but we do need the libraries. However, we want to
-  # control where they are installed to. Since there are multiple subpackages of cudf._lib that
-  # require access to libcudf, we place the library and all its dependent artifacts in the cudf
-  # directory as a single source of truth and modify the other rpaths appropriately.
-  set(cython_lib_dir cudf)
-  include(cmake/Modules/WheelHelpers.cmake)
-  # TODO: This install is currently overzealous. We should only install the libraries that are
-  # downloaded by CPM during the build, not libraries that were found on the system.  However, in
-  # practice right this would only be a problem is if libcudf was not found but some of the
-  # dependencies were, and we have no real use cases where that happens.
-  install_aliased_imported_targets(
-    TARGETS cudf arrow_shared nvcomp::nvcomp nvcomp::nvcomp_gdeflate nvcomp::nvcomp_bitcomp
-    DESTINATION ${cython_lib_dir}
-  )
-endif()
-
 rapids_cython_init()
 
-include(cmake/Modules/LinkPyarrowHeaders.cmake)
 add_subdirectory(cudf/_lib)
 add_subdirectory(udf_cpp)
 
diff --git a/python/cudf/cmake/Modules/LinkPyarrowHeaders.cmake b/python/cudf/cmake/Modules/LinkPyarrowHeaders.cmake
deleted file mode 100644
index d432f9fe1f5..00000000000
--- a/python/cudf/cmake/Modules/LinkPyarrowHeaders.cmake
+++ /dev/null
@@ -1,40 +0,0 @@
-# =============================================================================
-# Copyright (c) 2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
-# in compliance with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software distributed under the License
-# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
-# or implied. See the License for the specific language governing permissions and limitations under
-# the License.
-# =============================================================================
-include_guard(GLOBAL)
-
-find_package(Python REQUIRED COMPONENTS Development NumPy)
-
-execute_process(
-  COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_include())"
-  OUTPUT_VARIABLE PYARROW_INCLUDE_DIR
-  ERROR_VARIABLE PYARROW_ERROR
-  RESULT_VARIABLE PYARROW_RESULT
-  OUTPUT_STRIP_TRAILING_WHITESPACE
-)
-
-if(${PYARROW_RESULT})
-  message(FATAL_ERROR "Error while trying to obtain pyarrow include directory:\n${PYARROW_ERROR}")
-endif()
-
-# Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts of
-# cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the
-# requirement for arrow headers infects all of cudf. These requirements will go away once all
-# scalar-related Cython code is removed from cudf.
-function(link_to_pyarrow_headers targets)
-  foreach(target IN LISTS targets)
-    # PyArrow headers require numpy headers.
-    target_include_directories(${target} PRIVATE "${Python_NumPy_INCLUDE_DIRS}")
-    target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}")
-  endforeach()
-endfunction()
diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py
index e14815a1b0d..d7da42a1708 100644
--- a/python/cudf/cudf/__init__.py
+++ b/python/cudf/cudf/__init__.py
@@ -1,5 +1,15 @@
 # Copyright (c) 2018-2024, NVIDIA CORPORATION.
 
+# If libcudf was installed as a wheel, we must request it to load the library symbols.
+# Otherwise, we assume that the library was installed in a system path that ld can find.
+try:
+    import libcudf
+except ModuleNotFoundError:
+    pass
+else:
+    libcudf.load_library()
+    del libcudf
+
 # _setup_numba _must be called before numba.cuda is imported, because
 # it sets the numba config variable responsible for enabling
 # Minor Version Compatibility. Setting it after importing numba.cuda has no effect.
@@ -24,7 +34,7 @@
     register_series_accessor,
 )
 from cudf.api.types import dtype
-from cudf.core.algorithms import factorize
+from cudf.core.algorithms import factorize, unique
 from cudf.core.cut import cut
 from cudf.core.dataframe import DataFrame, from_dataframe, from_pandas, merge
 from cudf.core.dtypes import (
@@ -97,6 +107,7 @@
     "DatetimeIndex",
     "Decimal32Dtype",
     "Decimal64Dtype",
+    "Decimal128Dtype",
     "Grouper",
     "Index",
     "IntervalDtype",
@@ -126,6 +137,7 @@
     "isclose",
     "melt",
     "merge",
+    "option_context",
     "pivot",
     "pivot_table",
     "read_avro",
diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt
index 38b7e9ebe04..5d4b5421f16 100644
--- a/python/cudf/cudf/_lib/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/CMakeLists.txt
@@ -63,11 +63,12 @@ rapids_cython_create_modules(
 )
 
 target_link_libraries(strings_udf PUBLIC cudf_strings_udf)
+target_include_directories(interop PUBLIC "$<BUILD_INTERFACE:${DLPACK_INCLUDE_DIR}>")
 
-set(targets_using_arrow_headers interop avro csv orc json parquet)
-link_to_pyarrow_headers("${targets_using_arrow_headers}")
+include(${rapids-cmake-dir}/export/find_package_root.cmake)
+include(../../../../cpp/cmake/thirdparty/get_nanoarrow.cmake)
+target_link_libraries(interop PUBLIC nanoarrow)
 
 add_subdirectory(io)
 add_subdirectory(nvtext)
-add_subdirectory(pylibcudf)
 add_subdirectory(strings)
diff --git a/python/cudf/cudf/_lib/__init__.py b/python/cudf/cudf/_lib/__init__.py
index 34c0e29d0b1..918edb6d3f1 100644
--- a/python/cudf/cudf/_lib/__init__.py
+++ b/python/cudf/cudf/_lib/__init__.py
@@ -21,7 +21,6 @@
     orc,
     parquet,
     partitioning,
-    pylibcudf,
     quantiles,
     reduce,
     replace,
diff --git a/python/cudf/cudf/_lib/aggregation.pyx b/python/cudf/cudf/_lib/aggregation.pyx
index 1616c24eec2..7c91533cf93 100644
--- a/python/cudf/cudf/_lib/aggregation.pyx
+++ b/python/cudf/cudf/_lib/aggregation.pyx
@@ -3,8 +3,9 @@
 import pandas as pd
 from numba.np import numpy_support
 
+import pylibcudf
+
 import cudf
-from cudf._lib import pylibcudf
 from cudf._lib.types import SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES
 from cudf.utils import cudautils
 
diff --git a/python/cudf/cudf/_lib/avro.pyx b/python/cudf/cudf/_lib/avro.pyx
index 3c132b22880..b1759635a36 100644
--- a/python/cudf/cudf/_lib/avro.pyx
+++ b/python/cudf/cudf/_lib/avro.pyx
@@ -2,8 +2,8 @@
 
 from cudf._lib.utils cimport data_from_pylibcudf_io
 
-import cudf._lib.pylibcudf as plc
-from cudf._lib.pylibcudf.io.types import SourceInfo
+import pylibcudf as plc
+from pylibcudf.io.types import SourceInfo
 
 
 cpdef read_avro(datasource, columns=None, skip_rows=0, num_rows=-1):
diff --git a/python/cudf/cudf/_lib/binaryop.pyx b/python/cudf/cudf/_lib/binaryop.pyx
index 2e352dd7904..e2547476849 100644
--- a/python/cudf/cudf/_lib/binaryop.pyx
+++ b/python/cudf/cudf/_lib/binaryop.pyx
@@ -4,7 +4,8 @@ from cudf._lib.column cimport Column
 from cudf._lib.scalar cimport DeviceScalar
 from cudf._lib.types cimport dtype_to_pylibcudf_type
 
-from cudf._lib import pylibcudf
+import pylibcudf
+
 from cudf._lib.scalar import as_device_scalar
 from cudf.core.buffer import acquire_spill_lock
 
diff --git a/python/cudf/cudf/_lib/column.pxd b/python/cudf/cudf/_lib/column.pxd
index 437f44af9f0..8ceea4920e2 100644
--- a/python/cudf/cudf/_lib/column.pxd
+++ b/python/cudf/cudf/_lib/column.pxd
@@ -5,14 +5,13 @@ from typing import Literal
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 
-from rmm._lib.device_buffer cimport device_buffer
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport (
     column_view,
     mutable_column_view,
 )
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.types cimport size_type
+from rmm._lib.device_buffer cimport device_buffer
 
 
 cdef class Column:
diff --git a/python/cudf/cudf/_lib/column.pyi b/python/cudf/cudf/_lib/column.pyi
index bcab009c102..bb38488eefb 100644
--- a/python/cudf/cudf/_lib/column.pyi
+++ b/python/cudf/cudf/_lib/column.pyi
@@ -54,7 +54,7 @@ class Column:
     @property
     def mask_ptr(self) -> int: ...
     def set_base_mask(self, value: Buffer | None) -> None: ...
-    def set_mask(self, value: Buffer | None) -> Self: ...
+    def set_mask(self, value: ColumnBase | Buffer | None) -> Self: ...
     @property
     def null_count(self) -> int: ...
     @property
diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx
index e030147fdd3..e27c595edda 100644
--- a/python/cudf/cudf/_lib/column.pyx
+++ b/python/cudf/cudf/_lib/column.pyx
@@ -7,11 +7,11 @@ import cupy as cp
 import numpy as np
 import pandas as pd
 
+import pylibcudf
 import rmm
 
 import cudf
 import cudf._lib as libcudf
-from cudf._lib import pylibcudf
 from cudf.core.buffer import (
     Buffer,
     ExposureTrackedBuffer,
@@ -39,18 +39,18 @@ from cudf._lib.types cimport (
 from cudf._lib.null_mask import bitmask_allocation_size_bytes
 from cudf._lib.types import dtype_from_pylibcudf_column
 
-
-cimport cudf._lib.pylibcudf.libcudf.copying as cpp_copying
-cimport cudf._lib.pylibcudf.libcudf.types as libcudf_types
-cimport cudf._lib.pylibcudf.libcudf.unary as libcudf_unary
-from cudf._lib.pylibcudf.libcudf.column.column cimport column, column_contents
-from cudf._lib.pylibcudf.libcudf.column.column_factories cimport (
+cimport pylibcudf.libcudf.copying as cpp_copying
+cimport pylibcudf.libcudf.types as libcudf_types
+cimport pylibcudf.libcudf.unary as libcudf_unary
+from pylibcudf.libcudf.column.column cimport column, column_contents
+from pylibcudf.libcudf.column.column_factories cimport (
     make_column_from_scalar as cpp_make_column_from_scalar,
     make_numeric_column,
 )
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.null_mask cimport null_count as cpp_null_count
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.null_mask cimport null_count as cpp_null_count
+from pylibcudf.libcudf.scalar.scalar cimport scalar
+
 from cudf._lib.scalar cimport DeviceScalar
 
 
@@ -86,8 +86,10 @@ cdef class Column:
         object mask=None,
         int offset=0,
         object null_count=None,
-        object children=()
+        tuple children=()
     ):
+        if size < 0:
+            raise ValueError("size must be >=0")
         self._size = size
         self._distinct_count = {}
         self._dtype = dtype
@@ -295,11 +297,11 @@ cdef class Column:
                 dtypes = [
                     base_child.dtype for base_child in self.base_children
                 ]
-                self._children = [
+                self._children = tuple(
                     child._with_type_metadata(dtype) for child, dtype in zip(
                         children, dtypes
                     )
-                ]
+                )
         return self._children
 
     def set_base_children(self, value):
diff --git a/python/cudf/cudf/_lib/concat.pyx b/python/cudf/cudf/_lib/concat.pyx
index 89ddcfee99e..e661059faa3 100644
--- a/python/cudf/cudf/_lib/concat.pyx
+++ b/python/cudf/cudf/_lib/concat.pyx
@@ -5,7 +5,8 @@ from libcpp cimport bool
 from cudf._lib.column cimport Column
 from cudf._lib.utils cimport data_from_pylibcudf_table
 
-from cudf._lib import pylibcudf
+import pylibcudf
+
 from cudf.core.buffer import acquire_spill_lock
 
 
diff --git a/python/cudf/cudf/_lib/copying.pxd b/python/cudf/cudf/_lib/copying.pxd
index 8fc7f4e1da0..14c7d2066d8 100644
--- a/python/cudf/cudf/_lib/copying.pxd
+++ b/python/cudf/cudf/_lib/copying.pxd
@@ -1,6 +1,6 @@
 # Copyright (c) 2021-2024, NVIDIA CORPORATION.
 
-from cudf._lib.pylibcudf.libcudf.contiguous_split cimport packed_columns
+from pylibcudf.libcudf.contiguous_split cimport packed_columns
 
 
 cdef class _CPackedColumns:
diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx
index 796c70e615c..16182e31c08 100644
--- a/python/cudf/cudf/_lib/copying.pyx
+++ b/python/cudf/cudf/_lib/copying.pyx
@@ -10,8 +10,9 @@ from libcpp.vector cimport vector
 
 from rmm._lib.device_buffer cimport DeviceBuffer
 
+import pylibcudf
+
 import cudf
-from cudf._lib import pylibcudf
 from cudf.core.buffer import Buffer, acquire_spill_lock, as_buffer
 
 from cudf._lib.column cimport Column
@@ -26,17 +27,16 @@ from cudf.core.abc import Serializable
 
 from libcpp.memory cimport make_unique
 
-cimport cudf._lib.pylibcudf.libcudf.contiguous_split as cpp_contiguous_split
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.lists.gather cimport (
+cimport pylibcudf.libcudf.contiguous_split as cpp_contiguous_split
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.lists.gather cimport (
     segmented_gather as cpp_segmented_gather,
 )
-from cudf._lib.pylibcudf.libcudf.lists.lists_column_view cimport (
-    lists_column_view,
-)
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
+from pylibcudf.libcudf.scalar.scalar cimport scalar
+from pylibcudf.libcudf.types cimport size_type
+
 from cudf._lib.utils cimport columns_from_pylibcudf_table, data_from_table_view
 
 # workaround for https://github.com/cython/cython/issues/3885
diff --git a/python/cudf/cudf/_lib/csv.pyx b/python/cudf/cudf/_lib/csv.pyx
index 099b61d62ae..e0f57df1368 100644
--- a/python/cudf/cudf/_lib/csv.pyx
+++ b/python/cudf/cudf/_lib/csv.pyx
@@ -6,8 +6,8 @@ from libcpp.string cimport string
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
 
-cimport cudf._lib.pylibcudf.libcudf.types as libcudf_types
-from cudf._lib.pylibcudf.io.datasource cimport Datasource, NativeFileDatasource
+cimport pylibcudf.libcudf.types as libcudf_types
+
 from cudf._lib.types cimport dtype_to_pylibcudf_type
 
 import errno
@@ -23,22 +23,22 @@ from cudf.core.buffer import acquire_spill_lock
 
 from libcpp cimport bool
 
-from cudf._lib.io.utils cimport make_sink_info
-from cudf._lib.pylibcudf.libcudf.io.csv cimport (
+from pylibcudf.libcudf.io.csv cimport (
     csv_writer_options,
     write_csv as cpp_write_csv,
 )
-from cudf._lib.pylibcudf.libcudf.io.data_sink cimport data_sink
-from cudf._lib.pylibcudf.libcudf.io.types cimport compression_type, sink_info
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.io.data_sink cimport data_sink
+from pylibcudf.libcudf.io.types cimport compression_type, sink_info
+from pylibcudf.libcudf.table.table_view cimport table_view
+
+from cudf._lib.io.utils cimport make_sink_info
 from cudf._lib.utils cimport data_from_pylibcudf_io, table_view_from_table
 
-from pyarrow.lib import NativeFile
+import pylibcudf as plc
 
-import cudf._lib.pylibcudf as plc
 from cudf.api.types import is_hashable
 
-from cudf._lib.pylibcudf.types cimport DataType
+from pylibcudf.types cimport DataType
 
 CSV_HEX_TYPE_MAP = {
     "hex": np.dtype("int64"),
@@ -124,9 +124,7 @@ def read_csv(
     cudf.read_csv
     """
 
-    if not isinstance(datasource, (BytesIO, StringIO, bytes,
-                                   Datasource,
-                                   NativeFile)):
+    if not isinstance(datasource, (BytesIO, StringIO, bytes)):
         if not os.path.isfile(datasource):
             raise FileNotFoundError(
                 errno.ENOENT, os.strerror(errno.ENOENT), datasource
@@ -136,8 +134,6 @@ def read_csv(
         datasource = datasource.read().encode()
     elif isinstance(datasource, str) and not os.path.isfile(datasource):
         datasource = datasource.encode()
-    elif isinstance(datasource, NativeFile):
-        datasource = NativeFileDatasource(datasource)
 
     validate_args(delimiter, sep, delim_whitespace, decimal, thousands,
                   nrows, skipfooter, byte_range, skiprows)
@@ -286,7 +282,7 @@ def read_csv(
     # Set index if the index_col parameter is passed
     if index_col is not None and index_col is not False:
         if isinstance(index_col, int):
-            index_col_name = df._data.select_by_index(index_col).names[0]
+            index_col_name = df._data.get_labels_by_index(index_col)[0]
             df = df.set_index(index_col_name)
             if isinstance(index_col_name, str) and \
                     names is None and orig_header == "infer":
diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx
index b30ef875a7b..483250dd36f 100644
--- a/python/cudf/cudf/_lib/datetime.pyx
+++ b/python/cudf/cudf/_lib/datetime.pyx
@@ -7,13 +7,14 @@ from cudf.core.buffer import acquire_spill_lock
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
-cimport cudf._lib.pylibcudf.libcudf.datetime as libcudf_datetime
+cimport pylibcudf.libcudf.datetime as libcudf_datetime
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.filling cimport calendrical_month_sequence
+from pylibcudf.libcudf.scalar.scalar cimport scalar
+from pylibcudf.libcudf.types cimport size_type
+
 from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.filling cimport calendrical_month_sequence
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
 from cudf._lib.scalar cimport DeviceScalar
 
 
diff --git a/python/cudf/cudf/_lib/filling.pyx b/python/cudf/cudf/_lib/filling.pyx
index b7302f3d07a..b2f4c620144 100644
--- a/python/cudf/cudf/_lib/filling.pyx
+++ b/python/cudf/cudf/_lib/filling.pyx
@@ -2,12 +2,12 @@
 
 from cudf.core.buffer import acquire_spill_lock
 
-
 from cudf._lib.column cimport Column
 from cudf._lib.scalar cimport DeviceScalar
 from cudf._lib.utils cimport columns_from_pylibcudf_table
 
-from cudf._lib import pylibcudf
+import pylibcudf
+
 from cudf._lib.scalar import as_device_scalar
 
 
diff --git a/python/cudf/cudf/_lib/groupby.pyx b/python/cudf/cudf/_lib/groupby.pyx
index 9d18e023fe8..c199ed96d4f 100644
--- a/python/cudf/cudf/_lib/groupby.pyx
+++ b/python/cudf/cudf/_lib/groupby.pyx
@@ -18,10 +18,11 @@ from cudf._lib.utils cimport columns_from_pylibcudf_table
 
 from cudf._lib.scalar import as_device_scalar
 
-from cudf._lib.pylibcudf.libcudf.replace cimport replace_policy
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
+from pylibcudf.libcudf.replace cimport replace_policy
+from pylibcudf.libcudf.scalar.scalar cimport scalar
+
+import pylibcudf
 
-from cudf._lib import pylibcudf
 from cudf._lib.aggregation import make_aggregation
 
 # The sets below define the possible aggregations that can be performed on
diff --git a/python/cudf/cudf/_lib/hash.pyx b/python/cudf/cudf/_lib/hash.pyx
index b8331d5a226..48f75b12a73 100644
--- a/python/cudf/cudf/_lib/hash.pyx
+++ b/python/cudf/cudf/_lib/hash.pyx
@@ -7,10 +7,9 @@ from libcpp.pair cimport pair
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
 
-cimport cudf._lib.pylibcudf.libcudf.types as libcudf_types
-from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.hash cimport (
+cimport pylibcudf.libcudf.types as libcudf_types
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.hash cimport (
     md5,
     murmurhash3_x86_32,
     sha1,
@@ -20,11 +19,13 @@ from cudf._lib.pylibcudf.libcudf.hash cimport (
     sha512,
     xxhash_64,
 )
-from cudf._lib.pylibcudf.libcudf.partitioning cimport (
+from pylibcudf.libcudf.partitioning cimport (
     hash_partition as cpp_hash_partition,
 )
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.table.table_view cimport table_view
+
+from cudf._lib.column cimport Column
 from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
 
 
diff --git a/python/cudf/cudf/_lib/interop.pyx b/python/cudf/cudf/_lib/interop.pyx
index 37595b65e65..1dc586bb257 100644
--- a/python/cudf/cudf/_lib/interop.pyx
+++ b/python/cudf/cudf/_lib/interop.pyx
@@ -4,15 +4,16 @@ from cpython cimport pycapsule
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
-from cudf._lib import pylibcudf
+import pylibcudf
 
-from cudf._lib.pylibcudf.libcudf.interop cimport (
+from pylibcudf.libcudf.interop cimport (
     DLManagedTensor,
     from_dlpack as cpp_from_dlpack,
     to_dlpack as cpp_to_dlpack,
 )
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.table.table_view cimport table_view
+
 from cudf._lib.utils cimport (
     columns_from_pylibcudf_table,
     columns_from_unique_ptr,
diff --git a/python/cudf/cudf/_lib/io/CMakeLists.txt b/python/cudf/cudf/_lib/io/CMakeLists.txt
index 620229a1275..e7408cf2852 100644
--- a/python/cudf/cudf/_lib/io/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/io/CMakeLists.txt
@@ -19,5 +19,3 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX io_ ASSOCIATED_TARGETS cudf
 )
-
-link_to_pyarrow_headers("${RAPIDS_CYTHON_CREATED_TARGETS}")
diff --git a/python/cudf/cudf/_lib/io/utils.pxd b/python/cudf/cudf/_lib/io/utils.pxd
index 680a87c789e..1938f00c179 100644
--- a/python/cudf/cudf/_lib/io/utils.pxd
+++ b/python/cudf/cudf/_lib/io/utils.pxd
@@ -3,14 +3,15 @@
 from libcpp.memory cimport unique_ptr
 from libcpp.vector cimport vector
 
-from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.io.data_sink cimport data_sink
-from cudf._lib.pylibcudf.libcudf.io.types cimport (
+from pylibcudf.libcudf.io.data_sink cimport data_sink
+from pylibcudf.libcudf.io.types cimport (
     column_name_info,
     sink_info,
     source_info,
 )
 
+from cudf._lib.column cimport Column
+
 
 cdef source_info make_source_info(list src) except*
 cdef sink_info make_sinks_info(
diff --git a/python/cudf/cudf/_lib/io/utils.pyx b/python/cudf/cudf/_lib/io/utils.pyx
index 58956b9e9b7..b1900138d94 100644
--- a/python/cudf/cudf/_lib/io/utils.pyx
+++ b/python/cudf/cudf/_lib/io/utils.pyx
@@ -7,17 +7,18 @@ from libcpp.string cimport string
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
 
-from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.io.datasource cimport Datasource
-from cudf._lib.pylibcudf.libcudf.io.data_sink cimport data_sink
-from cudf._lib.pylibcudf.libcudf.io.datasource cimport datasource
-from cudf._lib.pylibcudf.libcudf.io.types cimport (
+from pylibcudf.io.datasource cimport Datasource
+from pylibcudf.libcudf.io.data_sink cimport data_sink
+from pylibcudf.libcudf.io.datasource cimport datasource
+from pylibcudf.libcudf.io.types cimport (
     column_name_info,
     host_buffer,
     sink_info,
     source_info,
 )
 
+from cudf._lib.column cimport Column
+
 import codecs
 import errno
 import io
diff --git a/python/cudf/cudf/_lib/join.pyx b/python/cudf/cudf/_lib/join.pyx
index 0a54f0d67a0..2559358c21f 100644
--- a/python/cudf/cudf/_lib/join.pyx
+++ b/python/cudf/cudf/_lib/join.pyx
@@ -4,7 +4,7 @@ from cudf.core.buffer import acquire_spill_lock
 
 from cudf._lib.column cimport Column
 
-from cudf._lib import pylibcudf
+import pylibcudf
 
 # The functions below return the *gathermaps* that represent
 # the join result when joining on the keys `lhs` and `rhs`.
diff --git a/python/cudf/cudf/_lib/json.pyx b/python/cudf/cudf/_lib/json.pyx
index 03bf9ed8b75..9bbbcf60dcf 100644
--- a/python/cudf/cudf/_lib/json.pyx
+++ b/python/cudf/cudf/_lib/json.pyx
@@ -9,18 +9,19 @@ from cudf.core.buffer import acquire_spill_lock
 
 from libcpp cimport bool
 
-cimport cudf._lib.pylibcudf.libcudf.io.types as cudf_io_types
+cimport pylibcudf.libcudf.io.types as cudf_io_types
+from pylibcudf.io.types cimport compression_type
+from pylibcudf.libcudf.io.json cimport json_recovery_mode_t
+from pylibcudf.libcudf.io.types cimport compression_type
+from pylibcudf.libcudf.types cimport data_type, type_id
+from pylibcudf.types cimport DataType
+
 from cudf._lib.column cimport Column
 from cudf._lib.io.utils cimport add_df_col_struct_names
-from cudf._lib.pylibcudf.io.types cimport compression_type
-from cudf._lib.pylibcudf.libcudf.io.json cimport json_recovery_mode_t
-from cudf._lib.pylibcudf.libcudf.io.types cimport compression_type
-from cudf._lib.pylibcudf.libcudf.types cimport data_type, type_id
-from cudf._lib.pylibcudf.types cimport DataType
 from cudf._lib.types cimport dtype_to_data_type
 from cudf._lib.utils cimport _data_from_columns, data_from_pylibcudf_io
 
-import cudf._lib.pylibcudf as plc
+import pylibcudf as plc
 
 
 cdef json_recovery_mode_t _get_json_recovery_mode(object on_bad_lines):
diff --git a/python/cudf/cudf/_lib/labeling.pyx b/python/cudf/cudf/_lib/labeling.pyx
index 439a727a9ca..2e1959a348d 100644
--- a/python/cudf/cudf/_lib/labeling.pyx
+++ b/python/cudf/cudf/_lib/labeling.pyx
@@ -6,13 +6,11 @@ from libcpp cimport bool as cbool
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.labeling cimport inclusive, label_bins as cpp_label_bins
+
 from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.labeling cimport (
-    inclusive,
-    label_bins as cpp_label_bins,
-)
 
 
 # Note that the parameter input shadows a Python built-in in the local scope,
diff --git a/python/cudf/cudf/_lib/lists.pyx b/python/cudf/cudf/_lib/lists.pyx
index f6d9c8c404c..7e8710bedb6 100644
--- a/python/cudf/cudf/_lib/lists.pyx
+++ b/python/cudf/cudf/_lib/lists.pyx
@@ -4,13 +4,14 @@ from cudf.core.buffer import acquire_spill_lock
 
 from libcpp cimport bool
 
+from pylibcudf.libcudf.types cimport null_order, size_type
+
 from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.types cimport null_order, size_type
 from cudf._lib.utils cimport columns_from_pylibcudf_table
 
-from cudf._lib import pylibcudf
+import pylibcudf
 
-from cudf._lib.pylibcudf cimport Scalar
+from pylibcudf cimport Scalar
 
 
 @acquire_spill_lock()
diff --git a/python/cudf/cudf/_lib/merge.pyx b/python/cudf/cudf/_lib/merge.pyx
index fe7f7ad2918..9372acdab44 100644
--- a/python/cudf/cudf/_lib/merge.pyx
+++ b/python/cudf/cudf/_lib/merge.pyx
@@ -4,7 +4,7 @@ from libcpp cimport bool
 
 from cudf._lib.utils cimport columns_from_pylibcudf_table
 
-from cudf._lib import pylibcudf
+import pylibcudf
 
 
 def merge_sorted(
diff --git a/python/cudf/cudf/_lib/null_mask.pyx b/python/cudf/cudf/_lib/null_mask.pyx
index b00deae2270..d54e8e66281 100644
--- a/python/cudf/cudf/_lib/null_mask.pyx
+++ b/python/cudf/cudf/_lib/null_mask.pyx
@@ -1,38 +1,11 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
-from enum import Enum
-
-from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
+import pylibcudf
+from pylibcudf.null_mask import MaskState
 
 from cudf.core.buffer import acquire_spill_lock, as_buffer
 
-from libcpp.memory cimport make_unique, unique_ptr
-from libcpp.pair cimport pair
-from libcpp.utility cimport move
-
 from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.null_mask cimport (
-    bitmask_allocation_size_bytes as cpp_bitmask_allocation_size_bytes,
-    bitmask_and as cpp_bitmask_and,
-    bitmask_or as cpp_bitmask_or,
-    copy_bitmask as cpp_copy_bitmask,
-    create_null_mask as cpp_create_null_mask,
-    underlying_type_t_mask_state,
-)
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
-from cudf._lib.pylibcudf.libcudf.types cimport mask_state, size_type
-from cudf._lib.utils cimport table_view_from_columns
-
-
-class MaskState(Enum):
-    """
-    Enum for null mask creation state
-    """
-    UNALLOCATED = <underlying_type_t_mask_state> mask_state.UNALLOCATED
-    UNINITIALIZED = <underlying_type_t_mask_state> mask_state.UNINITIALIZED
-    ALL_VALID = <underlying_type_t_mask_state> mask_state.ALL_VALID
-    ALL_NULL = <underlying_type_t_mask_state> mask_state.ALL_NULL
 
 
 @acquire_spill_lock()
@@ -44,33 +17,20 @@ def copy_bitmask(Column col):
     if col.base_mask is None:
         return None
 
-    cdef column_view col_view = col.view()
-    cdef device_buffer db
-    cdef unique_ptr[device_buffer] up_db
-
-    with nogil:
-        db = move(cpp_copy_bitmask(col_view))
-        up_db = move(make_unique[device_buffer](move(db)))
-
-    rmm_db = DeviceBuffer.c_from_unique_ptr(move(up_db))
+    rmm_db = pylibcudf.null_mask.copy_bitmask(col.to_pylibcudf(mode="read"))
     buf = as_buffer(rmm_db)
     return buf
 
 
-def bitmask_allocation_size_bytes(size_type num_bits):
+def bitmask_allocation_size_bytes(num_bits):
     """
     Given a size, calculates the number of bytes that should be allocated for a
     column validity mask
     """
-    cdef size_t output_size
-
-    with nogil:
-        output_size = cpp_bitmask_allocation_size_bytes(num_bits)
+    return pylibcudf.null_mask.bitmask_allocation_size_bytes(num_bits)
 
-    return output_size
 
-
-def create_null_mask(size_type size, state=MaskState.UNINITIALIZED):
+def create_null_mask(size, state=MaskState.UNINITIALIZED):
     """
     Given a size and a mask state, allocate a mask that can properly represent
     the given size with the given mask state
@@ -82,48 +42,24 @@ def create_null_mask(size_type size, state=MaskState.UNINITIALIZED):
     state : ``MaskState``, default ``MaskState.UNINITIALIZED``
         State the null mask should be created in
     """
-    if not isinstance(state, MaskState):
-        raise TypeError(
-            "`state` is required to be of type `MaskState`, got "
-            + (type(state).__name__)
-        )
-
-    cdef device_buffer db
-    cdef unique_ptr[device_buffer] up_db
-    cdef mask_state c_mask_state = <mask_state>(
-        <underlying_type_t_mask_state>(state.value)
-    )
-
-    with nogil:
-        db = move(cpp_create_null_mask(size, c_mask_state))
-        up_db = move(make_unique[device_buffer](move(db)))
-
-    rmm_db = DeviceBuffer.c_from_unique_ptr(move(up_db))
+    rmm_db = pylibcudf.null_mask.create_null_mask(size, state)
     buf = as_buffer(rmm_db)
     return buf
 
 
 @acquire_spill_lock()
-def bitmask_and(columns: list):
-    cdef table_view c_view = table_view_from_columns(columns)
-    cdef pair[device_buffer, size_type] c_result
-    cdef unique_ptr[device_buffer] up_db
-    with nogil:
-        c_result = move(cpp_bitmask_and(c_view))
-        up_db = move(make_unique[device_buffer](move(c_result.first)))
-    dbuf = DeviceBuffer.c_from_unique_ptr(move(up_db))
-    buf = as_buffer(dbuf)
-    return buf, c_result.second
+def bitmask_and(list columns):
+    rmm_db, other = pylibcudf.null_mask.bitmask_and(
+        [col.to_pylibcudf(mode="read") for col in columns]
+    )
+    buf = as_buffer(rmm_db)
+    return buf, other
 
 
 @acquire_spill_lock()
-def bitmask_or(columns: list):
-    cdef table_view c_view = table_view_from_columns(columns)
-    cdef pair[device_buffer, size_type] c_result
-    cdef unique_ptr[device_buffer] up_db
-    with nogil:
-        c_result = move(cpp_bitmask_or(c_view))
-        up_db = move(make_unique[device_buffer](move(c_result.first)))
-    dbuf = DeviceBuffer.c_from_unique_ptr(move(up_db))
-    buf = as_buffer(dbuf)
-    return buf, c_result.second
+def bitmask_or(list columns):
+    rmm_db, other = pylibcudf.null_mask.bitmask_or(
+        [col.to_pylibcudf(mode="read") for col in columns]
+    )
+    buf = as_buffer(rmm_db)
+    return buf, other
diff --git a/python/cudf/cudf/_lib/nvtext/byte_pair_encode.pyx b/python/cudf/cudf/_lib/nvtext/byte_pair_encode.pyx
index d60162d0656..0d768e24f39 100644
--- a/python/cudf/cudf/_lib/nvtext/byte_pair_encode.pyx
+++ b/python/cudf/cudf/_lib/nvtext/byte_pair_encode.pyx
@@ -6,15 +6,16 @@ from cudf.core.buffer import acquire_spill_lock
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
-from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.nvtext.byte_pair_encode cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.nvtext.byte_pair_encode cimport (
     bpe_merge_pairs as cpp_bpe_merge_pairs,
     byte_pair_encoding as cpp_byte_pair_encoding,
     load_merge_pairs as cpp_load_merge_pairs,
 )
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+
+from cudf._lib.column cimport Column
 from cudf._lib.scalar cimport DeviceScalar
 
 
diff --git a/python/cudf/cudf/_lib/nvtext/edit_distance.pyx b/python/cudf/cudf/_lib/nvtext/edit_distance.pyx
index 514b6610575..e3c2273345a 100644
--- a/python/cudf/cudf/_lib/nvtext/edit_distance.pyx
+++ b/python/cudf/cudf/_lib/nvtext/edit_distance.pyx
@@ -5,14 +5,15 @@ from cudf.core.buffer import acquire_spill_lock
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
-from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.nvtext.edit_distance cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.nvtext.edit_distance cimport (
     edit_distance as cpp_edit_distance,
     edit_distance_matrix as cpp_edit_distance_matrix,
 )
 
+from cudf._lib.column cimport Column
+
 
 @acquire_spill_lock()
 def edit_distance(Column strings, Column targets):
diff --git a/python/cudf/cudf/_lib/nvtext/generate_ngrams.pyx b/python/cudf/cudf/_lib/nvtext/generate_ngrams.pyx
index a6b9a1e4f7a..6591b527eec 100644
--- a/python/cudf/cudf/_lib/nvtext/generate_ngrams.pyx
+++ b/python/cudf/cudf/_lib/nvtext/generate_ngrams.pyx
@@ -5,16 +5,17 @@ from cudf.core.buffer import acquire_spill_lock
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
-from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.nvtext.generate_ngrams cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.nvtext.generate_ngrams cimport (
     generate_character_ngrams as cpp_generate_character_ngrams,
     generate_ngrams as cpp_generate_ngrams,
     hash_character_ngrams as cpp_hash_character_ngrams,
 )
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.types cimport size_type
+
+from cudf._lib.column cimport Column
 from cudf._lib.scalar cimport DeviceScalar
 
 
diff --git a/python/cudf/cudf/_lib/nvtext/jaccard.pyx b/python/cudf/cudf/_lib/nvtext/jaccard.pyx
index 42fe15d6869..0ebf7c281e3 100644
--- a/python/cudf/cudf/_lib/nvtext/jaccard.pyx
+++ b/python/cudf/cudf/_lib/nvtext/jaccard.pyx
@@ -5,13 +5,14 @@ from cudf.core.buffer import acquire_spill_lock
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
-from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.nvtext.jaccard cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.nvtext.jaccard cimport (
     jaccard_index as cpp_jaccard_index,
 )
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.types cimport size_type
+
+from cudf._lib.column cimport Column
 
 
 @acquire_spill_lock()
diff --git a/python/cudf/cudf/_lib/nvtext/minhash.pyx b/python/cudf/cudf/_lib/nvtext/minhash.pyx
index 4c92999e190..5ee15d0e409 100644
--- a/python/cudf/cudf/_lib/nvtext/minhash.pyx
+++ b/python/cudf/cudf/_lib/nvtext/minhash.pyx
@@ -5,14 +5,15 @@ from cudf.core.buffer import acquire_spill_lock
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
-from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.nvtext.minhash cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.nvtext.minhash cimport (
     minhash as cpp_minhash,
     minhash64 as cpp_minhash64,
 )
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.types cimport size_type
+
+from cudf._lib.column cimport Column
 
 
 @acquire_spill_lock()
diff --git a/python/cudf/cudf/_lib/nvtext/ngrams_tokenize.pyx b/python/cudf/cudf/_lib/nvtext/ngrams_tokenize.pyx
index ccd8de8c96f..dec4f037d98 100644
--- a/python/cudf/cudf/_lib/nvtext/ngrams_tokenize.pyx
+++ b/python/cudf/cudf/_lib/nvtext/ngrams_tokenize.pyx
@@ -5,14 +5,15 @@ from cudf.core.buffer import acquire_spill_lock
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
-from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.nvtext.ngrams_tokenize cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.nvtext.ngrams_tokenize cimport (
     ngrams_tokenize as cpp_ngrams_tokenize,
 )
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.types cimport size_type
+
+from cudf._lib.column cimport Column
 from cudf._lib.scalar cimport DeviceScalar
 
 
diff --git a/python/cudf/cudf/_lib/nvtext/normalize.pyx b/python/cudf/cudf/_lib/nvtext/normalize.pyx
index 9f81f865bb7..5e86a9ce959 100644
--- a/python/cudf/cudf/_lib/nvtext/normalize.pyx
+++ b/python/cudf/cudf/_lib/nvtext/normalize.pyx
@@ -6,14 +6,15 @@ from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
-from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.nvtext.normalize cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.nvtext.normalize cimport (
     normalize_characters as cpp_normalize_characters,
     normalize_spaces as cpp_normalize_spaces,
 )
 
+from cudf._lib.column cimport Column
+
 
 @acquire_spill_lock()
 def normalize_spaces(Column strings):
diff --git a/python/cudf/cudf/_lib/nvtext/replace.pyx b/python/cudf/cudf/_lib/nvtext/replace.pyx
index ce2edc58d19..61ae3da5782 100644
--- a/python/cudf/cudf/_lib/nvtext/replace.pyx
+++ b/python/cudf/cudf/_lib/nvtext/replace.pyx
@@ -5,15 +5,16 @@ from cudf.core.buffer import acquire_spill_lock
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
-from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.nvtext.replace cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.nvtext.replace cimport (
     filter_tokens as cpp_filter_tokens,
     replace_tokens as cpp_replace_tokens,
 )
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.types cimport size_type
+
+from cudf._lib.column cimport Column
 from cudf._lib.scalar cimport DeviceScalar
 
 
diff --git a/python/cudf/cudf/_lib/nvtext/stemmer.pyx b/python/cudf/cudf/_lib/nvtext/stemmer.pyx
index 8f75953ae99..5bf25562fed 100644
--- a/python/cudf/cudf/_lib/nvtext/stemmer.pyx
+++ b/python/cudf/cudf/_lib/nvtext/stemmer.pyx
@@ -7,16 +7,17 @@ from libcpp.utility cimport move
 
 from enum import IntEnum
 
-from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.nvtext.stemmer cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.nvtext.stemmer cimport (
     is_letter as cpp_is_letter,
     letter_type,
     porter_stemmer_measure as cpp_porter_stemmer_measure,
     underlying_type_t_letter_type,
 )
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.types cimport size_type
+
+from cudf._lib.column cimport Column
 
 
 class LetterType(IntEnum):
diff --git a/python/cudf/cudf/_lib/nvtext/subword_tokenize.pyx b/python/cudf/cudf/_lib/nvtext/subword_tokenize.pyx
index 1112667a087..ee442ece5c6 100644
--- a/python/cudf/cudf/_lib/nvtext/subword_tokenize.pyx
+++ b/python/cudf/cudf/_lib/nvtext/subword_tokenize.pyx
@@ -9,9 +9,8 @@ from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from libcpp.utility cimport move
 
-from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.nvtext.subword_tokenize cimport (
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.nvtext.subword_tokenize cimport (
     hashed_vocabulary as cpp_hashed_vocabulary,
     load_vocabulary_file as cpp_load_vocabulary_file,
     move as tr_move,
@@ -19,6 +18,8 @@ from cudf._lib.pylibcudf.libcudf.nvtext.subword_tokenize cimport (
     tokenizer_result as cpp_tokenizer_result,
 )
 
+from cudf._lib.column cimport Column
+
 
 cdef class Hashed_Vocabulary:
     cdef unique_ptr[cpp_hashed_vocabulary] c_obj
diff --git a/python/cudf/cudf/_lib/nvtext/tokenize.pyx b/python/cudf/cudf/_lib/nvtext/tokenize.pyx
index 98afd94ab1c..a7e63f1e9ae 100644
--- a/python/cudf/cudf/_lib/nvtext/tokenize.pyx
+++ b/python/cudf/cudf/_lib/nvtext/tokenize.pyx
@@ -5,10 +5,9 @@ from cudf.core.buffer import acquire_spill_lock
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
-from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.nvtext.tokenize cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.nvtext.tokenize cimport (
     character_tokenize as cpp_character_tokenize,
     count_tokens as cpp_count_tokens,
     detokenize as cpp_detokenize,
@@ -17,8 +16,10 @@ from cudf._lib.pylibcudf.libcudf.nvtext.tokenize cimport (
     tokenize_vocabulary as cpp_tokenize_vocabulary,
     tokenize_with_vocabulary as cpp_tokenize_with_vocabulary,
 )
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.types cimport size_type
+
+from cudf._lib.column cimport Column
 from cudf._lib.scalar cimport DeviceScalar
 
 
diff --git a/python/cudf/cudf/_lib/orc.pyx b/python/cudf/cudf/_lib/orc.pyx
index 9609e3131b4..adeba6fffb1 100644
--- a/python/cudf/cudf/_lib/orc.pyx
+++ b/python/cudf/cudf/_lib/orc.pyx
@@ -14,23 +14,16 @@ from libcpp.vector cimport vector
 import datetime
 from collections import OrderedDict
 
-cimport cudf._lib.pylibcudf.libcudf.lists.lists_column_view as cpp_lists_column_view
+cimport pylibcudf.libcudf.lists.lists_column_view as cpp_lists_column_view
 
 try:
     import ujson as json
 except ImportError:
     import json
 
-cimport cudf._lib.pylibcudf.libcudf.io.types as cudf_io_types
-from cudf._lib.column cimport Column
-from cudf._lib.io.utils cimport (
-    make_sink_info,
-    make_source_info,
-    update_column_struct_field_names,
-)
-from cudf._lib.pylibcudf.io.datasource cimport NativeFileDatasource
-from cudf._lib.pylibcudf.libcudf.io.data_sink cimport data_sink
-from cudf._lib.pylibcudf.libcudf.io.orc cimport (
+cimport pylibcudf.libcudf.io.types as cudf_io_types
+from pylibcudf.libcudf.io.data_sink cimport data_sink
+from pylibcudf.libcudf.io.orc cimport (
     chunked_orc_writer_options,
     orc_chunked_writer,
     orc_reader_options,
@@ -38,7 +31,7 @@ from cudf._lib.pylibcudf.libcudf.io.orc cimport (
     read_orc as libcudf_read_orc,
     write_orc as libcudf_write_orc,
 )
-from cudf._lib.pylibcudf.libcudf.io.orc_metadata cimport (
+from pylibcudf.libcudf.io.orc_metadata cimport (
     binary_statistics,
     bucket_statistics,
     column_statistics,
@@ -53,7 +46,7 @@ from cudf._lib.pylibcudf.libcudf.io.orc_metadata cimport (
     string_statistics,
     timestamp_statistics,
 )
-from cudf._lib.pylibcudf.libcudf.io.types cimport (
+from pylibcudf.libcudf.io.types cimport (
     column_in_metadata,
     compression_type,
     sink_info,
@@ -61,17 +54,22 @@ from cudf._lib.pylibcudf.libcudf.io.types cimport (
     table_input_metadata,
     table_with_metadata,
 )
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
-from cudf._lib.pylibcudf.libcudf.types cimport data_type, size_type, type_id
-from cudf._lib.variant cimport get_if as std_get_if, holds_alternative
+from pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.types cimport data_type, size_type, type_id
+from pylibcudf.variant cimport get_if as std_get_if, holds_alternative
+
+from cudf._lib.column cimport Column
+from cudf._lib.io.utils cimport (
+    make_sink_info,
+    make_source_info,
+    update_column_struct_field_names,
+)
 
 from cudf._lib.types import SUPPORTED_NUMPY_TO_LIBCUDF_TYPES
 
 from cudf._lib.types cimport underlying_type_t_type_id
 from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table
 
-from pyarrow.lib import NativeFile
-
 from cudf._lib.utils import _index_level_name, generate_pandas_metadata
 
 
@@ -203,10 +201,6 @@ cpdef read_parsed_orc_statistics(filepath_or_buffer):
     cudf.io.orc.read_orc_statistics
     """
 
-    # Handle NativeFile input
-    if isinstance(filepath_or_buffer, NativeFile):
-        filepath_or_buffer = NativeFileDatasource(filepath_or_buffer)
-
     cdef parsed_orc_statistics parsed = (
         libcudf_read_parsed_orc_statistics(make_source_info([filepath_or_buffer]))
     )
@@ -489,9 +483,6 @@ cdef orc_reader_options make_orc_reader_options(
     bool use_index
 ) except*:
 
-    for i, datasource in enumerate(filepaths_or_buffers):
-        if isinstance(datasource, NativeFile):
-            filepaths_or_buffers[i] = NativeFileDatasource(datasource)
     cdef vector[vector[size_type]] strps = stripes
     cdef orc_reader_options opts
     cdef source_info src = make_source_info(filepaths_or_buffers)
diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx
index a2eed94bb3c..a0155671a26 100644
--- a/python/cudf/cudf/_lib/parquet.pyx
+++ b/python/cudf/cudf/_lib/parquet.pyx
@@ -22,7 +22,7 @@ from cudf._lib.utils cimport _data_from_columns, data_from_pylibcudf_io
 
 from cudf._lib.utils import _index_level_name, generate_pandas_metadata
 
-from libc.stdint cimport uint8_t
+from libc.stdint cimport int64_t, uint8_t
 from libcpp cimport bool
 from libcpp.map cimport map
 from libcpp.memory cimport make_unique, unique_ptr
@@ -31,40 +31,40 @@ from libcpp.unordered_map cimport unordered_map
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
 
-cimport cudf._lib.pylibcudf.libcudf.io.data_sink as cudf_io_data_sink
-cimport cudf._lib.pylibcudf.libcudf.io.types as cudf_io_types
-from cudf._lib.column cimport Column
-from cudf._lib.io.utils cimport (
-    add_df_col_struct_names,
-    make_sinks_info,
-    make_source_info,
-)
-from cudf._lib.pylibcudf.expressions cimport Expression
-from cudf._lib.pylibcudf.io.datasource cimport NativeFileDatasource
-from cudf._lib.pylibcudf.io.parquet cimport ChunkedParquetReader
-from cudf._lib.pylibcudf.libcudf.io.parquet cimport (
+cimport pylibcudf.libcudf.io.data_sink as cudf_io_data_sink
+cimport pylibcudf.libcudf.io.types as cudf_io_types
+from pylibcudf.expressions cimport Expression
+from pylibcudf.io.parquet cimport ChunkedParquetReader
+from pylibcudf.libcudf.io.parquet cimport (
     chunked_parquet_writer_options,
     merge_row_group_metadata as parquet_merge_metadata,
     parquet_chunked_writer as cpp_parquet_chunked_writer,
     parquet_writer_options,
     write_parquet as parquet_writer,
 )
-from cudf._lib.pylibcudf.libcudf.io.parquet_metadata cimport (
+from pylibcudf.libcudf.io.parquet_metadata cimport (
     parquet_metadata,
     read_parquet_metadata as parquet_metadata_reader,
 )
-from cudf._lib.pylibcudf.libcudf.io.types cimport (
+from pylibcudf.libcudf.io.types cimport (
     column_in_metadata,
     table_input_metadata,
 )
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.types cimport size_type
+
+from cudf._lib.column cimport Column
+from cudf._lib.io.utils cimport (
+    add_df_col_struct_names,
+    make_sinks_info,
+    make_source_info,
+)
 from cudf._lib.utils cimport table_view_from_table
 
-from pyarrow.lib import NativeFile
+import pylibcudf as plc
+
+from pylibcudf cimport Table
 
-import cudf._lib.pylibcudf as plc
-from cudf._lib.pylibcudf cimport Table
 from cudf.utils.ioutils import _ROW_GROUP_SIZE_BYTES_DEFAULT
 
 
@@ -130,9 +130,11 @@ cdef object _process_metadata(object df,
                               list per_file_user_data,
                               object row_groups,
                               object filepaths_or_buffers,
-                              list pa_buffers,
                               bool allow_range_index,
-                              bool use_pandas_metadata):
+                              bool use_pandas_metadata,
+                              size_type nrows=-1,
+                              int64_t skip_rows=0,
+                              ):
 
     add_df_col_struct_names(df, child_names)
     index_col = None
@@ -193,9 +195,7 @@ cdef object _process_metadata(object df,
                     pa.parquet.read_metadata(
                         # Pyarrow cannot read directly from bytes
                         io.BytesIO(s) if isinstance(s, bytes) else s
-                    ) for s in (
-                        pa_buffers or filepaths_or_buffers
-                    )
+                    ) for s in filepaths_or_buffers
                 ]
 
                 filtered_idx = []
@@ -219,11 +219,15 @@ cdef object _process_metadata(object df,
                 if len(filtered_idx) > 0:
                     idx = cudf.concat(filtered_idx)
                 else:
-                    idx = cudf.Index(cudf.core.column.column_empty(0))
+                    idx = cudf.Index._from_column(cudf.core.column.column_empty(0))
             else:
+                start = range_index_meta["start"] + skip_rows
+                stop = range_index_meta["stop"]
+                if nrows != -1:
+                    stop = start + nrows
                 idx = cudf.RangeIndex(
-                    start=range_index_meta['start'],
-                    stop=range_index_meta['stop'],
+                    start=start,
+                    stop=stop,
                     step=range_index_meta['step'],
                     name=range_index_meta['name']
                 )
@@ -233,7 +237,7 @@ cdef object _process_metadata(object df,
             index_data = df[index_col]
             actual_index_names = list(index_col_names.values())
             if len(index_data._data) == 1:
-                idx = cudf.Index(
+                idx = cudf.Index._from_column(
                     index_data._data.columns[0],
                     name=actual_index_names[0]
                 )
@@ -260,34 +264,26 @@ def read_parquet_chunked(
     row_groups=None,
     use_pandas_metadata=True,
     size_t chunk_read_limit=0,
-    size_t pass_read_limit=1024000000
+    size_t pass_read_limit=1024000000,
+    size_type nrows=-1,
+    int64_t skip_rows=0,
+    allow_mismatched_pq_schemas=False
 ):
-    # Convert NativeFile buffers to NativeFileDatasource,
-    # but save original buffers in case we need to use
-    # pyarrow for metadata processing
-    # (See: https://github.com/rapidsai/cudf/issues/9599)
-
-    pa_buffers = []
-
-    new_bufs = []
-    for i, datasource in enumerate(filepaths_or_buffers):
-        if isinstance(datasource, NativeFile):
-            new_bufs.append(NativeFileDatasource(datasource))
-        else:
-            new_bufs.append(datasource)
-
     # Note: If this function ever takes accepts filters
     # allow_range_index needs to be False when a filter is passed
     # (see read_parquet)
     allow_range_index = columns is not None and len(columns) != 0
 
     reader = ChunkedParquetReader(
-        plc.io.SourceInfo(new_bufs),
+        plc.io.SourceInfo(filepaths_or_buffers),
         columns,
         row_groups,
-        use_pandas_metadata,
+        use_pandas_metadata=use_pandas_metadata,
         chunk_read_limit=chunk_read_limit,
-        pass_read_limit=pass_read_limit
+        pass_read_limit=pass_read_limit,
+        skip_rows=skip_rows,
+        nrows=nrows,
+        allow_mismatched_pq_schemas=allow_mismatched_pq_schemas,
     )
 
     tbl_w_meta = reader.read_chunk()
@@ -319,14 +315,18 @@ def read_parquet_chunked(
     )
     df = _process_metadata(df, column_names, child_names,
                            per_file_user_data, row_groups,
-                           filepaths_or_buffers, pa_buffers,
-                           allow_range_index, use_pandas_metadata)
+                           filepaths_or_buffers,
+                           allow_range_index, use_pandas_metadata,
+                           nrows=nrows, skip_rows=skip_rows)
     return df
 
 
 cpdef read_parquet(filepaths_or_buffers, columns=None, row_groups=None,
                    use_pandas_metadata=True,
-                   Expression filters=None):
+                   Expression filters=None,
+                   size_type nrows=-1,
+                   int64_t skip_rows=0,
+                   allow_mismatched_pq_schemas=False):
     """
     Cython function to call into libcudf API, see `read_parquet`.
 
@@ -339,16 +339,6 @@ cpdef read_parquet(filepaths_or_buffers, columns=None, row_groups=None,
     cudf.io.parquet.to_parquet
     """
 
-    # Convert NativeFile buffers to NativeFileDatasource,
-    # but save original buffers in case we need to use
-    # pyarrow for metadata processing
-    # (See: https://github.com/rapidsai/cudf/issues/9599)
-    pa_buffers = []
-    for i, datasource in enumerate(filepaths_or_buffers):
-        if isinstance(datasource, NativeFile):
-            pa_buffers.append(datasource)
-            filepaths_or_buffers[i] = NativeFileDatasource(datasource)
-
     allow_range_index = True
     if columns is not None and len(columns) == 0 or filters:
         allow_range_index = False
@@ -362,6 +352,9 @@ cpdef read_parquet(filepaths_or_buffers, columns=None, row_groups=None,
         filters,
         convert_strings_to_categories = False,
         use_pandas_metadata = use_pandas_metadata,
+        skip_rows = skip_rows,
+        nrows = nrows,
+        allow_mismatched_pq_schemas=allow_mismatched_pq_schemas,
     )
 
     df = cudf.DataFrame._from_data(
@@ -370,8 +363,9 @@ cpdef read_parquet(filepaths_or_buffers, columns=None, row_groups=None,
 
     df = _process_metadata(df, tbl_w_meta.column_names(include_children=False),
                            tbl_w_meta.child_names, tbl_w_meta.per_file_user_data,
-                           row_groups, filepaths_or_buffers, pa_buffers,
-                           allow_range_index, use_pandas_metadata)
+                           row_groups, filepaths_or_buffers,
+                           allow_range_index, use_pandas_metadata,
+                           nrows=nrows, skip_rows=skip_rows)
     return df
 
 cpdef read_parquet_metadata(filepaths_or_buffers):
@@ -383,11 +377,6 @@ cpdef read_parquet_metadata(filepaths_or_buffers):
     cudf.io.parquet.read_parquet
     cudf.io.parquet.to_parquet
     """
-    # Convert NativeFile buffers to NativeFileDatasource
-    for i, datasource in enumerate(filepaths_or_buffers):
-        if isinstance(datasource, NativeFile):
-            filepaths_or_buffers[i] = NativeFileDatasource(datasource)
-
     cdef cudf_io_types.source_info source = make_source_info(filepaths_or_buffers)
 
     args = move(source)
diff --git a/python/cudf/cudf/_lib/partitioning.pyx b/python/cudf/cudf/_lib/partitioning.pyx
index 708ec4174aa..d94f0e1b564 100644
--- a/python/cudf/cudf/_lib/partitioning.pyx
+++ b/python/cudf/cudf/_lib/partitioning.pyx
@@ -7,19 +7,18 @@ from libcpp.pair cimport pair
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
 
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.partitioning cimport partition as cpp_partition
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.table.table_view cimport table_view
+
 from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.partitioning cimport (
-    partition as cpp_partition,
-)
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
 from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
 
 from cudf._lib.reduce import minmax
 from cudf._lib.stream_compaction import distinct_count as cpp_distinct_count
 
-cimport cudf._lib.pylibcudf.libcudf.types as libcudf_types
+cimport pylibcudf.libcudf.types as libcudf_types
 
 
 @acquire_spill_lock()
diff --git a/python/cudf/cudf/_lib/pylibcudf/io/avro.pxd b/python/cudf/cudf/_lib/pylibcudf/io/avro.pxd
deleted file mode 100644
index 3695f36a6e7..00000000000
--- a/python/cudf/cudf/_lib/pylibcudf/io/avro.pxd
+++ /dev/null
@@ -1,12 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-from cudf._lib.pylibcudf.io.types cimport SourceInfo, TableWithMetadata
-from cudf._lib.pylibcudf.libcudf.io.avro cimport avro_reader_options
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
-
-
-cpdef TableWithMetadata read_avro(
-    SourceInfo source_info,
-    list columns = *,
-    size_type skip_rows = *,
-    size_type num_rows = *
-)
diff --git a/python/cudf/cudf/_lib/pylibcudf/io/datasource.pxd b/python/cudf/cudf/_lib/pylibcudf/io/datasource.pxd
deleted file mode 100644
index a0a9c3fa0d4..00000000000
--- a/python/cudf/cudf/_lib/pylibcudf/io/datasource.pxd
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-from libcpp.memory cimport shared_ptr
-
-from cudf._lib.pylibcudf.libcudf.io.arrow_io_source cimport arrow_io_source
-from cudf._lib.pylibcudf.libcudf.io.datasource cimport datasource
-
-
-cdef class Datasource:
-    cdef datasource* get_datasource(self) except * nogil
-
-
-cdef class NativeFileDatasource(Datasource):
-    cdef shared_ptr[arrow_io_source] c_datasource
-    cdef datasource* get_datasource(self) nogil
diff --git a/python/cudf/cudf/_lib/pylibcudf/io/datasource.pyx b/python/cudf/cudf/_lib/pylibcudf/io/datasource.pyx
deleted file mode 100644
index 8f265f585de..00000000000
--- a/python/cudf/cudf/_lib/pylibcudf/io/datasource.pyx
+++ /dev/null
@@ -1,35 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-from libcpp.memory cimport shared_ptr
-from pyarrow.includes.libarrow cimport CRandomAccessFile
-from pyarrow.lib cimport NativeFile
-
-from cudf._lib.pylibcudf.libcudf.io.arrow_io_source cimport arrow_io_source
-from cudf._lib.pylibcudf.libcudf.io.datasource cimport datasource
-
-import warnings
-
-
-cdef class Datasource:
-    cdef datasource* get_datasource(self) except * nogil:
-        with gil:
-            raise NotImplementedError("get_datasource() should not "
-                                      + "be directly invoked here")
-
-cdef class NativeFileDatasource(Datasource):
-
-    def __cinit__(self, NativeFile native_file):
-
-        cdef shared_ptr[CRandomAccessFile] ra_src
-
-        warnings.warn(
-            "Support for reading pyarrow's NativeFile is deprecated "
-            "and will be removed in a future release of cudf.",
-            FutureWarning,
-        )
-
-        ra_src = native_file.get_random_access_file()
-        self.c_datasource.reset(new arrow_io_source(ra_src))
-
-    cdef datasource* get_datasource(self) nogil:
-        return <datasource *> (self.c_datasource.get())
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/interop.pxd b/python/cudf/cudf/_lib/pylibcudf/libcudf/interop.pxd
deleted file mode 100644
index 2151da28d4b..00000000000
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/interop.pxd
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-from libcpp.memory cimport shared_ptr, unique_ptr
-from libcpp.string cimport string
-from libcpp.vector cimport vector
-from pyarrow.lib cimport CScalar, CTable
-
-from cudf._lib.types import cudf_to_np_types, np_to_cudf_types
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
-
-
-cdef extern from "dlpack/dlpack.h" nogil:
-    ctypedef struct DLManagedTensor:
-        void(*deleter)(DLManagedTensor*) except +
-
-
-# The Arrow structs are not namespaced.
-cdef extern from "cudf/interop.hpp" nogil:
-    cdef struct ArrowSchema:
-        void (*release)(ArrowSchema*) noexcept nogil
-
-    cdef struct ArrowArray:
-        void (*release)(ArrowArray*) noexcept nogil
-
-    cdef struct ArrowArrayStream:
-        void (*release)(ArrowArrayStream*) noexcept nogil
-
-
-cdef extern from "cudf/interop.hpp" namespace "cudf" \
-        nogil:
-    cdef unique_ptr[table] from_dlpack(const DLManagedTensor* tensor
-                                       ) except +
-
-    DLManagedTensor* to_dlpack(table_view input_table
-                               ) except +
-
-    cdef unique_ptr[table] from_arrow(CTable input) except +
-    cdef unique_ptr[scalar] from_arrow(CScalar input) except +
-
-    cdef cppclass column_metadata:
-        column_metadata() except +
-        column_metadata(string name_) except +
-        string name
-        vector[column_metadata] children_meta
-
-    cdef shared_ptr[CTable] to_arrow(
-        table_view input,
-        vector[column_metadata] metadata,
-    ) except +
-
-    cdef shared_ptr[CScalar] to_arrow(
-        const scalar& input,
-        column_metadata metadata,
-    ) except +
-
-    cdef unique_ptr[table] from_arrow_stream(ArrowArrayStream* input) except +
-    cdef unique_ptr[column] from_arrow_column(
-        const ArrowSchema* schema,
-        const ArrowArray* input
-    ) except +
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/arrow_io_source.pxd b/python/cudf/cudf/_lib/pylibcudf/libcudf/io/arrow_io_source.pxd
deleted file mode 100644
index 1d2138f8d10..00000000000
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/arrow_io_source.pxd
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-
-from libcpp.memory cimport shared_ptr
-from libcpp.string cimport string
-from pyarrow.includes.libarrow cimport CRandomAccessFile
-
-cimport cudf._lib.pylibcudf.libcudf.io.datasource as cudf_io_datasource
-
-
-cdef extern from "cudf/io/arrow_io_source.hpp" \
-        namespace "cudf::io" nogil:
-
-    cdef cppclass arrow_io_source(cudf_io_datasource.datasource):
-        arrow_io_source(const string& arrow_uri) except +
-        arrow_io_source(shared_ptr[CRandomAccessFile]) except +
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/extract.pxd b/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/extract.pxd
deleted file mode 100644
index 57903ca27de..00000000000
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/extract.pxd
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.strings.regex_program cimport regex_program
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-
-
-cdef extern from "cudf/strings/extract.hpp" namespace "cudf::strings" nogil:
-
-    cdef unique_ptr[table] extract(
-        column_view source_strings,
-        regex_program) except +
diff --git a/python/cudf/cudf/_lib/pylibcudf/strings/capitalize.pyx b/python/cudf/cudf/_lib/pylibcudf/strings/capitalize.pyx
deleted file mode 100644
index d3f79088018..00000000000
--- a/python/cudf/cudf/_lib/pylibcudf/strings/capitalize.pyx
+++ /dev/null
@@ -1,62 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-
-from libcpp.memory cimport unique_ptr
-from libcpp.utility cimport move
-
-from cudf._lib.pylibcudf.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from cudf._lib.pylibcudf.libcudf.scalar.scalar_factories cimport (
-    make_string_scalar as cpp_make_string_scalar,
-)
-from cudf._lib.pylibcudf.libcudf.strings cimport capitalize as cpp_capitalize
-from cudf._lib.pylibcudf.scalar cimport Scalar
-from cudf._lib.pylibcudf.strings.char_types cimport string_character_types
-
-from cython.operator import dereference
-
-
-cpdef Column capitalize(
-    Column input,
-    Scalar delimiters=None
-    # TODO: default scalar values
-    # https://github.com/rapidsai/cudf/issues/15505
-):
-
-    cdef unique_ptr[column] c_result
-
-    if delimiters is None:
-        delimiters = Scalar.from_libcudf(
-            cpp_make_string_scalar("".encode())
-        )
-
-    cdef const string_scalar* cpp_delimiters = <const string_scalar*>(
-        delimiters.c_obj.get()
-    )
-
-    with nogil:
-        c_result = cpp_capitalize.capitalize(
-            input.view(),
-            dereference(cpp_delimiters)
-        )
-
-    return Column.from_libcudf(move(c_result))
-
-
-cpdef Column title(
-    Column input,
-    string_character_types sequence_type=string_character_types.ALPHA
-):
-    cdef unique_ptr[column] c_result
-    with nogil:
-        c_result = cpp_capitalize.title(input.view(), sequence_type)
-
-    return Column.from_libcudf(move(c_result))
-
-
-cpdef Column is_title(Column input):
-    cdef unique_ptr[column] c_result
-    with nogil:
-        c_result = cpp_capitalize.is_title(input.view())
-
-    return Column.from_libcudf(move(c_result))
diff --git a/python/cudf/cudf/_lib/pylibcudf/strings/char_types.pxd b/python/cudf/cudf/_lib/pylibcudf/strings/char_types.pxd
deleted file mode 100644
index a80e02f520c..00000000000
--- a/python/cudf/cudf/_lib/pylibcudf/strings/char_types.pxd
+++ /dev/null
@@ -1,5 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-
-from cudf._lib.pylibcudf.libcudf.strings.char_types cimport (
-    string_character_types,
-)
diff --git a/python/cudf/cudf/_lib/pylibcudf/strings/contains.pxd b/python/cudf/cudf/_lib/pylibcudf/strings/contains.pxd
deleted file mode 100644
index 275aa95d97e..00000000000
--- a/python/cudf/cudf/_lib/pylibcudf/strings/contains.pxd
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-
-from cudf._lib.pylibcudf.column cimport Column
-from cudf._lib.pylibcudf.strings.regex_program cimport RegexProgram
-
-
-cpdef Column contains_re(Column input, RegexProgram prog)
diff --git a/python/cudf/cudf/_lib/pylibcudf/strings/regex_flags.pxd b/python/cudf/cudf/_lib/pylibcudf/strings/regex_flags.pxd
deleted file mode 100644
index 79937bf574a..00000000000
--- a/python/cudf/cudf/_lib/pylibcudf/strings/regex_flags.pxd
+++ /dev/null
@@ -1,2 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-from cudf._lib.pylibcudf.libcudf.strings.regex_flags cimport regex_flags
diff --git a/python/cudf/cudf/_lib/quantiles.pyx b/python/cudf/cudf/_lib/quantiles.pyx
index 7b50c00919a..7666b7ff8da 100644
--- a/python/cudf/cudf/_lib/quantiles.pyx
+++ b/python/cudf/cudf/_lib/quantiles.pyx
@@ -13,10 +13,11 @@ from cudf._lib.types cimport (
 
 from cudf._lib.types import Interpolation
 
-from cudf._lib.pylibcudf.libcudf.types cimport interpolation, sorted
+from pylibcudf.libcudf.types cimport interpolation, sorted
+
 from cudf._lib.utils cimport columns_from_pylibcudf_table
 
-import cudf._lib.pylibcudf as plc
+import pylibcudf as plc
 
 
 @acquire_spill_lock()
diff --git a/python/cudf/cudf/_lib/reduce.pyx b/python/cudf/cudf/_lib/reduce.pyx
index 64634b7a6f9..944753d28b8 100644
--- a/python/cudf/cudf/_lib/reduce.pyx
+++ b/python/cudf/cudf/_lib/reduce.pyx
@@ -8,7 +8,8 @@ from cudf._lib.column cimport Column
 from cudf._lib.scalar cimport DeviceScalar
 from cudf._lib.types cimport dtype_to_pylibcudf_type, is_decimal_type_id
 
-from cudf._lib import pylibcudf
+import pylibcudf
+
 from cudf._lib.aggregation import make_aggregation
 
 
@@ -61,7 +62,11 @@ def reduce(reduction_op, Column incol, dtype=None, **kwargs):
             result,
             dtype=col_dtype.__class__(precision, scale),
         ).value
-    return DeviceScalar.from_pylibcudf(result).value
+    scalar = DeviceScalar.from_pylibcudf(result).value
+    if isinstance(col_dtype, cudf.StructDtype):
+        # TODO: Utilize column_metadata in libcudf to maintain field labels
+        return dict(zip(col_dtype.fields.keys(), scalar.values()))
+    return scalar
 
 
 @acquire_spill_lock()
diff --git a/python/cudf/cudf/_lib/replace.pyx b/python/cudf/cudf/_lib/replace.pyx
index 2b5f32c7675..b50c6dd25e3 100644
--- a/python/cudf/cudf/_lib/replace.pyx
+++ b/python/cudf/cudf/_lib/replace.pyx
@@ -6,7 +6,8 @@ from cudf.core.buffer import acquire_spill_lock
 from cudf._lib.column cimport Column
 from cudf._lib.scalar cimport DeviceScalar
 
-from cudf._lib import pylibcudf
+import pylibcudf
+
 from cudf._lib.scalar import as_device_scalar
 
 
diff --git a/python/cudf/cudf/_lib/reshape.pyx b/python/cudf/cudf/_lib/reshape.pyx
index 6bba8f0df35..6cebeb2bc16 100644
--- a/python/cudf/cudf/_lib/reshape.pyx
+++ b/python/cudf/cudf/_lib/reshape.pyx
@@ -2,11 +2,12 @@
 
 from cudf.core.buffer import acquire_spill_lock
 
+from pylibcudf.libcudf.types cimport size_type
+
 from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
 from cudf._lib.utils cimport columns_from_pylibcudf_table
 
-import cudf._lib.pylibcudf as plc
+import pylibcudf as plc
 
 
 @acquire_spill_lock()
diff --git a/python/cudf/cudf/_lib/rolling.pyx b/python/cudf/cudf/_lib/rolling.pyx
index 5439e70fdce..687b261c2c7 100644
--- a/python/cudf/cudf/_lib/rolling.pyx
+++ b/python/cudf/cudf/_lib/rolling.pyx
@@ -4,7 +4,8 @@ from cudf.core.buffer import acquire_spill_lock
 
 from cudf._lib.column cimport Column
 
-from cudf._lib import pylibcudf
+import pylibcudf
+
 from cudf._lib.aggregation import make_aggregation
 
 
diff --git a/python/cudf/cudf/_lib/round.pyx b/python/cudf/cudf/_lib/round.pyx
index f8ad57947c8..f961c09e6f6 100644
--- a/python/cudf/cudf/_lib/round.pyx
+++ b/python/cudf/cudf/_lib/round.pyx
@@ -4,8 +4,8 @@ from cudf.core.buffer import acquire_spill_lock
 
 from cudf._lib.column cimport Column
 
-import cudf._lib.pylibcudf as plc
-from cudf._lib.pylibcudf.round import RoundingMethod
+import pylibcudf as plc
+from pylibcudf.round import RoundingMethod
 
 
 @acquire_spill_lock()
diff --git a/python/cudf/cudf/_lib/scalar.pxd b/python/cudf/cudf/_lib/scalar.pxd
index b57acbb37f1..27095ca02d4 100644
--- a/python/cudf/cudf/_lib/scalar.pxd
+++ b/python/cudf/cudf/_lib/scalar.pxd
@@ -3,10 +3,9 @@
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 
+from pylibcudf.libcudf.scalar.scalar cimport scalar
 from rmm._lib.memory_resource cimport DeviceMemoryResource
 
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
-
 
 cdef class DeviceScalar:
     cdef public object c_value
diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index e68398498d1..0dde91316fb 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -11,38 +11,40 @@ from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
+import pylibcudf
+
 import cudf
-from cudf._lib import pylibcudf
 from cudf._lib.types import LIBCUDF_TO_SUPPORTED_NUMPY_TYPES
 from cudf.core.dtypes import ListDtype, StructDtype
 from cudf.core.missing import NA, NaT
 
-cimport cudf._lib.pylibcudf.libcudf.types as libcudf_types
+cimport pylibcudf.libcudf.types as libcudf_types
 # We currently need this cimport because some of the implementations here
 # access the c_obj of the scalar, and because we need to be able to call
 # pylibcudf.Scalar.from_libcudf. Both of those are temporarily acceptable until
 # DeviceScalar is phased out entirely from cuDF Cython (at which point
 # cudf.Scalar will be directly backed by pylibcudf.Scalar).
-from cudf._lib.pylibcudf cimport Scalar as plc_Scalar
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport (
+from pylibcudf cimport Scalar as plc_Scalar
+from pylibcudf.libcudf.scalar.scalar cimport (
     duration_scalar,
     list_scalar,
     scalar,
     struct_scalar,
     timestamp_scalar,
 )
-from cudf._lib.pylibcudf.libcudf.wrappers.durations cimport (
+from pylibcudf.libcudf.wrappers.durations cimport (
     duration_ms,
     duration_ns,
     duration_s,
     duration_us,
 )
-from cudf._lib.pylibcudf.libcudf.wrappers.timestamps cimport (
+from pylibcudf.libcudf.wrappers.timestamps cimport (
     timestamp_ms,
     timestamp_ns,
     timestamp_s,
     timestamp_us,
 )
+
 from cudf._lib.types cimport dtype_from_column_view, underlying_type_t_type_id
 
 
diff --git a/python/cudf/cudf/_lib/search.pyx b/python/cudf/cudf/_lib/search.pyx
index 1ee73949fd3..8108361052b 100644
--- a/python/cudf/cudf/_lib/search.pyx
+++ b/python/cudf/cudf/_lib/search.pyx
@@ -4,7 +4,7 @@ from cudf.core.buffer import acquire_spill_lock
 
 from cudf._lib.column cimport Column
 
-from cudf._lib import pylibcudf
+import pylibcudf
 
 
 @acquire_spill_lock()
diff --git a/python/cudf/cudf/_lib/sort.pyx b/python/cudf/cudf/_lib/sort.pyx
index ff9565b9a89..185552ede82 100644
--- a/python/cudf/cudf/_lib/sort.pyx
+++ b/python/cudf/cudf/_lib/sort.pyx
@@ -9,18 +9,19 @@ from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
 
+from pylibcudf.libcudf.aggregation cimport rank_method
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.search cimport lower_bound, upper_bound
+from pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.types cimport null_order, order as cpp_order
+
 from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.aggregation cimport rank_method
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.search cimport lower_bound, upper_bound
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
-from cudf._lib.pylibcudf.libcudf.types cimport null_order, order as cpp_order
 from cudf._lib.utils cimport (
     columns_from_pylibcudf_table,
     table_view_from_columns,
 )
 
-from cudf._lib import pylibcudf
+import pylibcudf
 
 
 @acquire_spill_lock()
diff --git a/python/cudf/cudf/_lib/stream_compaction.pyx b/python/cudf/cudf/_lib/stream_compaction.pyx
index 834f91f48d9..1b8831940e3 100644
--- a/python/cudf/cudf/_lib/stream_compaction.pyx
+++ b/python/cudf/cudf/_lib/stream_compaction.pyx
@@ -7,7 +7,7 @@ from libcpp cimport bool
 from cudf._lib.column cimport Column
 from cudf._lib.utils cimport columns_from_pylibcudf_table
 
-from cudf._lib import pylibcudf
+import pylibcudf
 
 
 @acquire_spill_lock()
diff --git a/python/cudf/cudf/_lib/string_casting.pyx b/python/cudf/cudf/_lib/string_casting.pyx
index dfad7fd101c..8d463829a19 100644
--- a/python/cudf/cudf/_lib/string_casting.pyx
+++ b/python/cudf/cudf/_lib/string_casting.pyx
@@ -12,39 +12,40 @@ from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from libcpp.utility cimport move
 
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from cudf._lib.pylibcudf.libcudf.strings.convert.convert_booleans cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.strings.convert.convert_booleans cimport (
     from_booleans as cpp_from_booleans,
     to_booleans as cpp_to_booleans,
 )
-from cudf._lib.pylibcudf.libcudf.strings.convert.convert_datetime cimport (
+from pylibcudf.libcudf.strings.convert.convert_datetime cimport (
     from_timestamps as cpp_from_timestamps,
     is_timestamp as cpp_is_timestamp,
     to_timestamps as cpp_to_timestamps,
 )
-from cudf._lib.pylibcudf.libcudf.strings.convert.convert_durations cimport (
+from pylibcudf.libcudf.strings.convert.convert_durations cimport (
     from_durations as cpp_from_durations,
     to_durations as cpp_to_durations,
 )
-from cudf._lib.pylibcudf.libcudf.strings.convert.convert_floats cimport (
+from pylibcudf.libcudf.strings.convert.convert_floats cimport (
     from_floats as cpp_from_floats,
     to_floats as cpp_to_floats,
 )
-from cudf._lib.pylibcudf.libcudf.strings.convert.convert_integers cimport (
+from pylibcudf.libcudf.strings.convert.convert_integers cimport (
     from_integers as cpp_from_integers,
     hex_to_integers as cpp_hex_to_integers,
     integers_to_hex as cpp_integers_to_hex,
     is_hex as cpp_is_hex,
     to_integers as cpp_to_integers,
 )
-from cudf._lib.pylibcudf.libcudf.strings.convert.convert_ipv4 cimport (
+from pylibcudf.libcudf.strings.convert.convert_ipv4 cimport (
     integers_to_ipv4 as cpp_integers_to_ipv4,
     ipv4_to_integers as cpp_ipv4_to_integers,
     is_ipv4 as cpp_is_ipv4,
 )
-from cudf._lib.pylibcudf.libcudf.types cimport data_type, type_id
+from pylibcudf.libcudf.types cimport data_type, type_id
+
 from cudf._lib.types cimport underlying_type_t_type_id
 
 import cudf
diff --git a/python/cudf/cudf/_lib/strings/attributes.pyx b/python/cudf/cudf/_lib/strings/attributes.pyx
index 1f3d7c4eb1b..fe8c17c9e31 100644
--- a/python/cudf/cudf/_lib/strings/attributes.pyx
+++ b/python/cudf/cudf/_lib/strings/attributes.pyx
@@ -5,15 +5,16 @@ from cudf.core.buffer import acquire_spill_lock
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
-from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.strings.attributes cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.strings.attributes cimport (
     code_points as cpp_code_points,
     count_bytes as cpp_count_bytes,
     count_characters as cpp_count_characters,
 )
 
+from cudf._lib.column cimport Column
+
 
 @acquire_spill_lock()
 def count_characters(Column source_strings):
diff --git a/python/cudf/cudf/_lib/strings/capitalize.pyx b/python/cudf/cudf/_lib/strings/capitalize.pyx
index b3ca6a5ac8f..42c40e2e753 100644
--- a/python/cudf/cudf/_lib/strings/capitalize.pyx
+++ b/python/cudf/cudf/_lib/strings/capitalize.pyx
@@ -4,7 +4,7 @@ from cudf.core.buffer import acquire_spill_lock
 
 from cudf._lib.column cimport Column
 
-import cudf._lib.pylibcudf as plc
+import pylibcudf as plc
 
 
 @acquire_spill_lock()
diff --git a/python/cudf/cudf/_lib/strings/case.pyx b/python/cudf/cudf/_lib/strings/case.pyx
index 38f242a67d6..ad4cbb6f088 100644
--- a/python/cudf/cudf/_lib/strings/case.pyx
+++ b/python/cudf/cudf/_lib/strings/case.pyx
@@ -4,7 +4,7 @@ from cudf.core.buffer import acquire_spill_lock
 
 from cudf._lib.column cimport Column
 
-from cudf._lib.pylibcudf.strings import case
+from pylibcudf.strings import case
 
 
 @acquire_spill_lock()
diff --git a/python/cudf/cudf/_lib/strings/char_types.pyx b/python/cudf/cudf/_lib/strings/char_types.pyx
index 5b7b6d19d9e..376a6f8af97 100644
--- a/python/cudf/cudf/_lib/strings/char_types.pyx
+++ b/python/cudf/cudf/_lib/strings/char_types.pyx
@@ -7,15 +7,16 @@ from libcpp.utility cimport move
 
 from cudf.core.buffer import acquire_spill_lock
 
-from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from cudf._lib.pylibcudf.libcudf.strings.char_types cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.strings.char_types cimport (
     all_characters_of_type as cpp_all_characters_of_type,
     filter_characters_of_type as cpp_filter_characters_of_type,
     string_character_types,
 )
+
+from cudf._lib.column cimport Column
 from cudf._lib.scalar cimport DeviceScalar
 
 
diff --git a/python/cudf/cudf/_lib/strings/combine.pyx b/python/cudf/cudf/_lib/strings/combine.pyx
index 288f333d4d8..76cc13db0da 100644
--- a/python/cudf/cudf/_lib/strings/combine.pyx
+++ b/python/cudf/cudf/_lib/strings/combine.pyx
@@ -5,18 +5,19 @@ from cudf.core.buffer import acquire_spill_lock
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
-from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from cudf._lib.pylibcudf.libcudf.strings.combine cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.strings.combine cimport (
     concatenate as cpp_concatenate,
     join_list_elements as cpp_join_list_elements,
     join_strings as cpp_join_strings,
     output_if_empty_list,
     separator_on_nulls,
 )
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.table.table_view cimport table_view
+
+from cudf._lib.column cimport Column
 from cudf._lib.scalar cimport DeviceScalar
 from cudf._lib.utils cimport table_view_from_columns
 
diff --git a/python/cudf/cudf/_lib/strings/contains.pyx b/python/cudf/cudf/_lib/strings/contains.pyx
index 502a1d14696..82f5e06c547 100644
--- a/python/cudf/cudf/_lib/strings/contains.pyx
+++ b/python/cudf/cudf/_lib/strings/contains.pyx
@@ -9,21 +9,22 @@ from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from libcpp.utility cimport move
 
-from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from cudf._lib.pylibcudf.libcudf.strings.contains cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.strings.contains cimport (
     count_re as cpp_count_re,
     like as cpp_like,
     matches_re as cpp_matches_re,
 )
-from cudf._lib.pylibcudf.libcudf.strings.regex_flags cimport regex_flags
-from cudf._lib.pylibcudf.libcudf.strings.regex_program cimport regex_program
+from pylibcudf.libcudf.strings.regex_flags cimport regex_flags
+from pylibcudf.libcudf.strings.regex_program cimport regex_program
+
+from cudf._lib.column cimport Column
 from cudf._lib.scalar cimport DeviceScalar
 
-from cudf._lib.pylibcudf.strings import contains
-from cudf._lib.pylibcudf.strings.regex_program import RegexProgram
+from pylibcudf.strings import contains
+from pylibcudf.strings.regex_program import RegexProgram
 
 
 @acquire_spill_lock()
diff --git a/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx b/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx
index 6faff606226..a8df8c9a92c 100644
--- a/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx
+++ b/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx
@@ -7,15 +7,16 @@ from libcpp.utility cimport move
 
 from cudf.core.buffer import acquire_spill_lock
 
-from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.strings.convert.convert_fixed_point cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.strings.convert.convert_fixed_point cimport (
     from_fixed_point as cpp_from_fixed_point,
     is_fixed_point as cpp_is_fixed_point,
     to_fixed_point as cpp_to_fixed_point,
 )
-from cudf._lib.pylibcudf.libcudf.types cimport data_type, type_id
+from pylibcudf.libcudf.types cimport data_type, type_id
+
+from cudf._lib.column cimport Column
 
 
 @acquire_spill_lock()
diff --git a/python/cudf/cudf/_lib/strings/convert/convert_floats.pyx b/python/cudf/cudf/_lib/strings/convert/convert_floats.pyx
index 341cbc99dab..7965b588703 100644
--- a/python/cudf/cudf/_lib/strings/convert/convert_floats.pyx
+++ b/python/cudf/cudf/_lib/strings/convert/convert_floats.pyx
@@ -5,13 +5,14 @@ from libcpp.utility cimport move
 
 from cudf.core.buffer import acquire_spill_lock
 
-from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.strings.convert.convert_floats cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.strings.convert.convert_floats cimport (
     is_float as cpp_is_float,
 )
 
+from cudf._lib.column cimport Column
+
 
 @acquire_spill_lock()
 def is_float(Column source_strings):
diff --git a/python/cudf/cudf/_lib/strings/convert/convert_integers.pyx b/python/cudf/cudf/_lib/strings/convert/convert_integers.pyx
index 081b03cdc0d..8b6da2bfa1c 100644
--- a/python/cudf/cudf/_lib/strings/convert/convert_integers.pyx
+++ b/python/cudf/cudf/_lib/strings/convert/convert_integers.pyx
@@ -5,13 +5,14 @@ from libcpp.utility cimport move
 
 from cudf.core.buffer import acquire_spill_lock
 
-from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.strings.convert.convert_integers cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.strings.convert.convert_integers cimport (
     is_integer as cpp_is_integer,
 )
 
+from cudf._lib.column cimport Column
+
 
 @acquire_spill_lock()
 def is_integer(Column source_strings):
diff --git a/python/cudf/cudf/_lib/strings/convert/convert_lists.pyx b/python/cudf/cudf/_lib/strings/convert/convert_lists.pyx
index 4418bf2a72d..73aebf8ab35 100644
--- a/python/cudf/cudf/_lib/strings/convert/convert_lists.pyx
+++ b/python/cudf/cudf/_lib/strings/convert/convert_lists.pyx
@@ -5,14 +5,15 @@ from libcpp.utility cimport move
 
 from cudf.core.buffer import acquire_spill_lock
 
-from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from cudf._lib.pylibcudf.libcudf.strings.convert.convert_lists cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.strings.convert.convert_lists cimport (
     format_list_column as cpp_format_list_column,
 )
 
+from cudf._lib.column cimport Column
+
 from cudf._lib.scalar import as_device_scalar
 
 from cudf._lib.scalar cimport DeviceScalar
diff --git a/python/cudf/cudf/_lib/strings/convert/convert_urls.pyx b/python/cudf/cudf/_lib/strings/convert/convert_urls.pyx
index 5f62efe5c00..e52116d6247 100644
--- a/python/cudf/cudf/_lib/strings/convert/convert_urls.pyx
+++ b/python/cudf/cudf/_lib/strings/convert/convert_urls.pyx
@@ -5,14 +5,15 @@ from libcpp.utility cimport move
 
 from cudf.core.buffer import acquire_spill_lock
 
-from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.strings.convert.convert_urls cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.strings.convert.convert_urls cimport (
     url_decode as cpp_url_decode,
     url_encode as cpp_url_encode,
 )
 
+from cudf._lib.column cimport Column
+
 
 @acquire_spill_lock()
 def url_decode(Column source_strings):
diff --git a/python/cudf/cudf/_lib/strings/extract.pyx b/python/cudf/cudf/_lib/strings/extract.pyx
index 3b80c4f6368..63f4d57e562 100644
--- a/python/cudf/cudf/_lib/strings/extract.pyx
+++ b/python/cudf/cudf/_lib/strings/extract.pyx
@@ -8,12 +8,13 @@ from libcpp.utility cimport move
 
 from cudf.core.buffer import acquire_spill_lock
 
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.strings.extract cimport extract as cpp_extract
+from pylibcudf.libcudf.strings.regex_flags cimport regex_flags
+from pylibcudf.libcudf.strings.regex_program cimport regex_program
+from pylibcudf.libcudf.table.table cimport table
+
 from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.strings.extract cimport extract as cpp_extract
-from cudf._lib.pylibcudf.libcudf.strings.regex_flags cimport regex_flags
-from cudf._lib.pylibcudf.libcudf.strings.regex_program cimport regex_program
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
 from cudf._lib.utils cimport data_from_unique_ptr
 
 
diff --git a/python/cudf/cudf/_lib/strings/find.pyx b/python/cudf/cudf/_lib/strings/find.pyx
index 3c0009ee569..2d284d1aa9d 100644
--- a/python/cudf/cudf/_lib/strings/find.pyx
+++ b/python/cudf/cudf/_lib/strings/find.pyx
@@ -1,10 +1,12 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
-import cudf._lib.pylibcudf as plc
+import pylibcudf as plc
+
 from cudf.core.buffer import acquire_spill_lock
 
+from pylibcudf.libcudf.types cimport size_type
+
 from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
 
 
 @acquire_spill_lock()
diff --git a/python/cudf/cudf/_lib/strings/find_multiple.pyx b/python/cudf/cudf/_lib/strings/find_multiple.pyx
index c75f28db21b..1358f8e3c2c 100644
--- a/python/cudf/cudf/_lib/strings/find_multiple.pyx
+++ b/python/cudf/cudf/_lib/strings/find_multiple.pyx
@@ -5,13 +5,14 @@ from libcpp.utility cimport move
 
 from cudf.core.buffer import acquire_spill_lock
 
-from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.strings.find_multiple cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.strings.find_multiple cimport (
     find_multiple as cpp_find_multiple,
 )
 
+from cudf._lib.column cimport Column
+
 
 @acquire_spill_lock()
 def find_multiple(Column source_strings, Column target_strings):
diff --git a/python/cudf/cudf/_lib/strings/findall.pyx b/python/cudf/cudf/_lib/strings/findall.pyx
index 0d409889bc8..3cf2084e30a 100644
--- a/python/cudf/cudf/_lib/strings/findall.pyx
+++ b/python/cudf/cudf/_lib/strings/findall.pyx
@@ -8,12 +8,13 @@ from libcpp.utility cimport move
 
 from cudf.core.buffer import acquire_spill_lock
 
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.strings.findall cimport findall as cpp_findall
+from pylibcudf.libcudf.strings.regex_flags cimport regex_flags
+from pylibcudf.libcudf.strings.regex_program cimport regex_program
+
 from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.strings.findall cimport findall as cpp_findall
-from cudf._lib.pylibcudf.libcudf.strings.regex_flags cimport regex_flags
-from cudf._lib.pylibcudf.libcudf.strings.regex_program cimport regex_program
 
 
 @acquire_spill_lock()
diff --git a/python/cudf/cudf/_lib/strings/json.pyx b/python/cudf/cudf/_lib/strings/json.pyx
index 560f284b56c..c9b0bba088d 100644
--- a/python/cudf/cudf/_lib/strings/json.pyx
+++ b/python/cudf/cudf/_lib/strings/json.pyx
@@ -5,14 +5,15 @@ from libcpp.utility cimport move
 
 from cudf.core.buffer import acquire_spill_lock
 
-from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from cudf._lib.pylibcudf.libcudf.strings.json cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.strings.json cimport (
     get_json_object as cpp_get_json_object,
     get_json_object_options,
 )
+
+from cudf._lib.column cimport Column
 from cudf._lib.scalar cimport DeviceScalar
 
 
diff --git a/python/cudf/cudf/_lib/strings/padding.pyx b/python/cudf/cudf/_lib/strings/padding.pyx
index 9226810951f..d0239e91ec3 100644
--- a/python/cudf/cudf/_lib/strings/padding.pyx
+++ b/python/cudf/cudf/_lib/strings/padding.pyx
@@ -6,18 +6,19 @@ from libcpp.utility cimport move
 
 from cudf.core.buffer import acquire_spill_lock
 
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.types cimport size_type
+
 from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
 
 from enum import IntEnum
 
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.strings.padding cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.strings.padding cimport (
     pad as cpp_pad,
     zfill as cpp_zfill,
 )
-from cudf._lib.pylibcudf.libcudf.strings.side_type cimport (
+from pylibcudf.libcudf.strings.side_type cimport (
     side_type,
     underlying_type_t_side_type,
 )
diff --git a/python/cudf/cudf/_lib/strings/repeat.pyx b/python/cudf/cudf/_lib/strings/repeat.pyx
index 2b8116848cf..42fcfa5d94e 100644
--- a/python/cudf/cudf/_lib/strings/repeat.pyx
+++ b/python/cudf/cudf/_lib/strings/repeat.pyx
@@ -5,11 +5,12 @@ from libcpp.utility cimport move
 
 from cudf.core.buffer import acquire_spill_lock
 
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.strings cimport repeat as cpp_repeat
+from pylibcudf.libcudf.types cimport size_type
+
 from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.strings cimport repeat as cpp_repeat
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
 
 
 @acquire_spill_lock()
diff --git a/python/cudf/cudf/_lib/strings/replace.pyx b/python/cudf/cudf/_lib/strings/replace.pyx
index 374831f1833..a260c4e4f45 100644
--- a/python/cudf/cudf/_lib/strings/replace.pyx
+++ b/python/cudf/cudf/_lib/strings/replace.pyx
@@ -4,11 +4,12 @@ from libc.stdint cimport int32_t
 
 from cudf.core.buffer import acquire_spill_lock
 
+from pylibcudf.libcudf.types cimport size_type
+
 from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
 from cudf._lib.scalar cimport DeviceScalar
 
-import cudf._lib.pylibcudf as plc
+import pylibcudf as plc
 
 
 @acquire_spill_lock()
diff --git a/python/cudf/cudf/_lib/strings/replace_re.pyx b/python/cudf/cudf/_lib/strings/replace_re.pyx
index e13880a6186..fffc8b7c3f6 100644
--- a/python/cudf/cudf/_lib/strings/replace_re.pyx
+++ b/python/cudf/cudf/_lib/strings/replace_re.pyx
@@ -8,17 +8,18 @@ from libcpp.vector cimport vector
 
 from cudf.core.buffer import acquire_spill_lock
 
-from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from cudf._lib.pylibcudf.libcudf.strings.regex_flags cimport regex_flags
-from cudf._lib.pylibcudf.libcudf.strings.regex_program cimport regex_program
-from cudf._lib.pylibcudf.libcudf.strings.replace_re cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.strings.regex_flags cimport regex_flags
+from pylibcudf.libcudf.strings.regex_program cimport regex_program
+from pylibcudf.libcudf.strings.replace_re cimport (
     replace_re as cpp_replace_re,
     replace_with_backrefs as cpp_replace_with_backrefs,
 )
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.types cimport size_type
+
+from cudf._lib.column cimport Column
 from cudf._lib.scalar cimport DeviceScalar
 
 
diff --git a/python/cudf/cudf/_lib/strings/split/partition.pyx b/python/cudf/cudf/_lib/strings/split/partition.pyx
index be377c0f86b..a81fb18e752 100644
--- a/python/cudf/cudf/_lib/strings/split/partition.pyx
+++ b/python/cudf/cudf/_lib/strings/split/partition.pyx
@@ -5,14 +5,15 @@ from libcpp.utility cimport move
 
 from cudf.core.buffer import acquire_spill_lock
 
-from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from cudf._lib.pylibcudf.libcudf.strings.split.partition cimport (
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.strings.split.partition cimport (
     partition as cpp_partition,
     rpartition as cpp_rpartition,
 )
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.table.table cimport table
+
+from cudf._lib.column cimport Column
 from cudf._lib.scalar cimport DeviceScalar
 from cudf._lib.utils cimport data_from_unique_ptr
 
diff --git a/python/cudf/cudf/_lib/strings/split/split.pyx b/python/cudf/cudf/_lib/strings/split/split.pyx
index 942235686d7..f481fea4c51 100644
--- a/python/cudf/cudf/_lib/strings/split/split.pyx
+++ b/python/cudf/cudf/_lib/strings/split/split.pyx
@@ -7,13 +7,12 @@ from libcpp.utility cimport move
 
 from cudf.core.buffer import acquire_spill_lock
 
-from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from cudf._lib.pylibcudf.libcudf.strings.regex_flags cimport regex_flags
-from cudf._lib.pylibcudf.libcudf.strings.regex_program cimport regex_program
-from cudf._lib.pylibcudf.libcudf.strings.split.split cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.strings.regex_flags cimport regex_flags
+from pylibcudf.libcudf.strings.regex_program cimport regex_program
+from pylibcudf.libcudf.strings.split.split cimport (
     rsplit as cpp_rsplit,
     rsplit_re as cpp_rsplit_re,
     rsplit_record as cpp_rsplit_record,
@@ -23,8 +22,10 @@ from cudf._lib.pylibcudf.libcudf.strings.split.split cimport (
     split_record as cpp_split_record,
     split_record_re as cpp_split_record_re,
 )
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.types cimport size_type
+
+from cudf._lib.column cimport Column
 from cudf._lib.scalar cimport DeviceScalar
 from cudf._lib.utils cimport data_from_unique_ptr
 
diff --git a/python/cudf/cudf/_lib/strings/strip.pyx b/python/cudf/cudf/_lib/strings/strip.pyx
index 199fa5fc3b6..acf52cb7b9f 100644
--- a/python/cudf/cudf/_lib/strings/strip.pyx
+++ b/python/cudf/cudf/_lib/strings/strip.pyx
@@ -5,12 +5,13 @@ from libcpp.utility cimport move
 
 from cudf.core.buffer import acquire_spill_lock
 
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.strings.side_type cimport side_type
+from pylibcudf.libcudf.strings.strip cimport strip as cpp_strip
+
 from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from cudf._lib.pylibcudf.libcudf.strings.side_type cimport side_type
-from cudf._lib.pylibcudf.libcudf.strings.strip cimport strip as cpp_strip
 from cudf._lib.scalar cimport DeviceScalar
 
 
diff --git a/python/cudf/cudf/_lib/strings/substring.pyx b/python/cudf/cudf/_lib/strings/substring.pyx
index 706c21c0634..db96d99c7b6 100644
--- a/python/cudf/cudf/_lib/strings/substring.pyx
+++ b/python/cudf/cudf/_lib/strings/substring.pyx
@@ -10,7 +10,7 @@ from cudf._lib.scalar import as_device_scalar
 
 from cudf._lib.scalar cimport DeviceScalar
 
-import cudf._lib.pylibcudf as plc
+import pylibcudf as plc
 
 
 @acquire_spill_lock()
diff --git a/python/cudf/cudf/_lib/strings/translate.pyx b/python/cudf/cudf/_lib/strings/translate.pyx
index 8846e2e280d..3fad91bbfc0 100644
--- a/python/cudf/cudf/_lib/strings/translate.pyx
+++ b/python/cudf/cudf/_lib/strings/translate.pyx
@@ -8,16 +8,17 @@ from libcpp.vector cimport vector
 
 from cudf.core.buffer import acquire_spill_lock
 
-from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from cudf._lib.pylibcudf.libcudf.strings.translate cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.strings.translate cimport (
     filter_characters as cpp_filter_characters,
     filter_type,
     translate as cpp_translate,
 )
-from cudf._lib.pylibcudf.libcudf.types cimport char_utf8
+from pylibcudf.libcudf.types cimport char_utf8
+
+from cudf._lib.column cimport Column
 from cudf._lib.scalar cimport DeviceScalar
 
 
diff --git a/python/cudf/cudf/_lib/strings/wrap.pyx b/python/cudf/cudf/_lib/strings/wrap.pyx
index 92750f21e4d..eed5cf33b10 100644
--- a/python/cudf/cudf/_lib/strings/wrap.pyx
+++ b/python/cudf/cudf/_lib/strings/wrap.pyx
@@ -5,11 +5,12 @@ from libcpp.utility cimport move
 
 from cudf.core.buffer import acquire_spill_lock
 
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.strings.wrap cimport wrap as cpp_wrap
+from pylibcudf.libcudf.types cimport size_type
+
 from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.strings.wrap cimport wrap as cpp_wrap
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
 
 
 @acquire_spill_lock()
diff --git a/python/cudf/cudf/_lib/strings_udf.pyx b/python/cudf/cudf/_lib/strings_udf.pyx
index 7610cad0b40..78fc9f08bd8 100644
--- a/python/cudf/cudf/_lib/strings_udf.pyx
+++ b/python/cudf/cudf/_lib/strings_udf.pyx
@@ -2,7 +2,7 @@
 
 from libc.stdint cimport uint8_t, uint16_t, uintptr_t
 
-from cudf._lib.pylibcudf.libcudf.strings_udf cimport (
+from pylibcudf.libcudf.strings_udf cimport (
     get_character_cases_table as cpp_get_character_cases_table,
     get_character_flags_table as cpp_get_character_flags_table,
     get_special_case_mapping_table as cpp_get_special_case_mapping_table,
@@ -15,17 +15,17 @@ from libcpp.utility cimport move
 
 from cudf.core.buffer import as_buffer
 
-from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
-
-from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column, column_view
-from cudf._lib.pylibcudf.libcudf.strings_udf cimport (
+from pylibcudf.libcudf.column.column cimport column, column_view
+from pylibcudf.libcudf.strings_udf cimport (
     column_from_udf_string_array as cpp_column_from_udf_string_array,
     free_udf_string_array as cpp_free_udf_string_array,
     get_cuda_build_version as cpp_get_cuda_build_version,
     to_string_view_array as cpp_to_string_view_array,
     udf_string,
 )
+from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
+
+from cudf._lib.column cimport Column
 
 
 def get_cuda_build_version():
diff --git a/python/cudf/cudf/_lib/text.pyx b/python/cudf/cudf/_lib/text.pyx
index 6e63b8758b8..b2c7232f549 100644
--- a/python/cudf/cudf/_lib/text.pyx
+++ b/python/cudf/cudf/_lib/text.pyx
@@ -8,9 +8,8 @@ from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from libcpp.utility cimport move
 
-from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.io.text cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.io.text cimport (
     byte_range_info,
     data_chunk_source,
     make_source,
@@ -20,6 +19,8 @@ from cudf._lib.pylibcudf.libcudf.io.text cimport (
     parse_options,
 )
 
+from cudf._lib.column cimport Column
+
 
 def read_text(object filepaths_or_buffers,
               object delimiter=None,
@@ -85,4 +86,4 @@ def read_text(object filepaths_or_buffers,
             delim,
             c_options))
 
-    return {None: Column.from_unique_ptr(move(c_col))}
+    return Column.from_unique_ptr(move(c_col))
diff --git a/python/cudf/cudf/_lib/timezone.pyx b/python/cudf/cudf/_lib/timezone.pyx
index 53977e984c2..bff3b2c4ce4 100644
--- a/python/cudf/cudf/_lib/timezone.pyx
+++ b/python/cudf/cudf/_lib/timezone.pyx
@@ -5,10 +5,11 @@ from libcpp.optional cimport make_optional
 from libcpp.string cimport string
 from libcpp.utility cimport move
 
-from cudf._lib.pylibcudf.libcudf.io.timezone cimport (
+from pylibcudf.libcudf.io.timezone cimport (
     make_timezone_transition_table as cpp_make_timezone_transition_table,
 )
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.table.table cimport table
+
 from cudf._lib.utils cimport columns_from_unique_ptr
 
 
diff --git a/python/cudf/cudf/_lib/transform.pyx b/python/cudf/cudf/_lib/transform.pyx
index 622725e06a3..baa08a545ec 100644
--- a/python/cudf/cudf/_lib/transform.pyx
+++ b/python/cudf/cudf/_lib/transform.pyx
@@ -15,23 +15,23 @@ from libcpp.pair cimport pair
 from libcpp.string cimport string
 from libcpp.utility cimport move
 
-from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
-
-cimport cudf._lib.pylibcudf.libcudf.transform as libcudf_transform
-from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf cimport transform as plc_transform
-from cudf._lib.pylibcudf.expressions cimport Expression
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.expressions cimport expression
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
-from cudf._lib.pylibcudf.libcudf.types cimport (
+cimport pylibcudf.libcudf.transform as libcudf_transform
+from pylibcudf cimport transform as plc_transform
+from pylibcudf.expressions cimport Expression
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.expressions cimport expression
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.types cimport (
     bitmask_type,
     data_type,
     size_type,
     type_id,
 )
+from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
+
+from cudf._lib.column cimport Column
 from cudf._lib.types cimport underlying_type_t_type_id
 from cudf._lib.utils cimport (
     columns_from_unique_ptr,
diff --git a/python/cudf/cudf/_lib/transpose.pyx b/python/cudf/cudf/_lib/transpose.pyx
index 82b23439e6a..f78fbd4c844 100644
--- a/python/cudf/cudf/_lib/transpose.pyx
+++ b/python/cudf/cudf/_lib/transpose.pyx
@@ -4,10 +4,11 @@ from libcpp.memory cimport unique_ptr
 from libcpp.pair cimport pair
 from libcpp.utility cimport move
 
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.transpose cimport transpose as cpp_transpose
+
 from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
-from cudf._lib.pylibcudf.libcudf.transpose cimport transpose as cpp_transpose
 from cudf._lib.utils cimport columns_from_table_view, table_view_from_columns
 
 
diff --git a/python/cudf/cudf/_lib/types.pxd b/python/cudf/cudf/_lib/types.pxd
index 519d5ff8554..4fd3d31841e 100644
--- a/python/cudf/cudf/_lib/types.pxd
+++ b/python/cudf/cudf/_lib/types.pxd
@@ -3,11 +3,9 @@
 from libc.stdint cimport int32_t
 from libcpp cimport bool
 
-cimport cudf._lib.pylibcudf.libcudf.types as libcudf_types
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.lists.lists_column_view cimport (
-    lists_column_view,
-)
+cimport pylibcudf.libcudf.types as libcudf_types
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
 
 ctypedef bool underlying_type_t_order
 ctypedef bool underlying_type_t_null_order
diff --git a/python/cudf/cudf/_lib/types.pyx b/python/cudf/cudf/_lib/types.pyx
index 253fdf7b0d9..861bb063707 100644
--- a/python/cudf/cudf/_lib/types.pyx
+++ b/python/cudf/cudf/_lib/types.pyx
@@ -7,19 +7,19 @@ import pandas as pd
 
 from libcpp.memory cimport make_shared, shared_ptr
 
-cimport cudf._lib.pylibcudf.libcudf.types as libcudf_types
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.lists.lists_column_view cimport (
-    lists_column_view,
-)
+cimport pylibcudf.libcudf.types as libcudf_types
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
+
 from cudf._lib.types cimport (
     underlying_type_t_interpolation,
     underlying_type_t_order,
     underlying_type_t_sorted,
 )
 
+import pylibcudf
+
 import cudf
-from cudf._lib import pylibcudf
 
 
 class TypeId(IntEnum):
diff --git a/python/cudf/cudf/_lib/unary.pyx b/python/cudf/cudf/_lib/unary.pyx
index 2f58c4512d6..d5602fd5a1c 100644
--- a/python/cudf/cudf/_lib/unary.pyx
+++ b/python/cudf/cudf/_lib/unary.pyx
@@ -5,7 +5,8 @@ from cudf._lib.types cimport dtype_to_pylibcudf_type
 
 import numpy as np
 
-from cudf._lib import pylibcudf
+import pylibcudf
+
 from cudf.api.types import is_decimal_dtype
 from cudf.core.buffer import acquire_spill_lock
 
diff --git a/python/cudf/cudf/_lib/utils.pxd b/python/cudf/cudf/_lib/utils.pxd
index 1d55f7218dc..ff97fe80310 100644
--- a/python/cudf/cudf/_lib/utils.pxd
+++ b/python/cudf/cudf/_lib/utils.pxd
@@ -4,8 +4,8 @@ from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from libcpp.vector cimport vector
 
-from cudf._lib.pylibcudf.libcudf.column.column cimport column_view
-from cudf._lib.pylibcudf.libcudf.table.table cimport table, table_view
+from pylibcudf.libcudf.column.column cimport column_view
+from pylibcudf.libcudf.table.table cimport table, table_view
 
 
 cdef data_from_unique_ptr(
diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx
index f136cd997a7..cae28d02ef4 100644
--- a/python/cudf/cudf/_lib/utils.pyx
+++ b/python/cudf/cudf/_lib/utils.pyx
@@ -10,11 +10,12 @@ from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
 
+from pylibcudf.libcudf.column.column cimport column, column_view
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.types cimport size_type
+
 from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column, column_view
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
 
 try:
     import ujson as json
@@ -93,12 +94,12 @@ cpdef generate_pandas_metadata(table, index):
     materialize_index = False
     if index is not False:
         for level, name in enumerate(table._index.names):
-            if isinstance(table._index, cudf.core.multiindex.MultiIndex):
+            if isinstance(table._index, cudf.MultiIndex):
                 idx = table.index.get_level_values(level)
             else:
                 idx = table.index
 
-            if isinstance(idx, cudf.core.index.RangeIndex):
+            if isinstance(idx, cudf.RangeIndex):
                 if index is None:
                     descr = {
                         "kind": "range",
@@ -110,7 +111,7 @@ cpdef generate_pandas_metadata(table, index):
                 else:
                     materialize_index = True
                     # When `index=True`, RangeIndex needs to be materialized.
-                    materialized_idx = cudf.Index(idx._values, name=idx.name)
+                    materialized_idx = idx._as_int_index()
                     descr = _index_level_name(
                         index_name=materialized_idx.name,
                         level=level,
diff --git a/python/cudf/cudf/_typing.py b/python/cudf/cudf/_typing.py
index 34c96cc8cb3..6e8ad556b08 100644
--- a/python/cudf/cudf/_typing.py
+++ b/python/cudf/cudf/_typing.py
@@ -1,7 +1,8 @@
 # Copyright (c) 2021-2024, NVIDIA CORPORATION.
 
 import sys
-from typing import TYPE_CHECKING, Any, Callable, Dict, Iterable, TypeVar, Union
+from collections.abc import Callable
+from typing import TYPE_CHECKING, Any, Dict, Iterable, TypeVar, Union
 
 import numpy as np
 from pandas import Period, Timedelta, Timestamp
diff --git a/python/cudf/cudf/api/types.py b/python/cudf/cudf/api/types.py
index 294ae2fd985..9c436dfad18 100644
--- a/python/cudf/cudf/api/types.py
+++ b/python/cudf/cudf/api/types.py
@@ -249,7 +249,7 @@ def _union_categoricals(
             new_categories=sorted_categories
         )
 
-    return cudf.Index(result_col)
+    return cudf.CategoricalIndex._from_column(result_col)
 
 
 def is_bool_dtype(arr_or_dtype):
diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index c91514202c5..ff114474aa4 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -798,64 +798,6 @@ def fillna(self, value, downcast=None):
 
         return super().fillna(value=value)
 
-    def to_frame(self, index=True, name=no_default):
-        """Create a DataFrame with a column containing this Index
-
-        Parameters
-        ----------
-        index : boolean, default True
-            Set the index of the returned DataFrame as the original Index
-        name : object, defaults to index.name
-            The passed name should substitute for the index name (if it has
-            one).
-
-        Returns
-        -------
-        DataFrame
-            DataFrame containing the original Index data.
-
-        See Also
-        --------
-        Index.to_series : Convert an Index to a Series.
-        Series.to_frame : Convert Series to DataFrame.
-
-        Examples
-        --------
-        >>> import cudf
-        >>> idx = cudf.Index(['Ant', 'Bear', 'Cow'], name='animal')
-        >>> idx.to_frame()
-               animal
-        animal
-        Ant       Ant
-        Bear     Bear
-        Cow       Cow
-
-        By default, the original Index is reused. To enforce a new Index:
-
-        >>> idx.to_frame(index=False)
-            animal
-        0   Ant
-        1  Bear
-        2   Cow
-
-        To override the name of the resulting column, specify `name`:
-
-        >>> idx.to_frame(index=False, name='zoo')
-            zoo
-        0   Ant
-        1  Bear
-        2   Cow
-        """
-
-        if name is no_default:
-            col_name = 0 if self.name is None else self.name
-        else:
-            col_name = name
-
-        return cudf.DataFrame(
-            {col_name: self._values}, index=self if index else None
-        )
-
     def to_arrow(self):
         """Convert to a suitable Arrow object."""
         raise NotImplementedError
@@ -1698,7 +1640,7 @@ def join(
         # in case of MultiIndex
         if isinstance(lhs, cudf.MultiIndex):
             on = (
-                lhs._data.select_by_index(level).names[0]
+                lhs._data.get_labels_by_index(level)[0]
                 if isinstance(level, int)
                 else level
             )
@@ -1979,7 +1921,7 @@ def from_pandas(cls, index: pd.Index, nan_as_null=no_default):
                 name=index.name,
             )
         else:
-            return cudf.Index(
+            return cudf.Index._from_column(
                 column.as_column(index, nan_as_null=nan_as_null),
                 name=index.name,
             )
diff --git a/python/cudf/cudf/core/_internals/expressions.py b/python/cudf/cudf/core/_internals/expressions.py
index 63714a78572..67bde5a72b2 100644
--- a/python/cudf/cudf/core/_internals/expressions.py
+++ b/python/cudf/cudf/core/_internals/expressions.py
@@ -6,8 +6,8 @@
 
 import pyarrow as pa
 
-import cudf._lib.pylibcudf as plc
-from cudf._lib.pylibcudf.expressions import (
+import pylibcudf as plc
+from pylibcudf.expressions import (
     ASTOperator,
     ColumnReference,
     Expression,
diff --git a/python/cudf/cudf/core/_internals/timezones.py b/python/cudf/cudf/core/_internals/timezones.py
index 29cb9d7bd12..fd89904e766 100644
--- a/python/cudf/cudf/core/_internals/timezones.py
+++ b/python/cudf/cudf/core/_internals/timezones.py
@@ -120,7 +120,7 @@ def _read_tzfile_as_columns(
 
         # this happens for UTC-like zones
         min_date = np.int64(np.iinfo("int64").min + 1).astype("M8[s]")
-        return (as_column([min_date]), as_column([np.timedelta64(0, "s")]))
+        return (as_column([min_date]), as_column([np.timedelta64(0, "s")]))  # type: ignore[return-value]
     return tuple(transition_times_and_offsets)  # type: ignore[return-value]
 
 
diff --git a/python/cudf/cudf/core/_internals/where.py b/python/cudf/cudf/core/_internals/where.py
index 18ab32d2c9e..2199d4d5ba5 100644
--- a/python/cudf/cudf/core/_internals/where.py
+++ b/python/cudf/cudf/core/_internals/where.py
@@ -106,21 +106,6 @@ def _check_and_cast_columns_with_other(
     return _normalize_categorical(source_col.astype(common_dtype), other)
 
 
-def _make_categorical_like(result, column):
-    if isinstance(column, cudf.core.column.CategoricalColumn):
-        result = cudf.core.column.build_categorical_column(
-            categories=column.categories,
-            codes=cudf.core.column.build_column(
-                result.base_data, dtype=result.dtype
-            ),
-            mask=result.base_mask,
-            size=result.size,
-            offset=result.offset,
-            ordered=column.ordered,
-        )
-    return result
-
-
 def _can_cast(from_dtype, to_dtype):
     """
     Utility function to determine if we can cast
diff --git a/python/cudf/cudf/core/algorithms.py b/python/cudf/cudf/core/algorithms.py
index 6c69fbd2637..b28fce6d343 100644
--- a/python/cudf/cudf/core/algorithms.py
+++ b/python/cudf/cudf/core/algorithms.py
@@ -7,8 +7,9 @@
 import cupy as cp
 import numpy as np
 
+import cudf
 from cudf.core.column import as_column
-from cudf.core.index import RangeIndex, ensure_index
+from cudf.core.index import Index, RangeIndex
 from cudf.core.scalar import Scalar
 from cudf.options import get_option
 from cudf.utils.dtypes import can_convert_to_column
@@ -112,7 +113,9 @@ def factorize(values, sort=False, use_na_sentinel=True, size_hint=None):
         dtype="int64" if get_option("mode.pandas_compatible") else None,
     ).values
 
-    return labels, cats.values if return_cupy_array else ensure_index(cats)
+    return labels, cats.values if return_cupy_array else Index._from_column(
+        cats
+    )
 
 
 def _interpolation(column: ColumnBase, index: BaseIndex) -> ColumnBase:
@@ -143,3 +146,124 @@ def _interpolation(column: ColumnBase, index: BaseIndex) -> ColumnBase:
     first_nan_idx = valid_locs.values.argmax().item()
     result[:first_nan_idx] = np.nan
     return as_column(result)
+
+
+def unique(values):
+    """
+    Return unique values from array-like
+
+    Parameters
+    ----------
+    values : 1d array-like
+
+    Returns
+    -------
+    cudf.Series,
+
+        The return can be:
+
+        * Index : when the input is an Index
+        * cudf.Series : when the input is a Series
+        * cupy.ndarray : when the input is a cupy.ndarray
+
+        Return cudf.Series, cudf.Index, or cupy.ndarray.
+
+    See Also
+    --------
+    Index.unique : Return unique values from an Index.
+    Series.unique : Return unique values of Series object.
+
+    Examples
+    --------
+    >>> cudf.unique(cudf.Series([2, 1, 3, 3]))
+    0    2
+    1    1
+    2    3
+    dtype: int64
+
+    >>> cudf.unique(cudf.Series([2] + [1] * 5))
+    0    2
+    1    1
+    dtype: int64
+
+    >>> cudf.unique(cudf.Series([pd.Timestamp("20160101"), pd.Timestamp("20160101")]))
+    0   2016-01-01
+    dtype: datetime64[ns]
+
+    >>> cudf.unique(
+    ...     cudf.Series(
+    ...         [
+    ...             pd.Timestamp("20160101", tz="US/Eastern"),
+    ...             pd.Timestamp("20160101", tz="US/Eastern"),
+    ...             pd.Timestamp("20160103", tz="US/Eastern"),
+    ...         ]
+    ...     )
+    ... )
+    0   2016-01-01 00:00:00-05:00
+    1   2016-01-03 00:00:00-05:00
+    dtype: datetime64[ns, US/Eastern]
+
+    >>> cudf.unique(
+    ...     cudf.Index(
+    ...         [
+    ...             pd.Timestamp("20160101", tz="US/Eastern"),
+    ...             pd.Timestamp("20160101", tz="US/Eastern"),
+    ...             pd.Timestamp("20160103", tz="US/Eastern"),
+    ...         ]
+    ...     )
+    ... )
+    DatetimeIndex(['2016-01-01 00:00:00-05:00', '2016-01-03 00:00:00-05:00'],dtype='datetime64[ns, US/Eastern]')
+
+    An unordered Categorical will return categories in the
+    order of appearance.
+
+    >>> cudf.unique(cudf.Series(pd.Categorical(list("baabc"))))
+    0    b
+    1    a
+    2    c
+    dtype: category
+    Categories (3, object): ['a', 'b', 'c']
+
+    >>> cudf.unique(cudf.Series(pd.Categorical(list("baabc"), categories=list("abc"))))
+    0    b
+    1    a
+    2    c
+    dtype: category
+    Categories (3, object): ['a', 'b', 'c']
+
+    An ordered Categorical preserves the category ordering.
+
+    >>> pd.unique(
+    ...     pd.Series(
+    ...         pd.Categorical(list("baabc"), categories=list("abc"), ordered=True)
+    ...     )
+    ... )
+    0    b
+    1    a
+    2    c
+    dtype: category
+    Categories (3, object): ['a' < 'b' < 'c']
+
+    An array of tuples
+
+    >>> pd.unique(pd.Series([("a", "b"), ("b", "a"), ("a", "c"), ("b", "a")]).values)
+    array([('a', 'b'), ('b', 'a'), ('a', 'c')], dtype=object)
+    """
+    if not isinstance(values, (cudf.Series, cudf.Index, cp.ndarray)):
+        raise ValueError(
+            "Must pass cudf.Series, cudf.Index, or cupy.ndarray object"
+        )
+    if isinstance(values, cp.ndarray):
+        # pandas.unique will not sort the values in the result
+        # while cupy.unique documents it will, so we pass cupy.ndarray
+        # through cudf.Index to maintain the original order.
+        return cp.asarray(cudf.Index(values).unique())
+    if isinstance(values, cudf.Series):
+        if get_option("mode.pandas_compatible"):
+            if isinstance(values.dtype, cudf.CategoricalDtype):
+                raise NotImplementedError(
+                    "cudf.Categorical is not implemented"
+                )
+            else:
+                return cp.asarray(values.unique())
+    return values.unique()
diff --git a/python/cudf/cudf/core/buffer/buffer.py b/python/cudf/cudf/core/buffer/buffer.py
index 80dbbe4c048..32ae8c5ee53 100644
--- a/python/cudf/cudf/core/buffer/buffer.py
+++ b/python/cudf/cudf/core/buffer/buffer.py
@@ -11,6 +11,7 @@
 import numpy
 from typing_extensions import Self
 
+import pylibcudf
 import rmm
 
 import cudf
@@ -501,7 +502,7 @@ def get_ptr_and_size(array_interface: Mapping) -> tuple[int, int]:
     shape = array_interface["shape"] or (1,)
     strides = array_interface["strides"]
     itemsize = cudf.dtype(array_interface["typestr"]).itemsize
-    if strides is None or cudf._lib.pylibcudf.column.is_c_contiguous(
+    if strides is None or pylibcudf.column.is_c_contiguous(
         shape, strides, itemsize
     ):
         nelem = math.prod(shape)
diff --git a/python/cudf/cudf/core/byte_pair_encoding.py b/python/cudf/cudf/core/byte_pair_encoding.py
index 4c881022ecf..6ca64a0a2be 100644
--- a/python/cudf/cudf/core/byte_pair_encoding.py
+++ b/python/cudf/cudf/core/byte_pair_encoding.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -27,7 +27,7 @@ class BytePairEncoder:
     def __init__(self, merges_pair: "cudf.Series"):
         self.merge_pairs = cpp_merge_pairs(merges_pair._column)
 
-    def __call__(self, text, separator: str = " "):
+    def __call__(self, text, separator: str = " ") -> cudf.Series:
         """
 
         Parameters
@@ -56,4 +56,4 @@ def __call__(self, text, separator: str = " "):
         sep = cudf.Scalar(separator, dtype="str")
         result = cpp_byte_pair_encoding(text._column, self.merge_pairs, sep)
 
-        return cudf.Series(result)
+        return cudf.Series._from_column(result)
diff --git a/python/cudf/cudf/core/column/__init__.py b/python/cudf/cudf/core/column/__init__.py
index e7119fcdf47..06791df7dc0 100644
--- a/python/cudf/cudf/core/column/__init__.py
+++ b/python/cudf/cudf/core/column/__init__.py
@@ -8,11 +8,9 @@
 from cudf.core.column.column import (
     ColumnBase,
     as_column,
-    build_categorical_column,
     build_column,
     column_empty,
     column_empty_like,
-    column_empty_like_same_mask,
     concat_columns,
     deserialize_columns,
     serialize_columns,
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 9433a91b9c6..de5ed15771d 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -52,6 +52,15 @@
 _DEFAULT_CATEGORICAL_VALUE = np.int8(-1)
 
 
+def as_unsigned_codes(
+    num_cats: int, codes: NumericalColumn
+) -> NumericalColumn:
+    codes_dtype = min_unsigned_type(num_cats)
+    return cast(
+        cudf.core.column.numerical.NumericalColumn, codes.astype(codes_dtype)
+    )
+
+
 class CategoricalAccessor(ColumnMethods):
     """
     Accessor object for categorical properties of the Series values.
@@ -123,7 +132,7 @@ def categories(self) -> "cudf.core.index.Index":
         return self._column.dtype.categories
 
     @property
-    def codes(self) -> "cudf.Series":
+    def codes(self) -> cudf.Series:
         """
         Return Series of codes as well as the index.
         """
@@ -132,7 +141,7 @@ def codes(self) -> "cudf.Series":
             if isinstance(self._parent, cudf.Series)
             else None
         )
-        return cudf.Series(self._column.codes, index=index)
+        return cudf.Series._from_column(self._column.codes, index=index)
 
     @property
     def ordered(self) -> bool:
@@ -465,6 +474,18 @@ def reorder_categories(
         )
 
 
+def validate_categorical_children(children) -> None:
+    if not (
+        len(children) == 1
+        and isinstance(children[0], cudf.core.column.numerical.NumericalColumn)
+        and children[0].dtype.kind in "iu"
+    ):
+        # TODO: Enforce unsigned integer?
+        raise ValueError(
+            "Must specify exactly one child NumericalColumn of integers for representing the codes."
+        )
+
+
 class CategoricalColumn(column.ColumnBase):
     """
     Implements operations for Columns of Categorical type
@@ -481,8 +502,7 @@ class CategoricalColumn(column.ColumnBase):
         respectively
     """
 
-    dtype: cudf.core.dtypes.CategoricalDtype
-    _codes: NumericalColumn | None
+    dtype: CategoricalDtype
     _children: tuple[NumericalColumn]
     _VALID_REDUCTIONS = {
         "max",
@@ -499,25 +519,29 @@ class CategoricalColumn(column.ColumnBase):
 
     def __init__(
         self,
+        data: None,
+        size: int | None,
         dtype: CategoricalDtype,
         mask: Buffer | None = None,
-        size: int | None = None,
         offset: int = 0,
         null_count: int | None = None,
-        children: tuple["column.ColumnBase", ...] = (),
+        children: tuple[NumericalColumn] = (),  # type: ignore[assignment]
     ):
+        if data is not None:
+            raise ValueError(f"{data=} must be None")
+        validate_categorical_children(children)
         if size is None:
-            for child in children:
-                assert child.offset == 0
-                assert child.base_mask is None
-            size = children[0].size
+            child = children[0]
+            assert child.offset == 0
+            assert child.base_mask is None
+            size = child.size
             size = size - offset
-        if isinstance(dtype, pd.api.types.CategoricalDtype):
-            dtype = CategoricalDtype.from_pandas(dtype)
         if not isinstance(dtype, CategoricalDtype):
-            raise ValueError("dtype must be instance of CategoricalDtype")
+            raise ValueError(
+                f"{dtype=} must be cudf.CategoricalDtype instance."
+            )
         super().__init__(
-            data=None,
+            data=data,
             size=size,
             dtype=dtype,
             mask=mask,
@@ -525,7 +549,7 @@ def __init__(
             null_count=null_count,
             children=children,
         )
-        self._codes = None
+        self._codes = self.children[0].set_mask(self.mask)
 
     @property
     def base_size(self) -> int:
@@ -558,13 +582,14 @@ def _process_values_for_isin(
         rhs = cudf.core.column.as_column(values, dtype=self.dtype)
         return lhs, rhs
 
-    def set_base_mask(self, value: Buffer | None):
+    def set_base_mask(self, value: Buffer | None) -> None:
         super().set_base_mask(value)
-        self._codes = None
+        self._codes = self.children[0].set_mask(self.mask)
 
-    def set_base_children(self, value: tuple[ColumnBase, ...]):
+    def set_base_children(self, value: tuple[NumericalColumn]) -> None:  # type: ignore[override]
         super().set_base_children(value)
-        self._codes = None
+        validate_categorical_children(value)
+        self._codes = value[0].set_mask(self.mask)
 
     @property
     def children(self) -> tuple[NumericalColumn]:
@@ -572,13 +597,10 @@ def children(self) -> tuple[NumericalColumn]:
             codes_column = self.base_children[0]
             start = self.offset * codes_column.dtype.itemsize
             end = start + self.size * codes_column.dtype.itemsize
-            codes_column = cast(
-                cudf.core.column.NumericalColumn,
-                column.build_column(
-                    data=codes_column.base_data[start:end],
-                    dtype=codes_column.dtype,
-                    size=self.size,
-                ),
+            codes_column = cudf.core.column.NumericalColumn(
+                data=codes_column.base_data[start:end],
+                dtype=codes_column.dtype,
+                size=self.size,
             )
             self._children = (codes_column,)
         return self._children
@@ -589,9 +611,7 @@ def categories(self) -> ColumnBase:
 
     @property
     def codes(self) -> NumericalColumn:
-        if self._codes is None:
-            self._codes = self.children[0].set_mask(self.mask)
-        return cast(cudf.core.column.NumericalColumn, self._codes)
+        return self._codes
 
     @property
     def ordered(self) -> bool:
@@ -604,11 +624,13 @@ def __setitem__(self, key, value):
             to_add_categories = 0
         else:
             if cudf.api.types.is_scalar(value):
-                arr = [value]
+                arr = column.as_column(value, length=1, nan_as_null=False)
             else:
-                arr = value
+                arr = column.as_column(value, nan_as_null=False)
             to_add_categories = len(
-                cudf.Index(arr, nan_as_null=False).difference(self.categories)
+                cudf.Index._from_column(arr).difference(
+                    cudf.Index._from_column(self.categories)
+                )
             )
 
         if to_add_categories > 0:
@@ -624,13 +646,12 @@ def __setitem__(self, key, value):
             value = value.codes
         codes = self.codes
         codes[key] = value
-        out = cudf.core.column.build_categorical_column(
-            categories=self.categories,
-            codes=codes,
-            mask=codes.base_mask,
+        out = type(self)(
+            data=self.data,
             size=codes.size,
-            offset=self.offset,
-            ordered=self.ordered,
+            dtype=self.dtype,
+            mask=codes.base_mask,
+            children=(codes,),
         )
         self._mimic_inplace(out, inplace=True)
 
@@ -656,18 +677,13 @@ def _fill(
 
     def slice(self, start: int, stop: int, stride: int | None = None) -> Self:
         codes = self.codes.slice(start, stop, stride)
-        return cast(
-            Self,
-            cudf.core.column.build_categorical_column(
-                categories=self.categories,
-                codes=cudf.core.column.build_column(
-                    codes.base_data, dtype=codes.dtype
-                ),
-                mask=codes.base_mask,
-                ordered=self.ordered,
-                size=codes.size,
-                offset=codes.offset,
-            ),
+        return type(self)(
+            data=self.data,  # type: ignore[arg-type]
+            size=codes.size,
+            dtype=self.dtype,
+            mask=codes.base_mask,
+            offset=codes.offset,
+            children=(codes,),
         )
 
     def _reduce(
@@ -708,7 +724,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
             )
         return self.codes._binaryop(other.codes, op)
 
-    def normalize_binop_value(self, other: ScalarLike) -> CategoricalColumn:
+    def normalize_binop_value(self, other: ScalarLike) -> Self:
         if isinstance(other, column.ColumnBase):
             if not isinstance(other, CategoricalColumn):
                 return NotImplemented
@@ -716,30 +732,27 @@ def normalize_binop_value(self, other: ScalarLike) -> CategoricalColumn:
                 raise TypeError(
                     "Categoricals can only compare with the same type"
                 )
-            return other
-
-        ary = column.as_column(
+            return cast(Self, other)
+        codes = column.as_column(
             self._encode(other), length=len(self), dtype=self.codes.dtype
         )
-        return column.build_categorical_column(
-            categories=self.dtype.categories._values,
-            codes=column.as_column(ary),
+        return type(self)(
+            data=None,
+            size=self.size,
+            dtype=self.dtype,
             mask=self.base_mask,
-            ordered=self.dtype.ordered,
+            children=(codes,),  # type: ignore[arg-type]
         )
 
-    def sort_values(
-        self, ascending: bool = True, na_position="last"
-    ) -> CategoricalColumn:
+    def sort_values(self, ascending: bool = True, na_position="last") -> Self:
         codes = self.codes.sort_values(ascending, na_position)
-        col = column.build_categorical_column(
-            categories=self.dtype.categories._values,
-            codes=column.build_column(codes.base_data, dtype=codes.dtype),
-            mask=codes.base_mask,
+        return type(self)(
+            data=self.data,  # type: ignore[arg-type]
             size=codes.size,
-            ordered=self.dtype.ordered,
+            dtype=self.dtype,
+            mask=codes.base_mask,
+            children=(codes,),
         )
-        return col
 
     def element_indexing(self, index: int) -> ScalarLike:
         val = self.codes.element_indexing(index)
@@ -766,12 +779,12 @@ def to_pandas(
 
         if self.categories.dtype.kind == "f":
             new_mask = bools_to_mask(self.notnull())
-            col = column.build_categorical_column(
-                categories=self.categories,
-                codes=column.as_column(self.codes, dtype=self.codes.dtype),
+            col = type(self)(
+                data=self.data,  # type: ignore[arg-type]
+                size=self.size,
+                dtype=self.dtype,
                 mask=new_mask,
-                ordered=self.dtype.ordered,
-                size=self.codes.size,
+                children=self.children,
             )
         else:
             col = self
@@ -838,15 +851,15 @@ def data_array_view(
     ) -> numba.cuda.devicearray.DeviceNDArray:
         return self.codes.data_array_view(mode=mode)
 
-    def unique(self) -> CategoricalColumn:
+    def unique(self) -> Self:
         codes = self.codes.unique()
-        return column.build_categorical_column(
-            categories=self.categories,
-            codes=column.build_column(codes.base_data, dtype=codes.dtype),
+        return type(self)(
+            data=self.data,  # type: ignore[arg-type]
+            size=codes.size,
+            dtype=self.dtype,
             mask=codes.base_mask,
             offset=codes.offset,
-            size=codes.size,
-            ordered=self.ordered,
+            children=(codes,),
         )
 
     def _encode(self, value) -> ScalarLike:
@@ -914,7 +927,7 @@ def find_and_replace(
             )
             cur_categories = replaced.categories
             new_categories = cur_categories.apply_boolean_mask(
-                ~cudf.Series(cur_categories.isin(drop_values))
+                cur_categories.isin(drop_values).unary_operator("not")
             )
             replaced = replaced._set_categories(new_categories)
             df = df.dropna(subset=["new"])
@@ -939,7 +952,7 @@ def find_and_replace(
         # If a category is being replaced by an existing one, we
         # want to map it to None. If it's totally new, we want to
         # map it to the new label it is to be replaced by
-        dtype_replace = cudf.Series._from_data({None: replacement_col})
+        dtype_replace = cudf.Series._from_column(replacement_col)
         dtype_replace[dtype_replace.isin(cats_col)] = None
         new_cats_col = cats_col.find_and_replace(
             to_replace_col, dtype_replace._column
@@ -973,18 +986,21 @@ def find_and_replace(
         )
         replacement_col = catmap._data["index"].astype(replaced.codes.dtype)
 
-        replaced = column.as_column(replaced.codes)
+        replaced_codes = column.as_column(replaced.codes)
         output = libcudf.replace.replace(
-            replaced, to_replace_col, replacement_col
+            replaced_codes, to_replace_col, replacement_col
         )
+        codes = as_unsigned_codes(len(new_cats["cats"]), output)
 
-        result = column.build_categorical_column(
-            categories=new_cats["cats"],
-            codes=column.build_column(output.base_data, dtype=output.dtype),
-            mask=output.base_mask,
-            offset=output.offset,
-            size=output.size,
-            ordered=self.dtype.ordered,
+        result = type(self)(
+            data=self.data,  # type: ignore[arg-type]
+            size=codes.size,
+            dtype=CategoricalDtype(
+                categories=new_cats["cats"], ordered=self.dtype.ordered
+            ),
+            mask=codes.base_mask,
+            offset=codes.offset,
+            children=(codes,),
         )
         if result.dtype != self.dtype:
             warnings.warn(
@@ -1053,7 +1069,7 @@ def _validate_fillna_value(
                 raise TypeError(
                     "Cannot set a categorical with non-categorical data"
                 )
-            fill_value = fill_value._set_categories(
+            fill_value = cast(CategoricalColumn, fill_value)._set_categories(
                 self.categories,
             )
             return fill_value.codes.astype(self.codes.dtype)
@@ -1071,7 +1087,7 @@ def is_monotonic_increasing(self) -> bool:
     def is_monotonic_decreasing(self) -> bool:
         return bool(self.ordered) and self.codes.is_monotonic_decreasing
 
-    def as_categorical_column(self, dtype: Dtype) -> CategoricalColumn:
+    def as_categorical_column(self, dtype: Dtype) -> Self:
         if isinstance(dtype, str) and dtype == "category":
             return self
         if isinstance(dtype, pd.CategoricalDtype):
@@ -1088,7 +1104,23 @@ def as_categorical_column(self, dtype: Dtype) -> CategoricalColumn:
         if not isinstance(self.categories, type(dtype.categories._column)):
             # If both categories are of different Column types,
             # return a column full of Nulls.
-            return _create_empty_categorical_column(self, dtype)
+            codes = cast(
+                cudf.core.column.numerical.NumericalColumn,
+                column.as_column(
+                    _DEFAULT_CATEGORICAL_VALUE,
+                    length=self.size,
+                    dtype=self.codes.dtype,
+                ),
+            )
+            codes = as_unsigned_codes(len(dtype.categories), codes)
+            return type(self)(
+                data=self.data,  # type: ignore[arg-type]
+                size=self.size,
+                dtype=dtype,
+                mask=self.base_mask,
+                offset=self.offset,
+                children=(codes,),
+            )
 
         return self.set_categories(
             new_categories=dtype.categories, ordered=bool(dtype.ordered)
@@ -1134,7 +1166,7 @@ def _mimic_inplace(
     ) -> Self | None:
         out = super()._mimic_inplace(other_col, inplace=inplace)
         if inplace and isinstance(other_col, CategoricalColumn):
-            self._codes = other_col._codes
+            self._codes = other_col.codes
         return out
 
     def view(self, dtype: Dtype) -> ColumnBase:
@@ -1174,30 +1206,29 @@ def _concat(
             codes = [o for o in codes if len(o)]
             codes_col = libcudf.concat.concat_columns(objs)
 
-        return column.build_categorical_column(
-            categories=column.as_column(cats),
-            codes=column.build_column(
-                codes_col.base_data, dtype=codes_col.dtype
-            ),
-            mask=codes_col.base_mask,
+        codes_col = as_unsigned_codes(
+            len(cats),
+            cast(cudf.core.column.numerical.NumericalColumn, codes_col),
+        )
+        return CategoricalColumn(
+            data=None,
             size=codes_col.size,
+            dtype=CategoricalDtype(categories=cats),
+            mask=codes_col.base_mask,
             offset=codes_col.offset,
+            children=(codes_col,),  # type: ignore[arg-type]
         )
 
-    def _with_type_metadata(
-        self: CategoricalColumn, dtype: Dtype
-    ) -> CategoricalColumn:
+    def _with_type_metadata(self: Self, dtype: Dtype) -> Self:
         if isinstance(dtype, CategoricalDtype):
-            return column.build_categorical_column(
-                categories=dtype.categories._values,
-                codes=column.build_column(
-                    self.codes.base_data, dtype=self.codes.dtype
-                ),
-                mask=self.codes.base_mask,
-                ordered=dtype.ordered,
+            return type(self)(
+                data=self.data,  # type: ignore[arg-type]
                 size=self.codes.size,
+                dtype=dtype,
+                mask=self.codes.base_mask,
                 offset=self.codes.offset,
                 null_count=self.codes.null_count,
+                children=(self.codes,),
             )
         return self
 
@@ -1206,7 +1237,7 @@ def set_categories(
         new_categories: Any,
         ordered: bool = False,
         rename: bool = False,
-    ) -> CategoricalColumn:
+    ) -> Self:
         # See CategoricalAccessor.set_categories.
 
         ordered = ordered if ordered is not None else self.ordered
@@ -1225,25 +1256,39 @@ def set_categories(
                     "new_categories must have the same "
                     "number of items as old categories"
                 )
-
-            out_col = column.build_categorical_column(
-                categories=new_categories,
-                codes=self.base_children[0],
-                mask=self.base_mask,
+            out_col = type(self)(
+                data=self.data,  # type: ignore[arg-type]
                 size=self.size,
+                dtype=CategoricalDtype(
+                    categories=new_categories, ordered=ordered
+                ),
+                mask=self.base_mask,
                 offset=self.offset,
-                ordered=ordered,
+                children=(self.codes,),
             )
         else:
             out_col = self
             if type(out_col.categories) is not type(new_categories):
                 # If both categories are of different Column types,
                 # return a column full of Nulls.
-                out_col = _create_empty_categorical_column(
-                    self,
-                    CategoricalDtype(
+                new_codes = cast(
+                    cudf.core.column.numerical.NumericalColumn,
+                    column.as_column(
+                        _DEFAULT_CATEGORICAL_VALUE,
+                        length=self.size,
+                        dtype=self.codes.dtype,
+                    ),
+                )
+                new_codes = as_unsigned_codes(len(new_categories), new_codes)
+                out_col = type(self)(
+                    data=self.data,  # type: ignore[arg-type]
+                    size=self.size,
+                    dtype=CategoricalDtype(
                         categories=new_categories, ordered=ordered
                     ),
+                    mask=self.base_mask,
+                    offset=self.offset,
+                    children=(new_codes,),
                 )
             elif (
                 not out_col._categories_equal(new_categories, ordered=True)
@@ -1265,12 +1310,8 @@ def _categories_equal(
             return False
         # if order doesn't matter, sort before the equals call below
         if not ordered:
-            cur_categories = cudf.Series(cur_categories).sort_values(
-                ignore_index=True
-            )
-            new_categories = cudf.Series(new_categories).sort_values(
-                ignore_index=True
-            )
+            cur_categories = cur_categories.sort_values()
+            new_categories = new_categories.sort_values()
         return cur_categories.equals(new_categories)
 
     def _set_categories(
@@ -1332,21 +1373,19 @@ def _set_categories(
         df.reset_index(drop=True, inplace=True)
 
         ordered = ordered if ordered is not None else self.ordered
-        new_codes = df._data["new_codes"]
+        new_codes = cast(
+            cudf.core.column.numerical.NumericalColumn, df._data["new_codes"]
+        )
 
         # codes can't have masks, so take mask out before moving in
-        return cast(
-            Self,
-            column.build_categorical_column(
-                categories=new_cats,
-                codes=column.build_column(
-                    new_codes.base_data, dtype=new_codes.dtype
-                ),
-                mask=new_codes.base_mask,
-                size=new_codes.size,
-                offset=new_codes.offset,
-                ordered=ordered,
-            ),
+        new_codes = as_unsigned_codes(len(new_cats), new_codes)
+        return type(self)(
+            data=self.data,  # type: ignore[arg-type]
+            size=new_codes.size,
+            dtype=CategoricalDtype(categories=new_cats, ordered=ordered),
+            mask=new_codes.base_mask,
+            offset=new_codes.offset,
+            children=(new_codes,),
         )
 
     def add_categories(self, new_categories: Any) -> Self:
@@ -1424,56 +1463,16 @@ def remove_unused_categories(self) -> Self:
             "remove_unused_categories is currently not supported."
         )
 
-    def as_ordered(self, ordered: bool):
+    def as_ordered(self, ordered: bool) -> Self:
         if self.dtype.ordered == ordered:
             return self
-        return column.build_categorical_column(
-            categories=self.categories,
-            codes=self.codes,
-            mask=self.base_mask,
+        return type(self)(
+            data=self.data,  # type: ignore[arg-type]
             size=self.size,
+            dtype=CategoricalDtype(
+                categories=self.categories, ordered=ordered
+            ),
+            mask=self.base_mask,
             offset=self.offset,
-            ordered=ordered,
+            children=self.children,
         )
-
-
-def _create_empty_categorical_column(
-    categorical_column: CategoricalColumn, dtype: "CategoricalDtype"
-) -> CategoricalColumn:
-    return column.build_categorical_column(
-        categories=column.as_column(dtype.categories),
-        codes=column.as_column(
-            _DEFAULT_CATEGORICAL_VALUE,
-            length=categorical_column.size,
-            dtype=categorical_column.codes.dtype,
-        ),
-        offset=categorical_column.offset,
-        size=categorical_column.size,
-        mask=categorical_column.base_mask,
-        ordered=dtype.ordered,
-    )
-
-
-def pandas_categorical_as_column(
-    categorical: ColumnLike, codes: ColumnLike | None = None
-) -> CategoricalColumn:
-    """Creates a CategoricalColumn from a pandas.Categorical
-
-    If ``codes`` is defined, use it instead of ``categorical.codes``
-    """
-    codes = categorical.codes if codes is None else codes
-    codes = column.as_column(codes)
-
-    valid_codes = codes != codes.dtype.type(_DEFAULT_CATEGORICAL_VALUE)
-
-    mask = None
-    if not valid_codes.all():
-        mask = bools_to_mask(valid_codes)
-
-    return column.build_categorical_column(
-        categories=categorical.categories,
-        codes=column.build_column(codes.base_data, codes.dtype),
-        size=codes.size,
-        mask=mask,
-        ordered=categorical.ordered,
-    )
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 7e0d8ced595..7674565e2c3 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -352,13 +352,17 @@ def from_arrow(cls, array: pa.Array) -> ColumnBase:
 
             codes = libcudf.interop.from_arrow(indices_table)[0]
             categories = libcudf.interop.from_arrow(dictionaries_table)[0]
-
-            return build_categorical_column(
-                categories=categories,
-                codes=codes,
-                mask=codes.base_mask,
+            codes = cudf.core.column.categorical.as_unsigned_codes(
+                len(categories), codes
+            )
+            return cudf.core.column.CategoricalColumn(
+                data=None,
                 size=codes.size,
-                ordered=array.type.ordered,
+                dtype=CategoricalDtype(
+                    categories=categories, ordered=array.type.ordered
+                ),
+                mask=codes.base_mask,
+                children=(codes,),
             )
 
         result = libcudf.interop.from_arrow(data)[0]
@@ -553,7 +557,7 @@ def __setitem__(self, key: Any, value: Any):
         """
 
         # Normalize value to scalar/column
-        value_normalized = (
+        value_normalized: cudf.Scalar | ColumnBase = (
             cudf.Scalar(value, dtype=self.dtype)
             if is_scalar(value)
             else as_column(value, dtype=self.dtype)
@@ -609,9 +613,12 @@ def _scatter_by_slice(
                 )
 
         # step != 1, create a scatter map with arange
-        scatter_map = as_column(
-            rng,
-            dtype=cudf.dtype(np.int32),
+        scatter_map = cast(
+            cudf.core.column.NumericalColumn,
+            as_column(
+                rng,
+                dtype=cudf.dtype(np.int32),
+            ),
         )
 
         return self._scatter_by_column(scatter_map, value)
@@ -947,10 +954,10 @@ def is_monotonic_decreasing(self) -> bool:
         )
 
     def sort_values(
-        self: ColumnBase,
+        self: Self,
         ascending: bool = True,
         na_position: str = "last",
-    ) -> ColumnBase:
+    ) -> Self:
         if (not ascending and self.is_monotonic_decreasing) or (
             ascending and self.is_monotonic_increasing
         ):
@@ -1038,12 +1045,16 @@ def as_categorical_column(self, dtype) -> ColumnBase:
             and dtype._categories is not None
         ):
             cat_col = dtype._categories
-            labels = self._label_encoding(cats=cat_col)
-            return build_categorical_column(
-                categories=cat_col,
-                codes=labels,
+            codes = self._label_encoding(cats=cat_col)
+            codes = cudf.core.column.categorical.as_unsigned_codes(
+                len(cat_col), codes
+            )
+            return cudf.core.column.categorical.CategoricalColumn(
+                data=None,
+                size=None,
+                dtype=dtype,
                 mask=self.mask,
-                ordered=dtype.ordered,
+                children=(codes,),
             )
 
         # Categories must be unique and sorted in ascending order.
@@ -1055,15 +1066,16 @@ def as_categorical_column(self, dtype) -> ColumnBase:
         # columns include null index in factorization; remove:
         if self.has_nulls():
             cats = cats.dropna()
-            min_type = min_unsigned_type(len(cats), 8)
-            if cudf.dtype(min_type).itemsize < labels.dtype.itemsize:
-                labels = labels.astype(min_type)
 
-        return build_categorical_column(
-            categories=cats,
-            codes=labels,
+        labels = cudf.core.column.categorical.as_unsigned_codes(
+            len(cats), labels
+        )
+        return cudf.core.column.categorical.CategoricalColumn(
+            data=None,
+            size=None,
+            dtype=CategoricalDtype(categories=cats, ordered=ordered),
             mask=self.mask,
-            ordered=ordered,
+            children=(labels,),
         )
 
     def as_numerical_column(
@@ -1111,11 +1123,16 @@ def argsort(
         if (ascending and self.is_monotonic_increasing) or (
             not ascending and self.is_monotonic_decreasing
         ):
-            return as_column(range(len(self)))
+            return cast(
+                cudf.core.column.NumericalColumn, as_column(range(len(self)))
+            )
         elif (ascending and self.is_monotonic_decreasing) or (
             not ascending and self.is_monotonic_increasing
         ):
-            return as_column(range(len(self) - 1, -1, -1))
+            return cast(
+                cudf.core.column.NumericalColumn,
+                as_column(range(len(self) - 1, -1, -1)),
+            )
         else:
             return libcudf.sort.order_by(
                 [self], [ascending], na_position, stable=True
@@ -1178,7 +1195,7 @@ def searchsorted(
             na_position=na_position,
         )
 
-    def unique(self) -> ColumnBase:
+    def unique(self) -> Self:
         """
         Get unique values in the data
         """
@@ -1466,22 +1483,6 @@ def _has_any_nan(arbitrary: pd.Series | np.ndarray) -> bool:
     )
 
 
-def column_empty_like_same_mask(
-    column: ColumnBase, dtype: Dtype
-) -> ColumnBase:
-    """Create a new empty Column with the same length and the same mask.
-
-    Parameters
-    ----------
-    dtype : np.dtype like
-        The dtype of the data buffer.
-    """
-    result = column_empty_like(column, dtype)
-    if column.nullable:
-        result = result.set_mask(column.mask)
-    return result
-
-
 def column_empty(
     row_count: int, dtype: Dtype = "object", masked: bool = False
 ) -> ColumnBase:
@@ -1506,13 +1507,14 @@ def column_empty(
     elif isinstance(dtype, CategoricalDtype):
         data = None
         children = (
-            build_column(
+            cudf.core.column.NumericalColumn(
                 data=as_buffer(
                     rmm.DeviceBuffer(
                         size=row_count
                         * cudf.dtype(libcudf.types.size_type_dtype).itemsize
                     )
                 ),
+                size=None,
                 dtype=libcudf.types.size_type_dtype,
             ),
         )
@@ -1577,25 +1579,18 @@ def build_column(
         return col
 
     if isinstance(dtype, CategoricalDtype):
-        if not len(children) == 1:
-            raise ValueError(
-                "Must specify exactly one child column for CategoricalColumn"
-            )
-        if not isinstance(children[0], ColumnBase):
-            raise TypeError("children must be a tuple of Columns")
         return cudf.core.column.CategoricalColumn(
+            data=data,  # type: ignore[arg-type]
             dtype=dtype,
             mask=mask,
             size=size,
             offset=offset,
             null_count=null_count,
-            children=children,
+            children=children,  # type: ignore[arg-type]
         )
     elif dtype.type is np.datetime64:
-        if data is None:
-            raise TypeError("Must specify data buffer")
         return cudf.core.column.DatetimeColumn(
-            data=data,
+            data=data,  # type: ignore[arg-type]
             dtype=dtype,
             mask=mask,
             size=size,
@@ -1603,10 +1598,8 @@ def build_column(
             null_count=null_count,
         )
     elif isinstance(dtype, pd.DatetimeTZDtype):
-        if data is None:
-            raise TypeError("Must specify data buffer")
         return cudf.core.column.datetime.DatetimeTZColumn(
-            data=data,
+            data=data,  # type: ignore[arg-type]
             dtype=dtype,
             mask=mask,
             size=size,
@@ -1614,10 +1607,8 @@ def build_column(
             null_count=null_count,
         )
     elif dtype.type is np.timedelta64:
-        if data is None:
-            raise TypeError("Must specify data buffer")
         return cudf.core.column.TimeDeltaColumn(
-            data=data,
+            data=data,  # type: ignore[arg-type]
             dtype=dtype,
             mask=mask,
             size=size,
@@ -1635,40 +1626,38 @@ def build_column(
         )
     elif isinstance(dtype, ListDtype):
         return cudf.core.column.ListColumn(
-            size=size,
+            data=None,
+            size=size,  # type: ignore[arg-type]
             dtype=dtype,
             mask=mask,
             offset=offset,
             null_count=null_count,
-            children=children,
+            children=children,  # type: ignore[arg-type]
         )
     elif isinstance(dtype, IntervalDtype):
         return cudf.core.column.IntervalColumn(
+            data=None,
+            size=size,  # type: ignore[arg-type]
             dtype=dtype,
             mask=mask,
-            size=size,
             offset=offset,
-            children=children,
             null_count=null_count,
+            children=children,  # type: ignore[arg-type]
         )
     elif isinstance(dtype, StructDtype):
-        if size is None:
-            raise TypeError("Must specify size")
         return cudf.core.column.StructColumn(
-            data=data,
+            data=None,
+            size=size,  # type: ignore[arg-type]
             dtype=dtype,
-            size=size,
-            offset=offset,
             mask=mask,
+            offset=offset,
             null_count=null_count,
-            children=children,
+            children=children,  # type: ignore[arg-type]
         )
     elif isinstance(dtype, cudf.Decimal64Dtype):
-        if size is None:
-            raise TypeError("Must specify size")
         return cudf.core.column.Decimal64Column(
-            data=data,
-            size=size,
+            data=data,  # type: ignore[arg-type]
+            size=size,  # type: ignore[arg-type]
             offset=offset,
             dtype=dtype,
             mask=mask,
@@ -1676,11 +1665,9 @@ def build_column(
             children=children,
         )
     elif isinstance(dtype, cudf.Decimal32Dtype):
-        if size is None:
-            raise TypeError("Must specify size")
         return cudf.core.column.Decimal32Column(
-            data=data,
-            size=size,
+            data=data,  # type: ignore[arg-type]
+            size=size,  # type: ignore[arg-type]
             offset=offset,
             dtype=dtype,
             mask=mask,
@@ -1688,11 +1675,9 @@ def build_column(
             children=children,
         )
     elif isinstance(dtype, cudf.Decimal128Dtype):
-        if size is None:
-            raise TypeError("Must specify size")
         return cudf.core.column.Decimal128Column(
-            data=data,
-            size=size,
+            data=data,  # type: ignore[arg-type]
+            size=size,  # type: ignore[arg-type]
             offset=offset,
             dtype=dtype,
             mask=mask,
@@ -1703,51 +1688,6 @@ def build_column(
         raise TypeError(f"Unrecognized dtype: {dtype}")
 
 
-def build_categorical_column(
-    categories: ColumnBase,
-    codes: ColumnBase,
-    mask: Buffer | None = None,
-    size: int | None = None,
-    offset: int = 0,
-    null_count: int | None = None,
-    ordered: bool = False,
-) -> "cudf.core.column.CategoricalColumn":
-    """
-    Build a CategoricalColumn
-
-    Parameters
-    ----------
-    categories : Column
-        Column of categories
-    codes : Column
-        Column of codes, the size of the resulting Column will be
-        the size of `codes`
-    mask : Buffer
-        Null mask
-    size : int, optional
-    offset : int, optional
-    ordered : bool, default False
-        Indicates whether the categories are ordered
-    """
-    codes_dtype = min_unsigned_type(len(categories))
-    codes = as_column(codes)
-    if codes.dtype != codes_dtype:
-        codes = codes.astype(codes_dtype)
-
-    dtype = CategoricalDtype(categories=categories, ordered=ordered)
-
-    result = build_column(
-        data=None,
-        dtype=dtype,
-        mask=mask,
-        size=size,
-        offset=offset,
-        null_count=null_count,
-        children=(codes,),
-    )
-    return cast("cudf.core.column.CategoricalColumn", result)
-
-
 def check_invalid_array(shape: tuple, dtype):
     """Invalid ndarrays properties that are not supported"""
     if len(shape) > 1:
@@ -1768,7 +1708,7 @@ def as_column(
     nan_as_null: bool | None = None,
     dtype: Dtype | None = None,
     length: int | None = None,
-):
+) -> ColumnBase:
     """Create a Column from an arbitrary object
 
     Parameters
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 81fbb914842..d0ea4612a1b 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -24,6 +24,7 @@
     get_compatible_timezone,
     get_tz_data,
 )
+from cudf.core.buffer import Buffer
 from cudf.core.column import ColumnBase, as_column, column, string
 from cudf.core.column.timedelta import _unit_to_nanoseconds_conversion
 from cudf.utils.dtypes import _get_base_dtype
@@ -34,10 +35,8 @@
         ColumnBinaryOperand,
         DatetimeLikeScalar,
         Dtype,
-        DtypeObj,
         ScalarLike,
     )
-    from cudf.core.buffer import Buffer
     from cudf.core.column.numerical import NumericalColumn
 
 if PANDAS_GE_220:
@@ -207,30 +206,39 @@ class DatetimeColumn(column.ColumnBase):
     def __init__(
         self,
         data: Buffer,
-        dtype: DtypeObj,
+        size: int | None,
+        dtype: np.dtype | pd.DatetimeTZDtype,
         mask: Buffer | None = None,
-        size: int | None = None,  # TODO: make non-optional
         offset: int = 0,
         null_count: int | None = None,
+        children: tuple = (),
     ):
-        dtype = cudf.dtype(dtype)
-        if dtype.kind != "M":
-            raise TypeError(f"{self.dtype} is not a supported datetime type")
-
+        if not isinstance(data, Buffer):
+            raise ValueError("data must be a Buffer.")
+        dtype = self._validate_dtype_instance(dtype)
         if data.size % dtype.itemsize:
             raise ValueError("Buffer size must be divisible by element size")
         if size is None:
             size = data.size // dtype.itemsize
             size = size - offset
+        if len(children) != 0:
+            raise ValueError(f"{type(self).__name__} must have no children.")
         super().__init__(
-            data,
+            data=data,
             size=size,
             dtype=dtype,
             mask=mask,
             offset=offset,
             null_count=null_count,
+            children=children,
         )
 
+    @staticmethod
+    def _validate_dtype_instance(dtype: np.dtype) -> np.dtype:
+        if not (isinstance(dtype, np.dtype) and dtype.kind == "M"):
+            raise ValueError("dtype must be a datetime, numpy dtype")
+        return dtype
+
     def __contains__(self, item: ScalarLike) -> bool:
         try:
             ts = pd.Timestamp(item).as_unit(self.time_unit)
@@ -250,6 +258,10 @@ def __contains__(self, item: ScalarLike) -> bool:
     def time_unit(self) -> str:
         return np.datetime_data(self.dtype)[0]
 
+    @property
+    def quarter(self) -> ColumnBase:
+        return libcudf.datetime.extract_quarter(self)
+
     @property
     def year(self) -> ColumnBase:
         return self.get_dt_field("year")
@@ -308,7 +320,7 @@ def is_quarter_start(self) -> ColumnBase:
     @property
     def is_year_end(self) -> ColumnBase:
         day_of_year = self.day_of_year
-        leap_dates = libcudf.datetime.is_leap_year(self)
+        leap_dates = self.is_leap_year
 
         leap = day_of_year == cudf.Scalar(366)
         non_leap = day_of_year == cudf.Scalar(365)
@@ -316,6 +328,10 @@ def is_year_end(self) -> ColumnBase:
             False
         )
 
+    @property
+    def is_leap_year(self) -> ColumnBase:
+        return libcudf.datetime.is_leap_year(self)
+
     @property
     def is_year_start(self) -> ColumnBase:
         return (self.day_of_year == 1).fillna(False)
@@ -473,15 +489,15 @@ def as_timedelta_column(self, dtype: Dtype) -> None:  # type: ignore[override]
 
     def as_numerical_column(
         self, dtype: Dtype
-    ) -> "cudf.core.column.NumericalColumn":
-        col = column.build_column(
-            data=self.base_data,
-            dtype=np.int64,
+    ) -> cudf.core.column.NumericalColumn:
+        col = cudf.core.column.NumericalColumn(
+            data=self.base_data,  # type: ignore[arg-type]
+            dtype=np.dtype(np.int64),
             mask=self.base_mask,
             offset=self.offset,
             size=self.size,
         )
-        return cast("cudf.core.column.NumericalColumn", col.astype(dtype))
+        return cast(cudf.core.column.NumericalColumn, col.astype(dtype))
 
     def strftime(self, format: str) -> cudf.core.column.StringColumn:
         if len(self) == 0:
@@ -850,21 +866,30 @@ class DatetimeTZColumn(DatetimeColumn):
     def __init__(
         self,
         data: Buffer,
+        size: int | None,
         dtype: pd.DatetimeTZDtype,
         mask: Buffer | None = None,
-        size: int | None = None,
         offset: int = 0,
         null_count: int | None = None,
+        children: tuple = (),
     ):
         super().__init__(
             data=data,
-            dtype=_get_base_dtype(dtype),
-            mask=mask,
             size=size,
+            dtype=dtype,
+            mask=mask,
             offset=offset,
             null_count=null_count,
+            children=children,
         )
-        self._dtype = get_compatible_timezone(dtype)
+
+    @staticmethod
+    def _validate_dtype_instance(
+        dtype: pd.DatetimeTZDtype,
+    ) -> pd.DatetimeTZDtype:
+        if not isinstance(dtype, pd.DatetimeTZDtype):
+            raise ValueError("dtype must be a pandas.DatetimeTZDtype")
+        return get_compatible_timezone(dtype)
 
     def to_pandas(
         self,
diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py
index 6a7f338b065..8803ebd6791 100644
--- a/python/cudf/cudf/core/column/decimal.py
+++ b/python/cudf/cudf/core/column/decimal.py
@@ -31,14 +31,38 @@
 
 if TYPE_CHECKING:
     from cudf._typing import ColumnBinaryOperand, ColumnLike, Dtype, ScalarLike
+    from cudf.core.buffer import Buffer
 
 
 class DecimalBaseColumn(NumericalBaseColumn):
     """Base column for decimal32, decimal64 or decimal128 columns"""
 
-    dtype: DecimalDtype
     _VALID_BINARY_OPERATIONS = BinaryOperand._SUPPORTED_BINARY_OPERATIONS
 
+    def __init__(
+        self,
+        data: Buffer,
+        size: int,
+        dtype: DecimalDtype,
+        mask: Buffer | None = None,
+        offset: int = 0,
+        null_count: int | None = None,
+        children: tuple = (),
+    ):
+        if not isinstance(size, int):
+            raise ValueError("Must specify an integer size")
+        if not isinstance(dtype, DecimalDtype):
+            raise ValueError(f"{dtype=} must be a DecimalDtype instance")
+        super().__init__(
+            data=data,
+            size=size,
+            dtype=dtype,
+            mask=mask,
+            offset=offset,
+            null_count=null_count,
+            children=children,
+        )
+
     @property
     def __cuda_array_interface__(self):
         raise NotImplementedError(
@@ -111,9 +135,15 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str):
         # are computed outside of libcudf
         if op in {"__add__", "__sub__", "__mul__", "__div__"}:
             output_type = _get_decimal_type(lhs.dtype, rhs.dtype, op)
+            lhs = lhs.astype(
+                type(output_type)(lhs.dtype.precision, lhs.dtype.scale)
+            )
+            rhs = rhs.astype(
+                type(output_type)(rhs.dtype.precision, rhs.dtype.scale)
+            )
             result = libcudf.binaryop.binaryop(lhs, rhs, op, output_type)
-            # TODO:  Why is this necessary? Why isn't the result's
-            # precision already set correctly based on output_type?
+            # libcudf doesn't support precision, so result.dtype doesn't
+            # maintain output_type.precision
             result.dtype.precision = output_type.precision
         elif op in {
             "__eq__",
@@ -205,7 +235,27 @@ def as_numerical_column(
 
 
 class Decimal32Column(DecimalBaseColumn):
-    dtype: Decimal32Dtype
+    def __init__(
+        self,
+        data: Buffer,
+        size: int,
+        dtype: Decimal32Dtype,
+        mask: Buffer | None = None,
+        offset: int = 0,
+        null_count: int | None = None,
+        children: tuple = (),
+    ):
+        if not isinstance(dtype, Decimal32Dtype):
+            raise ValueError(f"{dtype=} must be a Decimal32Dtype instance")
+        super().__init__(
+            data=data,
+            size=size,
+            dtype=dtype,
+            mask=mask,
+            offset=offset,
+            null_count=null_count,
+            children=children,
+        )
 
     @classmethod
     def from_arrow(cls, data: pa.Array):
@@ -266,7 +316,27 @@ def _with_type_metadata(
 
 
 class Decimal128Column(DecimalBaseColumn):
-    dtype: Decimal128Dtype
+    def __init__(
+        self,
+        data: Buffer,
+        size: int,
+        dtype: Decimal128Dtype,
+        mask: Buffer | None = None,
+        offset: int = 0,
+        null_count: int | None = None,
+        children: tuple = (),
+    ):
+        if not isinstance(dtype, Decimal128Dtype):
+            raise ValueError(f"{dtype=} must be a Decimal128Dtype instance")
+        super().__init__(
+            data=data,
+            size=size,
+            dtype=dtype,
+            mask=mask,
+            offset=offset,
+            null_count=null_count,
+            children=children,
+        )
 
     @classmethod
     def from_arrow(cls, data: pa.Array):
@@ -287,7 +357,27 @@ def _with_type_metadata(
 
 
 class Decimal64Column(DecimalBaseColumn):
-    dtype: Decimal64Dtype
+    def __init__(
+        self,
+        data: Buffer,
+        size: int,
+        dtype: Decimal64Dtype,
+        mask: Buffer | None = None,
+        offset: int = 0,
+        null_count: int | None = None,
+        children: tuple = (),
+    ):
+        if not isinstance(dtype, Decimal64Dtype):
+            raise ValueError(f"{dtype=} must be a Decimal64Dtype instance")
+        super().__init__(
+            data=data,
+            size=size,
+            dtype=dtype,
+            mask=mask,
+            offset=offset,
+            null_count=null_count,
+            children=children,
+        )
 
     def __setitem__(self, key, value):
         if isinstance(value, np.integer):
@@ -346,7 +436,11 @@ def _with_type_metadata(
         return self
 
 
-def _get_decimal_type(lhs_dtype, rhs_dtype, op):
+def _get_decimal_type(
+    lhs_dtype: DecimalDtype,
+    rhs_dtype: DecimalDtype,
+    op: str,
+) -> DecimalDtype:
     """
     Returns the resulting decimal type after calculating
     precision & scale when performing the binary operation
@@ -357,6 +451,7 @@ def _get_decimal_type(lhs_dtype, rhs_dtype, op):
 
     # This should at some point be hooked up to libcudf's
     # binary_operation_fixed_point_scale
+    # Note: libcudf decimal types don't have a concept of precision
 
     p1, p2 = lhs_dtype.precision, rhs_dtype.precision
     s1, s2 = lhs_dtype.scale, rhs_dtype.scale
diff --git a/python/cudf/cudf/core/column/interval.py b/python/cudf/cudf/core/column/interval.py
index b2f79ef0c65..9147270c289 100644
--- a/python/cudf/cudf/core/column/interval.py
+++ b/python/cudf/cudf/core/column/interval.py
@@ -11,31 +11,46 @@
 from cudf.core.dtypes import IntervalDtype
 
 if TYPE_CHECKING:
+    from typing_extensions import Self
+
+    from cudf._typing import ScalarLike
+    from cudf.core.buffer import Buffer
     from cudf.core.column import ColumnBase
 
 
 class IntervalColumn(StructColumn):
     def __init__(
         self,
-        dtype,
-        mask=None,
-        size=None,
-        offset=0,
-        null_count=None,
-        children=(),
+        data: None,
+        size: int,
+        dtype: IntervalDtype,
+        mask: Buffer | None = None,
+        offset: int = 0,
+        null_count: int | None = None,
+        children: tuple[ColumnBase, ColumnBase] = (),  # type: ignore[assignment]
     ):
+        if len(children) != 2:
+            raise ValueError(
+                "children must be a tuple of two columns (left edges, right edges)."
+            )
         super().__init__(
-            data=None,
+            data=data,
+            size=size,
             dtype=dtype,
             mask=mask,
-            size=size,
             offset=offset,
             null_count=null_count,
             children=children,
         )
 
+    @staticmethod
+    def _validate_dtype_instance(dtype: IntervalDtype) -> IntervalDtype:
+        if not isinstance(dtype, IntervalDtype):
+            raise ValueError("dtype must be a IntervalDtype.")
+        return dtype
+
     @classmethod
-    def from_arrow(cls, data):
+    def from_arrow(cls, data: pa.Array) -> Self:
         new_col = super().from_arrow(data.storage)
         size = len(data)
         dtype = IntervalDtype.from_arrow(data.type)
@@ -47,16 +62,17 @@ def from_arrow(cls, data):
         null_count = data.null_count
         children = new_col.children
 
-        return IntervalColumn(
+        return cls(
+            data=None,
             size=size,
             dtype=dtype,
             mask=mask,
             offset=offset,
             null_count=null_count,
-            children=children,
+            children=children,  # type: ignore[arg-type]
         )
 
-    def to_arrow(self):
+    def to_arrow(self) -> pa.Array:
         typ = self.dtype.to_arrow()
         struct_arrow = super().to_arrow()
         if len(struct_arrow) == 0:
@@ -66,9 +82,14 @@ def to_arrow(self):
         return pa.ExtensionArray.from_storage(typ, struct_arrow)
 
     @classmethod
-    def from_struct_column(cls, struct_column: StructColumn, closed="right"):
+    def from_struct_column(
+        cls,
+        struct_column: StructColumn,
+        closed: Literal["left", "right", "both", "neither"] = "right",
+    ) -> Self:
         first_field_name = next(iter(struct_column.dtype.fields.keys()))
-        return IntervalColumn(
+        return cls(
+            data=None,
             size=struct_column.size,
             dtype=IntervalDtype(
                 struct_column.dtype.fields[first_field_name], closed
@@ -76,12 +97,13 @@ def from_struct_column(cls, struct_column: StructColumn, closed="right"):
             mask=struct_column.base_mask,
             offset=struct_column.offset,
             null_count=struct_column.null_count,
-            children=struct_column.base_children,
+            children=struct_column.base_children,  # type: ignore[arg-type]
         )
 
-    def copy(self, deep=True):
+    def copy(self, deep: bool = True) -> Self:
         struct_copy = super().copy(deep=deep)
-        return IntervalColumn(
+        return IntervalColumn(  # type: ignore[return-value]
+            data=None,
             size=struct_copy.size,
             dtype=IntervalDtype(
                 struct_copy.dtype.fields["left"], self.dtype.closed
@@ -89,7 +111,7 @@ def copy(self, deep=True):
             mask=struct_copy.base_mask,
             offset=struct_copy.offset,
             null_count=struct_copy.null_count,
-            children=struct_copy.base_children,
+            children=struct_copy.base_children,  # type: ignore[arg-type]
         )
 
     @property
@@ -137,25 +159,27 @@ def overlaps(other) -> ColumnBase:
 
     def set_closed(
         self, closed: Literal["left", "right", "both", "neither"]
-    ) -> IntervalColumn:
-        return IntervalColumn(
+    ) -> Self:
+        return IntervalColumn(  # type: ignore[return-value]
+            data=None,
             size=self.size,
             dtype=IntervalDtype(self.dtype.fields["left"], closed),
             mask=self.base_mask,
             offset=self.offset,
             null_count=self.null_count,
-            children=self.base_children,
+            children=self.base_children,  # type: ignore[arg-type]
         )
 
-    def as_interval_column(self, dtype):
+    def as_interval_column(self, dtype: IntervalDtype) -> Self:  # type: ignore[override]
         if isinstance(dtype, IntervalDtype):
-            return IntervalColumn(
+            return IntervalColumn(  # type: ignore[return-value]
+                data=None,
                 size=self.size,
                 dtype=dtype,
                 mask=self.mask,
                 offset=self.offset,
                 null_count=self.null_count,
-                children=tuple(
+                children=tuple(  # type: ignore[arg-type]
                     child.astype(dtype.subtype) for child in self.children
                 ),
             )
@@ -186,3 +210,16 @@ def element_indexing(self, index: int):
         if cudf.get_option("mode.pandas_compatible"):
             return pd.Interval(**result, closed=self.dtype.closed)
         return result
+
+    def _reduce(
+        self,
+        op: str,
+        skipna: bool | None = None,
+        min_count: int = 0,
+        *args,
+        **kwargs,
+    ) -> ScalarLike:
+        result = super()._reduce(op, skipna, min_count, *args, **kwargs)
+        if cudf.get_option("mode.pandas_compatible"):
+            return pd.Interval(**result, closed=self.dtype.closed)
+        return result
diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py
index 1b7cd95b3d0..c6a39199e3b 100644
--- a/python/cudf/cudf/core/column/lists.py
+++ b/python/cudf/cudf/core/column/lists.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 from functools import cached_property
-from typing import TYPE_CHECKING, Sequence
+from typing import TYPE_CHECKING, Sequence, cast
 
 import numpy as np
 import pandas as pd
@@ -29,30 +29,46 @@
 from cudf.api.types import _is_non_decimal_numeric_dtype, is_scalar
 from cudf.core.column import ColumnBase, as_column, column
 from cudf.core.column.methods import ColumnMethods, ParentType
+from cudf.core.column.numerical import NumericalColumn
 from cudf.core.dtypes import ListDtype
 from cudf.core.missing import NA
 
 if TYPE_CHECKING:
     from cudf._typing import ColumnBinaryOperand, ColumnLike, Dtype, ScalarLike
+    from cudf.core.buffer import Buffer
 
 
 class ListColumn(ColumnBase):
-    dtype: ListDtype
     _VALID_BINARY_OPERATIONS = {"__add__", "__radd__"}
 
     def __init__(
         self,
-        size,
-        dtype,
-        mask=None,
-        offset=0,
-        null_count=None,
-        children=(),
+        data: None,
+        size: int,
+        dtype: ListDtype,
+        mask: Buffer | None = None,
+        offset: int = 0,
+        null_count: int | None = None,
+        children: tuple[NumericalColumn, ColumnBase] = (),  # type: ignore[assignment]
     ):
+        if data is not None:
+            raise ValueError("data must be None")
+        if not isinstance(dtype, ListDtype):
+            raise ValueError("dtype must be a cudf.ListDtype")
+        if not (
+            len(children) == 2
+            and isinstance(children[0], NumericalColumn)
+            # TODO: Enforce int32_t (size_type) used in libcudf?
+            and children[0].dtype.kind == "i"
+            and isinstance(children[1], ColumnBase)
+        ):
+            raise ValueError(
+                "children must a tuple of 2 columns of (signed integer offsets, list values)"
+            )
         super().__init__(
-            None,
-            size,
-            dtype,
+            data=data,
+            size=size,
+            dtype=dtype,
             mask=mask,
             offset=offset,
             null_count=null_count,
@@ -131,7 +147,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
             raise TypeError("can only concatenate list to list")
 
     @property
-    def elements(self):
+    def elements(self) -> ColumnBase:
         """
         Column containing the elements of each list (may itself be a
         ListColumn)
@@ -139,11 +155,11 @@ def elements(self):
         return self.children[1]
 
     @property
-    def offsets(self):
+    def offsets(self) -> NumericalColumn:
         """
         Integer offsets to elements specifying each row of the ListColumn
         """
-        return self.children[0]
+        return cast(NumericalColumn, self.children[0])
 
     def to_arrow(self):
         offsets = self.offsets.to_arrow()
@@ -172,10 +188,9 @@ def set_base_data(self, value):
         else:
             super().set_base_data(value)
 
-    def set_base_children(self, value: tuple[ColumnBase, ...]):
+    def set_base_children(self, value: tuple[NumericalColumn, ColumnBase]):  # type: ignore[override]
         super().set_base_children(value)
-        _, values = value
-        self._dtype = cudf.ListDtype(element_type=values.dtype)
+        self._dtype = cudf.ListDtype(element_type=value[1].dtype)
 
     @property
     def __cuda_array_interface__(self):
@@ -196,12 +211,13 @@ def _with_type_metadata(
                 dtype.element_type
             )
             return ListColumn(
+                data=None,
                 dtype=dtype,
                 mask=self.base_mask,
                 size=self.size,
                 offset=self.offset,
                 null_count=self.null_count,
-                children=(self.base_children[0], elements),
+                children=(self.base_children[0], elements),  # type: ignore[arg-type]
             )
 
         return self
@@ -226,24 +242,28 @@ def from_sequences(
         """
         data_col = column.column_empty(0)
         mask_col = []
-        offset_col = [0]
+        offset_vals = [0]
         offset = 0
 
         # Build Data, Mask & Offsets
         for data in arbitrary:
             if cudf._lib.scalar._is_null_host_scalar(data):
                 mask_col.append(False)
-                offset_col.append(offset)
+                offset_vals.append(offset)
             else:
                 mask_col.append(True)
                 data_col = data_col.append(as_column(data))
                 offset += len(data)
-                offset_col.append(offset)
+                offset_vals.append(offset)
 
-        offset_col = column.as_column(offset_col, dtype=size_type_dtype)
+        offset_col = cast(
+            NumericalColumn,
+            column.as_column(offset_vals, dtype=size_type_dtype),
+        )
 
         # Build ListColumn
         res = cls(
+            data=None,
             size=len(arbitrary),
             dtype=cudf.ListDtype(data_col.dtype),
             mask=cudf._lib.transform.bools_to_mask(as_column(mask_col)),
@@ -283,12 +303,13 @@ def _transform_leaves(self, func, *args, **kwargs) -> Self:
         for c in cc:
             o = c.children[0]
             lc = cudf.core.column.ListColumn(  # type: ignore
+                data=None,
                 size=c.size,
                 dtype=cudf.ListDtype(lc.dtype),
                 mask=c.mask,
                 offset=c.offset,
                 null_count=c.null_count,
-                children=(o, lc),
+                children=(o, lc),  # type: ignore[arg-type]
             )
         return lc
 
@@ -320,7 +341,7 @@ def __init__(self, parent: ParentType):
 
     def get(
         self,
-        index: int,
+        index: int | ColumnLike,
         default: ScalarLike | ColumnLike | None = None,
     ) -> ParentType:
         """
diff --git a/python/cudf/cudf/core/column/methods.py b/python/cudf/cudf/core/column/methods.py
index 7c6f4e05577..05a0ab2e09a 100644
--- a/python/cudf/cudf/core/column/methods.py
+++ b/python/cudf/cudf/core/column/methods.py
@@ -7,6 +7,8 @@
 from typing_extensions import Literal
 
 import cudf
+import cudf.core.column
+import cudf.core.column_accessor
 from cudf.utils.utils import NotIterable
 
 ParentType = Union["cudf.Series", "cudf.core.index.Index"]
@@ -63,8 +65,8 @@ def _return_or_inplace(
         """
         if inplace:
             self._parent._mimic_inplace(
-                self._parent.__class__._from_data(
-                    {self._parent.name: new_col}
+                type(self._parent)._from_column(
+                    new_col, name=self._parent.name
                 ),
                 inplace=True,
             )
@@ -84,15 +86,12 @@ def _return_or_inplace(
                         data=table, index=self._parent.index
                     )
             elif isinstance(self._parent, cudf.Series):
-                if retain_index:
-                    return cudf.Series(
-                        new_col,
-                        name=self._parent.name,
-                        index=self._parent.index,
-                    )
-                else:
-                    return cudf.Series(new_col, name=self._parent.name)
+                return cudf.Series._from_column(
+                    new_col,
+                    name=self._parent.name,
+                    index=self._parent.index if retain_index else None,
+                )
             elif isinstance(self._parent, cudf.BaseIndex):
-                return cudf.Index(new_col, name=self._parent.name)
+                return cudf.Index._from_column(new_col, name=self._parent.name)
             else:
                 return self._parent._mimic_inplace(new_col, inplace=False)
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index f9404eb3b40..78d2814ed26 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -3,23 +3,18 @@
 from __future__ import annotations
 
 import functools
-from typing import TYPE_CHECKING, Any, Callable, Sequence, cast
+from typing import TYPE_CHECKING, Any, Sequence, cast
 
 import numpy as np
 import pandas as pd
 from typing_extensions import Self
 
+import pylibcudf
+
 import cudf
 from cudf import _lib as libcudf
-from cudf._lib import pylibcudf
 from cudf.api.types import is_integer, is_scalar
-from cudf.core.column import (
-    ColumnBase,
-    as_column,
-    build_column,
-    column,
-    string,
-)
+from cudf.core.column import ColumnBase, as_column, column, string
 from cudf.core.dtypes import CategoricalDtype
 from cudf.core.mixins import BinaryOperand
 from cudf.errors import MixedTypeError
@@ -33,6 +28,8 @@
 from .numerical_base import NumericalBaseColumn
 
 if TYPE_CHECKING:
+    from collections.abc import Callable
+
     from cudf._typing import (
         ColumnBinaryOperand,
         ColumnLike,
@@ -67,25 +64,30 @@ class NumericalColumn(NumericalBaseColumn):
     def __init__(
         self,
         data: Buffer,
-        dtype: DtypeObj,
+        size: int | None,
+        dtype: np.dtype,
         mask: Buffer | None = None,
-        size: int | None = None,  # TODO: make this non-optional
         offset: int = 0,
         null_count: int | None = None,
+        children: tuple = (),
     ):
-        dtype = cudf.dtype(dtype)
+        if not (isinstance(dtype, np.dtype) and dtype.kind in "iufb"):
+            raise ValueError(
+                "dtype must be a floating, integer or boolean numpy dtype."
+            )
 
         if data.size % dtype.itemsize:
             raise ValueError("Buffer size must be divisible by element size")
         if size is None:
             size = (data.size // dtype.itemsize) - offset
         super().__init__(
-            data,
+            data=data,
             size=size,
             dtype=dtype,
             mask=mask,
             offset=offset,
             null_count=null_count,
+            children=children,
         )
 
     def _clear_cache(self):
@@ -142,7 +144,7 @@ def __setitem__(self, key: Any, value: Any):
         """
 
         # Normalize value to scalar/column
-        device_value = (
+        device_value: cudf.Scalar | ColumnBase = (
             cudf.Scalar(
                 value,
                 dtype=self.dtype
@@ -205,16 +207,53 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
             np.bool_: np.float32,
         }
 
+        out_dtype = None
         if op in {"__truediv__", "__rtruediv__"}:
             # Division with integer types results in a suitable float.
             if truediv_type := int_float_dtype_mapping.get(self.dtype.type):
                 return self.astype(truediv_type)._binaryop(other, op)
+        elif op in {
+            "__lt__",
+            "__gt__",
+            "__le__",
+            "__ge__",
+            "__eq__",
+            "__ne__",
+        }:
+            out_dtype = "bool"
+
+            # If `other` is a Python integer and it is out-of-bounds
+            # promotion could fail but we can trivially define the result
+            # in terms of `notnull` or `NULL_NOT_EQUALS`.
+            if type(other) is int and self.dtype.kind in "iu":  # noqa: E721
+                truthiness = None
+                iinfo = np.iinfo(self.dtype)
+                if iinfo.min > other:
+                    truthiness = op in {"__ne__", "__gt__", "__ge__"}
+                elif iinfo.max < other:
+                    truthiness = op in {"__ne__", "__lt__", "__le__"}
+
+                # Compare with minimum value so that the result is true/false
+                if truthiness is True:
+                    other = iinfo.min
+                    op = "__ge__"
+                elif truthiness is False:
+                    other = iinfo.min
+                    op = "__lt__"
+
+        elif op in {"NULL_EQUALS", "NULL_NOT_EQUALS"}:
+            out_dtype = "bool"
 
         reflect, op = self._check_reflected_op(op)
         if (other := self._wrap_binop_normalization(other)) is NotImplemented:
             return NotImplemented
-        out_dtype = self.dtype
-        if other is not None:
+
+        if out_dtype is not None:
+            pass  # out_dtype was already set to bool
+        if other is None:
+            # not a binary operator, so no need to promote
+            out_dtype = self.dtype
+        elif out_dtype is None:
             out_dtype = np.result_type(self.dtype, other.dtype)
             if op in {"__mod__", "__floordiv__"}:
                 tmp = self if reflect else other
@@ -231,17 +270,6 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
                             out_dtype = cudf.dtype("float64")
                     elif is_scalar(tmp) and tmp == 0:
                         out_dtype = cudf.dtype("float64")
-        if op in {
-            "__lt__",
-            "__gt__",
-            "__le__",
-            "__ge__",
-            "__eq__",
-            "__ne__",
-            "NULL_EQUALS",
-            "NULL_NOT_EQUALS",
-        }:
-            out_dtype = "bool"
 
         if op in {"__and__", "__or__", "__xor__"}:
             if self.dtype.kind == "f" or other.dtype.kind == "f":
@@ -253,7 +281,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
             if self.dtype.kind == "b" or other.dtype.kind == "b":
                 out_dtype = "bool"
 
-        if (
+        elif (
             op == "__pow__"
             and self.dtype.kind in "iu"
             and (is_integer(other) or other.dtype.kind in "iu")
@@ -319,8 +347,8 @@ def normalize_binop_value(
             return NotImplemented
 
     def int2ip(self) -> "cudf.core.column.StringColumn":
-        if self.dtype != cudf.dtype("int64"):
-            raise TypeError("Only int64 type can be converted to ip")
+        if self.dtype != cudf.dtype("uint32"):
+            raise TypeError("Only uint32 type can be converted to ip")
 
         return libcudf.string_casting.int2ip(self)
 
@@ -338,29 +366,23 @@ def as_string_column(self) -> cudf.core.column.StringColumn:
     def as_datetime_column(
         self, dtype: Dtype
     ) -> cudf.core.column.DatetimeColumn:
-        return cast(
-            "cudf.core.column.DatetimeColumn",
-            build_column(
-                data=self.astype("int64").base_data,
-                dtype=dtype,
-                mask=self.base_mask,
-                offset=self.offset,
-                size=self.size,
-            ),
+        return cudf.core.column.DatetimeColumn(
+            data=self.astype("int64").base_data,  # type: ignore[arg-type]
+            dtype=dtype,
+            mask=self.base_mask,
+            offset=self.offset,
+            size=self.size,
         )
 
     def as_timedelta_column(
         self, dtype: Dtype
     ) -> cudf.core.column.TimeDeltaColumn:
-        return cast(
-            "cudf.core.column.TimeDeltaColumn",
-            build_column(
-                data=self.astype("int64").base_data,
-                dtype=dtype,
-                mask=self.base_mask,
-                offset=self.offset,
-                size=self.size,
-            ),
+        return cudf.core.column.TimeDeltaColumn(
+            data=self.astype("int64").base_data,  # type: ignore[arg-type]
+            dtype=dtype,
+            mask=self.base_mask,
+            offset=self.offset,
+            size=self.size,
         )
 
     def as_decimal_column(
@@ -532,7 +554,7 @@ def _validate_fillna_value(
     ) -> cudf.Scalar | ColumnBase:
         """Align fill_value for .fillna based on column type."""
         if is_scalar(fill_value):
-            cudf_obj = cudf.Scalar(fill_value)
+            cudf_obj: cudf.Scalar | ColumnBase = cudf.Scalar(fill_value)
             if not as_column(cudf_obj).can_cast_safely(self.dtype):
                 raise TypeError(
                     f"Cannot safely cast non-equivalent "
@@ -567,11 +589,8 @@ def can_cast_safely(self, to_dtype: DtypeObj) -> bool:
 
                 if self.dtype.kind == "f":
                     # Exclude 'np.inf', '-np.inf'
-                    s = cudf.Series(self)
-                    # TODO: replace np.inf with cudf scalar when
-                    # https://github.com/rapidsai/cudf/pull/6297 merges
-                    non_infs = s[~((s == np.inf) | (s == -np.inf))]
-                    col = non_infs._column
+                    not_inf = (self != np.inf) & (self != -np.inf)
+                    col = self.apply_boolean_mask(not_inf)
                 else:
                     col = self
 
@@ -611,8 +630,7 @@ def can_cast_safely(self, to_dtype: DtypeObj) -> bool:
             else:
                 filled = self.fillna(0)
                 return (
-                    cudf.Series(filled).astype(to_dtype).astype(filled.dtype)
-                    == cudf.Series(filled)
+                    filled.astype(to_dtype).astype(filled.dtype) == filled
                 ).all()
 
         # want to cast float to int:
@@ -627,24 +645,26 @@ def can_cast_safely(self, to_dtype: DtypeObj) -> bool:
             # NOTE(seberg): it would make sense to limit to the mantissa range.
             if (float(self.min()) >= min_) and (float(self.max()) <= max_):
                 filled = self.fillna(0)
-                return (cudf.Series(filled) % 1 == 0).all()
+                return (filled % 1 == 0).all()
             else:
                 return False
 
         return False
 
-    def _with_type_metadata(self: ColumnBase, dtype: Dtype) -> ColumnBase:
+    def _with_type_metadata(self: Self, dtype: Dtype) -> ColumnBase:
         if isinstance(dtype, CategoricalDtype):
-            return column.build_categorical_column(
-                categories=dtype.categories._values,
-                codes=build_column(self.base_data, dtype=self.dtype),
-                mask=self.base_mask,
-                ordered=dtype.ordered,
+            codes = cudf.core.column.categorical.as_unsigned_codes(
+                len(dtype.categories), self
+            )
+            return cudf.core.column.CategoricalColumn(
+                data=None,
                 size=self.size,
+                dtype=dtype,
+                mask=self.base_mask,
                 offset=self.offset,
                 null_count=self.null_count,
+                children=(codes,),
             )
-
         return self
 
     def to_pandas(
diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py
index f41010062c8..3b8dd05c13a 100644
--- a/python/cudf/cudf/core/column/numerical_base.py
+++ b/python/cudf/cudf/core/column/numerical_base.py
@@ -9,16 +9,19 @@
 
 import cudf
 from cudf import _lib as libcudf
+from cudf.core.buffer import Buffer
 from cudf.core.column import ColumnBase
 from cudf.core.missing import NA
 from cudf.core.mixins import Scannable
 
 if TYPE_CHECKING:
     from cudf._typing import ScalarLike
+    from cudf.core.column.decimal import DecimalDtype
 
 
 class NumericalBaseColumn(ColumnBase, Scannable):
-    """A column composed of numerical data.
+    """
+    A column composed of numerical (bool, integer, float, decimal) data.
 
     This class encodes a standard interface for different types of columns
     containing numerical types of data. In particular, mathematical operations
@@ -42,6 +45,30 @@ class NumericalBaseColumn(ColumnBase, Scannable):
         "cummax",
     }
 
+    def __init__(
+        self,
+        data: Buffer,
+        size: int,
+        dtype: DecimalDtype | np.dtype,
+        mask: Buffer | None = None,
+        offset: int = 0,
+        null_count: int | None = None,
+        children: tuple = (),
+    ):
+        if not isinstance(data, Buffer):
+            raise ValueError("data must be a Buffer instance.")
+        if len(children) != 0:
+            raise ValueError(f"{type(self).__name__} must have no children.")
+        super().__init__(
+            data=data,
+            size=size,
+            dtype=dtype,
+            mask=mask,
+            offset=offset,
+            null_count=null_count,
+            children=children,
+        )
+
     def _can_return_nan(self, skipna: bool | None = None) -> bool:
         return not skipna and self.has_nulls()
 
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index ec95c50f455..16e6908f308 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -358,7 +358,7 @@ def cat(self, others=None, sep=None, na_rep=None):
             )
 
         if len(data) == 1 and data.null_count == 1:
-            data = [""]
+            data = cudf.core.column.as_column("", length=len(data))
         # We only want to keep the index if we are adding something to each
         # row, not if we are joining all the rows into a single string.
         out = self._return_or_inplace(data, retain_index=others is not None)
@@ -549,6 +549,7 @@ def _split_by_character(self):
         offset_col = col.children[0]
 
         return cudf.core.column.ListColumn(
+            data=None,
             size=len(col),
             dtype=cudf.ListDtype(col.dtype),
             mask=col.mask,
@@ -775,11 +776,13 @@ def contains(
             # TODO: we silently ignore the `regex=` flag here
             if case is False:
                 input_column = libstrings.to_lower(self._column)
-                pat = libstrings.to_lower(column.as_column(pat, dtype="str"))
+                col_pat = libstrings.to_lower(
+                    column.as_column(pat, dtype="str")
+                )
             else:
                 input_column = self._column
-                pat = column.as_column(pat, dtype="str")
-            result_col = libstrings.contains_multiple(input_column, pat)
+                col_pat = column.as_column(pat, dtype="str")
+            result_col = libstrings.contains_multiple(input_column, col_pat)
         return self._return_or_inplace(result_col)
 
     def like(self, pat: str, esc: str | None = None) -> SeriesOrIndex:
@@ -3623,7 +3626,7 @@ def findall(self, pat: str, flags: int = 0) -> SeriesOrIndex:
         data = libstrings.findall(self._column, pat, flags)
         return self._return_or_inplace(data)
 
-    def find_multiple(self, patterns: SeriesOrIndex) -> "cudf.Series":
+    def find_multiple(self, patterns: SeriesOrIndex) -> cudf.Series:
         """
         Find all first occurrences of patterns in the Series/Index.
 
@@ -3679,12 +3682,12 @@ def find_multiple(self, patterns: SeriesOrIndex) -> "cudf.Series":
                 f"got: {patterns_column.dtype}"
             )
 
-        return cudf.Series(
+        return cudf.Series._from_column(
             libstrings.find_multiple(self._column, patterns_column),
+            name=self._parent.name,
             index=self._parent.index
             if isinstance(self._parent, cudf.Series)
             else self._parent,
-            name=self._parent.name,
         )
 
     def isempty(self) -> SeriesOrIndex:
@@ -4376,14 +4379,9 @@ def code_points(self) -> SeriesOrIndex:
         2    99
         dtype: int32
         """
-
-        new_col = libstrings.code_points(self._column)
-        if isinstance(self._parent, cudf.Series):
-            return cudf.Series(new_col, name=self._parent.name)
-        elif isinstance(self._parent, cudf.BaseIndex):
-            return cudf.Index(new_col, name=self._parent.name)
-        else:
-            return new_col
+        return self._return_or_inplace(
+            libstrings.code_points(self._column), retain_index=False
+        )
 
     def translate(self, table: dict) -> SeriesOrIndex:
         """
@@ -4694,9 +4692,11 @@ def character_tokenize(self) -> SeriesOrIndex:
         if isinstance(self._parent, cudf.Series):
             lengths = self.len().fillna(0)
             index = self._parent.index.repeat(lengths)
-            return cudf.Series(result_col, name=self._parent.name, index=index)
+            return cudf.Series._from_column(
+                result_col, name=self._parent.name, index=index
+            )
         elif isinstance(self._parent, cudf.BaseIndex):
-            return cudf.Index(result_col, name=self._parent.name)
+            return cudf.Index._from_column(result_col, name=self._parent.name)
         else:
             return result_col
 
@@ -5934,9 +5934,9 @@ def view(self, dtype) -> "cudf.core.column.ColumnBase":
 
         n_bytes_to_view = str_end_byte_offset - str_byte_offset
 
-        to_view = column.build_column(
-            self.base_data,
-            dtype=cudf.api.types.dtype("int8"),
+        to_view = cudf.core.column.NumericalColumn(
+            self.base_data,  # type: ignore[arg-type]
+            dtype=np.dtype(np.int8),
             offset=str_byte_offset,
             size=n_bytes_to_view,
         )
diff --git a/python/cudf/cudf/core/column/struct.py b/python/cudf/cudf/core/column/struct.py
index c2ce787eeae..2fda3b2c434 100644
--- a/python/cudf/cudf/core/column/struct.py
+++ b/python/cudf/cudf/core/column/struct.py
@@ -14,7 +14,10 @@
 from cudf.core.missing import NA
 
 if TYPE_CHECKING:
+    from typing_extensions import Self
+
     from cudf._typing import Dtype
+    from cudf.core.buffer import Buffer
 
 
 class StructColumn(ColumnBase):
@@ -23,10 +26,39 @@ class StructColumn(ColumnBase):
 
     Every column has n children, where n is
     the number of fields in the Struct Dtype.
-
     """
 
-    dtype: StructDtype
+    def __init__(
+        self,
+        data: None,
+        size: int,
+        dtype: StructDtype,
+        mask: Buffer | None = None,
+        offset: int = 0,
+        null_count: int | None = None,
+        children: tuple[ColumnBase, ...] = (),
+    ):
+        if data is not None:
+            raise ValueError("data must be None.")
+        dtype = self._validate_dtype_instance(dtype)
+        super().__init__(
+            data=data,
+            size=size,
+            dtype=dtype,
+            mask=mask,
+            offset=offset,
+            null_count=null_count,
+            children=children,
+        )
+
+    @staticmethod
+    def _validate_dtype_instance(dtype: StructDtype) -> StructDtype:
+        # IntervalDtype is a subclass of StructDtype, so compare types exactly
+        if type(dtype) is not StructDtype:
+            raise ValueError(
+                f"{type(dtype).__name__} must be a StructDtype exactly."
+            )
+        return dtype
 
     @property
     def base_size(self):
@@ -35,7 +67,7 @@ def base_size(self):
         else:
             return self.size + self.offset
 
-    def to_arrow(self):
+    def to_arrow(self) -> pa.Array:
         children = [
             pa.nulls(len(child))
             if len(child) == child.null_count
@@ -50,7 +82,7 @@ def to_arrow(self):
             }
         )
 
-        if self.nullable:
+        if self.mask is not None:
             buffers = (pa.py_buffer(self.mask.memoryview()),)
         else:
             buffers = (None,)
@@ -73,7 +105,7 @@ def to_pandas(
             return pd.Index(self.to_arrow().tolist(), dtype="object")
 
     @cached_property
-    def memory_usage(self):
+    def memory_usage(self) -> int:
         n = 0
         if self.nullable:
             n += cudf._lib.null_mask.bitmask_allocation_size_bytes(self.size)
@@ -99,7 +131,7 @@ def __setitem__(self, key, value):
             value = cudf.Scalar(value, self.dtype)
         super().__setitem__(key, value)
 
-    def copy(self, deep=True):
+    def copy(self, deep: bool = True) -> Self:
         # Since struct columns are immutable, both deep and
         # shallow copies share the underlying device data and mask.
         result = super().copy(deep=False)
@@ -107,15 +139,15 @@ def copy(self, deep=True):
             result = result._rename_fields(self.dtype.fields.keys())
         return result
 
-    def _rename_fields(self, names):
+    def _rename_fields(self, names) -> Self:
         """
         Return a StructColumn with the same field values as this StructColumn,
         but with the field names equal to `names`.
         """
-        dtype = cudf.core.dtypes.StructDtype(
+        dtype = StructDtype(
             {name: col.dtype for name, col in zip(names, self.children)}
         )
-        return StructColumn(
+        return StructColumn(  # type: ignore[return-value]
             data=None,
             size=self.size,
             dtype=dtype,
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index 47c8ed6fd95..6b6f3e517a8 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -75,28 +75,33 @@ class TimeDeltaColumn(ColumnBase):
     def __init__(
         self,
         data: Buffer,
-        dtype: Dtype,
-        size: int | None = None,  # TODO: make non-optional
+        size: int | None,
+        dtype: np.dtype,
         mask: Buffer | None = None,
         offset: int = 0,
         null_count: int | None = None,
+        children: tuple = (),
     ):
-        dtype = cudf.dtype(dtype)
-        if dtype.kind != "m":
-            raise TypeError(f"{self.dtype} is not a supported duration type")
+        if not isinstance(data, Buffer):
+            raise ValueError("data must be a Buffer.")
+        if not (isinstance(dtype, np.dtype) and dtype.kind == "m"):
+            raise ValueError("dtype must be a timedelta numpy dtype.")
 
         if data.size % dtype.itemsize:
             raise ValueError("Buffer size must be divisible by element size")
         if size is None:
             size = data.size // dtype.itemsize
             size = size - offset
+        if len(children) != 0:
+            raise ValueError("TimedeltaColumn must have no children.")
         super().__init__(
-            data,
+            data=data,
             size=size,
             dtype=dtype,
             mask=mask,
             offset=offset,
             null_count=null_count,
+            children=children,
         )
 
     def __contains__(self, item: DatetimeLikeScalar) -> bool:
@@ -265,10 +270,10 @@ def round(self, freq: str) -> ColumnBase:
 
     def as_numerical_column(
         self, dtype: Dtype
-    ) -> "cudf.core.column.NumericalColumn":
-        col = column.build_column(
-            data=self.base_data,
-            dtype=np.int64,
+    ) -> cudf.core.column.NumericalColumn:
+        col = cudf.core.column.NumericalColumn(
+            data=self.base_data,  # type: ignore[arg-type]
+            dtype=np.dtype(np.int64),
             mask=self.base_mask,
             offset=self.offset,
             size=self.size,
diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
index 819d351b2c4..09b0f453692 100644
--- a/python/cudf/cudf/core/column_accessor.py
+++ b/python/cudf/cudf/core/column_accessor.py
@@ -6,7 +6,7 @@
 import sys
 from collections import abc
 from functools import cached_property, reduce
-from typing import TYPE_CHECKING, Any, Callable, Mapping
+from typing import TYPE_CHECKING, Any, Mapping, cast
 
 import numpy as np
 import pandas as pd
@@ -35,7 +35,7 @@ class _NestedGetItemDict(dict):
     """
 
     @classmethod
-    def from_zip(cls, data):
+    def from_zip(cls, data: abc.Iterator):
         """Create from zip, specialized factory for nesting."""
         obj = cls()
         for key, value in data:
@@ -91,18 +91,18 @@ class ColumnAccessor(abc.MutableMapping):
         column length and data.values() are all Columns
     """
 
-    _data: dict[Any, ColumnBase]
-    _level_names: tuple[Any, ...]
+    _data: dict[abc.Hashable, ColumnBase]
+    _level_names: tuple[abc.Hashable, ...]
 
     def __init__(
         self,
-        data: abc.MutableMapping[Any, ColumnBase] | Self,
+        data: abc.MutableMapping[abc.Hashable, ColumnBase] | Self,
         multiindex: bool = False,
         level_names=None,
         rangeindex: bool = False,
         label_dtype: Dtype | None = None,
         verify: bool = True,
-    ):
+    ) -> None:
         if isinstance(data, ColumnAccessor):
             self._data = data._data
             self._level_names = data.level_names
@@ -141,16 +141,16 @@ def __init__(
                 f"data must be a ColumnAccessor or MutableMapping, not {type(data).__name__}"
             )
 
-    def __iter__(self):
+    def __iter__(self) -> abc.Iterator:
         return iter(self._data)
 
-    def __getitem__(self, key: Any) -> ColumnBase:
+    def __getitem__(self, key: abc.Hashable) -> ColumnBase:
         return self._data[key]
 
-    def __setitem__(self, key: Any, value: Any):
+    def __setitem__(self, key: abc.Hashable, value: ColumnBase) -> None:
         self.set_by_label(key, value)
 
-    def __delitem__(self, key: Any):
+    def __delitem__(self, key: abc.Hashable) -> None:
         old_ncols = len(self._data)
         del self._data[key]
         new_ncols = len(self._data)
@@ -174,7 +174,7 @@ def __repr__(self) -> str:
 
     def _from_columns_like_self(
         self, columns: abc.Iterable[ColumnBase], verify: bool = True
-    ):
+    ) -> Self:
         """
         Return a new ColumnAccessor with columns and the properties of self.
 
@@ -186,7 +186,7 @@ def _from_columns_like_self(
             Whether to verify column length and type.
         """
         if sys.version_info.major >= 3 and sys.version_info.minor >= 10:
-            data = zip(self.names, columns, strict=True)
+            data = zip(self.names, columns, strict=True)  # type: ignore[call-overload]
         else:
             columns = list(columns)
             if len(columns) != len(self.names):
@@ -205,7 +205,7 @@ def _from_columns_like_self(
         )
 
     @property
-    def level_names(self) -> tuple[Any, ...]:
+    def level_names(self) -> tuple[abc.Hashable, ...]:
         if self._level_names is None or len(self._level_names) == 0:
             return tuple((None,) * max(1, self.nlevels))
         else:
@@ -221,7 +221,7 @@ def nlevels(self) -> int:
             return len(next(iter(self.keys())))
 
     @property
-    def name(self) -> Any:
+    def name(self) -> abc.Hashable:
         return self.level_names[-1]
 
     @cached_property
@@ -232,7 +232,7 @@ def nrows(self) -> int:
             return len(next(iter(self.values())))
 
     @cached_property
-    def names(self) -> tuple[Any, ...]:
+    def names(self) -> tuple[abc.Hashable, ...]:
         return tuple(self.keys())
 
     @cached_property
@@ -250,7 +250,7 @@ def _grouped_data(self) -> abc.MutableMapping:
         else:
             return self._data
 
-    def _clear_cache(self, old_ncols: int, new_ncols: int):
+    def _clear_cache(self, old_ncols: int, new_ncols: int) -> None:
         """
         Clear cached attributes.
 
@@ -291,7 +291,7 @@ def to_pandas_index(self) -> pd.Index:
                     )
                 elif cudf.api.types.infer_dtype(self.names) == "integer":
                     if len(self.names) == 1:
-                        start = self.names[0]
+                        start = cast(int, self.names[0])
                         return pd.RangeIndex(
                             start=start, stop=start + 1, step=1, name=self.name
                         )
@@ -299,7 +299,9 @@ def to_pandas_index(self) -> pd.Index:
                     if len(uniques) == 1 and uniques[0] != 0:
                         diff = uniques[0]
                         new_range = range(
-                            self.names[0], self.names[-1] + diff, diff
+                            cast(int, self.names[0]),
+                            cast(int, self.names[-1]) + diff,
+                            diff,
                         )
                         return pd.RangeIndex(new_range, name=self.name)
             result = pd.Index(
@@ -311,15 +313,15 @@ def to_pandas_index(self) -> pd.Index:
         return result
 
     def insert(
-        self, name: Any, value: Any, loc: int = -1, validate: bool = True
-    ):
+        self, name: abc.Hashable, value: ColumnBase, loc: int = -1
+    ) -> None:
         """
         Insert column into the ColumnAccessor at the specified location.
 
         Parameters
         ----------
         name : Name corresponding to the new column
-        value : column-like
+        value : ColumnBase
         loc : int, optional
             The location to insert the new value at.
             Must be (0 <= loc <= ncols). By default, the column is added
@@ -330,30 +332,35 @@ def insert(
         None, this function operates in-place.
         """
         name = self._pad_key(name)
+        if name in self._data:
+            raise ValueError(f"Cannot insert '{name}', already exists")
 
         old_ncols = len(self._data)
         if loc == -1:
             loc = old_ncols
-        if not (0 <= loc <= old_ncols):
+        elif not (0 <= loc <= old_ncols):
             raise ValueError(
                 f"insert: loc out of bounds: must be  0 <= loc <= {old_ncols}"
             )
+
+        if not isinstance(value, column.ColumnBase):
+            raise ValueError("value must be a Column")
+        elif old_ncols > 0 and len(value) != self.nrows:
+            raise ValueError("All columns must be of equal length")
+
         # TODO: we should move all insert logic here
-        if name in self._data:
-            raise ValueError(f"Cannot insert '{name}', already exists")
         if loc == old_ncols:
-            if validate:
-                value = column.as_column(value)
-                if old_ncols > 0 and len(value) != self.nrows:
-                    raise ValueError("All columns must be of equal length")
             self._data[name] = value
         else:
             new_keys = self.names[:loc] + (name,) + self.names[loc:]
             new_values = self.columns[:loc] + (value,) + self.columns[loc:]
-            self._data = self._data.__class__(zip(new_keys, new_values))
+            self._data = dict(zip(new_keys, new_values))
         self._clear_cache(old_ncols, old_ncols + 1)
+        if old_ncols == 0:
+            # The type(name) may no longer match the prior label_dtype
+            self.label_dtype = None
 
-    def copy(self, deep=False) -> ColumnAccessor:
+    def copy(self, deep: bool = False) -> Self:
         """
         Make a copy of this ColumnAccessor.
         """
@@ -370,7 +377,7 @@ def copy(self, deep=False) -> ColumnAccessor:
             verify=False,
         )
 
-    def select_by_label(self, key: Any) -> ColumnAccessor:
+    def select_by_label(self, key: Any) -> Self:
         """
         Return a subset of this column accessor,
         composed of the keys specified by `key`.
@@ -386,7 +393,7 @@ def select_by_label(self, key: Any) -> ColumnAccessor:
         if isinstance(key, slice):
             return self._select_by_label_slice(key)
         elif pd.api.types.is_list_like(key) and not isinstance(key, tuple):
-            return self._select_by_label_list_like(key)
+            return self._select_by_label_list_like(tuple(key))
         else:
             if isinstance(key, tuple):
                 if any(isinstance(k, slice) for k in key):
@@ -424,9 +431,13 @@ def get_labels_by_index(self, index: Any) -> tuple:
             # TODO: Doesn't handle on-device columns
             return tuple(n for n, keep in zip(self.names, index) if keep)
         else:
+            if len(set(index)) != len(index):
+                raise NotImplementedError(
+                    "Selecting duplicate column labels is not supported."
+                )
             return tuple(self.names[i] for i in index)
 
-    def select_by_index(self, index: Any) -> ColumnAccessor:
+    def select_by_index(self, index: Any) -> Self:
         """
         Return a ColumnAccessor composed of the columns
         specified by index.
@@ -442,13 +453,15 @@ def select_by_index(self, index: Any) -> ColumnAccessor:
         """
         keys = self.get_labels_by_index(index)
         data = {k: self._data[k] for k in keys}
-        return self.__class__(
+        return type(self)(
             data,
             multiindex=self.multiindex,
             level_names=self.level_names,
+            label_dtype=self.label_dtype,
+            verify=False,
         )
 
-    def swaplevel(self, i=-2, j=-1):
+    def swaplevel(self, i: abc.Hashable = -2, j: abc.Hashable = -1) -> Self:
         """
         Swap level i with level j.
         Calling this method does not change the ordering of the values.
@@ -464,6 +477,10 @@ def swaplevel(self, i=-2, j=-1):
         -------
         ColumnAccessor
         """
+        if not self.multiindex:
+            raise ValueError(
+                "swaplevel is only valid for self.multiindex=True"
+            )
 
         i = _get_level(i, self.nlevels, self.level_names)
         j = _get_level(j, self.nlevels, self.level_names)
@@ -473,7 +490,7 @@ def swaplevel(self, i=-2, j=-1):
 
         # swap old keys for i and j
         for n, row in enumerate(self.names):
-            new_keys[n][i], new_keys[n][j] = row[j], row[i]
+            new_keys[n][i], new_keys[n][j] = row[j], row[i]  # type: ignore[call-overload, index]
             new_dict.update({row: tuple(new_keys[n])})
 
         # TODO: Change to deep=False when copy-on-write is default
@@ -481,15 +498,18 @@ def swaplevel(self, i=-2, j=-1):
 
         # swap level_names for i and j
         new_names = list(self.level_names)
-        new_names[i], new_names[j] = new_names[j], new_names[i]
+        new_names[i], new_names[j] = new_names[j], new_names[i]  # type: ignore[call-overload]
 
-        return self.__class__(
-            new_data,
-            multiindex=True,
+        return type(self)(
+            new_data,  # type: ignore[arg-type]
+            multiindex=self.multiindex,
             level_names=new_names,
+            rangeindex=self.rangeindex,
+            label_dtype=self.label_dtype,
+            verify=False,
         )
 
-    def set_by_label(self, key: Any, value: Any, validate: bool = True):
+    def set_by_label(self, key: abc.Hashable, value: ColumnBase) -> None:
         """
         Add (or modify) column by name.
 
@@ -497,26 +517,21 @@ def set_by_label(self, key: Any, value: Any, validate: bool = True):
         ----------
         key
             name of the column
-        value : column-like
+        value : Column
             The value to insert into the column.
-        validate : bool
-            If True, the provided value will be coerced to a column and
-            validated before setting (Default value = True).
         """
         key = self._pad_key(key)
-        if validate:
-            value = column.as_column(value)
-            if len(self._data) > 0 and len(value) != self.nrows:
-                raise ValueError("All columns must be of equal length")
+        if not isinstance(value, column.ColumnBase):
+            raise ValueError("value must be a Column")
+        if len(self) > 0 and len(value) != self.nrows:
+            raise ValueError("All columns must be of equal length")
 
         old_ncols = len(self._data)
         self._data[key] = value
         new_ncols = len(self._data)
         self._clear_cache(old_ncols, new_ncols)
 
-    def _select_by_label_list_like(self, key: Any) -> ColumnAccessor:
-        # Might be a generator
-        key = tuple(key)
+    def _select_by_label_list_like(self, key: tuple) -> Self:
         # Special-casing for boolean mask
         if (bn := len(key)) > 0 and all(map(is_bool, key)):
             if bn != (n := len(self.names)):
@@ -530,21 +545,28 @@ def _select_by_label_list_like(self, key: Any) -> ColumnAccessor:
             )
         else:
             data = {k: self._grouped_data[k] for k in key}
+            if len(data) != len(key):
+                raise ValueError(
+                    "Selecting duplicate column labels is not supported."
+                )
         if self.multiindex:
             data = dict(_to_flat_dict_inner(data))
-        return self.__class__(
+        return type(self)(
             data,
             multiindex=self.multiindex,
             level_names=self.level_names,
+            label_dtype=self.label_dtype,
+            verify=False,
         )
 
-    def _select_by_label_grouped(self, key: Any) -> ColumnAccessor:
+    def _select_by_label_grouped(self, key: abc.Hashable) -> Self:
         result = self._grouped_data[key]
         if isinstance(result, column.ColumnBase):
             # self._grouped_data[key] = self._data[key] so skip validation
-            return self.__class__(
+            return type(self)(
                 data={key: result},
                 multiindex=self.multiindex,
+                label_dtype=self.label_dtype,
                 verify=False,
             )
         else:
@@ -556,9 +578,10 @@ def _select_by_label_grouped(self, key: Any) -> ColumnAccessor:
                 result,
                 multiindex=self.nlevels - len(key) > 1,
                 level_names=self.level_names[len(key) :],
+                verify=False,
             )
 
-    def _select_by_label_slice(self, key: slice) -> ColumnAccessor:
+    def _select_by_label_slice(self, key: slice) -> Self:
         start, stop = key.start, key.stop
         if key.step is not None:
             raise TypeError("Label slicing with step is not supported")
@@ -578,23 +601,32 @@ def _select_by_label_slice(self, key: slice) -> ColumnAccessor:
                 stop_idx = len(self.names) - idx
                 break
         keys = self.names[start_idx:stop_idx]
-        return self.__class__(
+        return type(self)(
             {k: self._data[k] for k in keys},
             multiindex=self.multiindex,
             level_names=self.level_names,
+            label_dtype=self.label_dtype,
             verify=False,
         )
 
-    def _select_by_label_with_wildcard(self, key: Any) -> ColumnAccessor:
-        key = self._pad_key(key, slice(None))
-        return self.__class__(
-            {k: self._data[k] for k in self._data if _keys_equal(k, key)},
+    def _select_by_label_with_wildcard(self, key: tuple) -> Self:
+        pad_key = self._pad_key(key, slice(None))
+        data = {
+            k: self._data[k]
+            for k in self.names
+            if _keys_equal(k, pad_key)  # type: ignore[arg-type]
+        }
+        return type(self)(
+            data,
             multiindex=self.multiindex,
             level_names=self.level_names,
+            label_dtype=self.label_dtype,
             verify=False,
         )
 
-    def _pad_key(self, key: Any, pad_value="") -> Any:
+    def _pad_key(
+        self, key: abc.Hashable, pad_value: str | slice = ""
+    ) -> abc.Hashable:
         """
         Pad the provided key to a length equal to the number
         of levels.
@@ -606,8 +638,10 @@ def _pad_key(self, key: Any, pad_value="") -> Any:
         return key + (pad_value,) * (self.nlevels - len(key))
 
     def rename_levels(
-        self, mapper: Mapping[Any, Any] | Callable, level: int | None
-    ) -> ColumnAccessor:
+        self,
+        mapper: Mapping[abc.Hashable, abc.Hashable] | abc.Callable,
+        level: int | None = None,
+    ) -> Self:
         """
         Rename the specified levels of the given ColumnAccessor
 
@@ -649,10 +683,7 @@ def rename_column(x):
                 return x
 
             if level is None:
-                raise NotImplementedError(
-                    "Renaming columns with a MultiIndex and level=None is"
-                    "not supported"
-                )
+                level = 0
             new_col_names = (rename_column(k) for k in self.keys())
 
         else:
@@ -682,14 +713,14 @@ def rename_column(x):
             verify=False,
         )
 
-    def droplevel(self, level):
+    def droplevel(self, level: int) -> None:
         # drop the nth level
         if level < 0:
             level += self.nlevels
 
         old_ncols = len(self._data)
         self._data = {
-            _remove_key_level(key, level): value
+            _remove_key_level(key, level): value  # type: ignore[arg-type]
             for key, value in self._data.items()
         }
         new_ncols = len(self._data)
@@ -697,14 +728,13 @@ def droplevel(self, level):
             self._level_names[:level] + self._level_names[level + 1 :]
         )
 
-        if (
-            len(self._level_names) == 1
-        ):  # can't use nlevels, as it depends on multiindex
+        if len(self._level_names) == 1:
+            # can't use nlevels, as it depends on multiindex
             self.multiindex = False
         self._clear_cache(old_ncols, new_ncols)
 
 
-def _keys_equal(target: Any, key: Any) -> bool:
+def _keys_equal(target: abc.Hashable, key: abc.Iterable) -> bool:
     """
     Compare `key` to `target`.
 
@@ -722,7 +752,7 @@ def _keys_equal(target: Any, key: Any) -> bool:
     return True
 
 
-def _remove_key_level(key: Any, level: int) -> Any:
+def _remove_key_level(key: tuple, level: int) -> abc.Hashable:
     """
     Remove a level from key. If detupleize is True, and if only a
     single level remains, convert the tuple to a scalar.
@@ -733,7 +763,9 @@ def _remove_key_level(key: Any, level: int) -> Any:
     return result
 
 
-def _get_level(x, nlevels, level_names):
+def _get_level(
+    x: abc.Hashable, nlevels: int, level_names: tuple[abc.Hashable, ...]
+) -> abc.Hashable:
     """Get the level index from a level number or name.
 
     If given an integer, this function will handle wraparound for
diff --git a/python/cudf/cudf/core/copy_types.py b/python/cudf/cudf/core/copy_types.py
index 6afbc0bbc65..16d8964f083 100644
--- a/python/cudf/cudf/core/copy_types.py
+++ b/python/cudf/cudf/core/copy_types.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any, cast
 
@@ -44,15 +44,17 @@ class GatherMap:
         If the map is not in bounds.
     """
 
-    #: The gather map
-    column: "NumericalColumn"
     #: The number of rows the gather map has been validated for
     nrows: int
     #: Was the validation for nullify=True?
     nullify: bool
 
     def __init__(self, column: Any, nrows: int, *, nullify: bool):
-        self.column = cudf.core.column.as_column(column)
+        #: The gather map
+        self.column = cast(
+            cudf.core.column.NumericalColumn,
+            cudf.core.column.as_column(column),
+        )
         self.nrows = nrows
         self.nullify = nullify
         if len(self.column) == 0:
@@ -135,11 +137,12 @@ class BooleanMask:
         If the mask has the wrong number of rows
     """
 
-    #: The boolean mask
-    column: "NumericalColumn"
-
     def __init__(self, column: Any, nrows: int):
-        self.column = cudf.core.column.as_column(column)
+        #: The boolean mask
+        self.column = cast(
+            cudf.core.column.NumericalColumn,
+            cudf.core.column.as_column(column),
+        )
         if self.column.dtype.kind != "b":
             raise TypeError("Boolean mask must have bool dtype")
         if len(column) != nrows:
diff --git a/python/cudf/cudf/core/cut.py b/python/cudf/cudf/core/cut.py
index 197f46ee9fe..c9b1fa2669c 100644
--- a/python/cudf/cudf/core/cut.py
+++ b/python/cudf/cudf/core/cut.py
@@ -8,7 +8,8 @@
 
 import cudf
 from cudf.api.types import is_list_like
-from cudf.core.column import as_column, build_categorical_column
+from cudf.core.column import as_column
+from cudf.core.column.categorical import CategoricalColumn, as_unsigned_codes
 from cudf.core.index import IntervalIndex, interval_range
 
 
@@ -282,17 +283,21 @@ def cut(
             # should allow duplicate categories.
             return interval_labels[index_labels]
 
-    col = build_categorical_column(
-        categories=interval_labels,
-        codes=index_labels,
+    index_labels = as_unsigned_codes(len(interval_labels), index_labels)
+
+    col = CategoricalColumn(
+        data=None,
+        size=index_labels.size,
+        dtype=cudf.CategoricalDtype(
+            categories=interval_labels, ordered=ordered
+        ),
         mask=index_labels.base_mask,
         offset=index_labels.offset,
-        size=index_labels.size,
-        ordered=ordered,
+        children=(index_labels,),
     )
 
     # we return a categorical index, as we don't have a Categorical method
-    categorical_index = cudf.CategoricalIndex._from_data({None: col})
+    categorical_index = cudf.CategoricalIndex._from_column(col)
 
     if isinstance(orig_x, (pd.Series, cudf.Series)):
         # if we have a series input we return a series output
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 52dc29974bf..7a171fe9e05 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -13,8 +13,8 @@
 import textwrap
 import warnings
 from collections import abc, defaultdict
-from collections.abc import Iterator
-from typing import TYPE_CHECKING, Any, Callable, Literal, MutableMapping, cast
+from collections.abc import Callable, Iterator
+from typing import TYPE_CHECKING, Any, Literal, MutableMapping, cast
 
 import cupy
 import numba
@@ -48,11 +48,10 @@
     ColumnBase,
     StructColumn,
     as_column,
-    build_categorical_column,
-    build_column,
     column_empty,
     concat_columns,
 )
+from cudf.core.column.categorical import as_unsigned_codes
 from cudf.core.column_accessor import ColumnAccessor
 from cudf.core.copy_types import BooleanMask
 from cudf.core.groupby.groupby import DataFrameGroupBy, groupby_doc_template
@@ -326,7 +325,7 @@ def _getitem_tuple_arg(self, arg):
                                 range(len(tmp_arg[0]))
                             )
                         },
-                        index=cudf.Index(tmp_arg[0]),
+                        index=cudf.Index._from_column(tmp_arg[0]),
                     )
                     columns_df[cantor_name] = column.as_column(
                         range(len(columns_df))
@@ -382,17 +381,20 @@ def _setitem_tuple_arg(self, key, value):
                 length = len(idx) if idx is not None else 1
                 value = as_column(value, length=length)
 
-            new_col = cudf.Series(value, index=idx)
+            if isinstance(value, ColumnBase):
+                new_ser = cudf.Series._from_column(value, index=idx)
+            else:
+                new_ser = cudf.Series(value, index=idx)
             if len(self._frame.index) != 0:
-                new_col = new_col._align_to_index(
+                new_ser = new_ser._align_to_index(
                     self._frame.index, how="right"
                 )
 
             if len(self._frame.index) == 0:
                 self._frame.index = (
-                    idx if idx is not None else cudf.RangeIndex(len(new_col))
+                    idx if idx is not None else cudf.RangeIndex(len(new_ser))
                 )
-            self._frame._data.insert(key[1], new_col)
+            self._frame._data.insert(key[1], new_ser._column)
         else:
             if is_scalar(value):
                 for col in columns_df._column_names:
@@ -412,8 +414,9 @@ def _setitem_tuple_arg(self, key, value):
                     )
 
             else:
-                value = cupy.asarray(value)
-                if value.ndim == 2:
+                if not is_column_like(value):
+                    value = cupy.asarray(value)
+                if getattr(value, "ndim", 1) == 2:
                     # If the inner dimension is 1, it's broadcastable to
                     # all columns of the dataframe.
                     indexed_shape = columns_df.loc[key[0]].shape
@@ -470,15 +473,8 @@ def __getitem__(self, arg):
         ca = self._frame._data
         index = self._frame.index
         if col_is_scalar:
-            s = Series._from_data(
-                data=ColumnAccessor(
-                    {key: ca._data[key] for key in column_names},
-                    multiindex=ca.multiindex,
-                    level_names=ca.level_names,
-                    verify=False,
-                ),
-                index=index,
-            )
+            name = column_names[0]
+            s = Series._from_column(ca._data[name], name=name, index=index)
             return s._getitem_preprocessed(row_spec)
         if column_names != list(self._frame._column_names):
             frame = self._frame._from_data(
@@ -500,28 +496,33 @@ def __getitem__(self, arg):
             return frame._slice(row_spec.key)
         elif isinstance(row_spec, indexing_utils.ScalarIndexer):
             result = frame._gather(row_spec.key, keep_index=True)
+            new_name = result.index[0]
+            new_index = ensure_index(result.keys())
             # Attempt to turn into series.
-            try:
-                # Behaviour difference from pandas, which will merrily
-                # turn any heterogeneous set of columns into a series if
-                # you only ask for one row.
-                new_name = result.index[0]
-                result = Series._concat(
-                    [result[name] for name in column_names],
-                    index=result.keys(),
-                )
-                result.name = new_name
-                return result
-            except TypeError:
-                # Couldn't find a common type, Hence:
-                # Raise in pandas compatibility mode,
-                # or just return a 1xN dataframe otherwise
-                if cudf.get_option("mode.pandas_compatible"):
-                    raise TypeError(
-                        "All columns need to be of same type, please "
-                        "typecast to common dtype."
+            if len(column_names) == 0:
+                return Series([], index=new_index, name=new_name)
+            else:
+                try:
+                    # Behaviour difference from pandas, which will merrily
+                    # turn any heterogeneous set of columns into a series if
+                    # you only ask for one row.
+                    ser = Series._concat(
+                        [result[name] for name in column_names],
                     )
-                return result
+                except TypeError as err:
+                    # Couldn't find a common type, Hence:
+                    # Raise in pandas compatibility mode,
+                    # or just return a 1xN dataframe otherwise
+                    if cudf.get_option("mode.pandas_compatible"):
+                        raise TypeError(
+                            "All columns need to be of same type, please "
+                            "typecast to common dtype."
+                        ) from err
+                    return result
+                else:
+                    ser.index = new_index
+                    ser.name = new_name
+                    return ser
         elif isinstance(row_spec, indexing_utils.EmptyIndexer):
             return frame._empty_like(keep_index=True)
         assert_never(row_spec)
@@ -551,8 +552,9 @@ def _setitem_tuple_arg(self, key, value):
         else:
             # TODO: consolidate code path with identical counterpart
             # in `_DataFrameLocIndexer._setitem_tuple_arg`
-            value = cupy.asarray(value)
-            if value.ndim == 2:
+            if not is_column_like(value):
+                value = cupy.asarray(value)
+            if getattr(value, "ndim", 1) == 2:
                 indexed_shape = columns_df.iloc[key[0]].shape
                 if value.shape[1] == 1:
                     if value.shape[0] != indexed_shape[0]:
@@ -671,7 +673,9 @@ class DataFrame(IndexedFrame, Serializable, GetAttrGetItemMixin):
     3  3   0.3
     """
 
-    _PROTECTED_KEYS = frozenset(("_data", "_index"))
+    _PROTECTED_KEYS = frozenset(
+        ("_data", "_index", "_ipython_canary_method_should_not_exist_")
+    )
     _accessors: set[Any] = set()
     _loc_indexer_type = _DataFrameLocIndexer
     _iloc_indexer_type = _DataFrameIlocIndexer
@@ -690,7 +694,7 @@ def __init__(
     ):
         if copy is not None:
             raise NotImplementedError("copy is not currently implemented.")
-        super().__init__()
+        super().__init__({}, index=cudf.Index([]))
         if nan_as_null is no_default:
             nan_as_null = not cudf.get_option("mode.pandas_compatible")
 
@@ -974,6 +978,7 @@ def _init_from_series_list(self, data, columns, index):
             self._data.rangeindex = isinstance(
                 columns, (range, cudf.RangeIndex, pd.RangeIndex)
             )
+            self._data.label_dtype = pd.Index(columns).dtype
         else:
             self._data.rangeindex = True
 
@@ -1488,14 +1493,14 @@ def __delitem__(self, name):
         self._drop_column(name)
 
     @_performance_tracking
-    def memory_usage(self, index=True, deep=False):
+    def memory_usage(self, index=True, deep=False) -> cudf.Series:
         mem_usage = [col.memory_usage for col in self._data.columns]
         names = [str(name) for name in self._data.names]
         if index:
             mem_usage.append(self.index.memory_usage())
             names.append("Index")
-        return Series._from_data(
-            data={None: as_column(mem_usage)},
+        return Series._from_column(
+            as_column(mem_usage),
             index=cudf.Index(names),
         )
 
@@ -1750,9 +1755,9 @@ def _concat(
         for cols in columns:
             table_index = None
             if 1 == first_data_column_position:
-                table_index = cudf.Index(cols[0])
+                table_index = cudf.Index._from_column(cols[0])
             elif first_data_column_position > 1:
-                table_index = DataFrame._from_data(
+                table_index = cudf.MultiIndex._from_data(
                     data=dict(
                         zip(
                             indices[:first_data_column_position],
@@ -1802,7 +1807,7 @@ def _concat(
             if not isinstance(out.index, MultiIndex) and isinstance(
                 out.index.dtype, cudf.CategoricalDtype
             ):
-                out = out.set_index(cudf.Index(out.index._values))
+                out = out.set_index(out.index)
         for name, col in out._data.items():
             out._data[name] = col._with_type_metadata(
                 tables[0]._data[name].dtype
@@ -2647,8 +2652,12 @@ def columns(self, columns):
         elif isinstance(columns, (cudf.BaseIndex, ColumnBase, Series)):
             level_names = (getattr(columns, "name", None),)
             rangeindex = isinstance(columns, cudf.RangeIndex)
-            columns = as_column(columns)
-            if columns.distinct_count(dropna=False) != len(columns):
+            if rangeindex:
+                unique_count = len(columns)
+            else:
+                columns = as_column(columns)
+                unique_count = columns.distinct_count(dropna=False)
+            if unique_count != len(columns):
                 raise ValueError("Duplicate column names are not allowed")
             pd_columns = pd.Index(columns.to_pandas())
             label_dtype = pd_columns.dtype
@@ -2999,7 +3008,7 @@ def set_index(
             and not isinstance(keys[0], (cudf.MultiIndex, pd.MultiIndex))
         ):
             # Don't turn single level MultiIndex into an Index
-            idx = cudf.Index(data_to_add[0], name=names[0])
+            idx = cudf.Index._from_column(data_to_add[0], name=names[0])
         else:
             idx = MultiIndex._from_data(dict(enumerate(data_to_add)))
             idx.names = names
@@ -3051,7 +3060,6 @@ def where(self, cond, other=None, inplace=False, axis=None, level=None):
 
         from cudf.core._internals.where import (
             _check_and_cast_columns_with_other,
-            _make_categorical_like,
         )
 
         # First process the condition.
@@ -3103,7 +3111,7 @@ def where(self, cond, other=None, inplace=False, axis=None, level=None):
 
         out = []
         for (name, col), other_col in zip(self._data.items(), other_cols):
-            col, other_col = _check_and_cast_columns_with_other(
+            source_col, other_col = _check_and_cast_columns_with_other(
                 source_col=col,
                 other=other_col,
                 inplace=inplace,
@@ -3111,16 +3119,16 @@ def where(self, cond, other=None, inplace=False, axis=None, level=None):
 
             if cond_col := cond._data.get(name):
                 result = cudf._lib.copying.copy_if_else(
-                    col, other_col, cond_col
+                    source_col, other_col, cond_col
                 )
 
-                out.append(_make_categorical_like(result, self._data[name]))
+                out.append(result._with_type_metadata(col.dtype))
             else:
                 out_mask = cudf._lib.null_mask.create_null_mask(
-                    len(col),
+                    len(source_col),
                     state=cudf._lib.null_mask.MaskState.ALL_NULL,
                 )
-                out.append(col.set_mask(out_mask))
+                out.append(source_col.set_mask(out_mask))
 
         return self._mimic_inplace(
             self._from_data_like_self(self._data._from_columns_like_self(out)),
@@ -3261,9 +3269,6 @@ def _insert(self, loc, name, value, nan_as_null=None, ignore_index=True):
             If False, a reindexing operation is performed if
             `value.index` is not equal to `self.index`.
         """
-        if name in self._data:
-            raise NameError(f"duplicated column name {name}")
-
         num_cols = self._num_columns
         if loc < 0:
             loc += num_cols + 1
@@ -3283,9 +3288,7 @@ def _insert(self, loc, name, value, nan_as_null=None, ignore_index=True):
         # least require a deprecation cycle because we currently support
         # inserting a pd.Categorical.
         if isinstance(value, pd.Categorical):
-            value = cudf.core.column.categorical.pandas_categorical_as_column(
-                value
-            )
+            value = as_column(value)
 
         if _is_scalar_or_zero_d_array(value):
             dtype = None
@@ -3803,7 +3806,9 @@ def agg(self, aggs, axis=None):
                     col_empty = column_empty(
                         len(idxs), dtype=col.dtype, masked=True
                     )
-                    ans = cudf.Series(data=col_empty, index=idxs)
+                    ans = cudf.Series._from_column(
+                        col_empty, index=cudf.Index(idxs)
+                    )
                     if isinstance(aggs.get(key), abc.Iterable):
                         # TODO : Allow simultaneous pass for multi-aggregation
                         # as a future optimization
@@ -4801,7 +4806,7 @@ def _func(x):  # pragma: no cover
         # this could be written as a single kernel
         result = {}
         for name, col in self._data.items():
-            apply_sr = Series._from_data({None: col})
+            apply_sr = Series._from_column(col)
             result[name] = apply_sr.apply(_func)._column
 
         return DataFrame._from_data(result, index=self.index)
@@ -5484,14 +5489,9 @@ def from_pandas(cls, dataframe, nan_as_null=no_default):
             )
 
         if isinstance(dataframe, pd.DataFrame):
-            if not dataframe.columns.is_unique:
-                raise ValueError("Duplicate column names are not allowed")
-
             data = {
-                col_name: column.as_column(
-                    col_value.array, nan_as_null=nan_as_null
-                )
-                for col_name, col_value in dataframe.items()
+                i: column.as_column(col_value.array, nan_as_null=nan_as_null)
+                for i, (_, col_value) in enumerate(dataframe.items())
             }
             if isinstance(dataframe.index, pd.MultiIndex):
                 index = cudf.MultiIndex.from_pandas(
@@ -5502,14 +5502,8 @@ def from_pandas(cls, dataframe, nan_as_null=no_default):
                     dataframe.index, nan_as_null=nan_as_null
                 )
             df = cls._from_data(data, index)
-            df._data._level_names = tuple(dataframe.columns.names)
-
-            if isinstance(dataframe.columns, pd.RangeIndex):
-                df._data.rangeindex = True
-            # Set columns only if it is a MultiIndex
-            elif isinstance(dataframe.columns, pd.MultiIndex):
-                df.columns = dataframe.columns
-
+            # Checks duplicate columns and sets column metadata
+            df.columns = dataframe.columns
             return df
         elif hasattr(dataframe, "__dataframe__"):
             # TODO: Probably should be handled in the constructor as
@@ -5830,7 +5824,7 @@ def from_records(
 
         df = cls._from_data(
             ColumnAccessor(
-                data=ca_data,
+                data=ca_data,  # type: ignore[arg-type]
                 multiindex=isinstance(
                     columns, (pd.MultiIndex, cudf.MultiIndex)
                 ),
@@ -6083,8 +6077,8 @@ def quantile(
 
             if q_is_number:
                 result = result.transpose()
-                return Series(
-                    data=result._columns[0], index=result.index, name=q
+                return Series._from_column(
+                    result._columns[0], name=q, index=result.index
                 )
         else:
             # Ensure that qs is non-scalar so that we always get a column back.
@@ -6346,13 +6340,9 @@ def count(self, axis=0, numeric_only=False):
         if axis != 0:
             raise NotImplementedError("Only axis=0 is currently supported.")
         length = len(self)
-        return Series._from_data(
-            {
-                None: as_column(
-                    [length - col.null_count for col in self._columns]
-                )
-            },
-            cudf.Index(self._data.names),
+        return Series._from_column(
+            as_column([length - col.null_count for col in self._columns]),
+            index=cudf.Index(self._data.names),
         )
 
     _SUPPORT_AXIS_LOOKUP = {
@@ -6373,8 +6363,11 @@ def _reduce(
         source = self
 
         if axis is None:
+            assert PANDAS_LT_300, "Replace if/else with just axis=2"
+            # TODO(pandas3.0): Remove if/else for just axis = 2
             if op in {"sum", "product", "std", "var"}:
-                # Do not remove until pandas 2.0 support is added.
+                # pandas only raises FutureWarning for these ops
+                # though it applies for all reductions
                 warnings.warn(
                     f"In a future version, {type(self).__name__}"
                     f".{op}(axis=None) will return a scalar {op} over "
@@ -6393,9 +6386,7 @@ def _reduce(
 
         if numeric_only:
             numeric_cols = (
-                name
-                for name in self._data.names
-                if is_numeric_dtype(self._data[name].dtype)
+                name for name, dtype in self._dtypes if is_numeric_dtype(dtype)
             )
             source = self._get_columns_by_label(numeric_cols)
             if source.empty:
@@ -6405,62 +6396,41 @@ def _reduce(
                     else source.index,
                     dtype="float64",
                 )
-        if axis in {0, 2}:
-            if axis == 2 and op in ("kurtosis", "kurt", "skew"):
-                # TODO: concat + op can probably be done in the general case
-                # for axis == 2.
-                # https://github.com/rapidsai/cudf/issues/14930
-                return getattr(concat_columns(source._data.columns), op)(
-                    **kwargs
-                )
-            try:
-                result = [
-                    getattr(source._data[col], op)(**kwargs)
-                    for col in source._data.names
-                ]
-            except AttributeError:
-                numeric_ops = (
-                    "mean",
-                    "min",
-                    "max",
-                    "sum",
-                    "product",
-                    "prod",
-                    "std",
-                    "var",
-                    "kurtosis",
-                    "kurt",
-                    "skew",
-                )
-
-                if op in numeric_ops:
+        if (
+            axis == 2
+            and op in {"kurtosis", "skew"}
+            and self._num_rows < 4
+            and self._num_columns > 1
+        ):
+            # Total number of elements may satisfy the min number of values
+            # to compute skew/kurtosis
+            return getattr(concat_columns(source._columns), op)(**kwargs)
+        elif axis == 1:
+            return source._apply_cupy_method_axis_1(op, **kwargs)
+        else:
+            axis_0_results = []
+            for col_label, col in source._data.items():
+                try:
+                    axis_0_results.append(getattr(col, op)(**kwargs))
+                except AttributeError as err:
                     if numeric_only:
-                        try:
-                            result = [
-                                getattr(source._data[col], op)(**kwargs)
-                                for col in source._data.names
-                            ]
-                        except AttributeError:
-                            raise NotImplementedError(
-                                f"Not all column dtypes support op {op}"
-                            )
-                    elif any(
-                        not is_numeric_dtype(self._data[name].dtype)
-                        for name in self._data.names
-                    ):
+                        raise NotImplementedError(
+                            f"Column {col_label} with type {col.dtype} does not support {op}"
+                        ) from err
+                    elif not is_numeric_dtype(col.dtype):
                         raise TypeError(
                             "Non numeric columns passed with "
                             "`numeric_only=False`, pass `numeric_only=True` "
                             f"to perform DataFrame.{op}"
-                        )
-                else:
-                    raise
+                        ) from err
+                    else:
+                        raise
             if axis == 2:
-                return getattr(as_column(result, nan_as_null=False), op)(
-                    **kwargs
-                )
+                return getattr(
+                    as_column(axis_0_results, nan_as_null=False), op
+                )(**kwargs)
             else:
-                source_dtypes = [c.dtype for c in source._data.columns]
+                source_dtypes = [dtype for _, dtype in source._dtypes]
                 common_dtype = find_common_type(source_dtypes)
                 if (
                     is_object_dtype(common_dtype)
@@ -6474,17 +6444,14 @@ def _reduce(
                         "Columns must all have the same dtype to "
                         f"perform {op=} with {axis=}"
                     )
+                pd_index = source._data.to_pandas_index()
                 if source._data.multiindex:
-                    idx = MultiIndex.from_tuples(
-                        source._data.names, names=source._data.level_names
-                    )
+                    idx = MultiIndex.from_pandas(pd_index)
                 else:
-                    idx = cudf.Index(source._data.names)
-                return Series._from_data({None: as_column(result)}, idx)
-        elif axis == 1:
-            return source._apply_cupy_method_axis_1(op, **kwargs)
-        else:
-            raise ValueError(f"Invalid value of {axis=} received for {op}")
+                    idx = cudf.Index.from_pandas(pd_index)
+                return Series._from_column(
+                    as_column(axis_0_results), index=idx
+                )
 
     @_performance_tracking
     def _scan(
@@ -6710,11 +6677,7 @@ def _apply_cupy_method_axis_1(self, method, *args, **kwargs):
                 result = result.set_mask(
                     cudf._lib.transform.bools_to_mask(mask._column)
                 )
-            return Series(
-                result,
-                index=self.index,
-                dtype=result_dtype,
-            )
+            return Series._from_column(result, index=self.index)
         else:
             result_df = DataFrame(result).set_index(self.index)
             result_df._set_columns_like(prepared._data)
@@ -7302,9 +7265,7 @@ def unnamed_group_generator():
 
         # Construct the resulting dataframe / series
         if not has_unnamed_levels:
-            result = Series._from_data(
-                data={None: stacked[0]}, index=new_index
-            )
+            result = Series._from_column(stacked[0], index=new_index)
         else:
             if unnamed_level_values.nlevels == 1:
                 unnamed_level_values = unnamed_level_values.get_level_values(0)
@@ -7445,10 +7406,8 @@ def to_struct(self, name=None):
             size=len(self),
             offset=0,
         )
-        return cudf.Series._from_data(
-            cudf.core.column_accessor.ColumnAccessor(
-                {name: col}, verify=False
-            ),
+        return cudf.Series._from_column(
+            col,
             index=self.index,
             name=name,
         )
@@ -7804,8 +7763,8 @@ def interleave_columns(self):
                 "interleave_columns does not support 'category' dtype."
             )
 
-        return self._constructor_sliced._from_data(
-            {None: libcudf.reshape.interleave_columns([*self._columns])}
+        return self._constructor_sliced._from_column(
+            libcudf.reshape.interleave_columns([*self._columns])
         )
 
     @_performance_tracking
@@ -7935,12 +7894,10 @@ def eval(self, expr: str, inplace: bool = False, **kwargs):
                 raise ValueError(
                     "Cannot operate inplace if there is no assignment"
                 )
-            return Series._from_data(
-                {
-                    None: libcudf.transform.compute_column(
-                        [*self._columns], self._column_names, statements[0]
-                    )
-                }
+            return Series._from_column(
+                libcudf.transform.compute_column(
+                    [*self._columns], self._column_names, statements[0]
+                )
             )
 
         targets = []
@@ -8484,7 +8441,9 @@ def _get_non_null_cols_and_dtypes(col_idxs, list_of_columns):
     return non_null_columns, dtypes
 
 
-def _find_common_dtypes_and_categories(non_null_columns, dtypes):
+def _find_common_dtypes_and_categories(
+    non_null_columns, dtypes
+) -> dict[Any, ColumnBase]:
     # A mapping of {idx: categories}, where `categories` is a
     # column of all the unique categorical values from each
     # categorical column across all input frames
@@ -8500,9 +8459,9 @@ def _find_common_dtypes_and_categories(non_null_columns, dtypes):
             isinstance(col, cudf.core.column.CategoricalColumn) for col in cols
         ):
             # Combine and de-dupe the categories
-            categories[idx] = cudf.Series(
-                concat_columns([col.categories for col in cols])
-            )._column.unique()
+            categories[idx] = concat_columns(
+                [col.categories for col in cols]
+            ).unique()
             # Set the column dtype to the codes' dtype. The categories
             # will be re-assigned at the end
             dtypes[idx] = min_signed_type(len(categories[idx]))
@@ -8541,14 +8500,16 @@ def _cast_cols_to_common_dtypes(col_idxs, list_of_columns, dtypes, categories):
 def _reassign_categories(categories, cols, col_idxs):
     for name, idx in zip(cols, col_idxs):
         if idx in categories:
-            cols[name] = build_categorical_column(
-                categories=categories[idx],
-                codes=build_column(
-                    cols[name].base_data, dtype=cols[name].dtype
+            codes = as_unsigned_codes(len(categories[idx]), cols[name])
+            cols[name] = CategoricalColumn(
+                data=None,
+                size=codes.size,
+                dtype=cudf.CategoricalDtype(
+                    categories=categories[idx], ordered=False
                 ),
-                mask=cols[name].base_mask,
-                offset=cols[name].offset,
-                size=cols[name].size,
+                mask=codes.base_mask,
+                offset=codes.offset,
+                children=(codes,),
             )
 
 
diff --git a/python/cudf/cudf/core/df_protocol.py b/python/cudf/cudf/core/df_protocol.py
index a70a42c04af..5250a741d3d 100644
--- a/python/cudf/cudf/core/df_protocol.py
+++ b/python/cudf/cudf/core/df_protocol.py
@@ -13,7 +13,12 @@
 
 import cudf
 from cudf.core.buffer import Buffer, as_buffer
-from cudf.core.column import as_column, build_categorical_column, build_column
+from cudf.core.column import (
+    CategoricalColumn,
+    NumericalColumn,
+    as_column,
+    build_column,
+)
 
 # Implementation of interchange protocol classes
 # ----------------------------------------------
@@ -830,18 +835,19 @@ def _protocol_to_cudf_column_categorical(
     assert buffers["data"] is not None, "data buffer should not be None"
     codes_buffer, codes_dtype = buffers["data"]
     codes_buffer = _ensure_gpu_buffer(codes_buffer, codes_dtype, allow_copy)
-    cdtype = protocol_dtype_to_cupy_dtype(codes_dtype)
-    codes = build_column(
-        codes_buffer._buf,
-        cdtype,
+    cdtype = np.dtype(protocol_dtype_to_cupy_dtype(codes_dtype))
+    codes = NumericalColumn(
+        data=codes_buffer._buf,
+        size=None,
+        dtype=cdtype,
     )
-
-    cudfcol = build_categorical_column(
-        categories=categories,
-        codes=codes,
-        mask=codes.base_mask,
+    cudfcol = CategoricalColumn(
+        data=None,
         size=codes.size,
-        ordered=ordered,
+        dtype=cudf.CategoricalDtype(categories=categories, ordered=ordered),
+        mask=codes.base_mask,
+        offset=codes.offset,
+        children=(codes,),
     )
 
     return _set_missing_values(col, cudfcol, allow_copy), buffers
diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index 27afec18b4e..2110e610c37 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -7,7 +7,7 @@
 import textwrap
 import warnings
 from functools import cached_property
-from typing import TYPE_CHECKING, Any, Callable
+from typing import TYPE_CHECKING, Any
 
 import numpy as np
 import pandas as pd
@@ -27,6 +27,8 @@
     PANDAS_NUMPY_DTYPE = pd.core.dtypes.dtypes.PandasDtype
 
 if TYPE_CHECKING:
+    from collections.abc import Callable
+
     from cudf._typing import Dtype
     from cudf.core.buffer import Buffer
 
@@ -182,7 +184,7 @@ def __init__(self, categories=None, ordered: bool = False) -> None:
         self._ordered = ordered
 
     @property
-    def categories(self) -> "cudf.core.index.Index":
+    def categories(self) -> cudf.Index:
         """
         An ``Index`` containing the unique categories allowed.
 
@@ -194,10 +196,12 @@ def categories(self) -> "cudf.core.index.Index":
         Index(['b', 'a'], dtype='object')
         """
         if self._categories is None:
-            return cudf.Index(
-                cudf.core.column.column_empty(0, dtype="object", masked=False)
+            col = cudf.core.column.column_empty(
+                0, dtype="object", masked=False
             )
-        return cudf.Index(self._categories, copy=False)
+        else:
+            col = self._categories
+        return cudf.Index._from_column(col)
 
     @property
     def type(self):
@@ -259,7 +263,9 @@ def to_pandas(self) -> pd.CategoricalDtype:
             categories = self._categories.to_pandas()
         return pd.CategoricalDtype(categories=categories, ordered=self.ordered)
 
-    def _init_categories(self, categories: Any):
+    def _init_categories(
+        self, categories: Any
+    ) -> cudf.core.column.ColumnBase | None:
         if categories is None:
             return categories
         if len(categories) == 0 and not isinstance(
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 32c313e42d3..7b2bc85b13b 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -6,7 +6,7 @@
 import pickle
 import warnings
 from collections import abc
-from typing import TYPE_CHECKING, Any, Callable, Literal, MutableMapping
+from typing import TYPE_CHECKING, Any, Literal, MutableMapping
 
 # TODO: The `numpy` import is needed for typing purposes during doc builds
 # only, need to figure out why the `np` alias is insufficient then remove.
@@ -24,10 +24,10 @@
 from cudf.core.column import (
     ColumnBase,
     as_column,
-    build_categorical_column,
     deserialize_columns,
     serialize_columns,
 )
+from cudf.core.column.categorical import CategoricalColumn, as_unsigned_codes
 from cudf.core.column_accessor import ColumnAccessor
 from cudf.core.mixins import BinaryOperand, Scannable
 from cudf.utils import ioutils
@@ -53,14 +53,10 @@ class Frame(BinaryOperand, Scannable):
         A Frame representing the (optional) index columns.
     """
 
-    _data: "ColumnAccessor"
-
     _VALID_BINARY_OPERATIONS = BinaryOperand._SUPPORTED_BINARY_OPERATIONS
 
-    def __init__(self, data=None):
-        if data is None:
-            data = {}
-        self._data = cudf.core.column_accessor.ColumnAccessor(data)
+    def __init__(self, data: ColumnAccessor | MutableMapping[Any, ColumnBase]):
+        self._data = ColumnAccessor(data)
 
     @property
     def _num_columns(self) -> int:
@@ -407,7 +403,7 @@ def __arrow_array__(self, type=None):
     @_performance_tracking
     def _to_array(
         self,
-        get_array: Callable,
+        get_array: abc.Callable,
         module: ModuleType,
         copy: bool,
         dtype: Dtype | None = None,
@@ -893,18 +889,21 @@ def from_arrow(cls, data: pa.Table) -> Self:
                 for name in dict_dictionaries.keys()
             }
 
-            cudf_category_frame = {
-                name: build_categorical_column(
-                    cudf_dictionaries_columns[name],
-                    codes,
-                    mask=codes.base_mask,
+            for name, codes in zip(
+                dict_indices_table.column_names, indices_columns
+            ):
+                categories = cudf_dictionaries_columns[name]
+                codes = as_unsigned_codes(len(categories), codes)
+                cudf_category_frame[name] = CategoricalColumn(
+                    data=None,
                     size=codes.size,
-                    ordered=dict_ordered[name],
-                )
-                for name, codes in zip(
-                    dict_indices_table.column_names, indices_columns
+                    dtype=cudf.CategoricalDtype(
+                        categories=categories,
+                        ordered=dict_ordered[name],
+                    ),
+                    mask=codes.base_mask,
+                    children=(codes,),
                 )
-            }
 
         # Handle non-dict arrays
         cudf_non_category_frame = {
@@ -1014,9 +1013,7 @@ def _copy_type_metadata(self: Self, other: Self) -> Self:
         See `ColumnBase._with_type_metadata` for more information.
         """
         for (name, col), (_, dtype) in zip(self._data.items(), other._dtypes):
-            self._data.set_by_label(
-                name, col._with_type_metadata(dtype), validate=False
-            )
+            self._data.set_by_label(name, col._with_type_metadata(dtype))
 
         return self
 
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 3cfbd1d736a..4f283d41b17 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -403,8 +403,7 @@ def indices(self) -> dict[ScalarLike, cp.ndarray]:
         if len(group_keys) > 1:
             index = cudf.MultiIndex.from_arrays(group_keys)
         else:
-            (group_keys,) = group_keys
-            index = cudf.Index(group_keys)
+            index = cudf.Index._from_column(group_keys[0])
         return dict(
             zip(index.to_pandas(), cp.split(indices.values, offsets[1:-1]))
         )
@@ -458,12 +457,11 @@ def size(self):
         """
         Return the size of each group.
         """
+        col = cudf.core.column.column_empty(
+            len(self.obj), "int8", masked=False
+        )
         return (
-            cudf.Series(
-                cudf.core.column.column_empty(
-                    len(self.obj), "int8", masked=False
-                )
-            )
+            cudf.Series._from_column(col)
             .groupby(self.grouping, sort=self._sort, dropna=self._dropna)
             .agg("size")
         )
@@ -484,7 +482,7 @@ def cumcount(self, ascending: bool = True):
                 "ascending is currently not implemented."
             )
         return (
-            cudf.Series(
+            cudf.Series._from_column(
                 cudf.core.column.column_empty(
                     len(self.obj), "int8", masked=False
                 ),
@@ -549,7 +547,7 @@ def _groupby(self):
         )
 
     @_performance_tracking
-    def agg(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
+    def agg(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs):
         """
         Apply aggregation(s) to the groups.
 
@@ -649,11 +647,10 @@ def agg(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
             raise NotImplementedError(
                 "Passing args to func is currently not supported."
             )
-        if kwargs:
-            raise NotImplementedError(
-                "Passing kwargs to func is currently not supported."
-            )
-        column_names, columns, normalized_aggs = self._normalize_aggs(func)
+
+        column_names, columns, normalized_aggs = self._normalize_aggs(
+            func, **kwargs
+        )
         orig_dtypes = tuple(c.dtype for c in columns)
 
         # Note: When there are no key columns, the below produces
@@ -1069,7 +1066,7 @@ def ngroup(self, ascending=True):
             # Count descending from num_groups - 1 to 0
             groups = range(num_groups - 1, -1, -1)
 
-        group_ids = cudf.Series._from_data({None: as_column(groups)})
+        group_ids = cudf.Series._from_column(as_column(groups))
 
         if has_null_group:
             group_ids.iloc[-1] = cudf.NA
@@ -1267,11 +1264,11 @@ def _grouped(self, *, include_groups: bool = True):
         return (group_names, offsets, grouped_keys, grouped_values)
 
     def _normalize_aggs(
-        self, aggs: MultiColumnAggType
+        self, aggs: MultiColumnAggType, **kwargs
     ) -> tuple[Iterable[Any], tuple[ColumnBase, ...], list[list[AggType]]]:
         """
         Normalize aggs to a list of list of aggregations, where `out[i]`
-        is a list of aggregations for column `self.obj[i]`. We support three
+        is a list of aggregations for column `self.obj[i]`. We support four
         different form of `aggs` input here:
         - A single agg, such as "sum". This agg is applied to all value
         columns.
@@ -1280,18 +1277,30 @@ def _normalize_aggs(
         - A mapping of column name to aggs, such as
         {"a": ["sum"], "b": ["mean"]}, the aggs are applied to specified
         column.
+        - Pairs of column name and agg tuples passed as kwargs
+        eg. col1=("a", "sum"), col2=("b", "prod"). The output column names are
+        the keys. The aggs are applied to the corresponding column in the tuple.
         Each agg can be string or lambda functions.
         """
 
         aggs_per_column: Iterable[AggType | Iterable[AggType]]
-        if isinstance(aggs, dict):
-            column_names, aggs_per_column = aggs.keys(), aggs.values()
-            columns = tuple(self.obj._data[col] for col in column_names)
+        # TODO: Remove isinstance condition when the legacy dask_cudf API is removed.
+        # See https://github.com/rapidsai/cudf/pull/16528#discussion_r1715482302 for information.
+        if aggs or isinstance(aggs, dict):
+            if isinstance(aggs, dict):
+                column_names, aggs_per_column = aggs.keys(), aggs.values()
+                columns = tuple(self.obj._data[col] for col in column_names)
+            else:
+                values = self.grouping.values
+                column_names = values._column_names
+                columns = values._columns
+                aggs_per_column = (aggs,) * len(columns)
+        elif not aggs and kwargs:
+            column_names, aggs_per_column = kwargs.keys(), kwargs.values()
+            columns = tuple(self.obj._data[x[0]] for x in kwargs.values())
+            aggs_per_column = tuple(x[1] for x in kwargs.values())
         else:
-            values = self.grouping.values
-            column_names = values._column_names
-            columns = values._columns
-            aggs_per_column = (aggs,) * len(columns)
+            raise TypeError("Must provide at least one aggregation function.")
 
         # is_list_like performs type narrowing but type-checkers don't
         # know it. One could add a TypeGuard annotation to
@@ -2573,7 +2582,7 @@ def _mimic_pandas_order(
             # corresponding output rows in pandas, to do that here
             # expand the result by reindexing.
             ri = cudf.RangeIndex(0, len(self.obj))
-            result.index = cudf.Index(ordering)
+            result.index = cudf.Index._from_column(ordering)
             # This reorders and expands
             result = result.reindex(ri)
         else:
@@ -3144,7 +3153,9 @@ def keys(self):
                 dict(zip(range(nkeys), self._key_columns))
             )._set_names(self.names)
         else:
-            return cudf.Index(self._key_columns[0], name=self.names[0])
+            return cudf.Index._from_column(
+                self._key_columns[0], name=self.names[0]
+            )
 
     @property
     def values(self) -> cudf.core.frame.Frame:
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index cd879d559cd..b2bd20c4982 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -5,6 +5,7 @@
 import operator
 import pickle
 import warnings
+from collections.abc import Hashable
 from functools import cache, cached_property
 from numbers import Number
 from typing import TYPE_CHECKING, Any, Literal, MutableMapping, cast
@@ -17,7 +18,6 @@
 
 import cudf
 from cudf import _lib as libcudf
-from cudf._lib.datetime import extract_quarter, is_leap_year
 from cudf._lib.filling import sequence
 from cudf._lib.search import search_sorted
 from cudf._lib.types import size_type_dtype
@@ -46,7 +46,6 @@
 from cudf.core.column.column import as_column, concat_columns
 from cudf.core.column.string import StringMethods as StringMethods
 from cudf.core.dtypes import IntervalDtype
-from cudf.core.frame import Frame
 from cudf.core.join._join_helpers import _match_join_keys
 from cudf.core.mixins import BinaryOperand
 from cudf.core.single_column_frame import SingleColumnFrame
@@ -63,6 +62,8 @@
     from collections.abc import Generator, Iterable
     from datetime import tzinfo
 
+    from cudf.core.frame import Frame
+
 
 def ensure_index(index_like: Any) -> BaseIndex:
     """
@@ -316,7 +317,7 @@ def _num_rows(self) -> int:
 
     @cached_property  # type: ignore
     @_performance_tracking
-    def _values(self):
+    def _values(self) -> ColumnBase:
         if len(self) > 0:
             return column.as_column(self._range, dtype=self.dtype)
         else:
@@ -450,6 +451,16 @@ def __getitem__(self, index):
             return self.start + index * self.step
         return self._as_int_index()[index]
 
+    def _get_columns_by_label(self, labels) -> Index:
+        # used in .sort_values
+        if isinstance(labels, Hashable):
+            if labels == self.name:
+                return self._as_int_index()
+        elif is_list_like(labels):
+            if list(self.names) == list(labels):
+                return self._as_int_index()
+        raise KeyError(labels)
+
     @_performance_tracking
     def equals(self, other) -> bool:
         if isinstance(other, RangeIndex):
@@ -518,6 +529,11 @@ def to_pandas(
             name=self.name,
         )
 
+    def to_frame(
+        self, index: bool = True, name: Hashable = no_default
+    ) -> cudf.DataFrame:
+        return self._as_int_index().to_frame(index=index, name=name)
+
     @property
     def is_unique(self) -> bool:
         return True
@@ -572,7 +588,7 @@ def __rmul__(self, other):
         return self.__mul__(other)
 
     @_performance_tracking
-    def _as_int_index(self):
+    def _as_int_index(self) -> Index:
         # Convert self to an integer index. This method is used to perform ops
         # that are not defined directly on RangeIndex.
         return cudf.Index._from_data(self._data)
@@ -808,22 +824,23 @@ def sort_values(
     @_performance_tracking
     def _gather(self, gather_map, nullify=False, check_bounds=True):
         gather_map = cudf.core.column.as_column(gather_map)
-        return cudf.Index._from_data(
-            {self.name: self._values.take(gather_map, nullify, check_bounds)}
+        return cudf.Index._from_column(
+            self._column.take(gather_map, nullify, check_bounds),
+            name=self.name,
         )
 
     @_performance_tracking
     def _apply_boolean_mask(self, boolean_mask):
-        return cudf.Index._from_data(
-            {self.name: self._values.apply_boolean_mask(boolean_mask)}
+        return cudf.Index._from_column(
+            self._column.apply_boolean_mask(boolean_mask), name=self.name
         )
 
     def repeat(self, repeats, axis=None):
         return self._as_int_index().repeat(repeats, axis)
 
     def _split(self, splits):
-        return cudf.Index._from_data(
-            {self.name: self._as_int_index()._split(splits)}
+        return cudf.Index._from_column(
+            self._as_int_index()._split(splits), name=self.name
         )
 
     def _binaryop(self, other, op: str):
@@ -859,12 +876,12 @@ def join(
 
     @property  # type: ignore
     @_performance_tracking
-    def _column(self):
+    def _column(self) -> ColumnBase:
         return self._as_int_index()._column
 
     @property  # type: ignore
     @_performance_tracking
-    def _columns(self):
+    def _columns(self) -> list[ColumnBase]:
         return self._as_int_index()._columns
 
     @property  # type: ignore
@@ -926,7 +943,7 @@ def notna(self) -> cupy.ndarray:
     notnull = isna
 
     @_performance_tracking
-    def _minmax(self, meth: str):
+    def _minmax(self, meth: str) -> int | float:
         no_steps = len(self) - 1
         if no_steps == -1:
             return np.nan
@@ -937,10 +954,10 @@ def _minmax(self, meth: str):
 
         return self.start + self.step * no_steps
 
-    def min(self):
+    def min(self) -> int | float:
         return self._minmax("min")
 
-    def max(self):
+    def max(self) -> int | float:
         return self._minmax("max")
 
     @property
@@ -1071,6 +1088,19 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
 
         return NotImplemented
 
+    @classmethod
+    @_performance_tracking
+    def _from_column(
+        cls, column: ColumnBase, *, name: Hashable = None
+    ) -> Self:
+        if cls is Index:
+            ca = cudf.core.column_accessor.ColumnAccessor(
+                {name: column}, verify=False
+            )
+            return _index_from_data(ca)
+        else:
+            return super()._from_column(column, name=name)
+
     @classmethod
     @_performance_tracking
     def _from_data(cls, data: MutableMapping, name: Any = no_default) -> Self:
@@ -1091,19 +1121,41 @@ def _from_data_like_self(
 
     @classmethod
     @_performance_tracking
-    def from_arrow(cls, obj):
+    def from_arrow(cls, obj) -> Index | cudf.MultiIndex:
+        """Create from PyArrow Array/ChunkedArray.
+
+        Parameters
+        ----------
+        array : PyArrow Array/ChunkedArray
+            PyArrow Object which has to be converted.
+
+        Raises
+        ------
+        TypeError for invalid input type.
+
+        Returns
+        -------
+        SingleColumnFrame
+
+        Examples
+        --------
+        >>> import cudf
+        >>> import pyarrow as pa
+        >>> cudf.Index.from_arrow(pa.array(["a", "b", None]))
+        Index(['a', 'b', <NA>], dtype='object')
+        """
         try:
-            return cls(ColumnBase.from_arrow(obj))
+            return cls._from_column(ColumnBase.from_arrow(obj))
         except TypeError:
             # Try interpreting object as a MultiIndex before failing.
             return cudf.MultiIndex.from_arrow(obj)
 
     @cached_property
-    def is_monotonic_increasing(self):
+    def is_monotonic_increasing(self) -> bool:
         return super().is_monotonic_increasing
 
     @cached_property
-    def is_monotonic_decreasing(self):
+    def is_monotonic_decreasing(self) -> bool:
         return super().is_monotonic_decreasing
 
     def _binaryop(
@@ -1145,7 +1197,7 @@ def _binaryop(
 
     @property  # type: ignore
     @_performance_tracking
-    def _values(self):
+    def _values(self) -> ColumnBase:
         return self._column
 
     @classmethod
@@ -1180,8 +1232,8 @@ def _concat(cls, objs):
         if all(isinstance(obj, RangeIndex) for obj in non_empties):
             result = _concat_range_index(non_empties)
         else:
-            data = concat_columns([o._values for o in non_empties])
-            result = Index(data)
+            data = concat_columns([o._column for o in non_empties])
+            result = Index._from_column(data)
 
         names = {obj.name for obj in objs}
         if len(names) == 1:
@@ -1193,12 +1245,12 @@ def _concat(cls, objs):
         return result
 
     @_performance_tracking
-    def memory_usage(self, deep=False):
+    def memory_usage(self, deep: bool = False) -> int:
         return self._column.memory_usage
 
     @cached_property  # type: ignore
     @_performance_tracking
-    def is_unique(self):
+    def is_unique(self) -> bool:
         return self._column.is_unique
 
     @_performance_tracking
@@ -1225,7 +1277,7 @@ def equals(self, other) -> bool:
             return False
 
     @_performance_tracking
-    def copy(self, name=None, deep=False):
+    def copy(self, name: Hashable = None, deep: bool = False) -> Self:
         """
         Make a copy of this object.
 
@@ -1242,13 +1294,11 @@ def copy(self, name=None, deep=False):
         New index instance.
         """
         name = self.name if name is None else name
-
-        return _index_from_data(
-            {name: self._values.copy(True) if deep else self._values}
-        )
+        col = self._column.copy(deep=True) if deep else self._column
+        return type(self)._from_column(col, name=name)
 
     @_performance_tracking
-    def astype(self, dtype, copy: bool = True):
+    def astype(self, dtype, copy: bool = True) -> Index:
         return super().astype({self.name: dtype}, copy)
 
     @_performance_tracking
@@ -1297,22 +1347,22 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
             return _return_get_indexer_result(result.values)
 
         scatter_map, indices = libcudf.join.join([lcol], [rcol], how="inner")
-        (result,) = libcudf.copying.scatter([indices], scatter_map, [result])
-        result_series = cudf.Series(result)
+        result = libcudf.copying.scatter([indices], scatter_map, [result])[0]
+        result_series = cudf.Series._from_column(result)
 
         if method in {"ffill", "bfill", "pad", "backfill"}:
             result_series = _get_indexer_basic(
                 index=self,
                 positions=result_series,
                 method=method,
-                target_col=cudf.Series(needle),
+                target_col=cudf.Series._from_column(needle),
                 tolerance=tolerance,
             )
         elif method == "nearest":
             result_series = _get_nearest_indexer(
                 index=self,
                 positions=result_series,
-                target_col=cudf.Series(needle),
+                target_col=cudf.Series._from_column(needle),
                 tolerance=tolerance,
             )
         elif method is not None:
@@ -1359,7 +1409,7 @@ def get_loc(self, key) -> int | slice | cupy.ndarray:
         return mask
 
     @_performance_tracking
-    def __repr__(self):
+    def __repr__(self) -> str:
         max_seq_items = pd.get_option("max_seq_items") or len(self)
         mr = 0
         if 2 * max_seq_items < len(self):
@@ -1397,7 +1447,22 @@ def __repr__(self):
                     output[:break_idx].replace("'", "") + output[break_idx:]
                 )
             else:
-                output = repr(preprocess.to_pandas())
+                # Too many non-unique categories will cause
+                # the output to take too long. In this case, we
+                # split the categories into data and categories
+                # and generate the repr separately and
+                # merge them.
+                pd_cats = pd.Categorical(
+                    preprocess.astype(preprocess.categories.dtype).to_pandas()
+                )
+                pd_preprocess = pd.CategoricalIndex(pd_cats)
+                data_repr = repr(pd_preprocess).split("\n")
+                pd_preprocess.dtype._categories = (
+                    preprocess.categories.to_pandas()
+                )
+                pd_preprocess.dtype._ordered = preprocess.dtype.ordered
+                cats_repr = repr(pd_preprocess).split("\n")
+                output = "\n".join(data_repr[:-1] + cats_repr[-1:])
 
             output = output.replace("nan", str(cudf.NA))
         elif preprocess._values.nullable:
@@ -1440,15 +1505,15 @@ def __repr__(self):
             keywords.append(
                 f"freq={self._freq._maybe_as_fast_pandas_offset().freqstr!r}"
             )
-        keywords = ", ".join(keywords)
-        lines.append(f"{prior_to_dtype} {keywords})")
+        joined_keywords = ", ".join(keywords)
+        lines.append(f"{prior_to_dtype} {joined_keywords})")
         return "\n".join(lines)
 
     @_performance_tracking
     def __getitem__(self, index):
         res = self._get_elements_from_column(index)
         if isinstance(res, ColumnBase):
-            res = Index(res, name=self.name)
+            res = Index._from_column(res, name=self.name)
         return res
 
     @property  # type: ignore
@@ -1457,47 +1522,47 @@ def dtype(self):
         """
         `dtype` of the underlying values in Index.
         """
-        return self._values.dtype
+        return self._column.dtype
 
     @_performance_tracking
-    def isna(self):
+    def isna(self) -> cupy.ndarray:
         return self._column.isnull().values
 
     isnull = isna
 
     @_performance_tracking
-    def notna(self):
+    def notna(self) -> cupy.ndarray:
         return self._column.notnull().values
 
     notnull = notna
 
-    def _is_numeric(self):
+    def _is_numeric(self) -> bool:
         return (
             isinstance(self._values, cudf.core.column.NumericalColumn)
             and self.dtype.kind != "b"
         )
 
-    def _is_boolean(self):
+    def _is_boolean(self) -> bool:
         return self.dtype.kind == "b"
 
-    def _is_integer(self):
+    def _is_integer(self) -> bool:
         return self.dtype.kind in "iu"
 
-    def _is_floating(self):
+    def _is_floating(self) -> bool:
         return self.dtype.kind == "f"
 
-    def _is_object(self):
-        return isinstance(self._values, cudf.core.column.StringColumn)
+    def _is_object(self) -> bool:
+        return isinstance(self._column, cudf.core.column.StringColumn)
 
-    def _is_categorical(self):
+    def _is_categorical(self) -> bool:
         return False
 
-    def _is_interval(self):
+    def _is_interval(self) -> bool:
         return False
 
     @property  # type: ignore
     @_performance_tracking
-    def hasnans(self):
+    def hasnans(self) -> bool:
         return self._column.has_nulls(include_nan=True)
 
     @_performance_tracking
@@ -1539,13 +1604,13 @@ def argsort(
             na_position=na_position,
         )
 
-    def repeat(self, repeats, axis=None):
-        return self._from_columns_like_self(
-            Frame._repeat([*self._columns], repeats, axis), self._column_names
-        )
+    def repeat(self, repeats, axis=None) -> Self:
+        result = super()._repeat([self._column], repeats, axis)[0]
+        result = result._with_type_metadata(self.dtype)
+        return type(self)._from_column(result, name=self.name)
 
     @_performance_tracking
-    def where(self, cond, other=None, inplace=False):
+    def where(self, cond, other=None, inplace=False) -> Index:
         result_col = super().where(cond, other, inplace)
         return self._mimic_inplace(
             _index_from_data({self.name: result_col}),
@@ -1553,29 +1618,29 @@ def where(self, cond, other=None, inplace=False):
         )
 
     @property
-    def values(self):
+    def values(self) -> cupy.ndarray:
         return self._column.values
 
-    def __contains__(self, item):
+    def __contains__(self, item) -> bool:
         hash(item)
-        return item in self._values
+        return item in self._column
 
-    def _clean_nulls_from_index(self):
+    def _clean_nulls_from_index(self) -> Index:
         if self._values.has_nulls():
             fill_value = (
                 str(cudf.NaT)
                 if isinstance(self, (DatetimeIndex, TimedeltaIndex))
                 else str(cudf.NA)
             )
-            return cudf.Index(
-                self._values.astype("str").fillna(fill_value),
+            return cudf.Index._from_column(
+                self._column.astype("str").fillna(fill_value),
                 name=self.name,
             )
 
         return self
 
-    def any(self):
-        return self._values.any()
+    def any(self) -> bool:
+        return self._column.any()
 
     def to_pandas(
         self, *, nullable: bool = False, arrow_type: bool = False
@@ -1586,6 +1651,58 @@ def to_pandas(
         result.name = self.name
         return result
 
+    def to_frame(
+        self, index: bool = True, name: Hashable = no_default
+    ) -> cudf.DataFrame:
+        """Create a DataFrame with a column containing this Index
+
+        Parameters
+        ----------
+        index : boolean, default True
+            Set the index of the returned DataFrame as the original Index
+        name : object, defaults to index.name
+            The passed name should substitute for the index name (if it has
+            one).
+
+        Returns
+        -------
+        DataFrame
+            DataFrame containing the original Index data.
+
+        See Also
+        --------
+        Index.to_series : Convert an Index to a Series.
+        Series.to_frame : Convert Series to DataFrame.
+
+        Examples
+        --------
+        >>> import cudf
+        >>> idx = cudf.Index(['Ant', 'Bear', 'Cow'], name='animal')
+        >>> idx.to_frame()
+               animal
+        animal
+        Ant       Ant
+        Bear     Bear
+        Cow       Cow
+
+        By default, the original Index is reused. To enforce a new Index:
+
+        >>> idx.to_frame(index=False)
+            animal
+        0   Ant
+        1  Bear
+        2   Cow
+
+        To override the name of the resulting column, specify `name`:
+
+        >>> idx.to_frame(index=False, name='zoo')
+            zoo
+        0   Ant
+        1  Bear
+        2   Cow
+        """
+        return self._to_frame(name=name, index=self if index else None)
+
     def append(self, other):
         if is_list_like(other):
             to_concat = [self]
@@ -1630,11 +1747,9 @@ def unique(self, level: int | None = None) -> Self:
             raise IndexError(
                 f"Too many levels: Index has only 1 level, not {level + 1}"
             )
-        return cudf.core.index._index_from_data(
-            {self.name: self._values.unique()}, name=self.name
-        )
+        return type(self)._from_column(self._column.unique(), name=self.name)
 
-    def isin(self, values, level=None):
+    def isin(self, values, level=None) -> cupy.ndarray:
         if level is not None and level > 0:
             raise IndexError(
                 f"Too many levels: Index has only 1 level, not {level + 1}"
@@ -1645,11 +1760,7 @@ def isin(self, values, level=None):
                 f"to isin(), you passed a {type(values).__name__}"
             )
 
-        return self._values.isin(values).values
-
-    def _indices_of(self, value):
-        """Return indices of value in index"""
-        return self._column.indices_of(value)
+        return self._column.isin(values).values
 
     @copy_docstring(StringMethods)  # type: ignore
     @property
@@ -1823,6 +1934,17 @@ def _from_data(
         result._freq = _validate_freq(freq)
         return result
 
+    @classmethod
+    @_performance_tracking
+    def _from_column(
+        cls, column: ColumnBase, *, name: Hashable = None, freq: Any = None
+    ) -> Self:
+        if column.dtype.kind != "M":
+            raise ValueError("column must have a datetime type.")
+        result = super()._from_column(column, name=name)
+        result._freq = _validate_freq(freq)
+        return result
+
     def __getitem__(self, index):
         value = super().__getitem__(index)
         if cudf.get_option("mode.pandas_compatible") and isinstance(
@@ -1880,8 +2002,8 @@ def strftime(self, date_format: str) -> Index:
         date_format : str
             Date format string (e.g. "%Y-%m-%d").
         """
-        return Index._from_data(
-            {self.name: self._column.strftime(date_format)}
+        return Index._from_column(
+            self._column.strftime(date_format), name=self.name
         )
 
     @property
@@ -1946,7 +2068,9 @@ def to_pydatetime(self) -> np.ndarray:
         return self.to_pandas().to_pydatetime()
 
     def to_julian_date(self) -> Index:
-        return Index._from_data({self.name: self._column.to_julian_date()})
+        return Index._from_column(
+            self._column.to_julian_date(), name=self.name
+        )
 
     def to_period(self, freq) -> pd.PeriodIndex:
         return self.to_pandas().to_period(freq=freq)
@@ -1957,7 +2081,9 @@ def normalize(self) -> Self:
 
         Currently not implemented.
         """
-        return type(self)._from_data({self.name: self._column.normalize()})
+        return type(self)._from_column(
+            self._column.normalize(), name=self.name
+        )
 
     @property
     def time(self) -> np.ndarray:
@@ -2041,7 +2167,7 @@ def days_in_month(self) -> Index:
         """
         Get the total number of days in the month that the date falls on.
         """
-        return Index._from_data({self.name: self._column.days_in_month})
+        return Index._from_column(self._column.days_in_month, name=self.name)
 
     daysinmonth = days_in_month
 
@@ -2050,11 +2176,11 @@ def day_of_week(self) -> Index:
         """
         Get the day of week that the date falls on.
         """
-        return Index._from_data({self.name: self._column.day_of_week})
+        return Index._from_column(self._column.day_of_week, name=self.name)
 
     @property  # type: ignore
     @_performance_tracking
-    def year(self):
+    def year(self) -> Index:
         """
         The year of the datetime.
 
@@ -2073,7 +2199,7 @@ def year(self):
 
     @property  # type: ignore
     @_performance_tracking
-    def month(self):
+    def month(self) -> Index:
         """
         The month as January=1, December=12.
 
@@ -2092,7 +2218,7 @@ def month(self):
 
     @property  # type: ignore
     @_performance_tracking
-    def day(self):
+    def day(self) -> Index:
         """
         The day of the datetime.
 
@@ -2111,7 +2237,7 @@ def day(self):
 
     @property  # type: ignore
     @_performance_tracking
-    def hour(self):
+    def hour(self) -> Index:
         """
         The hours of the datetime.
 
@@ -2132,7 +2258,7 @@ def hour(self):
 
     @property  # type: ignore
     @_performance_tracking
-    def minute(self):
+    def minute(self) -> Index:
         """
         The minutes of the datetime.
 
@@ -2153,7 +2279,7 @@ def minute(self):
 
     @property  # type: ignore
     @_performance_tracking
-    def second(self):
+    def second(self) -> Index:
         """
         The seconds of the datetime.
 
@@ -2174,7 +2300,7 @@ def second(self):
 
     @property  # type: ignore
     @_performance_tracking
-    def microsecond(self):
+    def microsecond(self) -> Index:
         """
         The microseconds of the datetime.
 
@@ -2191,21 +2317,21 @@ def microsecond(self):
         >>> datetime_index.microsecond
         Index([0, 1, 2], dtype='int32')
         """  # noqa: E501
-        return Index(
+        return Index._from_column(
             (
                 # Need to manually promote column to int32 because
                 # pandas-matching binop behaviour requires that this
                 # __mul__ returns an int16 column.
-                self._values.get_dt_field("millisecond").astype("int32")
+                self._column.get_dt_field("millisecond").astype("int32")
                 * cudf.Scalar(1000, dtype="int32")
             )
-            + self._values.get_dt_field("microsecond"),
+            + self._column.get_dt_field("microsecond"),
             name=self.name,
         )
 
     @property  # type: ignore
     @_performance_tracking
-    def nanosecond(self):
+    def nanosecond(self) -> Index:
         """
         The nanoseconds of the datetime.
 
@@ -2227,7 +2353,7 @@ def nanosecond(self):
 
     @property  # type: ignore
     @_performance_tracking
-    def weekday(self):
+    def weekday(self) -> Index:
         """
         The day of the week with Monday=0, Sunday=6.
 
@@ -2249,7 +2375,7 @@ def weekday(self):
 
     @property  # type: ignore
     @_performance_tracking
-    def dayofweek(self):
+    def dayofweek(self) -> Index:
         """
         The day of the week with Monday=0, Sunday=6.
 
@@ -2271,7 +2397,7 @@ def dayofweek(self):
 
     @property  # type: ignore
     @_performance_tracking
-    def dayofyear(self):
+    def dayofyear(self) -> Index:
         """
         The day of the year, from 1-365 in non-leap years and
         from 1-366 in leap years.
@@ -2294,7 +2420,7 @@ def dayofyear(self):
 
     @property  # type: ignore
     @_performance_tracking
-    def day_of_year(self):
+    def day_of_year(self) -> Index:
         """
         The day of the year, from 1-365 in non-leap years and
         from 1-366 in leap years.
@@ -2331,12 +2457,12 @@ def is_leap_year(self) -> cupy.ndarray:
         ndarray
         Booleans indicating if dates belong to a leap year.
         """
-        res = is_leap_year(self._values).fillna(False)
+        res = self._column.is_leap_year.fillna(False)
         return cupy.asarray(res)
 
     @property  # type: ignore
     @_performance_tracking
-    def quarter(self):
+    def quarter(self) -> Index:
         """
         Integer indicator for which quarter of the year the date belongs in.
 
@@ -2357,8 +2483,7 @@ def quarter(self):
         >>> gIndex.quarter
         Index([2, 4], dtype='int8')
         """
-        res = extract_quarter(self._values)
-        return Index(res, dtype="int8")
+        return Index._from_column(self._column.quarter.astype("int8"))
 
     @_performance_tracking
     def day_name(self, locale: str | None = None) -> Index:
@@ -2371,14 +2496,16 @@ def day_name(self, locale: str | None = None) -> Index:
         >>> datetime_index = cudf.date_range("2016-12-31", "2017-01-08", freq="D")
         >>> datetime_index
         DatetimeIndex(['2016-12-31', '2017-01-01', '2017-01-02', '2017-01-03',
-                       '2017-01-04', '2017-01-05', '2017-01-06', '2017-01-07'],
+                       '2017-01-04', '2017-01-05', '2017-01-06', '2017-01-07',
+                       '2017-01-08'],
                       dtype='datetime64[ns]', freq='D')
         >>> datetime_index.day_name()
         Index(['Saturday', 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday',
-               'Friday', 'Saturday'], dtype='object')
+               'Friday', 'Saturday', 'Sunday'],
+              dtype='object')
         """
         day_names = self._column.get_day_names(locale)
-        return Index._from_data({self.name: day_names})
+        return Index._from_column(day_names, name=self.name)
 
     @_performance_tracking
     def month_name(self, locale: str | None = None) -> Index:
@@ -2397,7 +2524,7 @@ def month_name(self, locale: str | None = None) -> Index:
         Index(['December', 'January', 'January', 'January', 'January', 'February'], dtype='object')
         """
         month_names = self._column.get_month_names(locale)
-        return Index._from_data({self.name: month_names})
+        return Index._from_column(month_names, name=self.name)
 
     @_performance_tracking
     def isocalendar(self) -> cudf.DataFrame:
@@ -2434,24 +2561,23 @@ def to_pandas(
         return result
 
     @_performance_tracking
-    def _get_dt_field(self, field):
-        out_column = self._values.get_dt_field(field)
-        # column.column_empty_like always returns a Column object
-        # but we need a NumericalColumn for Index..
-        # how should this be handled?
-        out_column = column.build_column(
+    def _get_dt_field(self, field: str) -> Index:
+        """Return an Index of a numerical component of the DatetimeIndex."""
+        out_column = self._column.get_dt_field(field)
+        out_column = NumericalColumn(
             data=out_column.base_data,
+            size=out_column.size,
             dtype=out_column.dtype,
             mask=out_column.base_mask,
             offset=out_column.offset,
         )
-        return Index(out_column, name=self.name)
+        return Index._from_column(out_column, name=self.name)
 
-    def _is_boolean(self):
+    def _is_boolean(self) -> bool:
         return False
 
     @_performance_tracking
-    def ceil(self, freq):
+    def ceil(self, freq: str) -> Self:
         """
         Perform ceil operation on the data to the specified freq.
 
@@ -2479,12 +2605,10 @@ def ceil(self, freq):
         >>> gIndex.ceil("T")
         DatetimeIndex(['2020-05-31 08:06:00', '1999-12-31 18:41:00'], dtype='datetime64[ns]')
         """  # noqa: E501
-        out_column = self._values.ceil(freq)
-
-        return self.__class__._from_data({self.name: out_column})
+        return type(self)._from_column(self._column.ceil(freq), name=self.name)
 
     @_performance_tracking
-    def floor(self, freq):
+    def floor(self, freq: str) -> Self:
         """
         Perform floor operation on the data to the specified freq.
 
@@ -2512,12 +2636,12 @@ def floor(self, freq):
         >>> gIndex.floor("T")
         DatetimeIndex(['2020-05-31 08:59:00', '1999-12-31 18:44:00'], dtype='datetime64[ns]')
         """  # noqa: E501
-        out_column = self._values.floor(freq)
-
-        return self.__class__._from_data({self.name: out_column})
+        return type(self)._from_column(
+            self._column.floor(freq), name=self.name
+        )
 
     @_performance_tracking
-    def round(self, freq):
+    def round(self, freq: str) -> Self:
         """
         Perform round operation on the data to the specified freq.
 
@@ -2552,16 +2676,16 @@ def round(self, freq):
         >>> dt_idx.round('T')
         DatetimeIndex(['2001-01-01 00:05:00', '2001-01-01 00:05:00', '2001-01-01 00:05:00'], dtype='datetime64[ns]')
         """  # noqa: E501
-        out_column = self._values.round(freq)
-
-        return self.__class__._from_data({self.name: out_column})
+        return type(self)._from_column(
+            self._column.round(freq), name=self.name
+        )
 
     def tz_localize(
         self,
         tz: str | None,
         ambiguous: Literal["NaT"] = "NaT",
         nonexistent: Literal["NaT"] = "NaT",
-    ):
+    ) -> Self:
         """
         Localize timezone-naive data to timezone-aware data.
 
@@ -2604,11 +2728,11 @@ def tz_localize(
         to 'NaT'.
         """  # noqa: E501
         result_col = self._column.tz_localize(tz, ambiguous, nonexistent)
-        return DatetimeIndex._from_data(
-            {self.name: result_col}, freq=self._freq
+        return DatetimeIndex._from_column(
+            result_col, name=self.name, freq=self._freq
         )
 
-    def tz_convert(self, tz: str | None):
+    def tz_convert(self, tz: str | None) -> Self:
         """
         Convert tz-aware datetimes from one time zone to another.
 
@@ -2641,9 +2765,9 @@ def tz_convert(self, tz: str | None):
                       dtype='datetime64[ns, Europe/London]')
         """  # noqa: E501
         result_col = self._column.tz_convert(tz)
-        return DatetimeIndex._from_data({self.name: result_col})
+        return DatetimeIndex._from_column(result_col, name=self.name)
 
-    def repeat(self, repeats, axis=None):
+    def repeat(self, repeats, axis=None) -> Self:
         res = super().repeat(repeats, axis=axis)
         res._freq = None
         return res
@@ -2751,6 +2875,15 @@ def __init__(
 
         super().__init__(data, name=name)
 
+    @classmethod
+    @_performance_tracking
+    def _from_column(
+        cls, column: ColumnBase, *, name: Hashable = None, freq: Any = None
+    ) -> Self:
+        if column.dtype.kind != "m":
+            raise ValueError("column must have a timedelta type.")
+        return super()._from_column(column, name=name)
+
     def __getitem__(self, index):
         value = super().__getitem__(index)
         if cudf.get_option("mode.pandas_compatible") and isinstance(
@@ -2833,7 +2966,7 @@ def ceil(self, freq: str) -> Self:
 
         This method is currently not implemented.
         """
-        return type(self)._from_data({self.name: self._column.ceil(freq)})
+        return type(self)._from_column(self._column.ceil(freq), name=self.name)
 
     def floor(self, freq: str) -> Self:
         """
@@ -2841,7 +2974,9 @@ def floor(self, freq: str) -> Self:
 
         This method is currently not implemented.
         """
-        return type(self)._from_data({self.name: self._column.floor(freq)})
+        return type(self)._from_column(
+            self._column.floor(freq), name=self.name
+        )
 
     def round(self, freq: str) -> Self:
         """
@@ -2849,45 +2984,55 @@ def round(self, freq: str) -> Self:
 
         This method is currently not implemented.
         """
-        return type(self)._from_data({self.name: self._column.round(freq)})
+        return type(self)._from_column(
+            self._column.round(freq), name=self.name
+        )
 
     @property  # type: ignore
     @_performance_tracking
-    def days(self):
+    def days(self) -> cudf.Index:
         """
         Number of days for each element.
         """
         # Need to specifically return `int64` to avoid overflow.
-        return Index(self._values.days, name=self.name, dtype="int64")
+        return Index._from_column(
+            self._column.days.astype("int64"), name=self.name
+        )
 
     @property  # type: ignore
     @_performance_tracking
-    def seconds(self):
+    def seconds(self) -> cudf.Index:
         """
         Number of seconds (>= 0 and less than 1 day) for each element.
         """
-        return Index(self._values.seconds, name=self.name, dtype="int32")
+        return Index._from_column(
+            self._column.seconds.astype("int32"), name=self.name
+        )
 
     @property  # type: ignore
     @_performance_tracking
-    def microseconds(self):
+    def microseconds(self) -> cudf.Index:
         """
         Number of microseconds (>= 0 and less than 1 second) for each element.
         """
-        return Index(self._values.microseconds, name=self.name, dtype="int32")
+        return Index._from_column(
+            self._column.microseconds.astype("int32"), name=self.name
+        )
 
     @property  # type: ignore
     @_performance_tracking
-    def nanoseconds(self):
+    def nanoseconds(self) -> cudf.Index:
         """
         Number of nanoseconds (>= 0 and less than 1 microsecond) for each
         element.
         """
-        return Index(self._values.nanoseconds, name=self.name, dtype="int32")
+        return Index._from_column(
+            self._column.nanoseconds.astype("int32"), name=self.name
+        )
 
     @property  # type: ignore
     @_performance_tracking
-    def components(self):
+    def components(self) -> cudf.DataFrame:
         """
         Return a dataframe of the components (days, hours, minutes,
         seconds, milliseconds, microseconds, nanoseconds) of the Timedeltas.
@@ -2908,7 +3053,7 @@ def inferred_freq(self):
         """
         raise NotImplementedError("inferred_freq is not yet supported")
 
-    def _is_boolean(self):
+    def _is_boolean(self) -> bool:
         return False
 
 
@@ -2985,22 +3130,8 @@ def __init__(
         name = _getdefault_name(data, name=name)
         if isinstance(data, CategoricalColumn):
             data = data
-        elif isinstance(data, pd.Series) and (
-            isinstance(data.dtype, pd.CategoricalDtype)
-        ):
-            codes_data = column.as_column(data.cat.codes.values)
-            data = column.build_categorical_column(
-                categories=data.cat.categories,
-                codes=codes_data,
-                ordered=data.cat.ordered,
-            )
-        elif isinstance(data, (pd.Categorical, pd.CategoricalIndex)):
-            codes_data = column.as_column(data.codes)
-            data = column.build_categorical_column(
-                categories=data.categories,
-                codes=codes_data,
-                ordered=data.ordered,
-            )
+        elif isinstance(getattr(data, "dtype", None), pd.CategoricalDtype):
+            data = column.as_column(data)
         else:
             data = column.as_column(
                 data, dtype="category" if dtype is None else dtype
@@ -3018,30 +3149,39 @@ def __init__(
             data = data.as_ordered(ordered=False)
         super().__init__(data, name=name)
 
+    @classmethod
+    @_performance_tracking
+    def _from_column(
+        cls, column: ColumnBase, *, name: Hashable = None, freq: Any = None
+    ) -> Self:
+        if not isinstance(column.dtype, cudf.CategoricalDtype):
+            raise ValueError("column must have a categorial type.")
+        return super()._from_column(column, name=name)
+
     @property
     def ordered(self) -> bool:
         return self._column.ordered
 
     @property  # type: ignore
     @_performance_tracking
-    def codes(self):
+    def codes(self) -> cudf.Index:
         """
         The category codes of this categorical.
         """
-        return Index(self._values.codes)
+        return Index._from_column(self._column.codes)
 
     @property  # type: ignore
     @_performance_tracking
-    def categories(self):
+    def categories(self) -> cudf.Index:
         """
         The categories of this categorical.
         """
         return self.dtype.categories
 
-    def _is_boolean(self):
+    def _is_boolean(self) -> bool:
         return False
 
-    def _is_categorical(self):
+    def _is_categorical(self) -> bool:
         return True
 
     def add_categories(self, new_categories) -> Self:
@@ -3051,24 +3191,24 @@ def add_categories(self, new_categories) -> Self:
         `new_categories` will be included at the last/highest place in the
         categories and will be unused directly after this call.
         """
-        return type(self)._from_data(
-            {self.name: self._column.add_categories(new_categories)}
+        return type(self)._from_column(
+            self._column.add_categories(new_categories), name=self.name
         )
 
     def as_ordered(self) -> Self:
         """
         Set the Categorical to be ordered.
         """
-        return type(self)._from_data(
-            {self.name: self._column.as_ordered(ordered=True)}
+        return type(self)._from_column(
+            self._column.as_ordered(ordered=True), name=self.name
         )
 
     def as_unordered(self) -> Self:
         """
         Set the Categorical to be unordered.
         """
-        return type(self)._from_data(
-            {self.name: self._column.as_ordered(ordered=False)}
+        return type(self)._from_column(
+            self._column.as_ordered(ordered=False), name=self.name
         )
 
     def remove_categories(self, removals) -> Self:
@@ -3082,8 +3222,8 @@ def remove_categories(self, removals) -> Self:
         removals : category or list of categories
            The categories which should be removed.
         """
-        return type(self)._from_data(
-            {self.name: self._column.remove_categories(removals)}
+        return type(self)._from_column(
+            self._column.remove_categories(removals), name=self.name
         )
 
     def remove_unused_categories(self) -> Self:
@@ -3092,8 +3232,8 @@ def remove_unused_categories(self) -> Self:
 
         This method is currently not supported.
         """
-        return type(self)._from_data(
-            {self.name: self._column.remove_unused_categories()}
+        return type(self)._from_column(
+            self._column.remove_unused_categories(), name=self.name
         )
 
     def rename_categories(self, new_categories) -> Self:
@@ -3102,8 +3242,8 @@ def rename_categories(self, new_categories) -> Self:
 
         This method is currently not supported.
         """
-        return type(self)._from_data(
-            {self.name: self._column.rename_categories(new_categories)}
+        return type(self)._from_column(
+            self._column.rename_categories(new_categories), name=self.name
         )
 
     def reorder_categories(self, new_categories, ordered=None) -> Self:
@@ -3121,12 +3261,9 @@ def reorder_categories(self, new_categories, ordered=None) -> Self:
            Whether or not the categorical is treated as a ordered categorical.
            If not given, do not change the ordered information.
         """
-        return type(self)._from_data(
-            {
-                self.name: self._column.reorder_categories(
-                    new_categories, ordered=ordered
-                )
-            }
+        return type(self)._from_column(
+            self._column.reorder_categories(new_categories, ordered=ordered),
+            name=self.name,
         )
 
     def set_categories(
@@ -3148,12 +3285,11 @@ def set_categories(
             considered as a rename of the old categories
             or as reordered categories.
         """
-        return type(self)._from_data(
-            {
-                self.name: self._column.set_categories(
-                    new_categories, ordered=ordered, rename=rename
-                )
-            }
+        return type(self)._from_column(
+            self._column.set_categories(
+                new_categories, ordered=ordered, rename=rename
+            ),
+            name=self.name,
         )
 
 
@@ -3165,7 +3301,7 @@ def interval_range(
     freq=None,
     name=None,
     closed="right",
-) -> "IntervalIndex":
+) -> IntervalIndex:
     """
     Returns a fixed frequency IntervalIndex.
 
@@ -3260,20 +3396,7 @@ def interval_range(
         init=start.device_value,
         step=freq.device_value,
     )
-    left_col = bin_edges.slice(0, len(bin_edges) - 1)
-    right_col = bin_edges.slice(1, len(bin_edges))
-
-    if len(right_col) == 0 or len(left_col) == 0:
-        dtype = IntervalDtype("int64", closed)
-        data = column.column_empty_like_same_mask(left_col, dtype)
-        return IntervalIndex(data, closed=closed)
-
-    interval_col = IntervalColumn(
-        dtype=IntervalDtype(left_col.dtype, closed),
-        size=len(left_col),
-        children=(left_col, right_col),
-    )
-    return IntervalIndex(interval_col, closed=closed)
+    return IntervalIndex.from_breaks(bin_edges, closed=closed, name=name)
 
 
 class IntervalIndex(Index):
@@ -3340,6 +3463,7 @@ def __init__(
             elif isinstance(data.dtype, (pd.IntervalDtype, IntervalDtype)):
                 data = np.array([], dtype=data.dtype.subtype)
             interval_col = IntervalColumn(
+                None,
                 dtype=IntervalDtype(data.dtype, closed),
                 size=len(data),
                 children=(as_column(data), as_column(data)),
@@ -3351,12 +3475,13 @@ def __init__(
             if copy:
                 col = col.copy()
             interval_col = IntervalColumn(
+                data=None,
                 dtype=IntervalDtype(col.dtype.subtype, closed),
                 mask=col.mask,
                 size=col.size,
                 offset=col.offset,
                 null_count=col.null_count,
-                children=col.children,
+                children=col.children,  # type: ignore[arg-type]
             )
 
         if dtype:
@@ -3365,9 +3490,18 @@ def __init__(
         super().__init__(interval_col, name=name)
 
     @property
-    def closed(self):
+    def closed(self) -> Literal["left", "right", "neither", "both"]:
         return self.dtype.closed
 
+    @classmethod
+    @_performance_tracking
+    def _from_column(
+        cls, column: ColumnBase, *, name: Hashable = None, freq: Any = None
+    ) -> Self:
+        if not isinstance(column.dtype, cudf.IntervalDtype):
+            raise ValueError("column must have a interval type.")
+        return super()._from_column(column, name=name)
+
     @classmethod
     @_performance_tracking
     def from_breaks(
@@ -3377,7 +3511,7 @@ def from_breaks(
         name=None,
         copy: bool = False,
         dtype=None,
-    ):
+    ) -> Self:
         """
         Construct an IntervalIndex from an array of splits.
 
@@ -3412,7 +3546,7 @@ def from_breaks(
         left_col = breaks.slice(0, len(breaks) - 1)
         right_col = breaks.slice(1, len(breaks))
         # For indexing, children should both have 0 offset
-        right_col = column.build_column(
+        right_col = type(right_col)(
             data=right_col.data,
             dtype=right_col.dtype,
             size=right_col.size,
@@ -3423,11 +3557,12 @@ def from_breaks(
         )
 
         interval_col = IntervalColumn(
+            data=None,
             dtype=IntervalDtype(left_col.dtype, closed),
             size=len(left_col),
             children=(left_col, right_col),
         )
-        return IntervalIndex(interval_col, name=name, closed=closed)
+        return IntervalIndex._from_column(interval_col, name=name)
 
     @classmethod
     def from_arrays(
@@ -3448,7 +3583,7 @@ def from_tuples(
         name=None,
         copy: bool = False,
         dtype=None,
-    ) -> IntervalIndex:
+    ) -> Self:
         piidx = pd.IntervalIndex.from_tuples(
             data, closed=closed, name=name, copy=copy, dtype=dtype
         )
@@ -3459,13 +3594,13 @@ def __getitem__(self, index):
             "Getting a scalar from an IntervalIndex is not yet supported"
         )
 
-    def _is_interval(self):
+    def _is_interval(self) -> bool:
         return True
 
-    def _is_boolean(self):
+    def _is_boolean(self) -> bool:
         return False
 
-    def _clean_nulls_from_index(self):
+    def _clean_nulls_from_index(self) -> Self:
         return self
 
     @property
@@ -3550,8 +3685,8 @@ def set_closed(
             Whether the intervals are closed on the left-side, right-side, both
             or neither.
         """
-        return type(self)._from_data(
-            {self.name: self._column.set_closed(closed)}
+        return type(self)._from_column(
+            self._column.set_closed(closed), name=self.name
         )
 
     def to_tuples(self, na_tuple: bool = True) -> pd.Index:
@@ -3637,15 +3772,7 @@ def as_index(
     elif isinstance(arbitrary, BaseIndex):
         idx = arbitrary.copy(deep=copy).rename(name)
     elif isinstance(arbitrary, ColumnBase):
-        idx = _index_from_data({name: arbitrary})
-    elif isinstance(arbitrary, cudf.Series):
-        return as_index(
-            arbitrary._column,
-            nan_as_null=nan_as_null,
-            copy=copy,
-            name=name,
-            dtype=dtype,
-        )
+        raise ValueError("Use cudf.Index._from_column instead.")
     elif isinstance(arbitrary, (pd.RangeIndex, range)):
         idx = RangeIndex(
             start=arbitrary.start,
@@ -3665,11 +3792,9 @@ def as_index(
     elif isinstance(arbitrary, cudf.DataFrame) or is_scalar(arbitrary):
         raise ValueError("Index data must be 1-dimensional and list-like")
     else:
-        return as_index(
+        return Index._from_column(
             column.as_column(arbitrary, dtype=dtype, nan_as_null=nan_as_null),
-            copy=copy,
             name=name,
-            dtype=dtype,
         )
     if dtype is not None:
         idx = idx.astype(dtype)
@@ -3706,7 +3831,9 @@ def _concat_range_index(indexes: list[RangeIndex]) -> BaseIndex:
         elif step is None:
             # First non-empty index had only one element
             if obj.start == start:
-                result = Index(concat_columns([x._values for x in indexes]))
+                result = Index._from_column(
+                    concat_columns([x._column for x in indexes])
+                )
                 return result
             step = obj.start - start
 
@@ -3714,7 +3841,9 @@ def _concat_range_index(indexes: list[RangeIndex]) -> BaseIndex:
             next_ is not None and obj.start != next_
         )
         if non_consecutive:
-            result = Index(concat_columns([x._values for x in indexes]))
+            result = Index._from_column(
+                concat_columns([x._column for x in indexes])
+            )
             return result
         if step is not None:
             next_ = obj[-1] + step
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 0678ebfdd81..fd6bf37f0e6 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -24,6 +24,8 @@
 import pandas as pd
 from typing_extensions import Self
 
+import pylibcudf
+
 import cudf
 import cudf._lib as libcudf
 import cudf.core
@@ -35,9 +37,10 @@
     is_list_like,
     is_scalar,
 )
+from cudf.core._base_index import BaseIndex
 from cudf.core._compat import PANDAS_LT_300
 from cudf.core.buffer import acquire_spill_lock
-from cudf.core.column import ColumnBase, as_column
+from cudf.core.column import ColumnBase, NumericalColumn, as_column
 from cudf.core.column_accessor import ColumnAccessor
 from cudf.core.copy_types import BooleanMask, GatherMap
 from cudf.core.dtypes import ListDtype
@@ -67,7 +70,6 @@
         Dtype,
         NotImplementedType,
     )
-    from cudf.core._base_index import BaseIndex
 
 
 doc_reset_index_template = """
@@ -171,22 +173,17 @@ def _drop_columns(f: Frame, columns: abc.Iterable, errors: str):
 def _indices_from_labels(obj, labels):
     if not isinstance(labels, cudf.MultiIndex):
         labels = cudf.core.column.as_column(labels)
-
-        if isinstance(obj.index.dtype, cudf.CategoricalDtype):
-            labels = labels.astype("category")
-            codes = labels.codes.astype(obj.index.codes.dtype)
-            labels = cudf.core.column.build_categorical_column(
-                categories=labels.dtype.categories,
-                codes=codes,
-                ordered=labels.dtype.ordered,
-            )
-        else:
-            labels = labels.astype(obj.index.dtype)
+        labels = labels.astype(obj.index.dtype)
+        idx_labels = cudf.Index._from_column(labels)
+    else:
+        idx_labels = labels
 
     # join is not guaranteed to maintain the index ordering
     # so we will sort it with its initial ordering which is stored
     # in column "__"
-    lhs = cudf.DataFrame({"__": as_column(range(len(labels)))}, index=labels)
+    lhs = cudf.DataFrame(
+        {"__": as_column(range(len(idx_labels)))}, index=idx_labels
+    )
     rhs = cudf.DataFrame({"_": as_column(range(len(obj)))}, index=obj.index)
     return lhs.join(rhs).sort_values(by=["__", "_"])["_"]
 
@@ -260,7 +257,6 @@ class IndexedFrame(Frame):
     # mypy can't handle bound type variables as class members
     _loc_indexer_type: type[_LocIndexerClass]  # type: ignore
     _iloc_indexer_type: type[_IlocIndexerClass]  # type: ignore
-    _index: cudf.core.index.BaseIndex
     _groupby = GroupBy
     _resampler = _Resampler
 
@@ -279,18 +275,21 @@ class IndexedFrame(Frame):
         "cummax": {"op_name": "cumulative max"},
     }
 
-    def __init__(self, data=None, index=None):
+    def __init__(
+        self,
+        data: ColumnAccessor | MutableMapping[Any, ColumnBase],
+        index: BaseIndex,
+    ):
         super().__init__(data=data)
-        # TODO: Right now it is possible to initialize an IndexedFrame without
-        # an index. The code's correctness relies on the subclass constructors
-        # assigning the attribute after the fact. We should restructure those
-        # to ensure that this constructor is always invoked with an index.
+        if not isinstance(index, cudf.core._base_index.BaseIndex):
+            raise ValueError(
+                f"index must be a cudf index not {type(index).__name__}"
+            )
         self._index = index
 
     @property
     def _num_rows(self) -> int:
         # Important to use the index because the data may be empty.
-        # TODO: Remove once DataFrame.__init__ is cleaned up
         return len(self.index)
 
     @property
@@ -304,6 +303,10 @@ def _from_data(
         index: BaseIndex | None = None,
     ):
         out = super()._from_data(data)
+        if not (index is None or isinstance(index, BaseIndex)):
+            raise ValueError(
+                f"index must be None or a cudf.Index not {type(index).__name__}"
+            )
         out._index = RangeIndex(out._data.nrows) if index is None else index
         return out
 
@@ -1373,11 +1376,6 @@ def sum(
         a    10
         b    34
         dtype: int64
-
-        .. pandas-compat::
-           :meth:`pandas.DataFrame.sum`, :meth:`pandas.Series.sum`
-
-            Parameters currently not supported are `level`, `numeric_only`.
         """
         return self._reduce(
             "sum",
@@ -1434,11 +1432,6 @@ def product(
         a      24
         b    5040
         dtype: int64
-
-        .. pandas-compat::
-            :meth:`pandas.DataFrame.product`, :meth:`pandas.Series.product`
-
-            Parameters currently not supported are level`, `numeric_only`.
         """
 
         return self._reduce(
@@ -1495,7 +1488,9 @@ def mean(self, axis=0, skipna=True, numeric_only=False, **kwargs):
             **kwargs,
         )
 
-    def median(self, axis=None, skipna=True, numeric_only=None, **kwargs):
+    def median(
+        self, axis=no_default, skipna=True, numeric_only=None, **kwargs
+    ):
         """
         Return the median of the values for the requested axis.
 
@@ -1529,11 +1524,6 @@ def median(self, axis=None, skipna=True, numeric_only=None, **kwargs):
         dtype: int64
         >>> ser.median()
         17.0
-
-        .. pandas-compat::
-            :meth:`pandas.DataFrame.median`, :meth:`pandas.Series.median`
-
-            Parameters currently not supported are `level` and `numeric_only`.
         """
         return self._reduce(
             "median",
@@ -1585,12 +1575,6 @@ def std(
         a    1.290994
         b    1.290994
         dtype: float64
-
-        .. pandas-compat::
-            :meth:`pandas.DataFrame.std`, :meth:`pandas.Series.std`
-
-            Parameters currently not supported are `level` and
-            `numeric_only`
         """
 
         return self._reduce(
@@ -1644,12 +1628,6 @@ def var(
         a    1.666667
         b    1.666667
         dtype: float64
-
-        .. pandas-compat::
-            :meth:`pandas.DataFrame.var`, :meth:`pandas.Series.var`
-
-            Parameters currently not supported are `level` and
-            `numeric_only`
         """
         return self._reduce(
             "var",
@@ -1700,11 +1678,6 @@ def kurtosis(self, axis=0, skipna=True, numeric_only=False, **kwargs):
         a   -1.2
         b   -1.2
         dtype: float64
-
-        .. pandas-compat::
-            :meth:`pandas.DataFrame.kurtosis`
-
-            Parameters currently not supported are `level` and `numeric_only`
         """
         if axis not in (0, "index", None, no_default):
             raise NotImplementedError("Only axis=0 is currently supported.")
@@ -2934,8 +2907,8 @@ def hash_values(self, method="murmur3", seed=None):
         # Note that both Series and DataFrame return Series objects from this
         # calculation, necessitating the unfortunate circular reference to the
         # child class here.
-        return cudf.Series._from_data(
-            {None: libcudf.hash.hash([*self._columns], method, seed)},
+        return cudf.Series._from_column(
+            libcudf.hash.hash([*self._columns], method, seed),
             index=self.index,
         )
 
@@ -3025,9 +2998,12 @@ def _slice(self, arg: slice, keep_index: bool = True) -> Self:
         if stride != 1:
             return self._gather(
                 GatherMap.from_column_unchecked(
-                    as_column(
-                        range(start, stop, stride),
-                        dtype=libcudf.types.size_type_dtype,
+                    cast(
+                        NumericalColumn,
+                        as_column(
+                            range(start, stop, stride),
+                            dtype=libcudf.types.size_type_dtype,
+                        ),
                     ),
                     len(self),
                     nullify=False,
@@ -3212,20 +3188,22 @@ def duplicated(self, subset=None, keep="first"):
         """
         subset = self._preprocess_subset(subset)
 
+        name = None
         if isinstance(self, cudf.Series):
             columns = [self._column]
+            name = self.name
         else:
             columns = [self._data[n] for n in subset]
         distinct = libcudf.stream_compaction.distinct_indices(
             columns, keep=keep
         )
-        (result,) = libcudf.copying.scatter(
+        result = libcudf.copying.scatter(
             [cudf.Scalar(False, dtype=bool)],
             distinct,
             [as_column(True, length=len(self), dtype=bool)],
             bounds_check=False,
-        )
-        return cudf.Series(result, index=self.index)
+        )[0]
+        return cudf.Series._from_column(result, index=self.index, name=name)
 
     @_performance_tracking
     def _empty_like(self, keep_index=True) -> Self:
@@ -3506,7 +3484,7 @@ def _apply(self, func, kernel_getter, *args, **kwargs):
         col = _post_process_output_col(ans_col, retty)
 
         col.set_base_mask(libcudf.transform.bools_to_mask(ans_mask))
-        result = cudf.Series._from_data({None: col}, self.index)
+        result = cudf.Series._from_column(col, index=self.index)
 
         return result
 
@@ -3588,10 +3566,34 @@ def sort_values(
         if len(self) == 0:
             return self
 
+        try:
+            by_in_columns = self._get_columns_by_label(by)
+        except KeyError:
+            by_in_columns = None
+        if self.ndim == 1:
+            # For Series case, we're never selecting an index level.
+            by_in_index = None
+        else:
+            try:
+                by_in_index = self.index._get_columns_by_label(by)
+            except KeyError:
+                by_in_index = None
+
+        if by_in_columns is not None and by_in_index is not None:
+            raise ValueError(
+                f"{by=} appears in the {type(self).__name__} columns "
+                "and as an index level which is ambiguous."
+            )
+        elif by_in_columns is not None:
+            by_columns = by_in_columns
+        elif by_in_index is not None:
+            by_columns = by_in_index
+        else:
+            raise KeyError(by)
         # argsort the `by` column
         out = self._gather(
             GatherMap.from_column_unchecked(
-                self._get_columns_by_label(by)._get_sorted_inds(
+                by_columns._get_sorted_inds(
                     ascending=ascending, na_position=na_position
                 ),
                 len(self),
@@ -4754,10 +4756,13 @@ def _sample_axis_0(
     ):
         try:
             gather_map = GatherMap.from_column_unchecked(
-                cudf.core.column.as_column(
-                    random_state.choice(
-                        len(self), size=n, replace=replace, p=weights
-                    )
+                cast(
+                    NumericalColumn,
+                    cudf.core.column.as_column(
+                        random_state.choice(
+                            len(self), size=n, replace=replace, p=weights
+                        )
+                    ),
                 ),
                 len(self),
                 nullify=False,
@@ -6276,7 +6281,7 @@ def rank(
         if method not in {"average", "min", "max", "first", "dense"}:
             raise KeyError(method)
 
-        method_enum = libcudf.pylibcudf.aggregation.RankMethod[method.upper()]
+        method_enum = pylibcudf.aggregation.RankMethod[method.upper()]
         if na_option not in {"keep", "top", "bottom"}:
             raise ValueError(
                 "na_option must be one of 'keep', 'top', or 'bottom'"
@@ -6436,7 +6441,7 @@ def _get_replacement_values_for_columns(
         to_replace_columns = {col: [to_replace] for col in columns_dtype_map}
         values_columns = {col: [value] for col in columns_dtype_map}
     elif cudf.api.types.is_list_like(to_replace) or isinstance(
-        to_replace, ColumnBase
+        to_replace, (ColumnBase, BaseIndex)
     ):
         if is_scalar(value):
             to_replace_columns = {col: to_replace for col in columns_dtype_map}
@@ -6450,7 +6455,9 @@ def _get_replacement_values_for_columns(
                 )
                 for col in columns_dtype_map
             }
-        elif cudf.api.types.is_list_like(value):
+        elif cudf.api.types.is_list_like(
+            value
+        ) or cudf.utils.dtypes.is_column_like(value):
             if len(to_replace) != len(value):
                 raise ValueError(
                     f"Replacement lists must be "
@@ -6462,9 +6469,6 @@ def _get_replacement_values_for_columns(
                     col: to_replace for col in columns_dtype_map
                 }
                 values_columns = {col: value for col in columns_dtype_map}
-        elif cudf.utils.dtypes.is_column_like(value):
-            to_replace_columns = {col: to_replace for col in columns_dtype_map}
-            values_columns = {col: value for col in columns_dtype_map}
         else:
             raise TypeError(
                 "value argument must be scalar, list-like or Series"
@@ -6559,12 +6563,13 @@ def _get_replacement_values_for_columns(
     return all_na_columns, to_replace_columns, values_columns
 
 
-def _is_series(obj):
+def _is_series(obj: Any) -> bool:
     """
     Checks if the `obj` is of type `cudf.Series`
     instead of checking for isinstance(obj, cudf.Series)
+    to avoid circular imports.
     """
-    return isinstance(obj, Frame) and obj.ndim == 1 and obj.index is not None
+    return isinstance(obj, IndexedFrame) and obj.ndim == 1
 
 
 @_performance_tracking
@@ -6592,7 +6597,7 @@ def _drop_rows_by_labels(
             level = 0
 
         levels_index = obj.index.get_level_values(level)
-        if errors == "raise" and not labels.isin(levels_index).all():
+        if errors == "raise" and not labels.isin(levels_index).all():  # type: ignore[union-attr]
             raise KeyError("One or more values not found in axis")
 
         if isinstance(level, int):
@@ -6614,7 +6619,11 @@ def _drop_rows_by_labels(
         # 3. Use "leftanti" join to drop
         # TODO: use internal API with "leftanti" and specify left and right
         # join keys to bypass logic check
-        to_join = cudf.DataFrame(index=cudf.Index(labels, name=level))
+        if isinstance(labels, ColumnBase):
+            join_index = cudf.Index._from_column(labels, name=level)
+        else:
+            join_index = cudf.Index(labels, name=level)
+        to_join = cudf.DataFrame(index=join_index)
         join_res = working_df.join(to_join, how="leftanti")
 
         # 4. Reconstruct original layout, and rename
@@ -6638,15 +6647,14 @@ def _drop_rows_by_labels(
             )
 
     else:
-        if errors == "raise" and not labels.isin(obj.index).all():
+        if errors == "raise" and not labels.isin(obj.index).all():  # type: ignore[union-attr]
             raise KeyError("One or more values not found in axis")
 
-        key_df = cudf.DataFrame._from_data(
-            data={},
-            index=cudf.Index(
-                labels, name=getattr(labels, "name", obj.index.name)
-            ),
-        )
+        if isinstance(labels, ColumnBase):
+            idx = cudf.Index._from_column(labels, name=obj.index.name)
+        else:
+            idx = cudf.Index(labels, name=labels.name)
+        key_df = cudf.DataFrame._from_data(data={}, index=idx)
         if isinstance(obj, cudf.DataFrame):
             res = obj.join(key_df, how="leftanti")
         else:
diff --git a/python/cudf/cudf/core/indexing_utils.py b/python/cudf/cudf/core/indexing_utils.py
index a0089242909..8182e5cede2 100644
--- a/python/cudf/cudf/core/indexing_utils.py
+++ b/python/cudf/cudf/core/indexing_utils.py
@@ -152,10 +152,6 @@ def destructure_dataframe_iloc_indexer(
         column_names: ColumnLabels = list(
             frame._data.get_labels_by_index(cols)
         )
-        if len(set(column_names)) != len(column_names):
-            raise NotImplementedError(
-                "cudf DataFrames do not support repeated column names"
-            )
     except TypeError:
         raise TypeError(
             "Column indices must be integers, slices, "
diff --git a/python/cudf/cudf/core/join/_join_helpers.py b/python/cudf/cudf/core/join/_join_helpers.py
index 32c84763401..854c44ff1a1 100644
--- a/python/cudf/cudf/core/join/_join_helpers.py
+++ b/python/cudf/cudf/core/join/_join_helpers.py
@@ -37,16 +37,16 @@ class _ColumnIndexer(_Indexer):
     def get(self, obj: cudf.DataFrame) -> ColumnBase:
         return obj._data[self.name]
 
-    def set(self, obj: cudf.DataFrame, value: ColumnBase, validate=False):
-        obj._data.set_by_label(self.name, value, validate=validate)
+    def set(self, obj: cudf.DataFrame, value: ColumnBase):
+        obj._data.set_by_label(self.name, value)
 
 
 class _IndexIndexer(_Indexer):
     def get(self, obj: cudf.DataFrame) -> ColumnBase:
         return obj.index._data[self.name]
 
-    def set(self, obj: cudf.DataFrame, value: ColumnBase, validate=False):
-        obj.index._data.set_by_label(self.name, value, validate=validate)
+    def set(self, obj: cudf.DataFrame, value: ColumnBase):
+        obj.index._data.set_by_label(self.name, value)
 
 
 def _match_join_keys(
diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index ce81c1fc5b1..b65bc7af832 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -272,8 +272,8 @@ def perform_merge(self) -> cudf.DataFrame:
                 lcol_casted = lcol_casted.astype("category")
                 rcol_casted = rcol_casted.astype("category")
 
-            left_key.set(self.lhs, lcol_casted, validate=False)
-            right_key.set(self.rhs, rcol_casted, validate=False)
+            left_key.set(self.lhs, lcol_casted)
+            right_key.set(self.rhs, rcol_casted)
 
         left_rows, right_rows = self._gather_maps(
             left_join_cols, right_join_cols
@@ -329,7 +329,6 @@ def _merge_results(
                     lkey.set(
                         left_result,
                         lkey.get(left_result).fillna(rkey.get(right_result)),
-                        validate=False,
                     )
 
         # All columns from the left table make it into the output. Non-key
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 2788455aebf..e00890ac5c3 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -247,7 +247,7 @@ def to_series(self, index=None, name=None):
         )
 
     @_performance_tracking
-    def astype(self, dtype, copy: bool = True):
+    def astype(self, dtype, copy: bool = True) -> Self:
         if not is_object_dtype(dtype):
             raise TypeError(
                 "Setting a MultiIndex dtype to anything other than object is "
@@ -256,7 +256,7 @@ def astype(self, dtype, copy: bool = True):
         return self
 
     @_performance_tracking
-    def rename(self, names, inplace=False):
+    def rename(self, names, inplace: bool = False) -> Self | None:
         """
         Alter MultiIndex level names
 
@@ -303,7 +303,9 @@ def rename(self, names, inplace=False):
         return self.set_names(names, level=None, inplace=inplace)
 
     @_performance_tracking
-    def set_names(self, names, level=None, inplace=False):
+    def set_names(
+        self, names, level=None, inplace: bool = False
+    ) -> Self | None:
         names_is_list_like = is_list_like(names)
         level_is_list_like = is_list_like(level)
 
@@ -345,7 +347,7 @@ def _from_data(
         cls,
         data: MutableMapping,
         name: Any = None,
-    ) -> MultiIndex:
+    ) -> Self:
         """
         Use when you have a ColumnAccessor-like mapping but no codes and levels.
         """
@@ -394,7 +396,7 @@ def copy(
         names=None,
         deep=False,
         name=None,
-    ):
+    ) -> Self:
         """Returns copy of MultiIndex object.
 
         Returns a copy of `MultiIndex`. The `levels` and `codes` value can be
@@ -457,7 +459,7 @@ def copy(
         )
 
     @_performance_tracking
-    def __repr__(self):
+    def __repr__(self) -> str:
         max_seq_items = pd.get_option("display.max_seq_items") or len(self)
 
         if len(self) > max_seq_items:
@@ -503,7 +505,7 @@ def __repr__(self):
     @property  # type: ignore
     @_external_only_api("Use ._codes instead")
     @_performance_tracking
-    def codes(self):
+    def codes(self) -> pd.core.indexes.frozen.FrozenList:
         """
         Returns the codes of the underlying MultiIndex.
 
@@ -531,7 +533,7 @@ def get_slice_bound(self, label, side):
 
     @property  # type: ignore
     @_performance_tracking
-    def nlevels(self):
+    def nlevels(self) -> int:
         """Integer number of levels in this MultiIndex."""
         return self._num_columns
 
@@ -590,7 +592,7 @@ def _get_level_label(self, level):
             return self.names[level]
 
     @_performance_tracking
-    def isin(self, values, level=None):
+    def isin(self, values, level=None) -> cp.ndarray:
         """Return a boolean array where the index values are in values.
 
         Compute boolean array of whether each index value is found in
@@ -702,12 +704,8 @@ def _compute_validity_mask(self, index, row_tuple, max_length):
             data_table = cudf.concat(
                 [
                     frame,
-                    cudf.DataFrame(
-                        {
-                            "idx": cudf.Series(
-                                column.as_column(range(len(frame)))
-                            )
-                        }
+                    cudf.DataFrame._from_data(
+                        {"idx": column.as_column(range(len(frame)))}
                     ),
                 ],
                 axis=1,
@@ -786,7 +784,7 @@ def _index_and_downcast(self, result, index, index_key):
             out_index.insert(
                 out_index._num_columns,
                 k,
-                cudf.Series._from_data({None: index._data.columns[k]}),
+                cudf.Series._from_column(index._data.columns[k]),
             )
 
         # determine if we should downcast from a DataFrame to a Series
@@ -815,8 +813,9 @@ def _index_and_downcast(self, result, index, index_key):
             # it into an Index and name the final index values according
             # to that column's name.
             *_, last_column = index._data.columns
-            out_index = cudf.Index(last_column)
-            out_index.name = index.names[-1]
+            out_index = cudf.Index._from_column(
+                last_column, name=index.names[-1]
+            )
             index = out_index
         elif out_index._num_columns > 1:
             # Otherwise pop the leftmost levels, names, and codes from the
@@ -852,7 +851,10 @@ def _get_row_major(
         valid_indices = self._get_valid_indices_by_tuple(
             df.index, row_tuple, len(df.index)
         )
-        indices = cudf.Series(valid_indices)
+        if isinstance(valid_indices, column.ColumnBase):
+            indices = cudf.Series._from_column(valid_indices)
+        else:
+            indices = cudf.Series(valid_indices)
         result = df.take(indices)
         final = self._index_and_downcast(result, result.index, row_tuple)
         return final
@@ -864,7 +866,7 @@ def _validate_indexer(
         | slice
         | tuple[Any, ...]
         | list[tuple[Any, ...]],
-    ):
+    ) -> None:
         if isinstance(indexer, numbers.Number):
             return
         if isinstance(indexer, tuple):
@@ -900,12 +902,12 @@ def __eq__(self, other):
 
     @property  # type: ignore
     @_performance_tracking
-    def size(self):
+    def size(self) -> int:
         # The size of a MultiIndex is only dependent on the number of rows.
         return self._num_rows
 
     @_performance_tracking
-    def take(self, indices):
+    def take(self, indices) -> Self:
         if isinstance(indices, cudf.Series) and indices.has_nulls:
             raise ValueError("Column must have no nulls.")
         obj = super().take(indices)
@@ -957,7 +959,12 @@ def __getitem__(self, index):
             return result
 
     @_performance_tracking
-    def to_frame(self, index=True, name=no_default, allow_duplicates=False):
+    def to_frame(
+        self,
+        index: bool = True,
+        name=no_default,
+        allow_duplicates: bool = False,
+    ) -> cudf.DataFrame:
         """
         Create a DataFrame with the levels of the MultiIndex as columns.
 
@@ -1034,7 +1041,7 @@ def to_frame(self, index=True, name=no_default, allow_duplicates=False):
         )
 
     @_performance_tracking
-    def get_level_values(self, level):
+    def get_level_values(self, level) -> cudf.Index:
         """
         Return the values at the requested level
 
@@ -1062,35 +1069,35 @@ def get_level_values(self, level):
                 raise KeyError(f"Level not found: '{level}'")
         else:
             level_idx = colnames.index(level)
-        level_values = cudf.Index(
+        level_values = cudf.Index._from_column(
             self._data[level], name=self.names[level_idx]
         )
         return level_values
 
-    def _is_numeric(self):
+    def _is_numeric(self) -> bool:
         return False
 
-    def _is_boolean(self):
+    def _is_boolean(self) -> bool:
         return False
 
-    def _is_integer(self):
+    def _is_integer(self) -> bool:
         return False
 
-    def _is_floating(self):
+    def _is_floating(self) -> bool:
         return False
 
-    def _is_object(self):
+    def _is_object(self) -> bool:
         return False
 
-    def _is_categorical(self):
+    def _is_categorical(self) -> bool:
         return False
 
-    def _is_interval(self):
+    def _is_interval(self) -> bool:
         return False
 
     @classmethod
     @_performance_tracking
-    def _concat(cls, objs):
+    def _concat(cls, objs) -> Self:
         source_data = [o.to_frame(index=False) for o in objs]
 
         # TODO: Verify if this is really necessary or if we can rely on
@@ -1100,17 +1107,19 @@ def _concat(cls, objs):
             for obj in source_data[1:]:
                 obj.columns = colnames
 
-        source_data = cudf.DataFrame._concat(source_data)
+        source_df = cudf.DataFrame._concat(source_data)
         try:
             # Only set names if all objs have the same names
             (names,) = {o.names for o in objs} - {None}
         except ValueError:
-            names = [None] * source_data._num_columns
-        return cudf.MultiIndex.from_frame(source_data, names=names)
+            names = [None] * source_df._num_columns
+        return cudf.MultiIndex.from_frame(source_df, names=names)
 
     @classmethod
     @_performance_tracking
-    def from_tuples(cls, tuples, sortorder: int | None = None, names=None):
+    def from_tuples(
+        cls, tuples, sortorder: int | None = None, names=None
+    ) -> Self:
         """
         Convert list of tuples to MultiIndex.
 
@@ -1153,7 +1162,7 @@ def from_tuples(cls, tuples, sortorder: int | None = None, names=None):
         return cls.from_pandas(pdi)
 
     @_performance_tracking
-    def to_numpy(self):
+    def to_numpy(self) -> np.ndarray:
         return self.values_host
 
     def to_flat_index(self):
@@ -1167,7 +1176,7 @@ def to_flat_index(self):
 
     @property  # type: ignore
     @_performance_tracking
-    def values_host(self):
+    def values_host(self) -> np.ndarray:
         """
         Return a numpy representation of the MultiIndex.
 
@@ -1195,7 +1204,7 @@ def values_host(self):
 
     @property  # type: ignore
     @_performance_tracking
-    def values(self):
+    def values(self) -> cp.ndarray:
         """
         Return a CuPy representation of the MultiIndex.
 
@@ -1236,7 +1245,7 @@ def from_frame(
         df: pd.DataFrame | cudf.DataFrame,
         sortorder: int | None = None,
         names=None,
-    ):
+    ) -> Self:
         """
         Make a MultiIndex from a DataFrame.
 
@@ -1303,7 +1312,9 @@ def from_frame(
 
     @classmethod
     @_performance_tracking
-    def from_product(cls, iterables, sortorder: int | None = None, names=None):
+    def from_product(
+        cls, iterables, sortorder: int | None = None, names=None
+    ) -> Self:
         """
         Make a MultiIndex from the cartesian product of multiple iterables.
 
@@ -1355,7 +1366,7 @@ def from_arrays(
         arrays,
         sortorder=None,
         names=None,
-    ) -> MultiIndex:
+    ) -> Self:
         """
         Convert arrays to MultiIndex.
 
@@ -1395,18 +1406,22 @@ def from_arrays(
             raise TypeError(error_msg)
         codes = []
         levels = []
+        names_from_arrays = []
         for array in arrays:
             if not (is_list_like(array) or is_column_like(array)):
                 raise TypeError(error_msg)
             code, level = factorize(array, sort=True)
             codes.append(code)
             levels.append(level)
+            names_from_arrays.append(getattr(array, "name", None))
+        if names is None:
+            names = names_from_arrays
         return cls(
             codes=codes, levels=levels, sortorder=sortorder, names=names
         )
 
     @_performance_tracking
-    def _poplevels(self, level):
+    def _poplevels(self, level) -> None | MultiIndex | cudf.Index:
         """
         Remove and return the specified levels from self.
 
@@ -1457,7 +1472,7 @@ def _poplevels(self, level):
         return popped
 
     @_performance_tracking
-    def swaplevel(self, i=-2, j=-1):
+    def swaplevel(self, i=-2, j=-1) -> Self:
         """
         Swap level i with level j.
         Calling this method does not change the ordering of the values.
@@ -1508,7 +1523,7 @@ def swaplevel(self, i=-2, j=-1):
         return midx
 
     @_performance_tracking
-    def droplevel(self, level=-1):
+    def droplevel(self, level=-1) -> MultiIndex | cudf.Index:
         """
         Removes the specified levels from the MultiIndex.
 
@@ -1594,7 +1609,9 @@ def to_pandas(
 
     @classmethod
     @_performance_tracking
-    def from_pandas(cls, multiindex: pd.MultiIndex, nan_as_null=no_default):
+    def from_pandas(
+        cls, multiindex: pd.MultiIndex, nan_as_null=no_default
+    ) -> Self:
         """
         Convert from a Pandas MultiIndex
 
@@ -1629,11 +1646,11 @@ def from_pandas(cls, multiindex: pd.MultiIndex, nan_as_null=no_default):
 
     @cached_property  # type: ignore
     @_performance_tracking
-    def is_unique(self):
+    def is_unique(self) -> bool:
         return len(self) == len(self.unique())
 
     @property
-    def dtype(self):
+    def dtype(self) -> np.dtype:
         return np.dtype("O")
 
     @_performance_tracking
@@ -1702,7 +1719,7 @@ def is_monotonic_decreasing(self) -> bool:
         )
 
     @_performance_tracking
-    def fillna(self, value):
+    def fillna(self, value) -> Self:
         """
         Fill null values with the specified value.
 
@@ -1754,7 +1771,7 @@ def nunique(self, dropna: bool = True) -> int:
         mi = self.dropna(how="all") if dropna else self
         return len(mi.unique())
 
-    def _clean_nulls_from_index(self):
+    def _clean_nulls_from_index(self) -> Self:
         """
         Convert all na values(if any) in MultiIndex object
         to `<NA>` as a preprocessing step to `__repr__` methods.
@@ -1765,20 +1782,20 @@ def _clean_nulls_from_index(self):
         )
 
     @_performance_tracking
-    def memory_usage(self, deep=False):
+    def memory_usage(self, deep: bool = False) -> int:
         usage = sum(col.memory_usage for col in self._columns)
         usage += sum(level.memory_usage(deep=deep) for level in self._levels)
         usage += sum(code.memory_usage for code in self._codes)
         return usage
 
     @_performance_tracking
-    def difference(self, other, sort=None):
+    def difference(self, other, sort=None) -> Self:
         if hasattr(other, "to_pandas"):
             other = other.to_pandas()
         return cudf.from_pandas(self.to_pandas().difference(other, sort))
 
     @_performance_tracking
-    def append(self, other):
+    def append(self, other) -> Self:
         """
         Append a collection of MultiIndex objects together
 
@@ -1925,8 +1942,8 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
             *join_keys,
             how="inner",
         )
-        (result,) = libcudf.copying.scatter([indices], scatter_map, [result])
-        result_series = cudf.Series(result)
+        result = libcudf.copying.scatter([indices], scatter_map, [result])[0]
+        result_series = cudf.Series._from_column(result)
 
         if method in {"ffill", "bfill", "pad", "backfill"}:
             result_series = _get_indexer_basic(
@@ -1996,7 +2013,7 @@ def get_loc(self, key):
         mask[true_inds] = True
         return mask
 
-    def _get_reconciled_name_object(self, other) -> MultiIndex:
+    def _get_reconciled_name_object(self, other) -> Self:
         """
         If the result of a set operation will be self,
         return self, unless the names change, in which
@@ -2022,7 +2039,7 @@ def _maybe_match_names(self, other):
         ]
 
     @_performance_tracking
-    def union(self, other, sort=None):
+    def union(self, other, sort=None) -> Self:
         if not isinstance(other, MultiIndex):
             msg = "other must be a MultiIndex or a list of tuples"
             try:
@@ -2046,7 +2063,7 @@ def union(self, other, sort=None):
         return self._union(other, sort=sort)
 
     @_performance_tracking
-    def _union(self, other, sort=None):
+    def _union(self, other, sort=None) -> Self:
         # TODO: When to_frame is refactored to return a
         # deep copy in future, we should push most of the common
         # logic between MultiIndex._union & BaseIndex._union into
@@ -2072,7 +2089,7 @@ def _union(self, other, sort=None):
         return midx
 
     @_performance_tracking
-    def _intersection(self, other, sort=None):
+    def _intersection(self, other, sort=None) -> Self:
         if self.names != other.names:
             deep = True
             col_names = list(range(0, self.nlevels))
@@ -2163,7 +2180,7 @@ def _columns_for_reset_index(
         else:
             yield from self._split_columns_by_levels(levels, in_levels=True)
 
-    def repeat(self, repeats, axis=None):
+    def repeat(self, repeats, axis=None) -> Self:
         return self._from_data(
             self._data._from_columns_like_self(
                 super()._repeat([*self._columns], repeats, axis)
diff --git a/python/cudf/cudf/core/resample.py b/python/cudf/cudf/core/resample.py
index 715bbf89b15..e0aee28bfeb 100644
--- a/python/cudf/cudf/core/resample.py
+++ b/python/cudf/cudf/core/resample.py
@@ -145,7 +145,9 @@ def copy(self, deep=True):
     def keys(self):
         index = super().keys
         if self._freq is not None and isinstance(index, cudf.DatetimeIndex):
-            return cudf.DatetimeIndex._from_data(index._data, freq=self._freq)
+            return cudf.DatetimeIndex._from_column(
+                index._column, name=index.name, freq=self._freq
+            )
         return index
 
     def serialize(self):
diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py
index e7248977b1d..3d205957126 100644
--- a/python/cudf/cudf/core/reshape.py
+++ b/python/cudf/cudf/core/reshape.py
@@ -3,7 +3,7 @@
 
 import itertools
 import warnings
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Literal
 
 import numpy as np
 import pandas as pd
@@ -14,7 +14,7 @@
 from cudf.api.extensions import no_default
 from cudf.core._compat import PANDAS_LT_300
 from cudf.core.column import ColumnBase, as_column, column_empty_like
-from cudf.core.column.categorical import CategoricalColumn
+from cudf.core.column_accessor import ColumnAccessor
 from cudf.utils.dtypes import min_unsigned_type
 
 if TYPE_CHECKING:
@@ -101,7 +101,9 @@ def _get_combined_index(indexes, intersect: bool = False, sort=None):
     return index
 
 
-def _normalize_series_and_dataframe(objs, axis):
+def _normalize_series_and_dataframe(
+    objs: list[cudf.Series | cudf.DataFrame], axis: Literal[0, 1]
+) -> None:
     """Convert any cudf.Series objects in objs to DataFrames in place."""
     # Default to naming series by a numerical id if they are not named.
     sr_name = 0
@@ -118,7 +120,17 @@ def _normalize_series_and_dataframe(objs, axis):
             objs[idx] = obj.to_frame(name=name)
 
 
-def concat(objs, axis=0, join="outer", ignore_index=False, sort=None):
+def concat(
+    objs,
+    axis=0,
+    join="outer",
+    ignore_index=False,
+    keys=None,
+    levels=None,
+    names=None,
+    verify_integrity=False,
+    sort=None,
+):
     """Concatenate DataFrames, Series, or Indices row-wise.
 
     Parameters
@@ -132,6 +144,21 @@ def concat(objs, axis=0, join="outer", ignore_index=False, sort=None):
     ignore_index : bool, default False
         Set True to ignore the index of the *objs* and provide a
         default range index instead.
+    keys : sequence, default None
+        If multiple levels passed, should contain tuples. Construct
+        hierarchical index using the passed keys as the outermost level.
+        Currently not supported.
+    levels : list of sequences, default None
+        Specific levels (unique values) to use for constructing a
+        MultiIndex. Otherwise they will be inferred from the keys.
+        Currently not supported.
+    names : list, default None
+        Names for the levels in the resulting hierarchical index.
+        Currently not supported.
+    verify_integrity : bool, default False
+        Check whether the new concatenated axis contains duplicates. This can
+        be very expensive relative to the actual data concatenation.
+        Currently not supported.
     sort : bool, default False
         Sort non-concatenation axis if it is not already aligned.
 
@@ -243,6 +270,12 @@ def concat(objs, axis=0, join="outer", ignore_index=False, sort=None):
     0      a       1       c       3
     1      b       2       d       4
     """
+    if keys is not None:
+        raise NotImplementedError("keys is currently not supported")
+    if levels is not None:
+        raise NotImplementedError("levels is currently not supported")
+    if names is not None:
+        raise NotImplementedError("names is currently not supported")
     # TODO: Do we really need to have different error messages for an empty
     # list and a list of None?
     if not objs:
@@ -260,7 +293,7 @@ def concat(objs, axis=0, join="outer", ignore_index=False, sort=None):
                 f"Can only concatenate dictionary input along axis=1, not {axis}"
             )
         objs = {k: obj for k, obj in objs.items() if obj is not None}
-        keys = list(objs)
+        keys_objs = list(objs)
         objs = list(objs.values())
         if any(isinstance(o, cudf.BaseIndex) for o in objs):
             raise TypeError(
@@ -268,7 +301,7 @@ def concat(objs, axis=0, join="outer", ignore_index=False, sort=None):
             )
     else:
         objs = [obj for obj in objs if obj is not None]
-        keys = None
+        keys_objs = None
 
     if not objs:
         raise ValueError("All objects passed were None")
@@ -304,7 +337,7 @@ def concat(objs, axis=0, join="outer", ignore_index=False, sort=None):
                     result = obj.to_frame()
                 else:
                     result = obj.copy(deep=True)
-                result.columns = pd.RangeIndex(len(result._data))
+                result.columns = cudf.RangeIndex(len(result._data))
             else:
                 result = type(obj)._from_data(
                     data=obj._data.copy(deep=True),
@@ -317,9 +350,9 @@ def concat(objs, axis=0, join="outer", ignore_index=False, sort=None):
                 result = obj.to_frame()
             else:
                 result = obj.copy(deep=True)
-            if keys is not None and isinstance(result, cudf.DataFrame):
-                k = keys[0]
-                result.columns = cudf.MultiIndex.from_tuples(
+            if keys_objs is not None and isinstance(result, cudf.DataFrame):
+                k = keys_objs[0]
+                result.columns = pd.MultiIndex.from_tuples(
                     [
                         (k, *c) if isinstance(c, tuple) else (k, c)
                         for c in result._column_names
@@ -338,7 +371,6 @@ def concat(objs, axis=0, join="outer", ignore_index=False, sort=None):
             raise TypeError(
                 "Can only concatenate Series and DataFrame objects when axis=1"
             )
-        df = cudf.DataFrame()
         _normalize_series_and_dataframe(objs, axis=axis)
 
         any_empty = any(obj.empty for obj in objs)
@@ -362,18 +394,23 @@ def concat(objs, axis=0, join="outer", ignore_index=False, sort=None):
         objs = [obj for obj in objs if obj.shape != (0, 0)]
 
         if len(objs) == 0:
-            return df
+            # TODO: https://github.com/rapidsai/cudf/issues/16550
+            return cudf.DataFrame()
 
         # Don't need to align indices of all `objs` since we
         # would anyway return an empty dataframe below
         if not empty_inner:
             objs = _align_objs(objs, how=join, sort=sort)
-            df.index = objs[0].index
+            result_index = objs[0].index
+        else:
+            result_index = None
 
-        if keys is None:
+        result_data = {}
+        result_columns = None
+        if keys_objs is None:
             for o in objs:
                 for name, col in o._data.items():
-                    if name in df._data:
+                    if name in result_data:
                         raise NotImplementedError(
                             f"A Column with duplicate name found: {name}, cuDF "
                             f"doesn't support having multiple columns with "
@@ -383,11 +420,11 @@ def concat(objs, axis=0, join="outer", ignore_index=False, sort=None):
                         # if join is inner and it contains an empty df
                         # we return an empty df, hence creating an empty
                         # column with dtype metadata retained.
-                        df[name] = cudf.core.column.column_empty_like(
+                        result_data[name] = cudf.core.column.column_empty_like(
                             col, newsize=0
                         )
                     else:
-                        df[name] = col
+                        result_data[name] = col
 
             result_columns = (
                 objs[0]
@@ -408,9 +445,9 @@ def concat(objs, axis=0, join="outer", ignore_index=False, sort=None):
                     "label types in cuDF at this time. You must convert "
                     "the labels to the same type."
                 )
-            for k, o in zip(keys, objs):
+            for k, o in zip(keys_objs, objs):
                 for name, col in o._data.items():
-                    # if only series, then only keep keys as column labels
+                    # if only series, then only keep keys_objs as column labels
                     # if the existing column is multiindex, prepend it
                     # to handle cases where dfs and srs are concatenated
                     if only_series:
@@ -420,21 +457,21 @@ def concat(objs, axis=0, join="outer", ignore_index=False, sort=None):
                     else:
                         col_label = (k, name)
                     if empty_inner:
-                        df[col_label] = cudf.core.column.column_empty_like(
-                            col, newsize=0
+                        result_data[col_label] = (
+                            cudf.core.column.column_empty_like(col, newsize=0)
                         )
                     else:
-                        df[col_label] = col
+                        result_data[col_label] = col
 
-        if keys is None:
-            df.columns = result_columns.unique()
-            if ignore_index:
-                df.columns = cudf.RangeIndex(len(result_columns.unique()))
-        elif ignore_index:
-            # with ignore_index the column names change to numbers
-            df.columns = cudf.RangeIndex(len(result_columns))
+        df = cudf.DataFrame._from_data(
+            ColumnAccessor(result_data, verify=False), index=result_index
+        )
+        if ignore_index:
+            df.columns = cudf.RangeIndex(df._num_columns)
+        elif result_columns is not None:
+            df.columns = result_columns
         elif not only_series:
-            df.columns = cudf.MultiIndex.from_tuples(df._column_names)
+            df.columns = pd.MultiIndex.from_tuples(df._column_names)
 
         if empty_inner:
             # if join is inner and it contains an empty df
@@ -455,11 +492,12 @@ def concat(objs, axis=0, join="outer", ignore_index=False, sort=None):
         if len(objs) == 0:
             # If objs is empty, that indicates all of
             # objs are empty dataframes.
+            # TODO: https://github.com/rapidsai/cudf/issues/16550
             return cudf.DataFrame()
         elif len(objs) == 1:
             obj = objs[0]
             result = cudf.DataFrame._from_data(
-                data=None if join == "inner" else obj._data.copy(deep=True),
+                data={} if join == "inner" else obj._data.copy(deep=True),
                 index=cudf.RangeIndex(len(obj))
                 if ignore_index
                 else obj.index.copy(deep=True),
@@ -484,13 +522,11 @@ def concat(objs, axis=0, join="outer", ignore_index=False, sort=None):
         if len(new_objs) == 1 and not ignore_index:
             return new_objs[0]
         else:
-            return cudf.Series._concat(
-                objs, axis=axis, index=None if ignore_index else True
-            )
+            return cudf.Series._concat(objs, axis=axis, index=not ignore_index)
     elif typ is cudf.MultiIndex:
         return cudf.MultiIndex._concat(objs)
     elif issubclass(typ, cudf.Index):
-        return cudf.core.index.Index._concat(objs)
+        return cudf.Index._concat(objs)
     else:
         raise TypeError(f"cannot concatenate object of type {typ}")
 
@@ -603,18 +639,19 @@ def melt(
         value_vars = [c for c in frame._column_names if c not in unique_id]
 
     # Error for unimplemented support for datatype
-    dtypes = [frame[col].dtype for col in id_vars + value_vars]
-    if any(isinstance(typ, cudf.CategoricalDtype) for typ in dtypes):
+    if any(
+        isinstance(frame[col].dtype, cudf.CategoricalDtype)
+        for col in id_vars + value_vars
+    ):
         raise NotImplementedError(
             "Categorical columns are not yet supported for function"
         )
 
     # Check dtype homogeneity in value_var
     # Because heterogeneous concat is unimplemented
-    dtypes = [frame[col].dtype for col in value_vars]
-    if len(dtypes) > 0:
-        dtype = dtypes[0]
-        if any(t != dtype for t in dtypes):
+    if len(value_vars) > 1:
+        dtype = frame[value_vars[0]].dtype
+        if any(frame[col].dtype != dtype for col in value_vars):
             raise ValueError("all cols in value_vars must have the same dtype")
 
     # overlap
@@ -632,7 +669,7 @@ def melt(
     def _tile(A, reps):
         series_list = [A] * reps
         if reps > 0:
-            return cudf.Series._concat(objs=series_list, index=None)
+            return cudf.Series._concat(objs=series_list, index=False)
         else:
             return cudf.Series([], dtype=A.dtype)
 
@@ -661,14 +698,14 @@ def _tile(A, reps):
 
     # Step 3: add values
     mdata[value_name] = cudf.Series._concat(
-        objs=[frame[val] for val in value_vars], index=None
+        objs=[frame[val] for val in value_vars], index=False
     )
 
     return cudf.DataFrame(mdata)
 
 
 def get_dummies(
-    df,
+    data,
     prefix=None,
     prefix_sep="_",
     dummy_na=False,
@@ -683,7 +720,7 @@ def get_dummies(
 
     Parameters
     ----------
-    df : array-like, Series, or DataFrame
+    data : array-like, Series, or DataFrame
         Data of which to get dummy indicators.
     prefix : str, dict, or sequence, optional
         Prefix to append. Either a str (to apply a constant prefix), dict
@@ -761,17 +798,22 @@ def get_dummies(
 
     if cats is None:
         cats = {}
+    else:
+        warnings.warn(
+            "cats is deprecated and will be removed in a future version.",
+            FutureWarning,
+        )
     if sparse:
         raise NotImplementedError("sparse is not supported yet")
 
     if drop_first:
         raise NotImplementedError("drop_first is not supported yet")
 
-    if isinstance(df, cudf.DataFrame):
+    if isinstance(data, cudf.DataFrame):
         encode_fallback_dtypes = ["object", "category"]
 
         if columns is None or len(columns) == 0:
-            columns = df.select_dtypes(
+            columns = data.select_dtypes(
                 include=encode_fallback_dtypes
             )._column_names
 
@@ -798,33 +840,33 @@ def get_dummies(
         # If we have no columns to encode, we need to drop
         # fallback columns(if any)
         if len(columns) == 0:
-            return df.select_dtypes(exclude=encode_fallback_dtypes)
+            return data.select_dtypes(exclude=encode_fallback_dtypes)
         else:
             result_data = {
                 col_name: col
-                for col_name, col in df._data.items()
+                for col_name, col in data._data.items()
                 if col_name not in columns
             }
 
             for name in columns:
                 if name not in cats:
                     unique = _get_unique(
-                        column=df._data[name], dummy_na=dummy_na
+                        column=data._data[name], dummy_na=dummy_na
                     )
                 else:
                     unique = as_column(cats[name])
 
                 col_enc_data = _one_hot_encode_column(
-                    column=df._data[name],
+                    column=data._data[name],
                     categories=unique,
                     prefix=prefix_map.get(name, prefix),
                     prefix_sep=prefix_sep_map.get(name, prefix_sep),
                     dtype=dtype,
                 )
                 result_data.update(col_enc_data)
-            return cudf.DataFrame._from_data(result_data, index=df.index)
+            return cudf.DataFrame._from_data(result_data, index=data.index)
     else:
-        ser = cudf.Series(df)
+        ser = cudf.Series(data)
         unique = _get_unique(column=ser._column, dummy_na=dummy_na)
         data = _one_hot_encode_column(
             column=ser._column,
@@ -935,37 +977,39 @@ def _pivot(df, index, columns):
     index_labels, index_idx = index._encode()
     column_labels = columns_labels.to_pandas().to_flat_index()
 
-    def as_tuple(x):
-        return x if isinstance(x, tuple) else (x,)
-
     result = {}
-    for v in df:
-        names = [as_tuple(v) + as_tuple(name) for name in column_labels]
+    if len(index_labels) != 0 and len(columns_labels) != 0:
+
+        def as_tuple(x):
+            return x if isinstance(x, tuple) else (x,)
+
         nrows = len(index_labels)
-        ncols = len(names)
-        num_elements = nrows * ncols
-        if num_elements > 0:
-            col = df._data[v]
+        for col_label, col in df._data.items():
+            names = [
+                as_tuple(col_label) + as_tuple(name) for name in column_labels
+            ]
+            new_size = nrows * len(names)
             scatter_map = (columns_idx * np.int32(nrows)) + index_idx
-            target = cudf.DataFrame._from_data(
-                {
-                    None: cudf.core.column.column_empty_like(
-                        col, masked=True, newsize=nrows * ncols
-                    )
-                }
+            target_col = cudf.core.column.column_empty_like(
+                col, masked=True, newsize=new_size
             )
-            target._data[None][scatter_map] = col
-            result_frames = target._split(range(nrows, nrows * ncols, nrows))
+            target_col[scatter_map] = col
+            target = cudf.Index._from_column(target_col)
             result.update(
                 {
-                    name: next(iter(f._columns))
-                    for name, f in zip(names, result_frames)
+                    name: idx._column
+                    for name, idx in zip(
+                        names, target._split(range(nrows, new_size, nrows))
+                    )
                 }
             )
 
     # the result of pivot always has a multicolumn
-    ca = cudf.core.column_accessor.ColumnAccessor(
-        result, multiindex=True, level_names=(None,) + columns._data.names
+    ca = ColumnAccessor(
+        result,
+        multiindex=True,
+        level_names=(None,) + columns._data.names,
+        verify=False,
     )
     return cudf.DataFrame._from_data(
         ca, index=cudf.Index(index_labels, name=index.name)
@@ -1036,19 +1080,20 @@ def pivot(data, columns=None, index=no_default, values=no_default):
     if index is no_default:
         index = df.index
     else:
-        index = cudf.core.index.Index(df.loc[:, index])
+        index = cudf.Index(df.loc[:, index])
     columns = cudf.Index(df.loc[:, columns])
 
     # Create a DataFrame composed of columns from both
     # columns and index
-    columns_index = {}
-    columns_index = {
-        i: col
-        for i, col in enumerate(
-            itertools.chain(index._data.columns, columns._data.columns)
-        )
-    }
-    columns_index = cudf.DataFrame(columns_index)
+    ca = ColumnAccessor(
+        dict(
+            enumerate(
+                itertools.chain(index._data.columns, columns._data.columns)
+            )
+        ),
+        verify=False,
+    )
+    columns_index = cudf.DataFrame._from_data(ca)
 
     # Check that each row is unique:
     if len(columns_index) != len(columns_index.drop_duplicates()):
@@ -1191,13 +1236,13 @@ def unstack(df, level, fill_value=None, sort: bool = True):
     return result
 
 
-def _get_unique(column, dummy_na):
+def _get_unique(column: ColumnBase, dummy_na: bool) -> ColumnBase:
     """
     Returns unique values in a column, if
     dummy_na is False, nan's are also dropped.
     """
-    if isinstance(column, cudf.core.column.CategoricalColumn):
-        unique = column.categories
+    if isinstance(column.dtype, cudf.CategoricalDtype):
+        unique = column.categories  # type: ignore[attr-defined]
     else:
         unique = column.unique().sort_values()
     if not dummy_na:
@@ -1217,11 +1262,11 @@ def _one_hot_encode_column(
     `prefix`, separated with category name with `prefix_sep`. The encoding
     columns maybe coerced into `dtype`.
     """
-    if isinstance(column, CategoricalColumn):
+    if isinstance(column.dtype, cudf.CategoricalDtype):
         if column.size == column.null_count:
             column = column_empty_like(categories, newsize=column.size)
         else:
-            column = column._get_decategorized_column()
+            column = column._get_decategorized_column()  # type: ignore[attr-defined]
 
     if column.size * categories.size >= np.iinfo(size_type_dtype).max:
         raise ValueError(
@@ -1502,7 +1547,7 @@ def pivot_table(
         table_columns = tuple(
             map(lambda column: column[1:], table._data.names)
         )
-        table.columns = cudf.MultiIndex.from_tuples(
+        table.columns = pd.MultiIndex.from_tuples(
             tuples=table_columns, names=column_names
         )
 
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 929af5cd981..48445f018d3 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -38,7 +38,9 @@
     as_column,
 )
 from cudf.core.column.categorical import (
+    _DEFAULT_CATEGORICAL_VALUE,
     CategoricalAccessor as CategoricalAccessor,
+    CategoricalColumn,
 )
 from cudf.core.column.column import concat_columns
 from cudf.core.column.lists import ListMethods
@@ -69,6 +71,8 @@
 from cudf.utils.performance_tracking import _performance_tracking
 
 if TYPE_CHECKING:
+    import pyarrow as pa
+
     from cudf._typing import (
         ColumnLike,
         DataFrameOrSeries,
@@ -294,8 +298,8 @@ def __getitem__(self, arg: Any) -> ScalarLike | DataFrameOrSeries:
             return result
         try:
             arg = self._loc_to_iloc(arg)
-        except (TypeError, KeyError, IndexError, ValueError):
-            raise KeyError(arg)
+        except (TypeError, KeyError, IndexError, ValueError) as err:
+            raise KeyError(arg) from err
 
         return self._frame.iloc[arg]
 
@@ -373,7 +377,10 @@ def _loc_to_iloc(self, arg):
                     warnings.warn(warn_msg, FutureWarning)
                     return arg
             try:
-                indices = self._frame.index._indices_of(arg)
+                if isinstance(self._frame.index, RangeIndex):
+                    indices = self._frame.index._indices_of(arg)
+                else:
+                    indices = self._frame.index._column.indices_of(arg)
                 if (n := len(indices)) == 0:
                     raise KeyError("Label scalar is out of bounds")
                 elif n == 1:
@@ -394,8 +401,10 @@ def _loc_to_iloc(self, arg):
             return _indices_from_labels(self._frame, arg)
 
         else:
-            arg = cudf.core.series.Series(cudf.core.column.as_column(arg))
-            if arg.dtype in (bool, np.bool_):
+            arg = cudf.core.series.Series._from_column(
+                cudf.core.column.as_column(arg)
+            )
+            if arg.dtype.kind == "b":
                 return arg
             else:
                 indices = _indices_from_labels(self._frame, arg)
@@ -507,10 +516,53 @@ def from_categorical(cls, categorical, codes=None):
         dtype: category
         Categories (3, object): ['a', 'b', 'c']
         """  # noqa: E501
-        col = cudf.core.column.categorical.pandas_categorical_as_column(
-            categorical, codes=codes
-        )
-        return Series(data=col)
+        col = as_column(categorical)
+        if codes is not None:
+            codes = as_column(codes)
+
+            valid_codes = codes != codes.dtype.type(_DEFAULT_CATEGORICAL_VALUE)
+
+            mask = None
+            if not valid_codes.all():
+                mask = libcudf.transform.bools_to_mask(valid_codes)
+            col = CategoricalColumn(
+                data=col.data,
+                size=codes.size,
+                dtype=col.dtype,
+                mask=mask,
+                children=(codes,),
+            )
+        return Series._from_column(col)
+
+    @classmethod
+    @_performance_tracking
+    def from_arrow(cls, array: pa.Array) -> Self:
+        """Create from PyArrow Array/ChunkedArray.
+
+        Parameters
+        ----------
+        array : PyArrow Array/ChunkedArray
+            PyArrow Object which has to be converted.
+
+        Raises
+        ------
+        TypeError for invalid input type.
+
+        Returns
+        -------
+        SingleColumnFrame
+
+        Examples
+        --------
+        >>> import cudf
+        >>> import pyarrow as pa
+        >>> cudf.Series.from_arrow(pa.array(["a", "b", None]))
+        0       a
+        1       b
+        2    <NA>
+        dtype: object
+        """
+        return cls._from_column(ColumnBase.from_arrow(array))
 
     @classmethod
     @_performance_tracking
@@ -559,8 +611,7 @@ def from_masked_array(cls, data, mask, null_count=None):
         4      14
         dtype: int64
         """
-        col = as_column(data).set_mask(mask)
-        return cls(data=col)
+        return cls._from_column(as_column(data).set_mask(mask))
 
     @_performance_tracking
     def __init__(
@@ -586,10 +637,10 @@ def __init__(
             column = as_column(data, nan_as_null=nan_as_null, dtype=dtype)
             if isinstance(data, (pd.Series, Series)):
                 index_from_data = ensure_index(data.index)
-        elif isinstance(data, ColumnAccessor):
+        elif isinstance(data, (ColumnAccessor, ColumnBase)):
             raise TypeError(
                 "Use cudf.Series._from_data for constructing a Series from "
-                "ColumnAccessor"
+                "ColumnAccessor or a ColumnBase"
             )
         elif isinstance(data, dict):
             if not data:
@@ -656,6 +707,18 @@ def __init__(
             self._index = second_index
         self._check_data_index_length_match()
 
+    @classmethod
+    @_performance_tracking
+    def _from_column(
+        cls,
+        column: ColumnBase,
+        *,
+        name: abc.Hashable = None,
+        index: BaseIndex | None = None,
+    ) -> Self:
+        ca = ColumnAccessor({name: column}, verify=False)
+        return cls._from_data(ca, index=index)
+
     @classmethod
     @_performance_tracking
     def _from_data(
@@ -754,14 +817,17 @@ def dt(self):
         >>> s.dt.hour
         0    12
         1    13
+        2    14
         dtype: int16
         >>> s.dt.second
         0    0
         1    0
+        2    0
         dtype: int16
         >>> s.dt.day
         0    3
         1    3
+        2    3
         dtype: int16
 
         Returns
@@ -1082,7 +1148,7 @@ def reset_index(
             if name is no_default:
                 name = 0 if self.name is None else self.name
             data[name] = data.pop(self.name)
-            return cudf.core.dataframe.DataFrame._from_data(data, index)
+            return self._constructor_expanddim._from_data(data, index)
         # For ``name`` behavior, see:
         # https://github.com/pandas-dev/pandas/issues/44575
         # ``name`` has to be ignored when `drop=True`
@@ -1092,7 +1158,7 @@ def reset_index(
         )
 
     @_performance_tracking
-    def to_frame(self, name=None):
+    def to_frame(self, name: abc.Hashable = no_default) -> cudf.DataFrame:
         """Convert Series into a DataFrame
 
         Parameters
@@ -1124,15 +1190,7 @@ def to_frame(self, name=None):
         13   <NA>
         15      d
         """  # noqa: E501
-
-        if name is not None:
-            col = name
-        elif self.name is None:
-            col = 0
-        else:
-            col = self.name
-
-        return cudf.DataFrame({col: self._column}, index=self.index)
+        return self._to_frame(name=name, index=self.index)
 
     @_performance_tracking
     def memory_usage(self, index=True, deep=False):
@@ -1535,17 +1593,21 @@ def dtype(self):
 
     @classmethod
     @_performance_tracking
-    def _concat(cls, objs, axis=0, index=True):
+    def _concat(cls, objs, axis=0, index: bool = True):
         # Concatenate index if not provided
         if index is True:
             if isinstance(objs[0].index, cudf.MultiIndex):
-                index = cudf.MultiIndex._concat([o.index for o in objs])
+                result_index = cudf.MultiIndex._concat([o.index for o in objs])
             else:
                 with warnings.catch_warnings():
                     warnings.simplefilter("ignore", FutureWarning)
-                    index = cudf.core.index.Index._concat(
+                    result_index = cudf.core.index.Index._concat(
                         [o.index for o in objs]
                     )
+        elif index is False:
+            result_index = None
+        else:
+            raise ValueError(f"{index=} must be a bool")
 
         names = {obj.name for obj in objs}
         if len(names) == 1:
@@ -1597,7 +1659,7 @@ def _concat(cls, objs, axis=0, index=True):
         if len(objs):
             col = col._with_type_metadata(objs[0].dtype)
 
-        return cls(data=col, index=index, name=name)
+        return cls._from_column(col, name=name, index=result_index)
 
     @property  # type: ignore
     @_performance_tracking
@@ -1911,7 +1973,9 @@ def between(self, left, right, inclusive="both") -> Series:
                 "Inclusive has to be either string of 'both', "
                 "'left', 'right', or 'neither'."
             )
-        return self._from_data({self.name: lmask & rmask}, self.index)
+        return self._from_column(
+            lmask & rmask, name=self.name, index=self.index
+        )
 
     @_performance_tracking
     def all(self, axis=0, bool_only=None, skipna=True, **kwargs):
@@ -2709,8 +2773,8 @@ def mode(self, dropna=True):
         if len(val_counts) > 0:
             val_counts = val_counts[val_counts == val_counts.iloc[0]]
 
-        return Series._from_data(
-            {self.name: val_counts.index.sort_values()._column}, name=self.name
+        return Series._from_column(
+            val_counts.index.sort_values()._column, name=self.name
         )
 
     @_performance_tracking
@@ -2999,8 +3063,8 @@ def isin(self, values):
                 f"to isin(), you passed a [{type(values).__name__}]"
             )
 
-        return Series._from_data(
-            {self.name: self._column.isin(values)}, index=self.index
+        return Series._from_column(
+            self._column.isin(values), name=self.name, index=self.index
         )
 
     @_performance_tracking
@@ -3036,7 +3100,7 @@ def unique(self):
         res = self._column.unique()
         if cudf.get_option("mode.pandas_compatible"):
             return res.values
-        return Series(res, name=self.name)
+        return Series._from_column(res, name=self.name)
 
     @_performance_tracking
     def value_counts(
@@ -3189,8 +3253,8 @@ def value_counts(
             interval_col = IntervalColumn.from_struct_column(
                 res.index._column._get_decategorized_column()
             )
-            res.index = cudf.IntervalIndex._from_data(
-                {res.index.name: interval_col}
+            res.index = cudf.IntervalIndex._from_column(
+                interval_col, name=res.index.name
             )
         res.name = result_name
         return res
@@ -3268,8 +3332,9 @@ def quantile(
         if return_scalar:
             return result
 
-        return Series._from_data(
-            data={self.name: result},
+        return Series._from_column(
+            result,
+            name=self.name,
             index=cudf.Index(np_array_q) if quant_index else None,
         )
 
@@ -3351,8 +3416,9 @@ def digitize(self, bins, right=False):
         3    2
         dtype: int32
         """
-        return Series(
-            cudf.core.column.numerical.digitize(self._column, bins, right)
+        return Series._from_column(
+            cudf.core.column.numerical.digitize(self._column, bins, right),
+            name=self.name,
         )
 
     @_performance_tracking
@@ -3531,6 +3597,10 @@ def rename(
             raise NotImplementedError("level is currently not supported.")
         if errors != "ignore":
             raise NotImplementedError("errors is currently not supported.")
+        if not is_scalar(index):
+            raise NotImplementedError(
+                ".rename does not currently support relabeling the index."
+            )
         out_data = self._data.copy(deep=copy)
         return Series._from_data(out_data, self.index, name=index)
 
@@ -5293,10 +5363,10 @@ def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False):
     elif b_col.null_count:
         null_values = b_col.isnull()
     else:
-        return Series(result_col, index=index)
+        return Series._from_column(result_col, index=index)
 
     result_col[null_values] = False
     if equal_nan is True and a_col.null_count and b_col.null_count:
         result_col[equal_nulls] = True
 
-    return Series(result_col, index=index)
+    return Series._from_column(result_col, index=index)
diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py
index a5ff1223791..0e66f383ca0 100644
--- a/python/cudf/cudf/core/single_column_frame.py
+++ b/python/cudf/cudf/core/single_column_frame.py
@@ -15,11 +15,14 @@
     is_numeric_dtype,
 )
 from cudf.core.column import ColumnBase, as_column
+from cudf.core.column_accessor import ColumnAccessor
 from cudf.core.frame import Frame
 from cudf.utils.performance_tracking import _performance_tracking
 from cudf.utils.utils import NotIterable
 
 if TYPE_CHECKING:
+    from collections.abc import Hashable
+
     import cupy
     import numpy
     import pyarrow as pa
@@ -112,35 +115,17 @@ def values_host(self) -> numpy.ndarray:  # noqa: D102
 
     @classmethod
     @_performance_tracking
-    def from_arrow(cls, array) -> Self:
-        """Create from PyArrow Array/ChunkedArray.
-
-        Parameters
-        ----------
-        array : PyArrow Array/ChunkedArray
-            PyArrow Object which has to be converted.
+    def _from_column(
+        cls, column: ColumnBase, *, name: Hashable = None
+    ) -> Self:
+        """Constructor for a single Column."""
+        ca = ColumnAccessor({name: column}, verify=False)
+        return cls._from_data(ca)
 
-        Raises
-        ------
-        TypeError for invalid input type.
-
-        Returns
-        -------
-        SingleColumnFrame
-
-        Examples
-        --------
-        >>> import cudf
-        >>> import pyarrow as pa
-        >>> cudf.Index.from_arrow(pa.array(["a", "b", None]))
-        Index(['a', 'b', None], dtype='object')
-        >>> cudf.Series.from_arrow(pa.array(["a", "b", None]))
-        0       a
-        1       b
-        2    <NA>
-        dtype: object
-        """
-        return cls(ColumnBase.from_arrow(array))
+    @classmethod
+    @_performance_tracking
+    def from_arrow(cls, array) -> Self:
+        raise NotImplementedError
 
     @_performance_tracking
     def to_arrow(self) -> pa.Array:
@@ -173,6 +158,17 @@ def to_arrow(self) -> pa.Array:
         """
         return self._column.to_arrow()
 
+    def _to_frame(
+        self, name: Hashable, index: cudf.Index | None
+    ) -> cudf.DataFrame:
+        """Helper function for Series.to_frame, Index.to_frame"""
+        if name is no_default:
+            col_name = 0 if self.name is None else self.name
+        else:
+            col_name = name
+        ca = ColumnAccessor({col_name: self._column}, verify=False)
+        return cudf.DataFrame._from_data(ca, index=index)
+
     @property  # type: ignore
     @_performance_tracking
     def is_unique(self) -> bool:
@@ -365,7 +361,6 @@ def _get_elements_from_column(self, arg) -> ScalarLike | ColumnBase:
     def where(self, cond, other=None, inplace=False):
         from cudf.core._internals.where import (
             _check_and_cast_columns_with_other,
-            _make_categorical_like,
         )
 
         if isinstance(other, cudf.DataFrame):
@@ -381,14 +376,12 @@ def where(self, cond, other=None, inplace=False):
         if not cudf.api.types.is_scalar(other):
             other = cudf.core.column.as_column(other)
 
-        self_column = self._column
         input_col, other = _check_and_cast_columns_with_other(
-            source_col=self_column, other=other, inplace=inplace
+            source_col=self._column, other=other, inplace=inplace
         )
 
         result = cudf._lib.copying.copy_if_else(input_col, other, cond)
-
-        return _make_categorical_like(result, self_column)
+        return result._with_type_metadata(self.dtype)
 
     @_performance_tracking
     def transpose(self):
diff --git a/python/cudf/cudf/core/tokenize_vocabulary.py b/python/cudf/cudf/core/tokenize_vocabulary.py
index afb3496311b..99d85c0c5c0 100644
--- a/python/cudf/cudf/core/tokenize_vocabulary.py
+++ b/python/cudf/cudf/core/tokenize_vocabulary.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -22,7 +22,9 @@ class TokenizeVocabulary:
     def __init__(self, vocabulary: "cudf.Series"):
         self.vocabulary = cpp_tokenize_vocabulary(vocabulary._column)
 
-    def tokenize(self, text, delimiter: str = "", default_id: int = -1):
+    def tokenize(
+        self, text, delimiter: str = "", default_id: int = -1
+    ) -> cudf.Series:
         """
         Parameters
         ----------
@@ -45,4 +47,4 @@ def tokenize(self, text, delimiter: str = "", default_id: int = -1):
             text._column, self.vocabulary, delim, default_id
         )
 
-        return cudf.Series(result)
+        return cudf.Series._from_column(result)
diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py
index c6e2b5d10e1..7197560b5a4 100644
--- a/python/cudf/cudf/core/tools/datetimes.py
+++ b/python/cudf/cudf/core/tools/datetimes.py
@@ -18,6 +18,7 @@
 )
 from cudf.api.types import is_integer, is_scalar
 from cudf.core import column
+from cudf.core.index import ensure_index
 
 # https://github.com/pandas-dev/pandas/blob/2.2.x/pandas/core/tools/datetimes.py#L1112
 _unit_map = {
@@ -275,7 +276,7 @@ def to_datetime(
                 format=format,
                 utc=utc,
             )
-            return cudf.Series(col, index=arg.index)
+            return cudf.Series._from_column(col, index=arg.index)
         else:
             col = _process_col(
                 col=column.as_column(arg),
@@ -286,13 +287,15 @@ def to_datetime(
                 utc=utc,
             )
             if isinstance(arg, (cudf.BaseIndex, pd.Index)):
-                return cudf.Index(col, name=arg.name)
+                return cudf.DatetimeIndex._from_column(col, name=arg.name)
             elif isinstance(arg, (cudf.Series, pd.Series)):
-                return cudf.Series(col, index=arg.index, name=arg.name)
+                return cudf.Series._from_column(
+                    col, name=arg.name, index=ensure_index(arg.index)
+                )
             elif is_scalar(arg):
                 return col.element_indexing(0)
             else:
-                return cudf.Index(col)
+                return cudf.Index._from_column(col)
     except Exception as e:
         if errors == "raise":
             raise e
@@ -782,7 +785,7 @@ def date_range(
     tz=None,
     normalize: bool = False,
     name=None,
-    closed: Literal["left", "right", "both", "neither"] = "both",
+    inclusive: Literal["left", "right", "both", "neither"] = "both",
     *,
     unit: str | None = None,
 ):
@@ -820,7 +823,7 @@ def date_range(
     name : str, default None
         Name of the resulting DatetimeIndex
 
-    closed : {"left", "right", "both", "neither"}, default "both"
+    inclusive : {"left", "right", "both", "neither"}, default "both"
         Whether to set each bound as closed or open.
         Currently only "both" is supported
 
@@ -836,7 +839,7 @@ def date_range(
     -----
     Of the four parameters `start`, `end`, `periods`, and `freq`, exactly three
     must be specified. If `freq` is omitted, the resulting DatetimeIndex will
-    have periods linearly spaced elements between start and end (closed on both
+    have periods linearly spaced elements between start and end (inclusive on both
     sides).
 
     cudf supports `freq` specified with either fixed-frequency offset
@@ -863,8 +866,8 @@ def date_range(
                 '2026-04-23 08:00:00'],
                 dtype='datetime64[ns]')
     """
-    if closed != "both":
-        raise NotImplementedError(f"{closed=} is currently unsupported.")
+    if inclusive != "both":
+        raise NotImplementedError(f"{inclusive=} is currently unsupported.")
     if unit is not None:
         raise NotImplementedError(f"{unit=} is currently unsupported.")
     if normalize is not False:
@@ -895,7 +898,9 @@ def date_range(
         end = cudf.Scalar(end, dtype=dtype).value.astype("int64")
         arr = np.linspace(start=start, stop=end, num=periods)
         result = cudf.core.column.as_column(arr).astype("datetime64[ns]")
-        return cudf.DatetimeIndex._from_data({name: result}).tz_localize(tz)
+        return cudf.DatetimeIndex._from_column(result, name=name).tz_localize(
+            tz
+        )
 
     # The code logic below assumes `freq` is defined. It is first normalized
     # into `DateOffset` for further computation with timestamps.
@@ -946,7 +951,7 @@ def date_range(
         end = cudf.Scalar(end, dtype=dtype)
         _is_increment_sequence = end >= start
 
-        periods = math.ceil(
+        periods = math.floor(
             int(end - start) / _offset_to_nanoseconds_lower_bound(offset)
         )
 
@@ -954,9 +959,10 @@ def date_range(
             # Mismatched sign between (end-start) and offset, return empty
             # column
             periods = 0
-        elif periods == 0:
-            # end == start, return exactly 1 timestamp (start)
-            periods = 1
+        else:
+            # If end == start, periods == 0 and we return exactly 1 timestamp (start).
+            # Otherwise, since inclusive="both", we ensure the end point is included.
+            periods += 1
 
     # We compute `end_estim` (the estimated upper bound of the date
     # range) below, but don't always use it.  We do this to ensure
@@ -995,9 +1001,9 @@ def date_range(
             "datetime64[ns]"
         )
 
-    return cudf.DatetimeIndex._from_data({name: res}, freq=freq).tz_localize(
-        tz
-    )
+    return cudf.DatetimeIndex._from_column(
+        res, name=name, freq=freq
+    ).tz_localize(tz)
 
 
 def _has_fixed_frequency(freq: DateOffset) -> bool:
diff --git a/python/cudf/cudf/core/tools/numeric.py b/python/cudf/cudf/core/tools/numeric.py
index 07158e4ee61..6cecf3fa170 100644
--- a/python/cudf/cudf/core/tools/numeric.py
+++ b/python/cudf/cudf/core/tools/numeric.py
@@ -1,6 +1,8 @@
 # Copyright (c) 2018-2024, NVIDIA CORPORATION.
+from __future__ import annotations
 
 import warnings
+from typing import TYPE_CHECKING
 
 import numpy as np
 import pandas as pd
@@ -11,10 +13,14 @@
 from cudf.api.types import _is_non_decimal_numeric_dtype, is_string_dtype
 from cudf.core.column import as_column
 from cudf.core.dtypes import CategoricalDtype
+from cudf.core.index import ensure_index
 from cudf.utils.dtypes import can_convert_to_column
 
+if TYPE_CHECKING:
+    from cudf.core.column import ColumnBase
 
-def to_numeric(arg, errors="raise", downcast=None):
+
+def to_numeric(arg, errors="raise", downcast=None, dtype_backend=None):
     """
     Convert argument into numerical types.
 
@@ -42,6 +48,8 @@ def to_numeric(arg, errors="raise", downcast=None):
         Note that downcast behavior is decoupled from parsing. Errors
         encountered during downcast is raised regardless of ``errors``
         parameter.
+    dtype_backend : None
+        Not implemented.
 
     Returns
     -------
@@ -87,7 +95,10 @@ def to_numeric(arg, errors="raise", downcast=None):
         For example ``[1, 'a']``. A ``TypeError`` will be raised when such
         input is received, regardless of ``errors`` parameter.
     """
-
+    if dtype_backend is not None:
+        raise NotImplementedError(
+            "dtype_backend is not currently implemented."
+        )
     if errors not in {"raise", "ignore", "coerce"}:
         raise ValueError("invalid error value specified")
     elif errors == "ignore":
@@ -164,7 +175,9 @@ def to_numeric(arg, errors="raise", downcast=None):
                     break
 
     if isinstance(arg, (cudf.Series, pd.Series)):
-        return cudf.Series(col, index=arg.index, name=arg.name)
+        return cudf.Series._from_column(
+            col, name=arg.name, index=ensure_index(arg.index)
+        )
     else:
         if col.has_nulls():
             # To match pandas, always return a floating type filled with nan.
@@ -226,25 +239,10 @@ def _convert_str_col(col, errors, _downcast=None):
             raise ValueError("Unable to convert some strings to numerics.")
 
 
-def _proc_inf_empty_strings(col):
+def _proc_inf_empty_strings(col: ColumnBase) -> ColumnBase:
     """Handles empty and infinity strings"""
     col = libstrings.to_lower(col)
-    col = _proc_empty_strings(col)
-    col = _proc_inf_strings(col)
-    return col
-
-
-def _proc_empty_strings(col):
-    """Replaces empty strings with NaN"""
-    s = cudf.Series(col)
-    s = s.where(s != "", "NaN")
-    return s._column
-
-
-def _proc_inf_strings(col):
-    """Convert "inf/infinity" strings into "Inf", the native string
-    representing infinity in libcudf
-    """
+    col = col.find_and_replace(as_column([""]), as_column(["NaN"]))
     # TODO: This can be handled by libcudf in
     # future see StringColumn.as_numerical_column
     col = libstrings.replace_multi(
diff --git a/python/cudf/cudf/core/udf/utils.py b/python/cudf/cudf/core/udf/utils.py
index d616761cb3b..6d7362952c9 100644
--- a/python/cudf/cudf/core/udf/utils.py
+++ b/python/cudf/cudf/core/udf/utils.py
@@ -3,7 +3,7 @@
 
 import functools
 import os
-from typing import Any, Callable
+from typing import TYPE_CHECKING, Any
 
 import cachetools
 import cupy as cp
@@ -41,6 +41,9 @@
 from cudf.utils.performance_tracking import _performance_tracking
 from cudf.utils.utils import initfunc
 
+if TYPE_CHECKING:
+    from collections.abc import Callable
+
 # Maximum size of a string column is 2 GiB
 _STRINGS_UDF_DEFAULT_HEAP_SIZE = os.environ.get("STRINGS_UDF_HEAP_SIZE", 2**31)
 _heap_size = 0
diff --git a/python/cudf/cudf/datasets.py b/python/cudf/cudf/datasets.py
index 7b183d5f1a3..dbabaacf6b5 100644
--- a/python/cudf/cudf/datasets.py
+++ b/python/cudf/cudf/datasets.py
@@ -5,7 +5,6 @@
 
 import cudf
 from cudf._lib.transform import bools_to_mask
-from cudf.core.column_accessor import ColumnAccessor
 
 __all__ = ["timeseries", "randomdata"]
 
@@ -73,9 +72,7 @@ def timeseries(
         )
         mask_buf = bools_to_mask(cudf.core.column.as_column(mask))
         masked_col = gdf[col]._column.set_mask(mask_buf)
-        gdf[col] = cudf.Series._from_data(
-            ColumnAccessor({None: masked_col}), index=gdf.index
-        )
+        gdf[col] = cudf.Series._from_column(masked_col, index=gdf.index)
 
     return gdf
 
diff --git a/python/cudf/cudf/io/avro.py b/python/cudf/cudf/io/avro.py
index 728b34045bf..964bd02b03e 100644
--- a/python/cudf/cudf/io/avro.py
+++ b/python/cudf/cudf/io/avro.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 
 import cudf
 from cudf import _lib as libcudf
@@ -15,22 +15,13 @@ def read_avro(
 ):
     """{docstring}"""
 
-    is_single_filepath_or_buffer = ioutils.ensure_single_filepath_or_buffer(
+    filepath_or_buffer = ioutils.get_reader_filepath_or_buffer(
         path_or_data=filepath_or_buffer,
         storage_options=storage_options,
     )
-    if not is_single_filepath_or_buffer:
-        raise NotImplementedError(
-            "`read_avro` does not yet support reading multiple files"
-        )
-
-    filepath_or_buffer, compression = ioutils.get_reader_filepath_or_buffer(
-        path_or_data=filepath_or_buffer,
-        compression=None,
-        storage_options=storage_options,
+    filepath_or_buffer = ioutils._select_single_source(
+        filepath_or_buffer, "read_avro"
     )
-    if compression is not None:
-        ValueError("URL content-encoding decompression is not supported")
 
     return cudf.DataFrame._from_data(
         *libcudf.avro.read_avro(
diff --git a/python/cudf/cudf/io/csv.py b/python/cudf/cudf/io/csv.py
index 0f2820a01e9..a9c20150930 100644
--- a/python/cudf/cudf/io/csv.py
+++ b/python/cudf/cudf/io/csv.py
@@ -5,7 +5,6 @@
 from io import BytesIO, StringIO
 
 import numpy as np
-from pyarrow.lib import NativeFile
 
 import cudf
 from cudf import _lib as libcudf
@@ -50,7 +49,6 @@ def read_csv(
     comment=None,
     delim_whitespace=False,
     byte_range=None,
-    use_python_file_object=None,
     storage_options=None,
     bytes_per_thread=None,
 ):
@@ -63,32 +61,18 @@ def read_csv(
             FutureWarning,
         )
 
-    if use_python_file_object and bytes_per_thread is not None:
-        raise ValueError(
-            "bytes_per_thread is only supported when "
-            "`use_python_file_object=False`"
-        )
-
     if bytes_per_thread is None:
         bytes_per_thread = ioutils._BYTES_PER_THREAD_DEFAULT
 
-    is_single_filepath_or_buffer = ioutils.ensure_single_filepath_or_buffer(
+    filepath_or_buffer = ioutils.get_reader_filepath_or_buffer(
         path_or_data=filepath_or_buffer,
-        storage_options=storage_options,
-    )
-    if not is_single_filepath_or_buffer:
-        raise NotImplementedError(
-            "`read_csv` does not yet support reading multiple files"
-        )
-
-    filepath_or_buffer, compression = ioutils.get_reader_filepath_or_buffer(
-        path_or_data=filepath_or_buffer,
-        compression=compression,
-        iotypes=(BytesIO, StringIO, NativeFile),
-        use_python_file_object=use_python_file_object,
+        iotypes=(BytesIO, StringIO),
         storage_options=storage_options,
         bytes_per_thread=bytes_per_thread,
     )
+    filepath_or_buffer = ioutils._select_single_source(
+        filepath_or_buffer, "read_csv"
+    )
 
     if na_values is not None and is_scalar(na_values):
         na_values = [na_values]
diff --git a/python/cudf/cudf/io/dlpack.py b/python/cudf/cudf/io/dlpack.py
index d3d99aab0cd..1347b2cc38f 100644
--- a/python/cudf/cudf/io/dlpack.py
+++ b/python/cudf/cudf/io/dlpack.py
@@ -71,7 +71,7 @@ def to_dlpack(cudf_obj):
     if isinstance(cudf_obj, (cudf.DataFrame, cudf.Series, cudf.BaseIndex)):
         gdf = cudf_obj
     elif isinstance(cudf_obj, ColumnBase):
-        gdf = cudf.Series._from_data({None: cudf_obj})
+        gdf = cudf.Series._from_column(cudf_obj)
     else:
         raise TypeError(
             f"Input of type {type(cudf_obj)} cannot be converted "
diff --git a/python/cudf/cudf/io/json.py b/python/cudf/cudf/io/json.py
index fc3387d5117..d86db656fd0 100644
--- a/python/cudf/cudf/io/json.py
+++ b/python/cudf/cudf/io/json.py
@@ -9,7 +9,6 @@
 
 import cudf
 from cudf._lib import json as libjson
-from cudf.api.types import is_list_like
 from cudf.utils import ioutils
 from cudf.utils.dtypes import _maybe_convert_to_default_type
 
@@ -62,37 +61,15 @@ def read_json(
                 f"following positional arguments: {list(args)}"
             )
 
-        # Multiple sources are passed as a list. If a single source is passed,
-        # wrap it in a list for unified processing downstream.
-        if not is_list_like(path_or_buf):
-            path_or_buf = [path_or_buf]
-
-        filepaths_or_buffers = []
-        for source in path_or_buf:
-            if ioutils.is_directory(
-                path_or_data=source, storage_options=storage_options
-            ):
-                fs = ioutils._ensure_filesystem(
-                    passed_filesystem=None,
-                    path=source,
-                    storage_options=storage_options,
-                )
-                source = ioutils.stringify_pathlike(source)
-                source = fs.sep.join([source, "*.json"])
-
-            tmp_source, compression = ioutils.get_reader_filepath_or_buffer(
-                path_or_data=source,
-                compression=compression,
-                iotypes=(BytesIO, StringIO),
-                allow_raw_text_input=True,
-                storage_options=storage_options,
-                warn_on_raw_text_input=True,
-                warn_meta=("json", "read_json"),
-            )
-            if isinstance(tmp_source, list):
-                filepaths_or_buffers.extend(tmp_source)
-            else:
-                filepaths_or_buffers.append(tmp_source)
+        filepaths_or_buffers = ioutils.get_reader_filepath_or_buffer(
+            path_or_buf,
+            iotypes=(BytesIO, StringIO),
+            allow_raw_text_input=True,
+            storage_options=storage_options,
+            warn_on_raw_text_input=True,
+            warn_meta=("json", "read_json"),
+            expand_dir_pattern="*.json",
+        )
 
         df = libjson.read_json(
             filepaths_or_buffers=filepaths_or_buffers,
@@ -111,25 +88,18 @@ def read_json(
             "be GPU accelerated in the future"
         )
 
-        if not ioutils.ensure_single_filepath_or_buffer(
-            path_or_data=path_or_buf,
-            storage_options=storage_options,
-        ):
-            raise NotImplementedError(
-                "`read_json` does not yet support reading "
-                "multiple files via pandas"
-            )
-
-        path_or_buf, compression = ioutils.get_reader_filepath_or_buffer(
+        filepath_or_buffer = ioutils.get_reader_filepath_or_buffer(
             path_or_data=path_or_buf,
-            compression=compression,
             iotypes=(BytesIO, StringIO),
             allow_raw_text_input=True,
             storage_options=storage_options,
         )
+        filepath_or_buffer = ioutils._select_single_source(
+            filepath_or_buffer, "read_json (via pandas)"
+        )
 
         pd_value = pd.read_json(
-            path_or_buf,
+            filepath_or_buffer,
             lines=lines,
             dtype=dtype,
             compression=compression,
diff --git a/python/cudf/cudf/io/orc.py b/python/cudf/cudf/io/orc.py
index 289292b5182..fd246c6215f 100644
--- a/python/cudf/cudf/io/orc.py
+++ b/python/cudf/cudf/io/orc.py
@@ -4,13 +4,11 @@
 import warnings
 
 import pyarrow as pa
-from fsspec.utils import stringify_path
 
 import cudf
 from cudf._lib import orc as liborc
 from cudf.api.types import is_list_like
 from cudf.utils import ioutils
-from cudf.utils.utils import maybe_filter_deprecation
 
 
 def _make_empty_df(filepath_or_buffer, columns):
@@ -171,8 +169,11 @@ def read_orc_statistics(
     files_statistics = []
     stripes_statistics = []
     for source in filepaths_or_buffers:
-        path_or_buf, _ = ioutils.get_reader_filepath_or_buffer(
-            path_or_data=source, compression=None, **kwargs
+        path_or_buf = ioutils.get_reader_filepath_or_buffer(
+            path_or_data=source, **kwargs
+        )
+        path_or_buf = ioutils._select_single_source(
+            path_or_buf, "read_orc_statistics"
         )
         (
             column_names,
@@ -281,7 +282,6 @@ def read_orc(
     num_rows=None,
     use_index=True,
     timestamp_type=None,
-    use_python_file_object=None,
     storage_options=None,
     bytes_per_thread=None,
 ):
@@ -320,37 +320,12 @@ def read_orc(
                 "A list of stripes must be provided for each input source"
             )
 
-    filepaths_or_buffers = []
-    have_nativefile = any(
-        isinstance(source, pa.NativeFile) for source in filepath_or_buffer
+    filepaths_or_buffers = ioutils.get_reader_filepath_or_buffer(
+        path_or_data=filepath_or_buffer,
+        storage_options=storage_options,
+        bytes_per_thread=bytes_per_thread,
+        expand_dir_pattern="*.orc",
     )
-    for source in filepath_or_buffer:
-        if ioutils.is_directory(
-            path_or_data=source, storage_options=storage_options
-        ):
-            fs = ioutils._ensure_filesystem(
-                passed_filesystem=None,
-                path=source,
-                storage_options=storage_options,
-            )
-            source = stringify_path(source)
-            source = fs.sep.join([source, "*.orc"])
-
-        tmp_source, compression = ioutils.get_reader_filepath_or_buffer(
-            path_or_data=source,
-            compression=None,
-            use_python_file_object=use_python_file_object,
-            storage_options=storage_options,
-            bytes_per_thread=bytes_per_thread,
-        )
-        if compression is not None:
-            raise ValueError(
-                "URL content-encoding decompression is not supported"
-            )
-        if isinstance(tmp_source, list):
-            filepaths_or_buffers.extend(tmp_source)
-        else:
-            filepaths_or_buffers.append(tmp_source)
 
     if filters is not None:
         selected_stripes = _filter_stripes(
@@ -364,24 +339,17 @@ def read_orc(
             stripes = selected_stripes
 
     if engine == "cudf":
-        # Don't want to warn if use_python_file_object causes us to get
-        # a NativeFile (there is a separate deprecation warning for that)
-        with maybe_filter_deprecation(
-            not have_nativefile,
-            message="Support for reading pyarrow's NativeFile is deprecated",
-            category=FutureWarning,
-        ):
-            return DataFrame._from_data(
-                *liborc.read_orc(
-                    filepaths_or_buffers,
-                    columns,
-                    stripes,
-                    skiprows,
-                    num_rows,
-                    use_index,
-                    timestamp_type,
-                )
+        return DataFrame._from_data(
+            *liborc.read_orc(
+                filepaths_or_buffers,
+                columns,
+                stripes,
+                skiprows,
+                num_rows,
+                use_index,
+                timestamp_type,
             )
+        )
     else:
         from pyarrow import orc
 
diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py
index 7dab2f20100..62be7378e9e 100644
--- a/python/cudf/cudf/io/parquet.py
+++ b/python/cudf/cudf/io/parquet.py
@@ -10,21 +10,24 @@
 from collections import defaultdict
 from contextlib import ExitStack
 from functools import partial, reduce
-from typing import Callable
+from typing import TYPE_CHECKING
 from uuid import uuid4
 
 import numpy as np
 import pandas as pd
-import pyarrow as pa
 from pyarrow import dataset as ds
 
 import cudf
 from cudf._lib import parquet as libparquet
 from cudf.api.types import is_list_like
-from cudf.core.column import as_column, build_categorical_column, column_empty
+from cudf.core.column import as_column, column_empty
+from cudf.core.column.categorical import CategoricalColumn, as_unsigned_codes
 from cudf.utils import ioutils
 from cudf.utils.performance_tracking import _performance_tracking
-from cudf.utils.utils import maybe_filter_deprecation
+
+if TYPE_CHECKING:
+    from collections.abc import Callable
+
 
 BYTE_SIZES = {
     "kb": 1000,
@@ -331,41 +334,12 @@ def write_to_dataset(
 @_performance_tracking
 def read_parquet_metadata(filepath_or_buffer):
     """{docstring}"""
-    # Multiple sources are passed as a list. If a single source is passed,
-    # wrap it in a list for unified processing downstream.
-    if not is_list_like(filepath_or_buffer):
-        filepath_or_buffer = [filepath_or_buffer]
-
-    # Start by trying to construct a filesystem object
-    fs, paths = ioutils._get_filesystem_and_paths(
-        path_or_data=filepath_or_buffer, storage_options=None
-    )
-
-    # Check if filepath or buffer
-    filepath_or_buffer = paths if paths else filepath_or_buffer
 
     # List of filepaths or buffers
-    filepaths_or_buffers = []
-
-    for source in filepath_or_buffer:
-        tmp_source, compression = ioutils.get_reader_filepath_or_buffer(
-            path_or_data=source,
-            compression=None,
-            fs=fs,
-            use_python_file_object=None,
-            open_file_options=None,
-            storage_options=None,
-            bytes_per_thread=None,
-        )
-
-        if compression is not None:
-            raise ValueError(
-                "URL content-encoding decompression is not supported"
-            )
-        if isinstance(tmp_source, list):
-            filepath_or_buffer.extend(tmp_source)
-        else:
-            filepaths_or_buffers.append(tmp_source)
+    filepaths_or_buffers = ioutils.get_reader_filepath_or_buffer(
+        path_or_data=filepath_or_buffer,
+        bytes_per_thread=None,
+    )
 
     return libparquet.read_parquet_metadata(filepaths_or_buffers)
 
@@ -531,14 +505,16 @@ def read_parquet(
     engine="cudf",
     columns=None,
     storage_options=None,
+    filesystem=None,
     filters=None,
     row_groups=None,
     use_pandas_metadata=True,
-    use_python_file_object=None,
     categorical_partitions=True,
-    open_file_options=None,
     bytes_per_thread=None,
     dataset_kwargs=None,
+    nrows=None,
+    skip_rows=None,
+    allow_mismatched_pq_schemas=False,
     *args,
     **kwargs,
 ):
@@ -547,16 +523,6 @@ def read_parquet(
         raise ValueError(
             f"Only supported engines are {{'cudf', 'pyarrow'}}, got {engine=}"
         )
-    # Do not allow the user to set file-opening options
-    # when `use_python_file_object=False` is specified
-    if use_python_file_object is False:
-        if open_file_options:
-            raise ValueError(
-                "open_file_options is not currently supported when "
-                "use_python_file_object is set to False."
-            )
-        open_file_options = {}
-
     if bytes_per_thread is None:
         bytes_per_thread = ioutils._BYTES_PER_THREAD_DEFAULT
 
@@ -581,7 +547,9 @@ def read_parquet(
     # Start by trying construct a filesystem object, so we
     # can apply filters on remote file-systems
     fs, paths = ioutils._get_filesystem_and_paths(
-        path_or_data=filepath_or_buffer, storage_options=storage_options
+        path_or_data=filepath_or_buffer,
+        storage_options=storage_options,
+        filesystem=filesystem,
     )
 
     # Normalize and validate filters
@@ -609,36 +577,52 @@ def read_parquet(
         )
     filepath_or_buffer = paths if paths else filepath_or_buffer
 
-    filepaths_or_buffers = []
-    if use_python_file_object:
-        open_file_options = _default_open_file_options(
-            open_file_options=open_file_options,
-            columns=columns,
-            row_groups=row_groups,
-            fs=fs,
-        )
-    have_nativefile = any(
-        isinstance(source, pa.NativeFile) for source in filepath_or_buffer
-    )
-    for source in filepath_or_buffer:
-        tmp_source, compression = ioutils.get_reader_filepath_or_buffer(
-            path_or_data=source,
-            compression=None,
-            fs=fs,
-            use_python_file_object=use_python_file_object,
-            open_file_options=open_file_options,
-            storage_options=storage_options,
-            bytes_per_thread=bytes_per_thread,
-        )
-
-        if compression is not None:
-            raise ValueError(
-                "URL content-encoding decompression is not supported"
+    # Prepare remote-IO options
+    prefetch_options = kwargs.pop("prefetch_options", {})
+    if not ioutils._is_local_filesystem(fs):
+        # The default prefetch method depends on the
+        # `row_groups` argument. In most cases we will use
+        # method="all" by default, because it is fastest
+        # when we need to read most of the file(s).
+        # If a (simple) `row_groups` selection is made, we
+        # use method="parquet" to avoid transferring the
+        # entire file over the network
+        method = prefetch_options.get("method")
+        _row_groups = None
+        if method in (None, "parquet"):
+            if row_groups is None:
+                # If the user didn't specify a method, don't use
+                # 'parquet' prefetcher for column projection alone.
+                method = method or "all"
+            elif all(r == row_groups[0] for r in row_groups):
+                # Row group selection means we are probably
+                # reading half the file or less. We should
+                # avoid a full file transfer by default.
+                method = "parquet"
+                _row_groups = row_groups[0]
+            elif (method := method or "all") == "parquet":
+                raise ValueError(
+                    "The 'parquet' prefetcher requires a uniform "
+                    "row-group selection for all paths within the "
+                    "same `read_parquet` call. "
+                    "Got: {row_groups}"
+                )
+        if method == "parquet":
+            prefetch_options = prefetch_options.update(
+                {
+                    "method": method,
+                    "columns": columns,
+                    "row_groups": _row_groups,
+                }
             )
-        if isinstance(tmp_source, list):
-            filepath_or_buffer.extend(tmp_source)
-        else:
-            filepaths_or_buffers.append(tmp_source)
+
+    filepaths_or_buffers = ioutils.get_reader_filepath_or_buffer(
+        path_or_data=filepath_or_buffer,
+        fs=fs,
+        storage_options=storage_options,
+        bytes_per_thread=bytes_per_thread,
+        prefetch_options=prefetch_options,
+    )
 
     # Warn user if they are not using cudf for IO
     # (There is a good chance this was not the intention)
@@ -667,26 +651,21 @@ def read_parquet(
         )
 
     # Convert parquet data to a cudf.DataFrame
-
-    # Don't want to warn if use_python_file_object causes us to get
-    # a NativeFile (there is a separate deprecation warning for that)
-    with maybe_filter_deprecation(
-        not have_nativefile,
-        message="Support for reading pyarrow's NativeFile is deprecated",
-        category=FutureWarning,
-    ):
-        df = _parquet_to_frame(
-            filepaths_or_buffers,
-            engine,
-            *args,
-            columns=columns,
-            row_groups=row_groups,
-            use_pandas_metadata=use_pandas_metadata,
-            partition_keys=partition_keys,
-            partition_categories=partition_categories,
-            dataset_kwargs=dataset_kwargs,
-            **kwargs,
-        )
+    df = _parquet_to_frame(
+        filepaths_or_buffers,
+        engine,
+        *args,
+        columns=columns,
+        row_groups=row_groups,
+        use_pandas_metadata=use_pandas_metadata,
+        partition_keys=partition_keys,
+        partition_categories=partition_categories,
+        dataset_kwargs=dataset_kwargs,
+        nrows=nrows,
+        skip_rows=skip_rows,
+        allow_mismatched_pq_schemas=allow_mismatched_pq_schemas,
+        **kwargs,
+    )
     # Apply filters row-wise (if any are defined), and return
     df = _apply_post_filters(df, filters)
     if projected_columns:
@@ -813,6 +792,8 @@ def _parquet_to_frame(
     partition_keys=None,
     partition_categories=None,
     dataset_kwargs=None,
+    nrows=None,
+    skip_rows=None,
     **kwargs,
 ):
     # If this is not a partitioned read, only need
@@ -820,11 +801,18 @@ def _parquet_to_frame(
     if not partition_keys:
         return _read_parquet(
             paths_or_buffers,
+            nrows=nrows,
+            skip_rows=skip_rows,
             *args,
             row_groups=row_groups,
             **kwargs,
         )
 
+    if nrows is not None or skip_rows is not None:
+        raise NotImplementedError(
+            "nrows/skip_rows is not supported when reading a partitioned parquet dataset"
+        )
+
     partition_meta = None
     partitioning = (dataset_kwargs or {}).get("partitioning", None)
     if hasattr(partitioning, "schema"):
@@ -866,12 +854,17 @@ def _parquet_to_frame(
                     partition_categories[name].index(value),
                     length=_len,
                 )
-                dfs[-1][name] = build_categorical_column(
-                    categories=partition_categories[name],
-                    codes=codes,
+                codes = as_unsigned_codes(
+                    len(partition_categories[name]), codes
+                )
+                dfs[-1][name] = CategoricalColumn(
+                    data=None,
                     size=codes.size,
+                    dtype=cudf.CategoricalDtype(
+                        categories=partition_categories[name], ordered=False
+                    ),
                     offset=codes.offset,
-                    ordered=False,
+                    children=(codes,),
                 )
             else:
                 # Not building categorical columns, so
@@ -912,6 +905,9 @@ def _read_parquet(
     columns=None,
     row_groups=None,
     use_pandas_metadata=None,
+    nrows=None,
+    skip_rows=None,
+    allow_mismatched_pq_schemas=False,
     *args,
     **kwargs,
 ):
@@ -934,13 +930,23 @@ def _read_parquet(
                 columns=columns,
                 row_groups=row_groups,
                 use_pandas_metadata=use_pandas_metadata,
+                nrows=nrows if nrows is not None else -1,
+                skip_rows=skip_rows if skip_rows is not None else 0,
+                allow_mismatched_pq_schemas=allow_mismatched_pq_schemas,
             )
         else:
+            if nrows is None:
+                nrows = -1
+            if skip_rows is None:
+                skip_rows = 0
             return libparquet.read_parquet(
                 filepaths_or_buffers,
                 columns=columns,
                 row_groups=row_groups,
                 use_pandas_metadata=use_pandas_metadata,
+                nrows=nrows,
+                skip_rows=skip_rows,
+                allow_mismatched_pq_schemas=allow_mismatched_pq_schemas,
             )
     else:
         if (
@@ -1547,44 +1553,6 @@ def __exit__(self, *args):
         self.close()
 
 
-def _default_open_file_options(
-    open_file_options, columns, row_groups, fs=None
-):
-    """
-    Set default fields in open_file_options.
-
-    Copies and updates `open_file_options` to
-    include column and row-group information
-    under the "precache_options" key. By default,
-    we set "method" to "parquet", but precaching
-    will be disabled if the user chooses `method=None`
-
-    Parameters
-    ----------
-    open_file_options : dict or None
-    columns : list
-    row_groups : list
-    fs : fsspec.AbstractFileSystem, Optional
-    """
-    if fs and ioutils._is_local_filesystem(fs):
-        # Quick return for local fs
-        return open_file_options or {}
-    # Assume remote storage if `fs` was not specified
-    open_file_options = (open_file_options or {}).copy()
-    precache_options = open_file_options.pop("precache_options", {}).copy()
-    if precache_options.get("method", "parquet") == "parquet":
-        precache_options.update(
-            {
-                "method": "parquet",
-                "engine": precache_options.get("engine", "pyarrow"),
-                "columns": columns,
-                "row_groups": row_groups,
-            }
-        )
-    open_file_options["precache_options"] = precache_options
-    return open_file_options
-
-
 def _hive_dirname(name, val):
     # Simple utility to produce hive directory name
     if pd.isna(val):
diff --git a/python/cudf/cudf/io/text.py b/python/cudf/cudf/io/text.py
index 4329480bb2c..5ce738cae0e 100644
--- a/python/cudf/cudf/io/text.py
+++ b/python/cudf/cudf/io/text.py
@@ -24,14 +24,16 @@ def read_text(
     if delimiter is None:
         raise ValueError("delimiter needs to be provided")
 
-    filepath_or_buffer, _ = ioutils.get_reader_filepath_or_buffer(
+    filepath_or_buffer = ioutils.get_reader_filepath_or_buffer(
         path_or_data=filepath_or_buffer,
-        compression=None,
         iotypes=(BytesIO, StringIO),
         storage_options=storage_options,
     )
+    filepath_or_buffer = ioutils._select_single_source(
+        filepath_or_buffer, "read_text"
+    )
 
-    return cudf.Series._from_data(
+    return cudf.Series._from_column(
         libtext.read_text(
             filepath_or_buffer,
             delimiter=delimiter,
diff --git a/python/cudf/cudf/options.py b/python/cudf/cudf/options.py
index 94e73021cec..df7bbe22a61 100644
--- a/python/cudf/cudf/options.py
+++ b/python/cudf/cudf/options.py
@@ -5,10 +5,10 @@
 import textwrap
 from contextlib import ContextDecorator
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any, Callable
+from typing import TYPE_CHECKING, Any
 
 if TYPE_CHECKING:
-    from collections.abc import Container
+    from collections.abc import Callable, Container
 
 
 @dataclass
diff --git a/python/cudf/cudf/pandas/__init__.py b/python/cudf/cudf/pandas/__init__.py
index a6667a7bcd9..7954e7f242e 100644
--- a/python/cudf/cudf/pandas/__init__.py
+++ b/python/cudf/cudf/pandas/__init__.py
@@ -5,10 +5,9 @@
 import os
 import warnings
 
+import pylibcudf
 import rmm.mr
 
-from cudf._lib import pylibcudf
-
 from .fast_slow_proxy import is_proxy_object
 from .magics import load_ipython_extension
 from .profiler import Profiler
diff --git a/python/cudf/cudf/pandas/_wrappers/numpy.py b/python/cudf/cudf/pandas/_wrappers/numpy.py
index 3b012169676..d5e669cb58f 100644
--- a/python/cudf/cudf/pandas/_wrappers/numpy.py
+++ b/python/cudf/cudf/pandas/_wrappers/numpy.py
@@ -7,13 +7,16 @@
 import cupy
 import cupy._core.flags
 import numpy
-import numpy.core.multiarray
+from packaging import version
 
 from ..fast_slow_proxy import (
+    _fast_slow_function_call,
     _FastSlowAttribute,
+    is_proxy_object,
     make_final_proxy_type,
     make_intermediate_proxy_type,
 )
+from ..proxy_base import ProxyNDarrayBase
 from .common import (
     array_interface,
     array_method,
@@ -105,18 +108,38 @@ def wrap_ndarray(cls, arr: cupy.ndarray | numpy.ndarray, constructor):
         return super(cls, cls)._fsproxy_wrap(arr, constructor)
 
 
+def ndarray__array_ufunc__(self, ufunc, method, *inputs, **kwargs):
+    result, _ = _fast_slow_function_call(
+        getattr(ufunc, method),
+        *inputs,
+        **kwargs,
+    )
+    if isinstance(result, tuple):
+        if is_proxy_object(result[0]) and isinstance(
+            result[0]._fsproxy_wrapped, numpy.ndarray
+        ):
+            return tuple(numpy.asarray(x) for x in result)
+    elif is_proxy_object(result) and isinstance(
+        result._fsproxy_wrapped, numpy.ndarray
+    ):
+        return numpy.asarray(result)
+    return result
+
+
 ndarray = make_final_proxy_type(
     "ndarray",
     cupy.ndarray,
     numpy.ndarray,
     fast_to_slow=cupy.ndarray.get,
     slow_to_fast=cupy.asarray,
+    bases=(ProxyNDarrayBase,),
     additional_attributes={
         "__array__": array_method,
         # So that pa.array(wrapped-numpy-array) works
         "__arrow_array__": arrow_array_method,
         "__cuda_array_interface__": cuda_array_interface,
         "__array_interface__": array_interface,
+        "__array_ufunc__": ndarray__array_ufunc__,
         # ndarrays are unhashable
         "__hash__": None,
         # iter(cupy-array) produces an iterable of zero-dim device
@@ -141,10 +164,15 @@ def wrap_ndarray(cls, arr: cupy.ndarray | numpy.ndarray, constructor):
     },
 )
 
+if version.parse(numpy.__version__) >= version.parse("2.0"):
+    # NumPy 2 introduced `_core` and gives warnings for access to `core`.
+    from numpy._core.multiarray import flagsobj as _numpy_flagsobj
+else:
+    from numpy.core.multiarray import flagsobj as _numpy_flagsobj
 
 # Mapping flags between slow and fast types
 _ndarray_flags = make_intermediate_proxy_type(
     "_ndarray_flags",
     cupy._core.flags.Flags,
-    numpy.core.multiarray.flagsobj,
+    _numpy_flagsobj,
 )
diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py
index 478108f36f1..6d03063fa27 100644
--- a/python/cudf/cudf/pandas/_wrappers/pandas.py
+++ b/python/cudf/cudf/pandas/_wrappers/pandas.py
@@ -61,6 +61,12 @@
     TimeGrouper as pd_TimeGrouper,
 )
 
+try:
+    from IPython import get_ipython
+
+    ipython_shell = get_ipython()
+except ImportError:
+    ipython_shell = None
 
 cudf.set_option("mode.pandas_compatible", True)
 
@@ -208,6 +214,12 @@ def _DataFrame__dir__(self):
     ]
 
 
+def ignore_ipython_canary_check(self, **kwargs):
+    raise AttributeError(
+        "_ipython_canary_method_should_not_exist_ doesn't exist"
+    )
+
+
 DataFrame = make_final_proxy_type(
     "DataFrame",
     cudf.DataFrame,
@@ -220,10 +232,26 @@ def _DataFrame__dir__(self):
         "_constructor": _FastSlowAttribute("_constructor"),
         "_constructor_sliced": _FastSlowAttribute("_constructor_sliced"),
         "_accessors": set(),
+        "_ipython_canary_method_should_not_exist_": ignore_ipython_canary_check,
     },
 )
 
 
+def custom_repr_html(obj):
+    # This custom method is need to register a html format
+    # for ipython
+    return _fast_slow_function_call(
+        lambda obj: obj._repr_html_(),
+        obj,
+    )[0]
+
+
+if ipython_shell:
+    # See: https://ipython.readthedocs.io/en/stable/config/integrating.html#formatters-for-third-party-types
+    html_formatter = ipython_shell.display_formatter.formatters["text/html"]
+    html_formatter.for_type(DataFrame, custom_repr_html)
+
+
 Series = make_final_proxy_type(
     "Series",
     cudf.Series,
diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py
index bb678fd1efe..afa1ce5f86c 100644
--- a/python/cudf/cudf/pandas/fast_slow_proxy.py
+++ b/python/cudf/cudf/pandas/fast_slow_proxy.py
@@ -10,15 +10,16 @@
 import pickle
 import types
 import warnings
-from collections.abc import Iterator
+from collections.abc import Callable, Iterator
 from enum import IntEnum
-from typing import Any, Callable, Literal, Mapping
+from typing import Any, Literal, Mapping
 
 import numpy as np
 
 from ..options import _env_get_bool
 from ..testing import assert_eq
 from .annotation import nvtx
+from .proxy_base import ProxyNDarrayBase
 
 
 def call_operator(fn, args, kwargs):
@@ -564,7 +565,17 @@ def _fsproxy_wrap(cls, value, func):
         _FinalProxy subclasses can override this classmethod if they
         need particular behaviour when wrapped up.
         """
-        proxy = object.__new__(cls)
+        # TODO: Replace the if-elif-else using singledispatch helper function
+        base_class = _get_proxy_base_class(cls)
+        if base_class is object:
+            proxy = base_class.__new__(cls)
+        elif base_class is ProxyNDarrayBase:
+            proxy = base_class.__new__(cls, value)
+        else:
+            raise TypeError(
+                f"Cannot create an proxy instance of {cls.__name__} using base class {base_class.__name__}. "
+                f"Expected either 'object' or another type in 'PROXY_BASE_CLASSES'"
+            )
         proxy._fsproxy_wrapped = value
         return proxy
 
@@ -1193,6 +1204,19 @@ def is_proxy_object(obj: Any) -> bool:
     return False
 
 
+def _get_proxy_base_class(cls):
+    """Returns the proxy base class if one exists"""
+    for proxy_class in PROXY_BASE_CLASSES:
+        if proxy_class in cls.__mro__:
+            return proxy_class
+    return object
+
+
+PROXY_BASE_CLASSES: set[type] = {
+    ProxyNDarrayBase,
+}
+
+
 NUMPY_TYPES: set[str] = set(np.sctypeDict.values())
 
 
diff --git a/python/cudf/cudf/pandas/proxy_base.py b/python/cudf/cudf/pandas/proxy_base.py
new file mode 100644
index 00000000000..6f732834e94
--- /dev/null
+++ b/python/cudf/cudf/pandas/proxy_base.py
@@ -0,0 +1,22 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
+# All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import cupy as cp
+import numpy as np
+
+
+class ProxyNDarrayBase(np.ndarray):
+    def __new__(cls, arr):
+        if isinstance(arr, cp.ndarray):
+            arr = arr.get()
+        if not isinstance(arr, np.ndarray):
+            raise TypeError(
+                "Unsupported array type. Must be numpy.ndarray or cupy.ndarray"
+            )
+        return np.asarray(arr, dtype=arr.dtype).view(cls)
+
+    def __array_finalize__(self, obj):
+        if obj is None:
+            return
+        self._fsproxy_wrapped = getattr(obj, "_fsproxy_wrapped", obj)
diff --git a/python/cudf/cudf/pylibcudf_tests/test_column_from_device.py b/python/cudf/cudf/pylibcudf_tests/test_column_from_device.py
deleted file mode 100644
index c4ff7bb43a5..00000000000
--- a/python/cudf/cudf/pylibcudf_tests/test_column_from_device.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-
-import pyarrow as pa
-import pytest
-from utils import assert_column_eq
-
-import cudf
-from cudf._lib import pylibcudf as plc
-
-VALID_TYPES = [
-    pa.int8(),
-    pa.int16(),
-    pa.int32(),
-    pa.int64(),
-    pa.uint8(),
-    pa.uint16(),
-    pa.uint32(),
-    pa.uint64(),
-    pa.float32(),
-    pa.float64(),
-    pa.bool_(),
-    pa.timestamp("s"),
-    pa.timestamp("ms"),
-    pa.timestamp("us"),
-    pa.timestamp("ns"),
-    pa.duration("s"),
-    pa.duration("ms"),
-    pa.duration("us"),
-    pa.duration("ns"),
-]
-
-
-@pytest.fixture(params=VALID_TYPES, ids=repr)
-def valid_type(request):
-    return request.param
-
-
-@pytest.fixture
-def valid_column(valid_type):
-    if valid_type == pa.bool_():
-        return pa.array([True, False, True], type=valid_type)
-    return pa.array([1, 2, 3], type=valid_type)
-
-
-def test_from_cuda_array_interface(valid_column):
-    col = plc.column.Column.from_cuda_array_interface_obj(
-        cudf.Series(valid_column)
-    )
-    expect = valid_column
-
-    assert_column_eq(expect, col)
diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py
index a6a2d4eea00..8cb9efa873c 100644
--- a/python/cudf/cudf/testing/_utils.py
+++ b/python/cudf/cudf/testing/_utils.py
@@ -2,6 +2,7 @@
 
 import itertools
 import string
+import time
 from collections import abc
 from contextlib import contextmanager
 from decimal import Decimal
@@ -368,3 +369,24 @@ def sv_to_udf_str_testing_lowering(context, builder, sig, args):
     return cast_string_view_to_udf_string(
         context, builder, sig.args[0], sig.return_type, args[0]
     )
+
+
+class cudf_timeout:
+    """
+    Context manager to raise a TimeoutError after a specified number of seconds.
+    """
+
+    def __init__(self, timeout):
+        self.timeout = timeout
+
+    def __enter__(self):
+        self.start_time = time.perf_counter()
+
+    def __exit__(self, *args):
+        elapsed_time = (
+            time.perf_counter() - self.start_time
+        )  # Calculate elapsed time
+        if elapsed_time >= self.timeout:
+            raise TimeoutError(
+                f"Expected to finish in {self.timeout=} seconds but took {elapsed_time=} seconds"
+            )
diff --git a/python/cudf/cudf/testing/testing.py b/python/cudf/cudf/testing/testing.py
index c2072d90e98..31ad24a4664 100644
--- a/python/cudf/cudf/testing/testing.py
+++ b/python/cudf/cudf/testing/testing.py
@@ -398,8 +398,12 @@ def assert_index_equal(
             )
 
         for level in range(left.nlevels):
-            llevel = cudf.Index(left._columns[level], name=left.names[level])
-            rlevel = cudf.Index(right._columns[level], name=right.names[level])
+            llevel = cudf.Index._from_column(
+                left._columns[level], name=left.names[level]
+            )
+            rlevel = cudf.Index._from_column(
+                right._columns[level], name=right.names[level]
+            )
             mul_obj = f"MultiIndex level [{level}]"
             assert_index_equal(
                 llevel,
diff --git a/python/cudf/cudf/tests/groupby/test_agg.py b/python/cudf/cudf/tests/groupby/test_agg.py
index f8847f02d5a..99e7523031b 100644
--- a/python/cudf/cudf/tests/groupby/test_agg.py
+++ b/python/cudf/cudf/tests/groupby/test_agg.py
@@ -3,6 +3,7 @@
 import pytest
 
 import cudf
+from cudf.testing import assert_eq
 
 
 @pytest.mark.parametrize(
@@ -26,3 +27,32 @@ def test_series_agg(attr):
     pd_agg = getattr(pdf.groupby(["a"])["a"], attr)("count")
 
     assert agg.ndim == pd_agg.ndim
+
+
+@pytest.mark.parametrize("func", ["sum", "prod", "mean", "count"])
+@pytest.mark.parametrize("attr", ["agg", "aggregate"])
+def test_dataframe_agg(attr, func):
+    df = cudf.DataFrame({"a": [1, 2, 1, 2], "b": [0, 0, 0, 0]})
+    pdf = df.to_pandas()
+
+    agg = getattr(df.groupby("a"), attr)(func)
+    pd_agg = getattr(pdf.groupby(["a"]), attr)(func)
+
+    assert_eq(agg, pd_agg)
+
+    agg = getattr(df.groupby("a"), attr)({"b": func})
+    pd_agg = getattr(pdf.groupby(["a"]), attr)({"b": func})
+
+    assert_eq(agg, pd_agg)
+
+    agg = getattr(df.groupby("a"), attr)([func])
+    pd_agg = getattr(pdf.groupby(["a"]), attr)([func])
+
+    assert_eq(agg, pd_agg)
+
+    agg = getattr(df.groupby("a"), attr)(foo=("b", func), bar=("a", func))
+    pd_agg = getattr(pdf.groupby(["a"]), attr)(
+        foo=("b", func), bar=("a", func)
+    )
+
+    assert_eq(agg, pd_agg)
diff --git a/python/cudf/cudf/tests/indexes/test_interval.py b/python/cudf/cudf/tests/indexes/test_interval.py
index 3b3a9f96543..25edf788daf 100644
--- a/python/cudf/cudf/tests/indexes/test_interval.py
+++ b/python/cudf/cudf/tests/indexes/test_interval.py
@@ -149,6 +149,10 @@ def test_interval_range_periods_basic_dtype(start_t, end_t, periods_t):
     assert_eq(pindex, gindex)
 
 
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Does not warn on older versions of pandas",
+)
 def test_interval_range_periods_warnings():
     start_val, end_val, periods_val = 0, 4, 1.0
 
@@ -401,3 +405,15 @@ def test_from_tuples():
     result = cudf.IntervalIndex.from_tuples(data, closed="left", name="a")
     expected = pd.IntervalIndex.from_tuples(data, closed="left", name="a")
     assert_eq(result, expected)
+
+
+def test_interval_range_name():
+    expected = pd.interval_range(start=0, periods=5, freq=2, name="foo")
+    result = cudf.interval_range(start=0, periods=5, freq=2, name="foo")
+    assert_eq(result, expected)
+
+
+def test_from_interval_range_indexing():
+    result = cudf.interval_range(start=0, end=1, name="a").repeat(2)
+    expected = pd.interval_range(start=0, end=1, name="a").repeat(2)
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/test_apply_rows.py b/python/cudf/cudf/tests/test_apply_rows.py
index a11022c1a17..f9b0d9c1e78 100644
--- a/python/cudf/cudf/tests/test_apply_rows.py
+++ b/python/cudf/cudf/tests/test_apply_rows.py
@@ -27,8 +27,12 @@ def test_dataframe_apply_rows(dtype, has_nulls, pessimistic):
         gdf_series_expected = gdf_series_a * gdf_series_b
     else:
         # optimistically ignore the null masks
-        a = cudf.Series(column.build_column(gdf_series_a.data, dtype))
-        b = cudf.Series(column.build_column(gdf_series_b.data, dtype))
+        a = cudf.Series._from_column(
+            column.build_column(gdf_series_a.data, dtype)
+        )
+        b = cudf.Series._from_column(
+            column.build_column(gdf_series_b.data, dtype)
+        )
         gdf_series_expected = a * b
 
     df_expected = cudf.DataFrame(
diff --git a/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py b/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py
index 2ec1d1d2f28..9d69e626c3d 100644
--- a/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py
+++ b/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py
@@ -23,6 +23,7 @@
 import pytest
 
 import cudf
+from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
 from cudf.testing import assert_eq
 from cudf.testing.dataset_generator import rand_dataframe
 
@@ -302,6 +303,10 @@ def get_days_from_epoch(date: datetime.date | None) -> int | None:
 @pytest.mark.parametrize("namespace", [None, "root_ns"])
 @pytest.mark.parametrize("nullable", [True, False])
 @pytest.mark.parametrize("prepend_null", [True, False])
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas (datetime(9999, ...) too large)",
+)
 def test_can_parse_avro_date_logical_type(namespace, nullable, prepend_null):
     avro_type = {"logicalType": "date", "type": "int"}
     if nullable:
diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py
index 503b1a975b4..2e8519509e2 100644
--- a/python/cudf/cudf/tests/test_binops.py
+++ b/python/cudf/cudf/tests/test_binops.py
@@ -13,7 +13,11 @@
 
 import cudf
 from cudf import Index, Series
-from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_GE_220,
+    PANDAS_VERSION,
+)
 from cudf.core.buffer.spill_manager import get_global_manager
 from cudf.testing import _utils as utils, assert_eq
 from cudf.utils.dtypes import (
@@ -290,6 +294,47 @@ def test_series_compare(cmpop, obj_class, dtype):
     np.testing.assert_equal(result3.to_numpy(), cmpop(arr1, arr2))
 
 
+@pytest.mark.parametrize(
+    "dtype,val",
+    [("int8", 200), ("int32", 2**32), ("uint8", -128), ("uint64", -1)],
+)
+@pytest.mark.parametrize(
+    "op",
+    [
+        operator.eq,
+        operator.ne,
+        operator.lt,
+        operator.le,
+        operator.gt,
+        operator.ge,
+    ],
+)
+@pytest.mark.parametrize("reverse", [False, True])
+def test_series_compare_integer(dtype, val, op, reverse):
+    # Tests that these actually work, even though they are out of bound.
+    force_cast_val = np.array(val).astype(dtype)
+    sr = Series(
+        [np.iinfo(dtype).min, np.iinfo(dtype).max, force_cast_val, None],
+        dtype=dtype,
+    )
+
+    if reverse:
+        _op = op
+
+        def op(x, y):
+            return _op(y, x)
+
+    # We expect the same result as comparing to a value within range (e.g. 0)
+    # except that a NULL value evaluates to False
+    if op(0, val):
+        expected = Series([True, True, True, None])
+    else:
+        expected = Series([False, False, False, None])
+
+    res = op(sr, val)
+    assert_eq(res, expected)
+
+
 def _series_compare_nulls_typegen():
     return [
         *combinations_with_replacement(DATETIME_TYPES, 2),
@@ -1740,6 +1785,20 @@ def test_datetime_dateoffset_binaryop(
             reason="https://github.com/pandas-dev/pandas/issues/57448",
         )
     )
+    if (
+        not PANDAS_GE_220
+        and dtype in {"datetime64[ms]", "datetime64[s]"}
+        and frequency in ("microseconds", "nanoseconds")
+        and n_periods != 0
+    ):
+        pytest.skip(reason="https://github.com/pandas-dev/pandas/pull/55595")
+    if (
+        not PANDAS_GE_220
+        and dtype == "datetime64[us]"
+        and frequency == "nanoseconds"
+        and n_periods != 0
+    ):
+        pytest.skip(reason="https://github.com/pandas-dev/pandas/pull/55595")
 
     date_col = [
         f"2000-01-01 00:00:{components}",
@@ -1793,7 +1852,11 @@ def test_datetime_dateoffset_binaryop(
     "ignore:Discarding nonzero nanoseconds:UserWarning"
 )
 @pytest.mark.parametrize("op", [operator.add, operator.sub])
-def test_datetime_dateoffset_binaryop_multiple(date_col, kwargs, op):
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
+def test_datetime_dateoffset_binaryop_multiple(request, date_col, kwargs, op):
     gsr = cudf.Series(date_col, dtype="datetime64[ns]")
     psr = gsr.to_pandas()
 
@@ -1832,6 +1895,21 @@ def test_datetime_dateoffset_binaryop_multiple(date_col, kwargs, op):
 def test_datetime_dateoffset_binaryop_reflected(
     n_periods, frequency, dtype, components
 ):
+    if (
+        not PANDAS_GE_220
+        and dtype in {"datetime64[ms]", "datetime64[s]"}
+        and frequency in ("microseconds", "nanoseconds")
+        and n_periods != 0
+    ):
+        pytest.skip(reason="https://github.com/pandas-dev/pandas/pull/55595")
+    if (
+        not PANDAS_GE_220
+        and dtype == "datetime64[us]"
+        and frequency == "nanoseconds"
+        and n_periods != 0
+    ):
+        pytest.skip(reason="https://github.com/pandas-dev/pandas/pull/55595")
+
     date_col = [
         f"2000-01-01 00:00:{components}",
         f"2000-01-31 00:00:{components}",
diff --git a/python/cudf/cudf/tests/test_categorical.py b/python/cudf/cudf/tests/test_categorical.py
index ae58af8ebce..cd1ad21ae59 100644
--- a/python/cudf/cudf/tests/test_categorical.py
+++ b/python/cudf/cudf/tests/test_categorical.py
@@ -11,6 +11,7 @@
 import pytest
 
 import cudf
+from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
 from cudf.testing import assert_eq
 from cudf.testing._utils import NUMERIC_TYPES, assert_exceptions_equal
 
@@ -858,6 +859,10 @@ def test_cat_from_scalar(scalar):
     assert_eq(ps, gs)
 
 
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Does not warn on older versions of pandas",
+)
 def test_cat_groupby_fillna():
     ps = pd.Series(["a", "b", "c"], dtype="category")
     gs = cudf.from_pandas(ps)
diff --git a/python/cudf/cudf/tests/test_column.py b/python/cudf/cudf/tests/test_column.py
index c288155112c..4aa7fb27c9b 100644
--- a/python/cudf/cudf/tests/test_column.py
+++ b/python/cudf/cudf/tests/test_column.py
@@ -95,7 +95,7 @@ def test_column_offset_and_size(pandas_input, offset, size):
     else:
         assert col.size == (col.data.size / col.dtype.itemsize)
 
-    got = cudf.Series(col)
+    got = cudf.Series._from_column(col)
 
     if offset is None:
         offset = 0
@@ -112,8 +112,8 @@ def test_column_offset_and_size(pandas_input, offset, size):
 
 def column_slicing_test(col, offset, size, cast_to_float=False):
     col_slice = col.slice(offset, offset + size)
-    series = cudf.Series(col)
-    sliced_series = cudf.Series(col_slice)
+    series = cudf.Series._from_column(col)
+    sliced_series = cudf.Series._from_column(col_slice)
 
     if cast_to_float:
         pd_series = series.astype(float).to_pandas()
@@ -208,7 +208,9 @@ def test_as_column_scalar_with_nan(nan_as_null, scalar, size):
     )
 
     got = (
-        cudf.Series(as_column(scalar, length=size, nan_as_null=nan_as_null))
+        cudf.Series._from_column(
+            as_column(scalar, length=size, nan_as_null=nan_as_null)
+        )
         .dropna()
         .to_numpy()
     )
@@ -250,12 +252,18 @@ def test_column_chunked_array_creation():
     actual_column = cudf.core.column.as_column(chunked_array, dtype="float")
     expected_column = cudf.core.column.as_column(pyarrow_array, dtype="float")
 
-    assert_eq(cudf.Series(actual_column), cudf.Series(expected_column))
+    assert_eq(
+        cudf.Series._from_column(actual_column),
+        cudf.Series._from_column(expected_column),
+    )
 
     actual_column = cudf.core.column.as_column(chunked_array)
     expected_column = cudf.core.column.as_column(pyarrow_array)
 
-    assert_eq(cudf.Series(actual_column), cudf.Series(expected_column))
+    assert_eq(
+        cudf.Series._from_column(actual_column),
+        cudf.Series._from_column(expected_column),
+    )
 
 
 @pytest.mark.parametrize(
@@ -287,7 +295,7 @@ def test_column_view_valid_numeric_to_numeric(data, from_dtype, to_dtype):
     gpu_data_view = gpu_data.view(to_dtype)
 
     expect = pd.Series(cpu_data_view, dtype=cpu_data_view.dtype)
-    got = cudf.Series(gpu_data_view, dtype=gpu_data_view.dtype)
+    got = cudf.Series._from_column(gpu_data_view).astype(gpu_data_view.dtype)
 
     gpu_ptr = gpu_data.data.get_ptr(mode="read")
     assert gpu_ptr == got._column.data.get_ptr(mode="read")
@@ -327,7 +335,7 @@ def test_column_view_invalid_numeric_to_numeric(data, from_dtype, to_dtype):
     ],
 )
 def test_column_view_valid_string_to_numeric(data, to_dtype):
-    expect = cudf.Series(cudf.Series(data)._column.view(to_dtype))
+    expect = cudf.Series._from_column(cudf.Series(data)._column.view(to_dtype))
     got = cudf.Series(str_host_view(data, to_dtype))
 
     assert_eq(expect, got)
@@ -342,7 +350,7 @@ def test_column_view_nulls_widths_even():
 
     sr = cudf.Series(data, dtype="int32")
     expect = cudf.Series(expect_data, dtype="float32")
-    got = cudf.Series(sr._column.view("float32"))
+    got = cudf.Series._from_column(sr._column.view("float32"))
 
     assert_eq(expect, got)
 
@@ -354,7 +362,7 @@ def test_column_view_nulls_widths_even():
 
     sr = cudf.Series(data, dtype="float64")
     expect = cudf.Series(expect_data, dtype="int64")
-    got = cudf.Series(sr._column.view("int64"))
+    got = cudf.Series._from_column(sr._column.view("int64"))
 
     assert_eq(expect, got)
 
@@ -365,7 +373,9 @@ def test_column_view_numeric_slice(slc):
     sr = cudf.Series(data)
 
     expect = cudf.Series(data[slc].view("int64"))
-    got = cudf.Series(sr._column.slice(slc.start, slc.stop).view("int64"))
+    got = cudf.Series._from_column(
+        sr._column.slice(slc.start, slc.stop).view("int64")
+    )
 
     assert_eq(expect, got)
 
@@ -376,7 +386,7 @@ def test_column_view_numeric_slice(slc):
 def test_column_view_string_slice(slc):
     data = ["a", "bcde", "cd", "efg", "h"]
 
-    expect = cudf.Series(
+    expect = cudf.Series._from_column(
         cudf.Series(data)._column.slice(slc.start, slc.stop).view("int8")
     )
     got = cudf.Series(str_host_view(data[slc], "int8"))
@@ -409,7 +419,10 @@ def test_as_column_buffer(data, expected):
     actual_column = cudf.core.column.as_column(
         cudf.core.buffer.as_buffer(data), dtype=data.dtype
     )
-    assert_eq(cudf.Series(actual_column), cudf.Series(expected))
+    assert_eq(
+        cudf.Series._from_column(actual_column),
+        cudf.Series._from_column(expected),
+    )
 
 
 @pytest.mark.parametrize(
@@ -436,7 +449,10 @@ def test_as_column_arrow_array(data, pyarrow_kwargs, cudf_kwargs):
     pyarrow_data = pa.array(data, **pyarrow_kwargs)
     cudf_from_pyarrow = as_column(pyarrow_data)
     expected = as_column(data, **cudf_kwargs)
-    assert_eq(cudf.Series(cudf_from_pyarrow), cudf.Series(expected))
+    assert_eq(
+        cudf.Series._from_column(cudf_from_pyarrow),
+        cudf.Series._from_column(expected),
+    )
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_column_accessor.py b/python/cudf/cudf/tests/test_column_accessor.py
index e84e1433c10..5cef077c18d 100644
--- a/python/cudf/cudf/tests/test_column_accessor.py
+++ b/python/cudf/cudf/tests/test_column_accessor.py
@@ -362,11 +362,15 @@ def test_replace_level_values_MultiColumn():
     got = ca.rename_levels(mapper={"a": "f"}, level=0)
     check_ca_equal(expect, got)
 
+    # passing without level kwarg assumes level=0
+    got = ca.rename_levels(mapper={"a": "f"})
+    check_ca_equal(expect, got)
+
 
 def test_clear_nrows_empty_before():
     ca = ColumnAccessor({})
     assert ca.nrows == 0
-    ca.insert("new", [1])
+    ca.insert("new", as_column([1]))
     assert ca.nrows == 1
 
 
diff --git a/python/cudf/cudf/tests/test_concat.py b/python/cudf/cudf/tests/test_concat.py
index c1c03de48d4..8da589ba45b 100644
--- a/python/cudf/cudf/tests/test_concat.py
+++ b/python/cudf/cudf/tests/test_concat.py
@@ -9,6 +9,7 @@
 import pytest
 
 import cudf
+from cudf.core._compat import PANDAS_GE_220
 from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype, Decimal128Dtype
 from cudf.testing import assert_eq
 from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
@@ -451,45 +452,75 @@ def test_concat_mixed_input():
         [pd.Series([1, 2, 3]), pd.DataFrame({"a": []})],
         [pd.Series([], dtype="float64"), pd.DataFrame({"a": []})],
         [pd.Series([], dtype="float64"), pd.DataFrame({"a": [1, 2]})],
-        [
-            pd.Series([1, 2, 3.0, 1.2], name="abc"),
-            pd.DataFrame({"a": [1, 2]}),
-        ],
-        [
-            pd.Series(
-                [1, 2, 3.0, 1.2], name="abc", index=[100, 110, 120, 130]
-            ),
-            pd.DataFrame({"a": [1, 2]}),
-        ],
-        [
-            pd.Series(
-                [1, 2, 3.0, 1.2], name="abc", index=["a", "b", "c", "d"]
+        pytest.param(
+            [
+                pd.Series([1, 2, 3.0, 1.2], name="abc"),
+                pd.DataFrame({"a": [1, 2]}),
+            ],
+            marks=pytest.mark.skipif(
+                not PANDAS_GE_220,
+                reason="https://github.com/pandas-dev/pandas/pull/56365",
             ),
-            pd.DataFrame({"a": [1, 2]}, index=["a", "b"]),
-        ],
-        [
-            pd.Series(
-                [1, 2, 3.0, 1.2, 8, 100],
-                name="New name",
-                index=["a", "b", "c", "d", "e", "f"],
+        ),
+        pytest.param(
+            [
+                pd.Series(
+                    [1, 2, 3.0, 1.2], name="abc", index=[100, 110, 120, 130]
+                ),
+                pd.DataFrame({"a": [1, 2]}),
+            ],
+            marks=pytest.mark.skipif(
+                not PANDAS_GE_220,
+                reason="https://github.com/pandas-dev/pandas/pull/56365",
             ),
-            pd.DataFrame(
-                {"a": [1, 2, 4, 10, 11, 12]},
-                index=["a", "b", "c", "d", "e", "f"],
+        ),
+        pytest.param(
+            [
+                pd.Series(
+                    [1, 2, 3.0, 1.2], name="abc", index=["a", "b", "c", "d"]
+                ),
+                pd.DataFrame({"a": [1, 2]}, index=["a", "b"]),
+            ],
+            marks=pytest.mark.skipif(
+                not PANDAS_GE_220,
+                reason="https://github.com/pandas-dev/pandas/pull/56365",
             ),
-        ],
-        [
-            pd.Series(
-                [1, 2, 3.0, 1.2, 8, 100],
-                name="New name",
-                index=["a", "b", "c", "d", "e", "f"],
+        ),
+        pytest.param(
+            [
+                pd.Series(
+                    [1, 2, 3.0, 1.2, 8, 100],
+                    name="New name",
+                    index=["a", "b", "c", "d", "e", "f"],
+                ),
+                pd.DataFrame(
+                    {"a": [1, 2, 4, 10, 11, 12]},
+                    index=["a", "b", "c", "d", "e", "f"],
+                ),
+            ],
+            marks=pytest.mark.skipif(
+                not PANDAS_GE_220,
+                reason="https://github.com/pandas-dev/pandas/pull/56365",
             ),
-            pd.DataFrame(
-                {"a": [1, 2, 4, 10, 11, 12]},
-                index=["a", "b", "c", "d", "e", "f"],
+        ),
+        pytest.param(
+            [
+                pd.Series(
+                    [1, 2, 3.0, 1.2, 8, 100],
+                    name="New name",
+                    index=["a", "b", "c", "d", "e", "f"],
+                ),
+                pd.DataFrame(
+                    {"a": [1, 2, 4, 10, 11, 12]},
+                    index=["a", "b", "c", "d", "e", "f"],
+                ),
+            ]
+            * 7,
+            marks=pytest.mark.skipif(
+                not PANDAS_GE_220,
+                reason="https://github.com/pandas-dev/pandas/pull/56365",
             ),
-        ]
-        * 7,
+        ),
     ],
 )
 def test_concat_series_dataframe_input(objs):
diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py
index 6a21cb1b9d7..cee3d23eadc 100644
--- a/python/cudf/cudf/tests/test_csv.py
+++ b/python/cudf/cudf/tests/test_csv.py
@@ -13,13 +13,16 @@
 import numpy as np
 import pandas as pd
 import pytest
-from pyarrow import fs as pa_fs
 
 import cudf
 from cudf import read_csv
-from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_GE_220,
+    PANDAS_VERSION,
+)
 from cudf.testing import assert_eq
-from cudf.testing._utils import assert_exceptions_equal
+from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
 
 
 def make_numeric_dataframe(nrows, dtype):
@@ -1080,18 +1083,6 @@ def test_csv_reader_filepath_or_buffer(tmpdir, path_or_buf, src):
     assert_eq(expect, got)
 
 
-def test_csv_reader_arrow_nativefile(path_or_buf):
-    # Check that we can read a file opened with the
-    # Arrow FileSystem interface
-    expect = cudf.read_csv(path_or_buf("filepath"))
-    fs, path = pa_fs.FileSystem.from_uri(path_or_buf("filepath"))
-    with pytest.warns(FutureWarning):
-        with fs.open_input_file(path) as fil:
-            got = cudf.read_csv(fil)
-
-    assert_eq(expect, got)
-
-
 def test_small_zip(tmpdir):
     df = pd.DataFrame(
         {
@@ -1283,14 +1274,14 @@ def test_csv_reader_delim_whitespace():
     # with header row
     with pytest.warns(FutureWarning):
         cu_df = read_csv(StringIO(buffer), delim_whitespace=True)
-    with pytest.warns(FutureWarning):
+    with expect_warning_if(PANDAS_GE_220):
         pd_df = pd.read_csv(StringIO(buffer), delim_whitespace=True)
     assert_eq(pd_df, cu_df)
 
     # without header row
     with pytest.warns(FutureWarning):
         cu_df = read_csv(StringIO(buffer), delim_whitespace=True, header=None)
-    with pytest.warns(FutureWarning):
+    with expect_warning_if(PANDAS_GE_220):
         pd_df = pd.read_csv(
             StringIO(buffer), delim_whitespace=True, header=None
         )
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index e2ce5c03b70..f4d1578bda7 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -26,7 +26,11 @@
 
 import cudf
 from cudf.api.extensions import no_default
-from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_GE_220,
+    PANDAS_VERSION,
+)
 from cudf.core.buffer.spill_manager import get_global_manager
 from cudf.core.column import column
 from cudf.errors import MixedTypeError
@@ -3561,8 +3565,11 @@ def test_dataframe_empty_sort_index():
 @pytest.mark.parametrize("inplace", [True, False])
 @pytest.mark.parametrize("na_position", ["first", "last"])
 def test_dataframe_sort_index(
-    index, axis, ascending, inplace, ignore_index, na_position
+    request, index, axis, ascending, inplace, ignore_index, na_position
 ):
+    if not PANDAS_GE_220 and axis in (1, "columns") and ignore_index:
+        pytest.skip(reason="Bug fixed in pandas-2.2")
+
     pdf = pd.DataFrame(
         {"b": [1, 3, 2], "a": [1, 4, 3], "c": [4, 1, 5]},
         index=index,
@@ -3612,6 +3619,10 @@ def test_dataframe_sort_index(
 @pytest.mark.parametrize("ignore_index", [True, False])
 @pytest.mark.parametrize("inplace", [True, False])
 @pytest.mark.parametrize("na_position", ["first", "last"])
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
 def test_dataframe_mulitindex_sort_index(
     request, axis, level, ascending, inplace, ignore_index, na_position
 ):
@@ -4264,34 +4275,36 @@ def test_empty_dataframe_describe():
 def test_as_column_types():
     col = column.as_column(cudf.Series([], dtype="float64"))
     assert_eq(col.dtype, np.dtype("float64"))
-    gds = cudf.Series(col)
+    gds = cudf.Series._from_column(col)
     pds = pd.Series(pd.Series([], dtype="float64"))
 
     assert_eq(pds, gds)
 
     col = column.as_column(cudf.Series([], dtype="float64"), dtype="float32")
     assert_eq(col.dtype, np.dtype("float32"))
-    gds = cudf.Series(col)
+    gds = cudf.Series._from_column(col)
     pds = pd.Series(pd.Series([], dtype="float32"))
 
     assert_eq(pds, gds)
 
     col = column.as_column(cudf.Series([], dtype="float64"), dtype="str")
     assert_eq(col.dtype, np.dtype("object"))
-    gds = cudf.Series(col)
+    gds = cudf.Series._from_column(col)
     pds = pd.Series(pd.Series([], dtype="str"))
 
     assert_eq(pds, gds)
 
     col = column.as_column(cudf.Series([], dtype="float64"), dtype="object")
     assert_eq(col.dtype, np.dtype("object"))
-    gds = cudf.Series(col)
+    gds = cudf.Series._from_column(col)
     pds = pd.Series(pd.Series([], dtype="object"))
 
     assert_eq(pds, gds)
 
     pds = pd.Series(np.array([1, 2, 3]), dtype="float32")
-    gds = cudf.Series(column.as_column(np.array([1, 2, 3]), dtype="float32"))
+    gds = cudf.Series._from_column(
+        column.as_column(np.array([1, 2, 3]), dtype="float32")
+    )
 
     assert_eq(pds, gds)
 
@@ -4301,23 +4314,25 @@ def test_as_column_types():
     assert_eq(pds, gds)
 
     pds = pd.Series([], dtype="float64")
-    gds = cudf.Series(column.as_column(pds))
+    gds = cudf.Series._from_column(column.as_column(pds))
     assert_eq(pds, gds)
 
     pds = pd.Series([1, 2, 4], dtype="int64")
-    gds = cudf.Series(column.as_column(cudf.Series([1, 2, 4]), dtype="int64"))
+    gds = cudf.Series._from_column(
+        column.as_column(cudf.Series([1, 2, 4]), dtype="int64")
+    )
 
     assert_eq(pds, gds)
 
     pds = pd.Series([1.2, 18.0, 9.0], dtype="float32")
-    gds = cudf.Series(
+    gds = cudf.Series._from_column(
         column.as_column(cudf.Series([1.2, 18.0, 9.0]), dtype="float32")
     )
 
     assert_eq(pds, gds)
 
     pds = pd.Series([1.2, 18.0, 9.0], dtype="str")
-    gds = cudf.Series(
+    gds = cudf.Series._from_column(
         column.as_column(cudf.Series([1.2, 18.0, 9.0]), dtype="str")
     )
 
@@ -6521,7 +6536,9 @@ def test_from_pandas_for_series_nan_as_null(nan_as_null):
     data = [np.nan, 2.0, 3.0]
     psr = pd.Series(data)
 
-    expected = cudf.Series(column.as_column(data, nan_as_null=nan_as_null))
+    expected = cudf.Series._from_column(
+        column.as_column(data, nan_as_null=nan_as_null)
+    )
     got = cudf.from_pandas(psr, nan_as_null=nan_as_null)
 
     assert_eq(expected, got)
@@ -6741,6 +6758,10 @@ def test_dataframe_init_from_arrays_cols(data, cols, index):
         None,
     ],
 )
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
 def test_dataframe_assign_scalar(request, col_data, assign_val):
     request.applymarker(
         pytest.mark.xfail(
@@ -9403,7 +9424,6 @@ def test_rename_for_level_RangeIndex_dataframe():
     assert_eq(expect, got)
 
 
-@pytest_xfail(reason="level=None not implemented yet")
 def test_rename_for_level_is_None_MC():
     gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
     gdf.columns = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1)])
@@ -11109,3 +11129,9 @@ def test_bool_raises():
         lfunc_args_and_kwargs=[[cudf.DataFrame()]],
         rfunc_args_and_kwargs=[[pd.DataFrame()]],
     )
+
+
+def test_from_pandas_preserve_column_dtype():
+    df = pd.DataFrame([[1, 2]], columns=pd.Index([1, 2], dtype="int8"))
+    result = cudf.DataFrame.from_pandas(df)
+    pd.testing.assert_index_equal(result.columns, df.columns, exact=True)
diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py
index 6bc775d2a2c..4a2345fc009 100644
--- a/python/cudf/cudf/tests/test_datetime.py
+++ b/python/cudf/cudf/tests/test_datetime.py
@@ -14,7 +14,11 @@
 import cudf
 import cudf.testing.dataset_generator as dataset_generator
 from cudf import DataFrame, Series
-from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_GE_220,
+    PANDAS_VERSION,
+)
 from cudf.core.index import DatetimeIndex
 from cudf.testing import assert_eq
 from cudf.testing._utils import (
@@ -801,6 +805,10 @@ def test_to_datetime_different_formats_notimplemented():
         cudf.to_datetime(["2015-02-01", "2015-02-01 10:10:10"])
 
 
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas.",
+)
 def test_datetime_can_cast_safely():
     sr = cudf.Series(
         ["1679-01-01", "2000-01-31", "2261-01-01"], dtype="datetime64[ms]"
@@ -847,6 +855,10 @@ def test_datetime_array_timeunit_cast(dtype):
 
 
 @pytest.mark.parametrize("timeunit", ["D", "W", "M", "Y"])
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
 def test_datetime_scalar_timeunit_cast(timeunit):
     testscalar = np.datetime64("2016-11-20", timeunit)
 
@@ -1535,6 +1547,10 @@ def test_date_range_start_end_periods(start, end, periods):
     )
 
 
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
 def test_date_range_start_end_freq(start, end, freq):
     if isinstance(freq, str):
         _gfreq = _pfreq = freq
@@ -1551,6 +1567,10 @@ def test_date_range_start_end_freq(start, end, freq):
     )
 
 
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
 def test_date_range_start_freq_periods(start, freq, periods):
     if isinstance(freq, str):
         _gfreq = _pfreq = freq
@@ -1643,6 +1663,9 @@ def test_date_range_raise_overflow():
     ],
 )
 def test_date_range_raise_unsupported(freqstr_unsupported):
+    if not PANDAS_GE_220 and freqstr_unsupported.endswith("E"):
+        pytest.skip(reason="YE, etc. support was added in pandas 2.2")
+
     s, e = "2001-01-01", "2008-01-31"
     pd.date_range(start=s, end=e, freq=freqstr_unsupported)
     with pytest.raises(ValueError, match="does not yet support"):
@@ -1654,7 +1677,7 @@ def test_date_range_raise_unsupported(freqstr_unsupported):
     if freqstr_unsupported != "3MS":
         freqstr_unsupported = freqstr_unsupported.lower()
         with pytest.raises(ValueError, match="does not yet support"):
-            with pytest.warns(FutureWarning):
+            with expect_warning_if(PANDAS_GE_220):
                 cudf.date_range(start=s, end=e, freq=freqstr_unsupported)
 
 
@@ -1995,6 +2018,10 @@ def test_first(idx, offset):
         )
     ],
 )
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="warning not present in older pandas versions",
+)
 def test_first_start_at_end_of_month(idx, offset):
     p = pd.Series(range(len(idx)), index=idx)
     g = cudf.from_pandas(p)
@@ -2319,6 +2346,10 @@ def test_datetime_to_str(data, dtype):
     assert_eq(actual.to_pandas(nullable=True), expected)
 
 
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
 def test_datetime_string_to_datetime_resolution_loss_raises():
     data = ["2020-01-01 00:00:00.00001"]
     dtype = "datetime64[s]"
@@ -2536,3 +2567,9 @@ def test_dti_methods(method, kwargs):
     result = getattr(cudf_dti, method)(**kwargs)
     expected = getattr(pd_dti, method)(**kwargs)
     assert_eq(result, expected)
+
+
+def test_date_range_start_end_divisible_by_freq():
+    result = cudf.date_range("2011-01-01", "2011-01-02", freq="h")
+    expected = pd.date_range("2011-01-01", "2011-01-02", freq="h")
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/test_decimal.py b/python/cudf/cudf/tests/test_decimal.py
index 65f739bc74a..048b3a656e3 100644
--- a/python/cudf/cudf/tests/test_decimal.py
+++ b/python/cudf/cudf/tests/test_decimal.py
@@ -106,7 +106,7 @@ def test_typecast_from_float_to_decimal(request, data, from_dtype, to_dtype):
     pa_arr = got.to_arrow().cast(
         pa.decimal128(to_dtype.precision, to_dtype.scale)
     )
-    expected = cudf.Series(Decimal64Column.from_arrow(pa_arr))
+    expected = cudf.Series._from_column(Decimal64Column.from_arrow(pa_arr))
 
     got = got.astype(to_dtype)
 
@@ -146,7 +146,7 @@ def test_typecast_from_int_to_decimal(data, from_dtype, to_dtype):
         .cast("float64")
         .cast(pa.decimal128(to_dtype.precision, to_dtype.scale))
     )
-    expected = cudf.Series(Decimal64Column.from_arrow(pa_arr))
+    expected = cudf.Series._from_column(Decimal64Column.from_arrow(pa_arr))
 
     got = got.astype(to_dtype)
 
@@ -206,9 +206,9 @@ def test_typecast_to_from_decimal(data, from_dtype, to_dtype):
         pa.decimal128(to_dtype.precision, to_dtype.scale), safe=False
     )
     if isinstance(to_dtype, Decimal32Dtype):
-        expected = cudf.Series(Decimal32Column.from_arrow(pa_arr))
+        expected = cudf.Series._from_column(Decimal32Column.from_arrow(pa_arr))
     elif isinstance(to_dtype, Decimal64Dtype):
-        expected = cudf.Series(Decimal64Column.from_arrow(pa_arr))
+        expected = cudf.Series._from_column(Decimal64Column.from_arrow(pa_arr))
 
     with expect_warning_if(to_dtype.scale < s.dtype.scale, UserWarning):
         got = s.astype(to_dtype)
@@ -245,7 +245,7 @@ def test_typecast_from_decimal(data, from_dtype, to_dtype):
     pa_arr = got.to_arrow().cast(to_dtype, safe=False)
 
     got = got.astype(to_dtype)
-    expected = cudf.Series(NumericalColumn.from_arrow(pa_arr))
+    expected = cudf.Series._from_column(NumericalColumn.from_arrow(pa_arr))
 
     assert_eq(got, expected)
     assert_eq(got.dtype, expected.dtype)
@@ -398,3 +398,13 @@ def test_decimal_overflow():
     s = cudf.Series([1, 2], dtype=cudf.Decimal128Dtype(precision=38, scale=0))
     result = s * Decimal("1.0")
     assert_eq(cudf.Decimal128Dtype(precision=38, scale=1), result.dtype)
+
+
+def test_decimal_binop_upcast_operands():
+    ser1 = cudf.Series([0.51, 1.51, 2.51]).astype(cudf.Decimal64Dtype(18, 2))
+    ser2 = cudf.Series([0.90, 0.96, 0.99]).astype(cudf.Decimal128Dtype(19, 2))
+    result = ser1 + ser2
+    expected = cudf.Series([1.41, 2.47, 3.50]).astype(
+        cudf.Decimal128Dtype(20, 2)
+    )
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/test_df_protocol.py b/python/cudf/cudf/tests/test_df_protocol.py
index 7f48e414180..44270d20d59 100644
--- a/python/cudf/cudf/tests/test_df_protocol.py
+++ b/python/cudf/cudf/tests/test_df_protocol.py
@@ -78,7 +78,7 @@ def assert_buffer_equal(buffer_and_dtype: tuple[_CuDFBuffer, Any], cudfcol):
     # FIXME: In gh-10202 some minimal fixes were added to unblock CI. But
     # currently only non-null values are compared, null positions are
     # unchecked.
-    non_null_idxs = ~cudf.Series(cudfcol).isna()
+    non_null_idxs = cudfcol.notnull()
     assert_eq(
         col_from_buf.apply_boolean_mask(non_null_idxs),
         cudfcol.apply_boolean_mask(non_null_idxs),
@@ -86,8 +86,8 @@ def assert_buffer_equal(buffer_and_dtype: tuple[_CuDFBuffer, Any], cudfcol):
     array_from_dlpack = cp.from_dlpack(buf.__dlpack__()).get()
     col_array = cp.asarray(cudfcol.data_array_view(mode="read")).get()
     assert_eq(
-        array_from_dlpack[non_null_idxs.to_numpy()].flatten(),
-        col_array[non_null_idxs.to_numpy()].flatten(),
+        array_from_dlpack[non_null_idxs.values_host].flatten(),
+        col_array[non_null_idxs.values_host].flatten(),
     )
 
 
diff --git a/python/cudf/cudf/tests/test_doctests.py b/python/cudf/cudf/tests/test_doctests.py
index 794660cffcb..5d3d18cbe95 100644
--- a/python/cudf/cudf/tests/test_doctests.py
+++ b/python/cudf/cudf/tests/test_doctests.py
@@ -11,6 +11,7 @@
 from packaging import version
 
 import cudf
+from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
 
 pytestmark = pytest.mark.filterwarnings("ignore::FutureWarning")
 
@@ -96,6 +97,10 @@ def prinoptions(cls):
         itertools.chain(*[_find_doctests_in_obj(mod) for mod in tests]),
         ids=lambda docstring: docstring.name,
     )
+    @pytest.mark.skipif(
+        PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+        reason="Doctests not expected to pass on older versions of pandas",
+    )
     def test_docstring(self, docstring):
         # We ignore differences in whitespace in the doctest output, and enable
         # the use of an ellipsis "..." to match any string in the doctest
diff --git a/python/cudf/cudf/tests/test_gcs.py b/python/cudf/cudf/tests/test_gcs.py
index 28fdfb5c2f1..82ecd356bbf 100644
--- a/python/cudf/cudf/tests/test_gcs.py
+++ b/python/cudf/cudf/tests/test_gcs.py
@@ -42,12 +42,8 @@ def mock_size(*args):
     monkeypatch.setattr(gcsfs.core.GCSFileSystem, "size", mock_size)
 
     # Test read from explicit path.
-    # Since we are monkey-patching, we cannot use
-    # use_python_file_object=True, because the pyarrow
-    # `open_input_file` command will fail (since it doesn't
-    # use the monkey-patched `open` definition)
     with pytest.warns(FutureWarning):
-        got = cudf.read_csv(f"gcs://{fpath}", use_python_file_object=False)
+        got = cudf.read_csv(f"gcs://{fpath}")
     assert_eq(pdf, got)
 
     # AbstractBufferedFile -> PythonFile conversion
diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py
index 74f04c0584f..0aaa71e50d7 100644
--- a/python/cudf/cudf/tests/test_groupby.py
+++ b/python/cudf/cudf/tests/test_groupby.py
@@ -188,6 +188,10 @@ def test_groupby_as_index_single_agg(pdf, gdf, as_index):
 
 @pytest.mark.parametrize("engine", ["cudf", "jit"])
 @pytest.mark.parametrize("as_index", [True, False])
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Include groups missing on old versions of pandas",
+)
 def test_groupby_as_index_apply(pdf, gdf, as_index, engine):
     gdf = gdf.groupby("y", as_index=as_index).apply(
         lambda df: df["x"].mean(), engine=engine
@@ -298,6 +302,10 @@ def assert_values_equal(arr):
             assert_values_equal(pddf[k].values)
 
 
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
 def test_groupby_apply():
     np.random.seed(0)
     df = DataFrame()
@@ -338,6 +346,10 @@ def f3(df, k, L, m):
 
 
 @pytest.mark.parametrize("func,args", create_test_groupby_apply_args_params())
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
 def test_groupby_apply_args(func, args):
     np.random.seed(0)
     df = DataFrame()
@@ -500,6 +512,10 @@ def func(df):
     "func", ["min", "max", "sum", "mean", "var", "std", "idxmin", "idxmax"]
 )
 @pytest.mark.parametrize("dataset", ["small", "large", "nans"])
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Include groups missing on old versions of pandas",
+)
 def test_groupby_apply_jit_unary_reductions(
     func, dtype, dataset, groupby_jit_datasets
 ):
@@ -530,6 +546,10 @@ def func(df):
 
 
 # test unary index reductions for special values
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
 def groupby_apply_jit_idx_reductions_special_vals_inner(
     func, data, dtype, special_val
 ):
@@ -555,6 +575,10 @@ def func(df):
 @pytest.mark.parametrize("func", ["min", "max", "sum", "mean", "var", "std"])
 @pytest.mark.parametrize("special_val", [np.nan, np.inf, -np.inf])
 @pytest.mark.parametrize("dataset", ["small", "large", "nans"])
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Include groups missing on old versions of pandas",
+)
 def test_groupby_apply_jit_reductions_special_vals(
     func, dtype, dataset, groupby_jit_datasets, special_val
 ):
@@ -583,6 +607,10 @@ def test_groupby_apply_jit_reductions_special_vals(
     ],
 )
 @pytest.mark.parametrize("dataset", ["small", "large", "nans"])
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="include_groups keyword new in pandas 2.2",
+)
 def test_groupby_apply_jit_idx_reductions_special_vals(
     func, dtype, dataset, groupby_jit_datasets, special_val
 ):
@@ -593,6 +621,10 @@ def test_groupby_apply_jit_idx_reductions_special_vals(
 
 
 @pytest.mark.parametrize("dtype", ["int32"])
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
 def test_groupby_apply_jit_sum_integer_overflow(dtype):
     max = np.iinfo(dtype).max
 
@@ -627,6 +659,10 @@ def func(group):
         "large",
     ],
 )
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
 def test_groupby_apply_jit_correlation(dataset, groupby_jit_datasets, dtype):
     dataset = groupby_jit_datasets[dataset]
 
@@ -653,6 +689,10 @@ def func(group):
 
 
 @pytest.mark.parametrize("dtype", ["int32", "int64"])
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
 def test_groupby_apply_jit_correlation_zero_variance(dtype):
     # pearson correlation is undefined when the variance of either
     # variable is zero. This test ensures that the jit implementation
@@ -711,6 +751,10 @@ def func(group):
 
 
 @pytest.mark.parametrize("dtype", ["uint8", "str"])
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
 def test_groupby_apply_unsupported_dtype(dtype):
     df = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
     df["b"] = df["b"].astype(dtype)
@@ -739,6 +783,10 @@ def func(group):
         lambda df: df["val1"].mean() + df["val2"].std(),
     ],
 )
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
 def test_groupby_apply_jit_basic(func, groupby_jit_data_small):
     run_groupby_apply_jit_test(groupby_jit_data_small, func, ["key1", "key2"])
 
@@ -759,12 +807,20 @@ def f3(df, k, L, m):
 @pytest.mark.parametrize(
     "func,args", create_test_groupby_apply_jit_args_params()
 )
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
 def test_groupby_apply_jit_args(func, args, groupby_jit_data_small):
     run_groupby_apply_jit_test(
         groupby_jit_data_small, func, ["key1", "key2"], *args
     )
 
 
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
 def test_groupby_apply_jit_block_divergence():
     # https://github.com/rapidsai/cudf/issues/12686
     df = cudf.DataFrame(
@@ -782,6 +838,10 @@ def diverging_block(grp_df):
     run_groupby_apply_jit_test(df, diverging_block, ["a"])
 
 
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
 def test_groupby_apply_caching():
     # Make sure similar functions that differ
     # by simple things like constants actually
@@ -818,6 +878,10 @@ def f(group):
     assert precompiled.currsize == 3
 
 
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
 def test_groupby_apply_no_bytecode_fallback():
     # tests that a function which contains no bytecode
     # attribute, but would still be executable using
@@ -836,6 +900,10 @@ def f(group):
     assert_groupby_results_equal(expect, got)
 
 
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
 def test_groupby_apply_return_col_from_df():
     # tests a UDF that consists of purely colwise
     # ops, such as `lambda group: group.x + group.y`
@@ -862,6 +930,10 @@ def func(df):
 
 
 @pytest.mark.parametrize("func", [lambda group: group.sum()])
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
 def test_groupby_apply_return_df(func):
     # tests a UDF that reduces over a dataframe
     # and produces a series with the original column names
@@ -1940,6 +2012,10 @@ def test_groupby_agg_combinations(agg):
     )
 
 
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Include groups missing on old versions of pandas",
+)
 def test_groupby_apply_noempty_group():
     pdf = pd.DataFrame(
         {"a": [1, 1, 2, 2], "b": [1, 2, 1, 2], "c": [1, 2, 3, 4]}
@@ -2208,6 +2284,10 @@ def f3(x, k, L, m):
 @pytest.mark.parametrize(
     "func,args", create_test_groupby_apply_return_scalars_params()
 )
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
 def test_groupby_apply_return_scalars(func, args):
     pdf = pd.DataFrame(
         {
@@ -2266,6 +2346,10 @@ def f5(x, k, L, m):
 @pytest.mark.parametrize(
     "func,args", create_test_groupby_apply_return_series_dataframe_params()
 )
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Include groups missing on old versions of pandas",
+)
 def test_groupby_apply_return_series_dataframe(func, args):
     pdf = pd.DataFrame(
         {"key": [0, 0, 1, 1, 2, 2, 2], "val": [0, 1, 2, 3, 4, 5, 6]}
@@ -2744,6 +2828,10 @@ def test_groupby_diff_row_zero_shift(nelem):
 
 # TODO: test for category columns when cudf.Scalar supports category type
 @pytest.mark.parametrize("nelem", [10, 100, 1000])
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="warning not present in older pandas versions",
+)
 def test_groupby_fillna_multi_value(nelem):
     t = rand_dataframe(
         dtypes_meta=[
@@ -2790,6 +2878,10 @@ def test_groupby_fillna_multi_value(nelem):
 # TODO: test for category columns when cudf.Scalar supports category type
 # TODO: cudf.fillna does not support decimal column to column fill yet
 @pytest.mark.parametrize("nelem", [10, 100, 1000])
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="warning not present in older pandas versions",
+)
 def test_groupby_fillna_multi_value_df(nelem):
     t = rand_dataframe(
         dtypes_meta=[
@@ -2843,6 +2935,10 @@ def test_groupby_fillna_multi_value_df(nelem):
     "data", [[1, None, 2, None, 3, None], [1, 2, 3, 4, 5, 6]]
 )
 @pytest.mark.parametrize("args", [{"value": 42}, {"method": "ffill"}])
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="warning not present in older pandas versions",
+)
 def test_groupby_various_by_fillna(by, data, args):
     ps = pd.Series(data)
     gs = cudf.from_pandas(ps)
@@ -3146,6 +3242,10 @@ def test_groupby_freq_s(label, closed):
         ),
     ],
 )
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Warnings only given on newer versions.",
+)
 def test_groupby_get_group(pdf, group, name, obj):
     gdf = cudf.from_pandas(pdf)
 
@@ -3644,6 +3744,10 @@ def test_group_by_pandas_sort_order(groups, sort):
         "last",
     ],
 )
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
 def test_group_by_empty_reduction(dtype, reduce_op):
     gdf = cudf.DataFrame({"a": [], "b": [], "c": []}, dtype=dtype)
     pdf = gdf.to_pandas()
@@ -3664,6 +3768,10 @@ def test_group_by_empty_reduction(dtype, reduce_op):
     "apply_op",
     ["sum", "min", "max", "idxmax"],
 )
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
 def test_group_by_empty_apply(request, dtype, apply_op):
     request.applymarker(
         pytest.mark.xfail(
@@ -3719,6 +3827,10 @@ def test_groupby_consecutive_operations():
     assert_groupby_results_equal(actual, expected, check_dtype=False)
 
 
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Warning only given on newer versions.",
+)
 def test_categorical_grouping_pandas_compatibility():
     gdf = cudf.DataFrame(
         {
diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py
index 722a64cb553..3f483219423 100644
--- a/python/cudf/cudf/tests/test_index.py
+++ b/python/cudf/cudf/tests/test_index.py
@@ -16,6 +16,11 @@
 
 import cudf
 from cudf.api.extensions import no_default
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_GE_220,
+    PANDAS_VERSION,
+)
 from cudf.core.index import CategoricalIndex, DatetimeIndex, Index, RangeIndex
 from cudf.testing import assert_eq
 from cudf.testing._utils import (
@@ -791,9 +796,27 @@ def test_index_to_series(data):
     "name_data,name_other",
     [("abc", "c"), (None, "abc"), ("abc", pd.NA), ("abc", "abc")],
 )
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
 def test_index_difference(data, other, sort, name_data, name_other):
     pd_data = pd.Index(data, name=name_data)
     pd_other = pd.Index(other, name=name_other)
+    if (
+        not PANDAS_GE_220
+        and isinstance(pd_data.dtype, pd.CategoricalDtype)
+        and not isinstance(pd_other.dtype, pd.CategoricalDtype)
+        and pd_other.isnull().any()
+    ):
+        pytest.skip(reason="https://github.com/pandas-dev/pandas/issues/57318")
+
+    if (
+        not PANDAS_GE_220
+        and len(pd_other) == 0
+        and len(pd_data) != len(pd_data.unique())
+    ):
+        pytest.skip(reason="Bug fixed in pandas-2.2+")
 
     gd_data = cudf.from_pandas(pd_data)
     gd_other = cudf.from_pandas(pd_other)
@@ -1017,6 +1040,10 @@ def test_index_equal_misc(data, other):
         ["abcd", "defgh", "werty", "poiu"],
     ],
 )
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Does not warn on older versions of pandas",
+)
 def test_index_append(data, other):
     pd_data = pd.Index(data)
     pd_other = pd.Index(other)
@@ -1220,6 +1247,10 @@ def test_index_append_error(data, other):
         ),
     ],
 )
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Does not warn on older versions of pandas",
+)
 def test_index_append_list(data, other):
     pd_data = data
     pd_other = other
@@ -2084,6 +2115,10 @@ def test_get_indexer_multi_numeric_deviate(key, method):
 
 
 @pytest.mark.parametrize("method", ["ffill", "bfill"])
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
 def test_get_indexer_multi_error(method):
     pi = pd.MultiIndex.from_tuples(
         [(2, 1, 1), (1, 2, 3), (1, 2, 1), (1, 1, 10), (1, 1, 1), (2, 2, 1)]
@@ -2527,7 +2562,7 @@ def test_isin_index(index, values):
     )
     with expect_warning_if(is_dt_str):
         got = gidx.isin(values)
-    with expect_warning_if(is_dt_str):
+    with expect_warning_if(PANDAS_GE_220 and is_dt_str):
         expected = pidx.isin(values)
 
     assert_eq(got, expected)
diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py
index 7005cbc6834..00ae99466bb 100644
--- a/python/cudf/cudf/tests/test_indexing.py
+++ b/python/cudf/cudf/tests/test_indexing.py
@@ -1016,6 +1016,10 @@ def test_series_setitem_iloc(key, value, nulls):
         (slice(0, 2), [0.5, 0.25]),
     ],
 )
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
 def test_series_setitem_dtype(key, value):
     psr = pd.Series([1, 2, 3], dtype="int32")
     gsr = cudf.from_pandas(psr)
@@ -1634,6 +1638,10 @@ def test_dataframe_loc_iloc_inplace_update_with_RHS_dataframe(
     assert_eq(expected, actual)
 
 
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="No warning in older versions of pandas",
+)
 def test_dataframe_loc_inplace_update_with_invalid_RHS_df_columns():
     gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
     pdf = gdf.to_pandas()
@@ -2361,3 +2369,21 @@ def test_sliced_categorical_as_ordered():
         name="a",
     )
     assert_eq(result, expected)
+
+
+def test_duplicate_labels_raises():
+    df = cudf.DataFrame([[1, 2]], columns=["a", "b"])
+    with pytest.raises(ValueError):
+        df[["a", "a"]]
+    with pytest.raises(ValueError):
+        df.loc[:, ["a", "a"]]
+
+
+@pytest.mark.parametrize("indexer", ["iloc", "loc"])
+@pytest.mark.parametrize("dtype", ["category", "timedelta64[ns]"])
+def test_loc_iloc_setitem_col_slice_non_cupy_types(indexer, dtype):
+    df_pd = pd.DataFrame(range(2), dtype=dtype)
+    df_cudf = cudf.DataFrame.from_pandas(df_pd)
+    getattr(df_pd, indexer)[:, 0] = getattr(df_pd, indexer)[:, 0]
+    getattr(df_cudf, indexer)[:, 0] = getattr(df_cudf, indexer)[:, 0]
+    assert_eq(df_pd, df_cudf)
diff --git a/python/cudf/cudf/tests/test_interpolate.py b/python/cudf/cudf/tests/test_interpolate.py
index a4f0b9fc97e..c76a49103e2 100644
--- a/python/cudf/cudf/tests/test_interpolate.py
+++ b/python/cudf/cudf/tests/test_interpolate.py
@@ -125,6 +125,10 @@ def test_interpolate_series_values_or_index(data, index, method):
         ),
     ],
 )
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Does not fail on older versions of pandas",
+)
 def test_interpolate_dataframe_error_cases(data, kwargs):
     gsr = cudf.DataFrame(data)
     psr = gsr.to_pandas()
diff --git a/python/cudf/cudf/tests/test_interval.py b/python/cudf/cudf/tests/test_interval.py
index 5eeea87d8e0..5e1dd33fbf1 100644
--- a/python/cudf/cudf/tests/test_interval.py
+++ b/python/cudf/cudf/tests/test_interval.py
@@ -6,6 +6,7 @@
 import pytest
 
 import cudf
+from cudf.core._compat import PANDAS_GE_220
 from cudf.testing import assert_eq
 
 
@@ -168,6 +169,10 @@ def test_interval_index_unique():
 
 @pytest.mark.parametrize("box", [pd.Series, pd.IntervalIndex])
 @pytest.mark.parametrize("tz", ["US/Eastern", None])
+@pytest.mark.skipif(
+    condition=not PANDAS_GE_220,
+    reason="ME frequency new in pandas 2.2",
+)
 def test_interval_with_datetime(tz, box):
     dti = pd.date_range(
         start=pd.Timestamp("20180101", tz=tz),
@@ -194,3 +199,14 @@ def test_intervaldtype_eq_string_with_attributes():
     dtype = cudf.IntervalDtype("int64", closed="left")
     assert dtype == "interval"
     assert dtype == "interval[int64, left]"
+
+
+def test_reduction_return_interval_pandas_compatible():
+    ii = pd.IntervalIndex.from_tuples(
+        [("2017-01-03", "2017-01-04")], dtype="interval[datetime64[ns], right]"
+    )
+    cudf_ii = cudf.IntervalIndex.from_pandas(ii)
+    with cudf.option_context("mode.pandas_compatible", True):
+        result = cudf_ii.min()
+    expected = ii.min()
+    assert result == expected
diff --git a/python/cudf/cudf/tests/test_join_order.py b/python/cudf/cudf/tests/test_join_order.py
index 9ea4ba007d2..9a95f0e01ab 100644
--- a/python/cudf/cudf/tests/test_join_order.py
+++ b/python/cudf/cudf/tests/test_join_order.py
@@ -1,13 +1,19 @@
 # Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
 import itertools
+import operator
 import string
+from collections import defaultdict
 
 import numpy as np
 import pytest
 
 import cudf
-from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_GE_220,
+    PANDAS_VERSION,
+)
 from cudf.testing import assert_eq
 
 
@@ -35,10 +41,124 @@ def right():
 # Behaviour in sort=False case didn't match documentation in many
 # cases prior to https://github.com/pandas-dev/pandas/pull/54611
 # (released as part of pandas 2.2)
-def expected(left, right, sort, *, how):
-    left = left.to_pandas()
-    right = right.to_pandas()
-    return left.merge(right, on="key", how=how, sort=sort)
+if PANDAS_GE_220:
+    # Behaviour in sort=False case didn't match documentation in many
+    # cases prior to https://github.com/pandas-dev/pandas/pull/54611
+    # (released as part of pandas 2.2)
+    def expected(left, right, sort, *, how):
+        left = left.to_pandas()
+        right = right.to_pandas()
+        return left.merge(right, on="key", how=how, sort=sort)
+
+else:
+
+    def expect_inner(left, right, sort):
+        left_key = left.key.values_host.tolist()
+        left_val = left.val.values_host.tolist()
+        right_key = right.key.values_host.tolist()
+        right_val = right.val.values_host.tolist()
+
+        right_have = defaultdict(list)
+        for i, k in enumerate(right_key):
+            right_have[k].append(i)
+        keys = []
+        val_x = []
+        val_y = []
+        for k, v in zip(left_key, left_val):
+            if k not in right_have:
+                continue
+            for i in right_have[k]:
+                keys.append(k)
+                val_x.append(v)
+                val_y.append(right_val[i])
+
+        if sort:
+            # Python sort is stable, so this will preserve input order for
+            # equal items.
+            keys, val_x, val_y = zip(
+                *sorted(zip(keys, val_x, val_y), key=operator.itemgetter(0))
+            )
+        return cudf.DataFrame({"key": keys, "val_x": val_x, "val_y": val_y})
+
+    def expect_left(left, right, sort):
+        left_key = left.key.values_host.tolist()
+        left_val = left.val.values_host.tolist()
+        right_key = right.key.values_host.tolist()
+        right_val = right.val.values_host.tolist()
+
+        right_have = defaultdict(list)
+        for i, k in enumerate(right_key):
+            right_have[k].append(i)
+        keys = []
+        val_x = []
+        val_y = []
+        for k, v in zip(left_key, left_val):
+            if k not in right_have:
+                right_vals = [None]
+            else:
+                right_vals = [right_val[i] for i in right_have[k]]
+
+            for rv in right_vals:
+                keys.append(k)
+                val_x.append(v)
+                val_y.append(rv)
+
+        if sort:
+            # Python sort is stable, so this will preserve input order for
+            # equal items.
+            keys, val_x, val_y = zip(
+                *sorted(zip(keys, val_x, val_y), key=operator.itemgetter(0))
+            )
+        return cudf.DataFrame({"key": keys, "val_x": val_x, "val_y": val_y})
+
+    def expect_outer(left, right, sort):
+        left_key = left.key.values_host.tolist()
+        left_val = left.val.values_host.tolist()
+        right_key = right.key.values_host.tolist()
+        right_val = right.val.values_host.tolist()
+        right_have = defaultdict(list)
+        for i, k in enumerate(right_key):
+            right_have[k].append(i)
+        keys = []
+        val_x = []
+        val_y = []
+        for k, v in zip(left_key, left_val):
+            if k not in right_have:
+                right_vals = [None]
+            else:
+                right_vals = [right_val[i] for i in right_have[k]]
+            for rv in right_vals:
+                keys.append(k)
+                val_x.append(v)
+                val_y.append(rv)
+        left_have = set(left_key)
+        for k, v in zip(right_key, right_val):
+            if k not in left_have:
+                keys.append(k)
+                val_x.append(None)
+                val_y.append(v)
+
+        # Python sort is stable, so this will preserve input order for
+        # equal items.
+        # outer joins are always sorted, but we test both sort values
+        keys, val_x, val_y = zip(
+            *sorted(zip(keys, val_x, val_y), key=operator.itemgetter(0))
+        )
+        return cudf.DataFrame({"key": keys, "val_x": val_x, "val_y": val_y})
+
+    def expected(left, right, sort, *, how):
+        if how == "inner":
+            return expect_inner(left, right, sort)
+        elif how == "outer":
+            return expect_outer(left, right, sort)
+        elif how == "left":
+            return expect_left(left, right, sort)
+        elif how == "right":
+            return expect_left(right, left, sort).rename(
+                {"val_x": "val_y", "val_y": "val_x"}, axis=1
+            )
+        else:
+            raise NotImplementedError()
 
 
 @pytest.mark.parametrize("how", ["inner", "left", "right", "outer"])
diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py
index 36bcaa66d7d..7d87fc73621 100644
--- a/python/cudf/cudf/tests/test_list.py
+++ b/python/cudf/cudf/tests/test_list.py
@@ -928,6 +928,7 @@ def test_empty_nested_list_uninitialized_offsets_memory_usage():
     col = column_empty(0, cudf.ListDtype(cudf.ListDtype("int64")))
     nested_col = col.children[1]
     empty_inner = type(nested_col)(
+        data=None,
         size=nested_col.size,
         dtype=nested_col.dtype,
         mask=nested_col.mask,
@@ -939,6 +940,7 @@ def test_empty_nested_list_uninitialized_offsets_memory_usage():
         ),
     )
     col_empty_offset = type(col)(
+        data=None,
         size=col.size,
         dtype=col.dtype,
         mask=col.mask,
@@ -946,5 +948,5 @@ def test_empty_nested_list_uninitialized_offsets_memory_usage():
         null_count=col.null_count,
         children=(column_empty(0, col.children[0].dtype), empty_inner),
     )
-    ser = cudf.Series._from_data({None: col_empty_offset})
+    ser = cudf.Series._from_column(col_empty_offset)
     assert ser.memory_usage() == 8
diff --git a/python/cudf/cudf/tests/test_multiindex.py b/python/cudf/cudf/tests/test_multiindex.py
index b7314a36e73..b1e095e8853 100644
--- a/python/cudf/cudf/tests/test_multiindex.py
+++ b/python/cudf/cudf/tests/test_multiindex.py
@@ -167,7 +167,9 @@ def test_string_index():
     pdf.index = stringIndex.to_pandas()
     gdf.index = stringIndex
     assert_eq(pdf, gdf)
-    stringIndex = cudf.Index(as_column(["a", "b", "c", "d", "e"]), name="name")
+    stringIndex = cudf.Index._from_column(
+        as_column(["a", "b", "c", "d", "e"]), name="name"
+    )
     pdf.index = stringIndex.to_pandas()
     gdf.index = stringIndex
     assert_eq(pdf, gdf)
@@ -2179,3 +2181,13 @@ def test_unique_level():
     result = pd_mi.unique(level=1)
     expected = cudf_mi.unique(level=1)
     assert_eq(result, expected)
+
+
+@pytest.mark.parametrize(
+    "idx", [pd.Index, pd.CategoricalIndex, pd.DatetimeIndex, pd.TimedeltaIndex]
+)
+def test_from_arrays_infer_names(idx):
+    arrays = [idx([1], name="foo"), idx([2], name="bar")]
+    expected = pd.MultiIndex.from_arrays(arrays)
+    result = cudf.MultiIndex.from_arrays(arrays)
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/test_mvc.py b/python/cudf/cudf/tests/test_mvc.py
index 7dd25ebc500..055bc5757b3 100644
--- a/python/cudf/cudf/tests/test_mvc.py
+++ b/python/cudf/cudf/tests/test_mvc.py
@@ -1,8 +1,9 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 import subprocess
 import sys
 
 import pytest
+from packaging import version
 
 IS_CUDA_11 = False
 IS_CUDA_12 = False
@@ -14,9 +15,12 @@
 # do not test cuda 12 if pynvjitlink isn't present
 HAVE_PYNVJITLINK = False
 try:
+    import numba
     import pynvjitlink  # noqa: F401
 
-    HAVE_PYNVJITLINK = True
+    HAVE_PYNVJITLINK = version.parse(numba.__version__) >= version.parse(
+        "0.58"
+    )
 except ModuleNotFoundError:
     pass
 
diff --git a/python/cudf/cudf/tests/test_numerical.py b/python/cudf/cudf/tests/test_numerical.py
index 1b0589254f5..b1a2f081cd2 100644
--- a/python/cudf/cudf/tests/test_numerical.py
+++ b/python/cudf/cudf/tests/test_numerical.py
@@ -5,6 +5,7 @@
 import pytest
 
 import cudf
+from cudf.core._compat import PANDAS_GE_220
 from cudf.testing import assert_eq
 from cudf.testing._utils import NUMERIC_TYPES, expect_warning_if
 from cudf.utils.dtypes import np_dtypes_to_pandas_dtypes
@@ -373,7 +374,7 @@ def test_to_numeric_error(data, errors):
         ):
             cudf.to_numeric(data, errors=errors)
     else:
-        with expect_warning_if(errors == "ignore"):
+        with expect_warning_if(PANDAS_GE_220 and errors == "ignore"):
             expect = pd.to_numeric(data, errors=errors)
         with expect_warning_if(errors == "ignore"):
             got = cudf.to_numeric(data, errors=errors)
diff --git a/python/cudf/cudf/tests/test_onehot.py b/python/cudf/cudf/tests/test_onehot.py
index 154e1e19072..cc17dc46e0a 100644
--- a/python/cudf/cudf/tests/test_onehot.py
+++ b/python/cudf/cudf/tests/test_onehot.py
@@ -155,3 +155,9 @@ def test_get_dummies_array_like_with_nan():
     actual = cudf.get_dummies(ser, dummy_na=True, prefix="a", prefix_sep="_")
 
     assert_eq(expected, actual)
+
+
+def test_get_dummies_cats_deprecated():
+    df = cudf.DataFrame(range(3))
+    with pytest.warns(FutureWarning):
+        cudf.get_dummies(df, cats={0: [0, 1, 2]})
diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py
index e0884a5819a..c2a30b76bea 100644
--- a/python/cudf/cudf/tests/test_orc.py
+++ b/python/cudf/cudf/tests/test_orc.py
@@ -1679,7 +1679,13 @@ def run_orc_columns_and_index_param(index_obj, index, columns):
     "columns",
     [
         None,
-        [],
+        pytest.param(
+            [],
+            marks=pytest.mark.skipif(
+                PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+                reason="Bug in older version of pandas",
+            ),
+        ),
     ],
 )
 def test_orc_columns_and_index_param(index_obj, index, columns):
diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py
index 3806b901b10..8b59a7eef08 100644
--- a/python/cudf/cudf/tests/test_parquet.py
+++ b/python/cudf/cudf/tests/test_parquet.py
@@ -19,10 +19,11 @@
 import pytest
 from fsspec.core import get_fs_token_paths
 from packaging import version
-from pyarrow import fs as pa_fs, parquet as pq
+from pyarrow import parquet as pq
 
 import cudf
 from cudf._lib.parquet import read_parquet_chunked
+from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
 from cudf.io.parquet import (
     ParquetDatasetWriter,
     ParquetWriter,
@@ -515,10 +516,6 @@ def test_parquet_read_filtered_multiple_files(tmpdir):
     )
 
 
-@pytest.mark.skipif(
-    version.parse(pa.__version__) < version.parse("1.0.1"),
-    reason="pyarrow 1.0.0 needed for various operators and operand types",
-)
 @pytest.mark.parametrize(
     "predicate,expected_len",
     [
@@ -705,40 +702,17 @@ def test_parquet_reader_filepath_or_buffer(parquet_path_or_buf, src):
     assert_eq(expect, got)
 
 
-def test_parquet_reader_arrow_nativefile(parquet_path_or_buf):
-    # Check that we can read a file opened with the
-    # Arrow FileSystem interface
-    expect = cudf.read_parquet(parquet_path_or_buf("filepath"))
-    fs, path = pa_fs.FileSystem.from_uri(parquet_path_or_buf("filepath"))
-    with fs.open_input_file(path) as fil:
-        with pytest.warns(FutureWarning):
-            got = cudf.read_parquet(fil)
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("use_python_file_object", [True, False])
-def test_parquet_reader_use_python_file_object(
-    parquet_path_or_buf, use_python_file_object
-):
-    # Check that the non-default `use_python_file_object=True`
-    # option works as expected
+def test_parquet_reader_file_types(parquet_path_or_buf):
     expect = cudf.read_parquet(parquet_path_or_buf("filepath"))
     fs, _, paths = get_fs_token_paths(parquet_path_or_buf("filepath"))
 
     # Pass open fsspec file
-    with pytest.warns(FutureWarning):
-        with fs.open(paths[0], mode="rb") as fil:
-            got1 = cudf.read_parquet(
-                fil, use_python_file_object=use_python_file_object
-            )
+    with fs.open(paths[0], mode="rb") as fil:
+        got1 = cudf.read_parquet(fil)
     assert_eq(expect, got1)
 
     # Pass path only
-    with pytest.warns(FutureWarning):
-        got2 = cudf.read_parquet(
-            paths[0], use_python_file_object=use_python_file_object
-        )
+    got2 = cudf.read_parquet(paths[0])
     assert_eq(expect, got2)
 
 
@@ -1978,6 +1952,25 @@ def test_parquet_partitioned(tmpdir_factory, cols, filename):
                 assert fn == filename
 
 
+@pytest.mark.parametrize("kwargs", [{"nrows": 1}, {"skip_rows": 1}])
+def test_parquet_partitioned_notimplemented(tmpdir_factory, kwargs):
+    # Checks that write_to_dataset is wrapping to_parquet
+    # as expected
+    pdf_dir = str(tmpdir_factory.mktemp("pdf_dir"))
+    size = 100
+    pdf = pd.DataFrame(
+        {
+            "a": np.arange(0, stop=size, dtype="int64"),
+            "b": np.random.choice(list("abcd"), size=size),
+            "c": np.random.choice(np.arange(4), size=size),
+        }
+    )
+    pdf.to_parquet(pdf_dir, index=False, partition_cols=["b"])
+
+    with pytest.raises(NotImplementedError):
+        cudf.read_parquet(pdf_dir, **kwargs)
+
+
 @pytest.mark.parametrize("return_meta", [True, False])
 def test_parquet_writer_chunked_partitioned(tmpdir_factory, return_meta):
     pdf_dir = str(tmpdir_factory.mktemp("pdf_dir"))
@@ -2397,6 +2390,10 @@ def test_parquet_writer_list_large_mixed(tmpdir):
 
 @pytest.mark.parametrize("store_schema", [True, False])
 def test_parquet_writer_list_chunked(tmpdir, store_schema):
+    if store_schema and version.parse(pa.__version__) < version.parse(
+        "15.0.0"
+    ):
+        pytest.skip("https://github.com/apache/arrow/pull/37792")
     table1 = cudf.DataFrame(
         {
             "a": list_gen(string_gen, 128, 80, 50),
@@ -2582,6 +2579,10 @@ def normalized_equals(value1, value2):
 @pytest.mark.parametrize("add_nulls", [True, False])
 @pytest.mark.parametrize("store_schema", [True, False])
 def test_parquet_writer_statistics(tmpdir, pdf, add_nulls, store_schema):
+    if store_schema and version.parse(pa.__version__) < version.parse(
+        "15.0.0"
+    ):
+        pytest.skip("https://github.com/apache/arrow/pull/37792")
     file_path = tmpdir.join("cudf.parquet")
     if "col_category" in pdf.columns:
         pdf = pdf.drop(columns=["col_category", "col_bool"])
@@ -2961,6 +2962,10 @@ def test_per_column_options_string_col(tmpdir, encoding):
     assert encoding in fmd.row_group(0).column(0).encodings
 
 
+@pytest.mark.skipif(
+    version.parse(pa.__version__) < version.parse("16.0.0"),
+    reason="https://github.com/apache/arrow/pull/39748",
+)
 @pytest.mark.parametrize(
     "num_rows",
     [200, 10000],
@@ -3030,6 +3035,10 @@ def test_parquet_reader_rle_boolean(datadir):
 #                a list column in a schema, the cudf reader was confusing
 #                nesting information between a list column and a subsequent
 #                string column, ultimately causing a crash.
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Older versions of pandas do not have DataFrame.map()",
+)
 def test_parquet_reader_one_level_list2(datadir):
     # we are reading in a file containing binary types, but cudf returns
     # those as strings. so we have to massage the pandas data to get
@@ -3561,6 +3570,10 @@ def test_parquet_reader_roundtrip_structs_with_arrow_schema(tmpdir, data):
 
 
 @pytest.mark.parametrize("index", [None, True, False])
+@pytest.mark.skipif(
+    version.parse(pa.__version__) < version.parse("15.0.0"),
+    reason="https://github.com/apache/arrow/pull/37792",
+)
 def test_parquet_writer_roundtrip_with_arrow_schema(index):
     # Ensure that the concrete and nested types are faithfully being roundtripped
     # across Parquet with arrow schema
@@ -3711,6 +3724,10 @@ def test_parquet_writer_int96_timestamps_and_arrow_schema():
     ],
 )
 @pytest.mark.parametrize("index", [None, True, False])
+@pytest.mark.skipif(
+    version.parse(pa.__version__) < version.parse("15.0.0"),
+    reason="https://github.com/apache/arrow/pull/37792",
+)
 def test_parquet_writer_roundtrip_structs_with_arrow_schema(
     tmpdir, data, index
 ):
@@ -3768,6 +3785,26 @@ def test_parquet_chunked_reader(
     assert_eq(expected, actual)
 
 
+@pytest.mark.parametrize(
+    "nrows,skip_rows",
+    [
+        (0, 0),
+        (1000, 0),
+        (0, 1000),
+        (1000, 10000),
+    ],
+)
+def test_parquet_reader_nrows_skiprows(nrows, skip_rows):
+    df = pd.DataFrame(
+        {"a": [1, 2, 3, 4] * 100000, "b": ["av", "qw", "hi", "xyz"] * 100000}
+    )
+    expected = df[skip_rows : skip_rows + nrows]
+    buffer = BytesIO()
+    df.to_parquet(buffer)
+    got = cudf.read_parquet(buffer, nrows=nrows, skip_rows=skip_rows)
+    assert_eq(expected, got)
+
+
 def test_parquet_reader_pandas_compatibility():
     df = pd.DataFrame(
         {"a": [1, 2, 3, 4] * 10000, "b": ["av", "qw", "hi", "xyz"] * 10000}
@@ -3777,3 +3814,251 @@ def test_parquet_reader_pandas_compatibility():
     with cudf.option_context("io.parquet.low_memory", True):
         expected = cudf.read_parquet(buffer)
     assert_eq(expected, df)
+
+
+@pytest.mark.parametrize("store_schema", [True, False])
+def test_parquet_reader_with_mismatched_tables(store_schema):
+    # cuDF tables with mixed types
+    df1 = cudf.DataFrame(
+        {
+            "i32": cudf.Series([None, None, None], dtype="int32"),
+            "i64": cudf.Series([1234, None, 123], dtype="int64"),
+            "list": list([[1, 2], [None, 4], [5, 6]]),
+            "time": cudf.Series([1234, 123, 4123], dtype="datetime64[ms]"),
+            "str": ["vfd", None, "ghu"],
+            "d_list": list(
+                [
+                    [pd.Timedelta(minutes=1), pd.Timedelta(minutes=2)],
+                    [None, pd.Timedelta(minutes=3)],
+                    [pd.Timedelta(minutes=8), None],
+                ]
+            ),
+        }
+    )
+
+    df2 = cudf.DataFrame(
+        {
+            "str": ["abc", "def", None],
+            "i64": cudf.Series([None, 65, 98], dtype="int64"),
+            "times": cudf.Series([1234, None, 4123], dtype="datetime64[us]"),
+            "list": list([[7, 8], [9, 10], [None, 12]]),
+            "d_list": list(
+                [
+                    [pd.Timedelta(minutes=4), None],
+                    [None, None],
+                    [pd.Timedelta(minutes=6), None],
+                ]
+            ),
+        }
+    )
+
+    # IO buffers
+    buf1 = BytesIO()
+    buf2 = BytesIO()
+
+    # Write Parquet with and without arrow schema
+    df1.to_parquet(buf1, store_schema=store_schema)
+    df2.to_parquet(buf2, store_schema=store_schema)
+
+    # Read mismatched Parquet files
+    got = cudf.read_parquet(
+        [buf1, buf2],
+        columns=["list", "d_list", "str"],
+        filters=[("i64", ">", 20)],
+        allow_mismatched_pq_schemas=True,
+    )
+
+    # Construct the expected table
+    expected = cudf.concat(
+        [
+            df1[df1["i64"] > 20][["list", "d_list", "str"]],
+            df2[df2["i64"] > 20][["list", "d_list", "str"]],
+        ]
+    ).reset_index(drop=True)
+
+    # Read with chunked reader (filter columns not supported)
+    got_chunked = read_parquet_chunked(
+        [buf1, buf2],
+        columns=["list", "d_list", "str"],
+        chunk_read_limit=240,
+        pass_read_limit=240,
+        allow_mismatched_pq_schemas=True,
+    )
+
+    # Construct the expected table without filter columns
+    expected_chunked = cudf.concat(
+        [df1[["list", "d_list", "str"]], df2[["list", "d_list", "str"]]]
+    ).reset_index(drop=True)
+
+    # Check results
+    assert_eq(expected, got)
+    assert_eq(expected_chunked, got_chunked)
+
+
+def test_parquet_reader_with_mismatched_structs():
+    data1 = [
+        {
+            "a": 1,
+            "b": {
+                "inner_a": 10,
+                "inner_b": {"inner_inner_b": 1, "inner_inner_a": 2},
+            },
+            "c": 2,
+        },
+        {
+            "a": 3,
+            "b": {"inner_a": 30, "inner_b": {"inner_inner_a": 210}},
+            "c": 4,
+        },
+        {"a": 5, "b": {"inner_a": 50, "inner_b": None}, "c": 6},
+        {"a": 7, "b": None, "c": 8},
+        {"a": None, "b": {"inner_a": None, "inner_b": None}, "c": None},
+        None,
+        {
+            "a": None,
+            "b": {
+                "inner_a": None,
+                "inner_b": {"inner_inner_b": None, "inner_inner_a": 10},
+            },
+            "c": 10,
+        },
+    ]
+
+    data2 = [
+        {"a": 1, "b": {"inner_b": {"inner_inner_a": None}}},
+        {"a": 3, "b": {"inner_b": {"inner_inner_a": 1}}},
+        {"a": 5, "b": {"inner_b": None}},
+        {"a": 7, "b": {"inner_b": {"inner_inner_b": 1, "inner_inner_a": 0}}},
+        {"a": None, "b": {"inner_b": None}},
+        None,
+        {"a": None, "b": {"inner_b": {"inner_inner_a": 1}}},
+    ]
+
+    # cuDF tables from struct data
+    df1 = cudf.DataFrame.from_arrow(pa.Table.from_pydict({"struct": data1}))
+    df2 = cudf.DataFrame.from_arrow(pa.Table.from_pydict({"struct": data2}))
+
+    # Buffers
+    buf1 = BytesIO()
+    buf2 = BytesIO()
+
+    # Write to parquet
+    df1.to_parquet(buf1)
+    df2.to_parquet(buf2)
+
+    # Read the struct.b.inner_b.inner_inner_a column from parquet
+    got = cudf.read_parquet(
+        [buf1, buf2],
+        columns=["struct.b.inner_b.inner_inner_a"],
+        allow_mismatched_pq_schemas=True,
+    )
+    got = (
+        cudf.Series(got["struct"])
+        .struct.field("b")
+        .struct.field("inner_b")
+        .struct.field("inner_inner_a")
+    )
+
+    # Read with chunked reader
+    got_chunked = read_parquet_chunked(
+        [buf1, buf2],
+        columns=["struct.b.inner_b.inner_inner_a"],
+        chunk_read_limit=240,
+        pass_read_limit=240,
+        allow_mismatched_pq_schemas=True,
+    )
+    got_chunked = (
+        cudf.Series(got_chunked["struct"])
+        .struct.field("b")
+        .struct.field("inner_b")
+        .struct.field("inner_inner_a")
+    )
+
+    # Construct the expected series
+    expected = cudf.concat(
+        [
+            cudf.Series(df1["struct"])
+            .struct.field("b")
+            .struct.field("inner_b")
+            .struct.field("inner_inner_a"),
+            cudf.Series(df2["struct"])
+            .struct.field("b")
+            .struct.field("inner_b")
+            .struct.field("inner_inner_a"),
+        ]
+    ).reset_index(drop=True)
+
+    # Check results
+    assert_eq(expected, got)
+    assert_eq(expected, got_chunked)
+
+
+def test_parquet_reader_with_mismatched_schemas_error():
+    df1 = cudf.DataFrame(
+        {
+            "millis": cudf.Series([123, 3454, 123], dtype="timedelta64[ms]"),
+            "i64": cudf.Series([123, 3454, 123], dtype="int64"),
+            "i32": cudf.Series([123, 3454, 123], dtype="int32"),
+        }
+    )
+    df2 = cudf.DataFrame(
+        {
+            "i64": cudf.Series([123, 3454, 123], dtype="int64"),
+            "millis": cudf.Series([123, 3454, 123], dtype="timedelta64[ms]"),
+        }
+    )
+
+    buf1 = BytesIO()
+    buf2 = BytesIO()
+
+    df1.to_parquet(buf1, store_schema=True)
+    df2.to_parquet(buf2, store_schema=False)
+
+    with pytest.raises(
+        ValueError,
+        match="Encountered mismatching SchemaElement properties for a column in the selected path",
+    ):
+        cudf.read_parquet(
+            [buf1, buf2], columns=["millis"], allow_mismatched_pq_schemas=True
+        )
+
+    data1 = [
+        {"a": 1, "b": {"inner_a": 1, "inner_b": 6}},
+        {"a": 3, "b": {"inner_a": None, "inner_b": 2}},
+    ]
+    data2 = [
+        {"b": {"inner_a": 1}, "c": "str"},
+        {"b": {"inner_a": None}, "c": None},
+    ]
+
+    # cuDF tables from struct data
+    df1 = cudf.DataFrame.from_arrow(pa.Table.from_pydict({"struct": data1}))
+    df2 = cudf.DataFrame.from_arrow(pa.Table.from_pydict({"struct": data2}))
+
+    # Buffers
+    buf1 = BytesIO()
+    buf2 = BytesIO()
+
+    # Write to parquet
+    df1.to_parquet(buf1)
+    df2.to_parquet(buf2)
+
+    with pytest.raises(
+        IndexError,
+        match="Encountered mismatching number of children for a column in the selected path",
+    ):
+        cudf.read_parquet(
+            [buf1, buf2],
+            columns=["struct.b"],
+            allow_mismatched_pq_schemas=True,
+        )
+
+    with pytest.raises(
+        IndexError,
+        match="Encountered mismatching schema tree depths across data sources",
+    ):
+        cudf.read_parquet(
+            [buf1, buf2],
+            columns=["struct.b.inner_b"],
+            allow_mismatched_pq_schemas=True,
+        )
diff --git a/python/cudf/cudf/tests/test_pickling.py b/python/cudf/cudf/tests/test_pickling.py
index 719e8a33285..0f13a9e173a 100644
--- a/python/cudf/cudf/tests/test_pickling.py
+++ b/python/cudf/cudf/tests/test_pickling.py
@@ -127,7 +127,7 @@ def test_pickle_categorical_column(slices):
     pickled = pickle.dumps(input_col)
     out = pickle.loads(pickled)
 
-    assert_eq(Series(out), Series(input_col))
+    assert_eq(Series._from_column(out), Series._from_column(input_col))
 
 
 @pytest.mark.parametrize(
@@ -148,4 +148,4 @@ def test_pickle_string_column(slices):
     pickled = pickle.dumps(input_col)
     out = pickle.loads(pickled)
 
-    assert_eq(Series(out), Series(input_col))
+    assert_eq(Series._from_column(out), Series._from_column(input_col))
diff --git a/python/cudf/cudf/tests/test_reductions.py b/python/cudf/cudf/tests/test_reductions.py
index 8be6463c699..f276f394cd0 100644
--- a/python/cudf/cudf/tests/test_reductions.py
+++ b/python/cudf/cudf/tests/test_reductions.py
@@ -10,6 +10,7 @@
 
 import cudf
 from cudf import Series
+from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
 from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype, Decimal128Dtype
 from cudf.testing import _utils as utils, assert_eq
 from cudf.testing._utils import NUMERIC_TYPES, expect_warning_if, gen_rand
@@ -342,6 +343,10 @@ def test_any_all_axis_none(data, op):
         "median",
     ],
 )
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Warning not given on older versions of pandas",
+)
 def test_reductions_axis_none_warning(op):
     df = cudf.DataFrame({"a": [1, 2, 3], "b": [10, 2, 3]})
     pdf = df.to_pandas()
@@ -358,6 +363,30 @@ def test_reductions_axis_none_warning(op):
     assert_eq(expected, actual, check_dtype=False)
 
 
+@pytest.mark.parametrize(
+    "op",
+    [
+        "sum",
+        "product",
+        "std",
+        "var",
+        "kurt",
+        "kurtosis",
+        "skew",
+        "min",
+        "max",
+        "mean",
+        "median",
+    ],
+)
+def test_dataframe_reduction_no_args(op):
+    df = cudf.DataFrame({"a": range(10), "b": range(10)})
+    pdf = df.to_pandas()
+    result = getattr(df, op)()
+    expected = getattr(pdf, op)()
+    assert_eq(result, expected)
+
+
 def test_reduction_column_multiindex():
     idx = cudf.MultiIndex.from_tuples(
         [("a", 1), ("a", 2)], names=["foo", "bar"]
@@ -374,3 +403,14 @@ def test_dtype_deprecated(op):
     with pytest.warns(FutureWarning):
         result = getattr(ser, op)(dtype=np.dtype(np.int8))
     assert isinstance(result, np.int8)
+
+
+@pytest.mark.parametrize(
+    "columns", [pd.RangeIndex(2), pd.Index([0, 1], dtype="int8")]
+)
+def test_dataframe_axis_0_preserve_column_type_in_index(columns):
+    pd_df = pd.DataFrame([[1, 2]], columns=columns)
+    cudf_df = cudf.DataFrame.from_pandas(pd_df)
+    result = cudf_df.sum(axis=0)
+    expected = pd_df.sum(axis=0)
+    assert_eq(result, expected, check_index_type=True)
diff --git a/python/cudf/cudf/tests/test_replace.py b/python/cudf/cudf/tests/test_replace.py
index d4fe5ff3bb5..3a8928297c0 100644
--- a/python/cudf/cudf/tests/test_replace.py
+++ b/python/cudf/cudf/tests/test_replace.py
@@ -10,7 +10,11 @@
 import pytest
 
 import cudf
-from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_GE_220,
+    PANDAS_VERSION,
+)
 from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype, Decimal128Dtype
 from cudf.testing import assert_eq
 from cudf.testing._utils import (
@@ -66,7 +70,7 @@ def test_series_replace_all(gsr, to_replace, value):
     )
     with expect_warning_if(expect_warn):
         actual = gsr.replace(to_replace=gd_to_replace, value=gd_value)
-    with expect_warning_if(expect_warn):
+    with expect_warning_if(expect_warn and PANDAS_GE_220):
         if pd_value is None:
             # TODO: Remove this workaround once cudf
             # introduces `no_default` values
@@ -91,7 +95,7 @@ def test_series_replace():
 
     # Categorical
     psr3 = pd.Series(["one", "two", "three"], dtype="category")
-    with pytest.warns(FutureWarning):
+    with expect_warning_if(PANDAS_GE_220, FutureWarning):
         psr4 = psr3.replace("one", "two")
     sr3 = cudf.from_pandas(psr3)
     with pytest.warns(FutureWarning):
@@ -100,7 +104,7 @@ def test_series_replace():
         psr4.sort_values().reset_index(drop=True),
         sr4.sort_values().reset_index(drop=True),
     )
-    with pytest.warns(FutureWarning):
+    with expect_warning_if(PANDAS_GE_220, FutureWarning):
         psr5 = psr3.replace("one", "five")
     with pytest.warns(FutureWarning):
         sr5 = sr3.replace("one", "five")
@@ -517,7 +521,7 @@ def test_fillna_categorical(psr_data, fill_value, inplace):
             pd.date_range(
                 "2010-01-01",
                 "2020-01-10",
-                freq="1YE",
+                freq="1YE" if PANDAS_GE_220 else "1y",
             )
         ),
         pd.Series(["2010-01-01", None, "2011-10-10"], dtype="datetime64[ns]"),
@@ -564,7 +568,7 @@ def test_fillna_categorical(psr_data, fill_value, inplace):
             pd.date_range(
                 "2010-01-01",
                 "2020-01-10",
-                freq="1YE",
+                freq="1YE" if PANDAS_GE_220 else "1y",
             )
         )
         + pd.Timedelta("1d"),
@@ -817,12 +821,12 @@ def test_fillna_string(ps_data, fill_value, inplace):
 def test_series_fillna_invalid_dtype(data_dtype):
     gdf = cudf.Series([1, 2, None, 3], dtype=data_dtype)
     fill_value = 2.5
-    with pytest.raises(TypeError) as raises:
-        gdf.fillna(fill_value)
-    raises.match(
+    msg = (
         f"Cannot safely cast non-equivalent"
         f" {type(fill_value).__name__} to {gdf.dtype.type.__name__}"
     )
+    with pytest.raises(TypeError, match=msg):
+        gdf.fillna(fill_value)
 
 
 @pytest.mark.parametrize("data_dtype", NUMERIC_TYPES)
@@ -1069,6 +1073,10 @@ def test_numeric_series_replace_dtype(series_dtype, replacement):
         ),
     ],
 )
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Warning not given on older versions of pandas",
+)
 def test_replace_inplace(pframe, replace_args):
     gpu_frame = cudf.from_pandas(pframe)
     pandas_frame = pframe.copy()
@@ -1378,3 +1386,9 @@ def test_fillna_nan_and_null():
     result = ser.fillna(2.2)
     expected = cudf.Series([2.2, 2.2, 1.1])
     assert_eq(result, expected)
+
+
+def test_replace_with_index_objects():
+    result = cudf.Series([1, 2]).replace(cudf.Index([1]), cudf.Index([2]))
+    expected = pd.Series([1, 2]).replace(pd.Index([1]), pd.Index([2]))
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py
index a013745f71e..95e19fae501 100644
--- a/python/cudf/cudf/tests/test_repr.py
+++ b/python/cudf/cudf/tests/test_repr.py
@@ -1480,3 +1480,22 @@ def test_interval_index_repr():
     gi = cudf.from_pandas(pi)
 
     assert repr(pi) == repr(gi)
+
+
+def test_large_unique_categories_repr():
+    # Unfortunately, this is a long running test (takes about 1 minute)
+    # and there is no way we can reduce the time
+    pi = pd.CategoricalIndex(range(100_000_000))
+    gi = cudf.CategoricalIndex(range(100_000_000))
+    expected_repr = repr(pi)
+    with utils.cudf_timeout(6):
+        actual_repr = repr(gi)
+    assert expected_repr == actual_repr
+
+
+@pytest.mark.parametrize("ordered", [True, False])
+def test_categorical_index_ordered(ordered):
+    pi = pd.CategoricalIndex(range(10), ordered=ordered)
+    gi = cudf.CategoricalIndex(range(10), ordered=ordered)
+
+    assert repr(pi) == repr(gi)
diff --git a/python/cudf/cudf/tests/test_resampling.py b/python/cudf/cudf/tests/test_resampling.py
index 95fa8e9a50a..a61477981f8 100644
--- a/python/cudf/cudf/tests/test_resampling.py
+++ b/python/cudf/cudf/tests/test_resampling.py
@@ -5,6 +5,7 @@
 import pytest
 
 import cudf
+from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
 from cudf.testing import assert_eq
 
 
@@ -147,6 +148,10 @@ def test_dataframe_resample_level():
         ("10D", "1D", "s"),
     ],
 )
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
 def test_resampling_frequency_conversion(in_freq, sampling_freq, out_freq):
     # test that we cast to the appropriate frequency
     # when resampling:
@@ -164,6 +169,10 @@ def test_resampling_frequency_conversion(in_freq, sampling_freq, out_freq):
     assert got.index.dtype == np.dtype(f"datetime64[{out_freq}]")
 
 
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
 def test_resampling_downsampling_ms():
     pdf = pd.DataFrame(
         {
diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py
index 50db4302b75..4235affd4d1 100644
--- a/python/cudf/cudf/tests/test_reshape.py
+++ b/python/cudf/cudf/tests/test_reshape.py
@@ -8,10 +8,19 @@
 import pytest
 
 import cudf
-from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_GE_220,
+    PANDAS_VERSION,
+)
 from cudf.core.buffer.spill_manager import get_global_manager
 from cudf.testing import assert_eq
-from cudf.testing._utils import ALL_TYPES, DATETIME_TYPES, NUMERIC_TYPES
+from cudf.testing._utils import (
+    ALL_TYPES,
+    DATETIME_TYPES,
+    NUMERIC_TYPES,
+    expect_warning_if,
+)
 
 pytest_xfail = pytest.mark.xfail
 pytestmark = pytest.mark.spilling
@@ -220,7 +229,7 @@ def test_df_stack_multiindex_column_axis(columns, index, level, dropna):
 
     with pytest.warns(FutureWarning):
         got = gdf.stack(level=level, dropna=dropna, future_stack=False)
-    with pytest.warns(FutureWarning):
+    with expect_warning_if(PANDAS_GE_220, FutureWarning):
         expect = pdf.stack(level=level, dropna=dropna, future_stack=False)
 
     assert_eq(expect, got, check_dtype=False)
@@ -265,7 +274,7 @@ def test_df_stack_multiindex_column_axis_pd_example(level):
 
     df = pd.DataFrame(np.random.randn(4, 4), columns=columns)
 
-    with pytest.warns(FutureWarning):
+    with expect_warning_if(PANDAS_GE_220, FutureWarning):
         expect = df.stack(level=level, future_stack=False)
     gdf = cudf.from_pandas(df)
     with pytest.warns(FutureWarning):
diff --git a/python/cudf/cudf/tests/test_s3.py b/python/cudf/cudf/tests/test_s3.py
index 3ae318d3bf5..0958b68084d 100644
--- a/python/cudf/cudf/tests/test_s3.py
+++ b/python/cudf/cudf/tests/test_s3.py
@@ -7,7 +7,6 @@
 
 import numpy as np
 import pandas as pd
-import pyarrow.fs as pa_fs
 import pytest
 from fsspec.core import get_fs_token_paths
 
@@ -138,48 +137,17 @@ def test_read_csv(s3_base, s3so, pdf, bytes_per_thread):
     buffer = pdf.to_csv(index=False)
 
     # Use fsspec file object
-    with pytest.warns(FutureWarning):
-        with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}):
-            got = cudf.read_csv(
-                f"s3://{bucket}/{fname}",
-                storage_options=s3so,
-                bytes_per_thread=bytes_per_thread,
-                use_python_file_object=False,
-            )
-    assert_eq(pdf, got)
-
-    # Use Arrow PythonFile object
-    with pytest.warns(FutureWarning):
-        with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}):
-            got = cudf.read_csv(
-                f"s3://{bucket}/{fname}",
-                storage_options=s3so,
-                use_python_file_object=True,
-            )
-    assert_eq(pdf, got)
-
-
-def test_read_csv_arrow_nativefile(s3_base, s3so, pdf):
-    # Write to buffer
-    fname = "test_csv_reader_arrow_nativefile.csv"
-    bucket = "csv"
-    buffer = pdf.to_csv(index=False)
     with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}):
-        fs = pa_fs.S3FileSystem(
-            endpoint_override=s3so["client_kwargs"]["endpoint_url"],
+        got = cudf.read_csv(
+            f"s3://{bucket}/{fname}",
+            storage_options=s3so,
+            bytes_per_thread=bytes_per_thread,
         )
-        with pytest.warns(FutureWarning):
-            with fs.open_input_file(f"{bucket}/{fname}") as fil:
-                got = cudf.read_csv(fil)
-
     assert_eq(pdf, got)
 
 
 @pytest.mark.parametrize("bytes_per_thread", [32, 1024])
-@pytest.mark.parametrize("use_python_file_object", [True, False])
-def test_read_csv_byte_range(
-    s3_base, s3so, pdf, bytes_per_thread, use_python_file_object
-):
+def test_read_csv_byte_range(s3_base, s3so, pdf, bytes_per_thread):
     # Write to buffer
     fname = "test_csv_reader_byte_range.csv"
     bucket = "csv"
@@ -187,18 +155,14 @@ def test_read_csv_byte_range(
 
     # Use fsspec file object
     with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}):
-        with pytest.warns(FutureWarning):
-            got = cudf.read_csv(
-                f"s3://{bucket}/{fname}",
-                storage_options=s3so,
-                byte_range=(74, 73),
-                bytes_per_thread=bytes_per_thread
-                if not use_python_file_object
-                else None,
-                header=None,
-                names=["Integer", "Float", "Integer2", "String", "Boolean"],
-                use_python_file_object=use_python_file_object,
-            )
+        got = cudf.read_csv(
+            f"s3://{bucket}/{fname}",
+            storage_options=s3so,
+            byte_range=(74, 73),
+            bytes_per_thread=bytes_per_thread,
+            header=None,
+            names=["Integer", "Float", "Integer2", "String", "Boolean"],
+        )
 
     assert_eq(pdf.iloc[-2:].reset_index(drop=True), got)
 
@@ -226,16 +190,12 @@ def test_write_csv(s3_base, s3so, pdf, chunksize):
 
 @pytest.mark.parametrize("bytes_per_thread", [32, 1024])
 @pytest.mark.parametrize("columns", [None, ["Float", "String"]])
-@pytest.mark.parametrize("precache", [None, "parquet"])
-@pytest.mark.parametrize("use_python_file_object", [True, False])
 def test_read_parquet(
     s3_base,
     s3so,
     pdf,
     bytes_per_thread,
     columns,
-    precache,
-    use_python_file_object,
 ):
     fname = "test_parquet_reader.parquet"
     bucket = "parquet"
@@ -245,19 +205,12 @@ def test_read_parquet(
     # Check direct path handling
     buffer.seek(0)
     with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}):
-        with pytest.warns(FutureWarning):
-            got1 = cudf.read_parquet(
-                f"s3://{bucket}/{fname}",
-                open_file_options=(
-                    {"precache_options": {"method": precache}}
-                    if use_python_file_object
-                    else None
-                ),
-                storage_options=s3so,
-                bytes_per_thread=bytes_per_thread,
-                columns=columns,
-                use_python_file_object=use_python_file_object,
-            )
+        got1 = cudf.read_parquet(
+            f"s3://{bucket}/{fname}",
+            storage_options=s3so,
+            bytes_per_thread=bytes_per_thread,
+            columns=columns,
+        )
     expect = pdf[columns] if columns else pdf
     assert_eq(expect, got1)
 
@@ -268,16 +221,61 @@ def test_read_parquet(
             f"s3://{bucket}/{fname}", storage_options=s3so
         )[0]
         with fs.open(f"s3://{bucket}/{fname}", mode="rb") as f:
-            with pytest.warns(FutureWarning):
-                got2 = cudf.read_parquet(
-                    f,
-                    bytes_per_thread=bytes_per_thread,
-                    columns=columns,
-                    use_python_file_object=use_python_file_object,
-                )
+            got2 = cudf.read_parquet(
+                f,
+                bytes_per_thread=bytes_per_thread,
+                columns=columns,
+            )
     assert_eq(expect, got2)
 
 
+@pytest.mark.parametrize("method", ["all", "parquet"])
+@pytest.mark.parametrize("blocksize", [1024 * 1024, 1024])
+def test_read_parquet_prefetch_options(
+    s3_base,
+    s3so,
+    pdf,
+    method,
+    blocksize,
+):
+    bucket = "parquet"
+    fname_1 = "test_parquet_reader_prefetch_options_1.parquet"
+    buffer_1 = BytesIO()
+    pdf.to_parquet(path=buffer_1)
+    buffer_1.seek(0)
+
+    fname_2 = "test_parquet_reader_prefetch_options_2.parquet"
+    buffer_2 = BytesIO()
+    pdf_2 = pdf.copy()
+    pdf_2["Integer"] += 1
+    pdf_2.to_parquet(path=buffer_2)
+    buffer_2.seek(0)
+
+    with s3_context(
+        s3_base=s3_base,
+        bucket=bucket,
+        files={
+            fname_1: buffer_1,
+            fname_2: buffer_2,
+        },
+    ):
+        got = cudf.read_parquet(
+            [
+                f"s3://{bucket}/{fname_1}",
+                f"s3://{bucket}/{fname_2}",
+            ],
+            storage_options=s3so,
+            prefetch_options={
+                "method": method,
+                "blocksize": blocksize,
+            },
+            columns=["String", "Integer"],
+        )
+
+    expect = pd.concat([pdf, pdf_2], ignore_index=True)[["String", "Integer"]]
+    assert_eq(expect, got)
+
+
 @pytest.mark.parametrize("bytes_per_thread", [32, 1024])
 @pytest.mark.parametrize("columns", [None, ["List", "Struct"]])
 @pytest.mark.parametrize("index", [None, "Integer"])
@@ -318,6 +316,28 @@ def test_read_parquet_ext(
     assert_eq(expect, got1)
 
 
+def test_read_parquet_filesystem(s3_base, s3so, pdf):
+    fname = "data.0.parquet"
+    # NOTE: Need a unique bucket name when a glob pattern
+    # is used, otherwise fsspec seems to cache the bucket
+    # contents, and later tests using the same bucket name
+    # will fail.
+    bucket = "test_read_parquet_filesystem"
+    buffer = BytesIO()
+    pdf.to_parquet(path=buffer)
+    buffer.seek(0)
+    fs = get_fs_token_paths("s3://", mode="rb", storage_options=s3so)[0]
+    with s3_context(
+        s3_base=s3_base,
+        bucket=bucket,
+        files={fname: buffer},
+    ):
+        # Check that a glob pattern works
+        path = f"s3://{bucket}/{'data.*.parquet'}"
+        got = cudf.read_parquet(path, filesystem=fs)
+    assert_eq(pdf, got)
+
+
 def test_read_parquet_multi_file(s3_base, s3so, pdf):
     fname_1 = "test_parquet_reader_multi_file_1.parquet"
     buffer_1 = BytesIO()
@@ -350,28 +370,7 @@ def test_read_parquet_multi_file(s3_base, s3so, pdf):
     assert_eq(expect, got)
 
 
-@pytest.mark.parametrize("columns", [None, ["Float", "String"]])
-def test_read_parquet_arrow_nativefile(s3_base, s3so, pdf, columns):
-    # Write to buffer
-    fname = "test_parquet_reader_arrow_nativefile.parquet"
-    bucket = "parquet"
-    buffer = BytesIO()
-    pdf.to_parquet(path=buffer)
-    buffer.seek(0)
-    with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}):
-        with pytest.warns(FutureWarning):
-            fs = pa_fs.S3FileSystem(
-                endpoint_override=s3so["client_kwargs"]["endpoint_url"],
-            )
-            with fs.open_input_file(f"{bucket}/{fname}") as fil:
-                got = cudf.read_parquet(fil, columns=columns)
-
-    expect = pdf[columns] if columns else pdf
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("precache", [None, "parquet"])
-def test_read_parquet_filters(s3_base, s3so, pdf_ext, precache):
+def test_read_parquet_filters(s3_base, s3so, pdf_ext):
     fname = "test_parquet_reader_filters.parquet"
     bucket = "parquet"
     buffer = BytesIO()
@@ -379,13 +378,11 @@ def test_read_parquet_filters(s3_base, s3so, pdf_ext, precache):
     buffer.seek(0)
     filters = [("String", "==", "Omega")]
     with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}):
-        with pytest.warns(FutureWarning):
-            got = cudf.read_parquet(
-                f"s3://{bucket}/{fname}",
-                storage_options=s3so,
-                filters=filters,
-                open_file_options={"precache_options": {"method": precache}},
-            )
+        got = cudf.read_parquet(
+            f"s3://{bucket}/{fname}",
+            storage_options=s3so,
+            filters=filters,
+        )
 
     # All row-groups should be filtered out
     assert_eq(pdf_ext.iloc[:0], got.reset_index(drop=True))
@@ -445,33 +442,8 @@ def test_read_json(s3_base, s3so):
     assert_eq(expect, got)
 
 
-@pytest.mark.parametrize("use_python_file_object", [False, True])
-@pytest.mark.parametrize("columns", [None, ["string1"]])
-def test_read_orc(s3_base, s3so, datadir, use_python_file_object, columns):
-    source_file = str(datadir / "orc" / "TestOrcFile.testSnappy.orc")
-    fname = "test_orc_reader.orc"
-    bucket = "orc"
-    expect = pd.read_orc(source_file)
-
-    with open(source_file, "rb") as f:
-        buffer = f.read()
-
-    with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}):
-        with pytest.warns(FutureWarning):
-            got = cudf.read_orc(
-                f"s3://{bucket}/{fname}",
-                columns=columns,
-                storage_options=s3so,
-                use_python_file_object=use_python_file_object,
-            )
-
-    if columns:
-        expect = expect[columns]
-    assert_eq(expect, got)
-
-
 @pytest.mark.parametrize("columns", [None, ["string1"]])
-def test_read_orc_arrow_nativefile(s3_base, s3so, datadir, columns):
+def test_read_orc(s3_base, s3so, datadir, columns):
     source_file = str(datadir / "orc" / "TestOrcFile.testSnappy.orc")
     fname = "test_orc_reader.orc"
     bucket = "orc"
@@ -481,12 +453,11 @@ def test_read_orc_arrow_nativefile(s3_base, s3so, datadir, columns):
         buffer = f.read()
 
     with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}):
-        fs = pa_fs.S3FileSystem(
-            endpoint_override=s3so["client_kwargs"]["endpoint_url"],
+        got = cudf.read_orc(
+            f"s3://{bucket}/{fname}",
+            columns=columns,
+            storage_options=s3so,
         )
-        with pytest.warns(FutureWarning):
-            with fs.open_input_file(f"{bucket}/{fname}") as fil:
-                got = cudf.read_orc(fil, columns=columns)
 
     if columns:
         expect = expect[columns]
diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index 8ed78d804bf..a24002dc38e 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -2041,7 +2041,7 @@ def test_series_ordered_dedup():
     sr = cudf.Series(np.random.randint(0, 100, 1000))
     # pandas unique() preserves order
     expect = pd.Series(sr.to_pandas().unique())
-    got = cudf.Series(sr._column.unique())
+    got = cudf.Series._from_column(sr._column.unique())
     assert_eq(expect.values, got.values)
 
 
@@ -2115,8 +2115,9 @@ def test_series_hasnans(data):
     ],
 )
 @pytest.mark.parametrize("keep", ["first", "last", False])
-def test_series_duplicated(data, index, keep):
-    gs = cudf.Series(data, index=index)
+@pytest.mark.parametrize("name", [None, "a"])
+def test_series_duplicated(data, index, keep, name):
+    gs = cudf.Series(data, index=index, name=name)
     ps = gs.to_pandas()
 
     assert_eq(gs.duplicated(keep=keep), ps.duplicated(keep=keep))
@@ -2289,6 +2290,13 @@ def test_series_rename(initial_name, name):
     assert_eq(actual, expected)
 
 
+@pytest.mark.parametrize("index", [lambda x: x * 2, {1: 2}])
+def test_rename_index_not_supported(index):
+    ser = cudf.Series(range(2))
+    with pytest.raises(NotImplementedError):
+        ser.rename(index=index)
+
+
 @pytest.mark.parametrize(
     "data",
     [
@@ -2549,6 +2557,13 @@ def test_series_arrow_list_types_roundtrip():
             cudf.from_pandas(pdf)
 
 
+@pytest.mark.parametrize("base_name", [None, "a"])
+def test_series_to_frame_none_name(base_name):
+    result = cudf.Series(range(1), name=base_name).to_frame(name=None)
+    expected = pd.Series(range(1), name=base_name).to_frame(name=None)
+    assert_eq(result, expected)
+
+
 @pytest.mark.parametrize("klass", [cudf.Index, cudf.Series])
 @pytest.mark.parametrize(
     "data", [pa.array([float("nan")]), pa.chunked_array([[float("nan")]])]
@@ -2697,7 +2712,9 @@ def test_series_duplicate_index_reindex():
 def test_list_category_like_maintains_dtype():
     dtype = cudf.CategoricalDtype(categories=[1, 2, 3, 4], ordered=True)
     data = [1, 2, 3]
-    result = cudf.Series(cudf.core.column.as_column(data, dtype=dtype))
+    result = cudf.Series._from_column(
+        cudf.core.column.as_column(data, dtype=dtype)
+    )
     expected = pd.Series(data, dtype=dtype.to_pandas())
     assert_eq(result, expected)
 
@@ -2705,7 +2722,9 @@ def test_list_category_like_maintains_dtype():
 def test_list_interval_like_maintains_dtype():
     dtype = cudf.IntervalDtype(subtype=np.int8)
     data = [pd.Interval(1, 2)]
-    result = cudf.Series(cudf.core.column.as_column(data, dtype=dtype))
+    result = cudf.Series._from_column(
+        cudf.core.column.as_column(data, dtype=dtype)
+    )
     expected = pd.Series(data, dtype=dtype.to_pandas())
     assert_eq(result, expected)
 
diff --git a/python/cudf/cudf/tests/test_setitem.py b/python/cudf/cudf/tests/test_setitem.py
index 69122cdbafa..5406836ba61 100644
--- a/python/cudf/cudf/tests/test_setitem.py
+++ b/python/cudf/cudf/tests/test_setitem.py
@@ -178,13 +178,19 @@ def test_column_set_equal_length_object_by_mask():
     bool_col = cudf.Series([True, True, True, True, True])._column
 
     data[bool_col] = replace_data
-    assert_eq(cudf.Series(data), cudf.Series(replace_data))
+    assert_eq(
+        cudf.Series._from_column(data),
+        cudf.Series._from_column(replace_data),
+    )
 
     data = cudf.Series([0, 0, 1, 1, 1])._column
     bool_col = cudf.Series([True, False, True, False, True])._column
     data[bool_col] = replace_data
 
-    assert_eq(cudf.Series(data), cudf.Series([100, 0, 300, 1, 500]))
+    assert_eq(
+        cudf.Series._from_column(data),
+        cudf.Series([100, 0, 300, 1, 500]),
+    )
 
 
 def test_column_set_unequal_length_object_by_mask():
diff --git a/python/cudf/cudf/tests/test_sorting.py b/python/cudf/cudf/tests/test_sorting.py
index a8ffce6e88b..2cf2259d9ec 100644
--- a/python/cudf/cudf/tests/test_sorting.py
+++ b/python/cudf/cudf/tests/test_sorting.py
@@ -405,3 +405,23 @@ def test_dataframe_scatter_by_map_empty():
     df = DataFrame({"a": [], "b": []}, dtype="float64")
     scattered = df.scatter_by_map(df["a"])
     assert len(scattered) == 0
+
+
+def test_sort_values_by_index_level():
+    df = pd.DataFrame({"a": [1, 3, 2]}, index=pd.Index([1, 3, 2], name="b"))
+    cudf_df = DataFrame.from_pandas(df)
+    result = cudf_df.sort_values("b")
+    expected = df.sort_values("b")
+    assert_eq(result, expected)
+
+
+def test_sort_values_by_ambiguous():
+    df = pd.DataFrame({"a": [1, 3, 2]}, index=pd.Index([1, 3, 2], name="a"))
+    cudf_df = DataFrame.from_pandas(df)
+
+    assert_exceptions_equal(
+        lfunc=df.sort_values,
+        rfunc=cudf_df.sort_values,
+        lfunc_args_and_kwargs=(["a"], {}),
+        rfunc_args_and_kwargs=(["a"], {}),
+    )
diff --git a/python/cudf/cudf/tests/test_stats.py b/python/cudf/cudf/tests/test_stats.py
index d5f63fdab77..f952cea07f8 100644
--- a/python/cudf/cudf/tests/test_stats.py
+++ b/python/cudf/cudf/tests/test_stats.py
@@ -447,6 +447,10 @@ def test_cov1d(data1, data2):
     ],
 )
 @pytest.mark.parametrize("method", ["spearman", "pearson"])
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Warnings missing on older pandas (scipy version seems unrelated?)",
+)
 def test_corr1d(data1, data2, method):
     if method == "spearman":
         # Pandas uses scipy.stats.spearmanr code-path
@@ -585,6 +589,10 @@ def test_min_count_ops(data, ops, skipna, min_count):
     ],
 )
 @pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"])
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
 def test_cov_corr_datetime_timedelta(data1, data2, dtype):
     gsr1 = cudf.Series(data1, dtype=dtype)
     gsr2 = cudf.Series(data2, dtype=dtype)
diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py
index f447759d010..cc88cc79769 100644
--- a/python/cudf/cudf/tests/test_string.py
+++ b/python/cudf/cudf/tests/test_string.py
@@ -978,6 +978,22 @@ def test_string_split_re(data, pat, n, expand):
     assert_eq(expect, got)
 
 
+@pytest.mark.parametrize("pat", [None, "\\s+"])
+@pytest.mark.parametrize("regex", [False, True])
+@pytest.mark.parametrize("expand", [False, True])
+def test_string_split_all_empty(pat, regex, expand):
+    ps = pd.Series(["", "", "", ""], dtype="str")
+    gs = cudf.Series(["", "", "", ""], dtype="str")
+
+    expect = ps.str.split(pat=pat, expand=expand, regex=regex)
+    got = gs.str.split(pat=pat, expand=expand, regex=regex)
+
+    if isinstance(got, cudf.DataFrame):
+        assert_eq(expect, got, check_column_type=False)
+    else:
+        assert_eq(expect, got)
+
+
 @pytest.mark.parametrize(
     "str_data", [[], ["a", "b", "c", "d", "e"], [None, None, None, None, None]]
 )
@@ -1076,7 +1092,7 @@ def test_string_index():
     pdf.index = stringIndex.to_pandas()
     gdf.index = stringIndex
     assert_eq(pdf, gdf)
-    stringIndex = cudf.Index(
+    stringIndex = cudf.Index._from_column(
         cudf.core.column.as_column(["a", "b", "c", "d", "e"]), name="name"
     )
     pdf.index = stringIndex.to_pandas()
@@ -2672,12 +2688,14 @@ def test_string_ip4_to_int():
 
 
 def test_string_int_to_ipv4():
-    gsr = cudf.Series([0, None, 0, 698875905, 2130706433, 700776449])
+    gsr = cudf.Series([0, None, 0, 698875905, 2130706433, 700776449]).astype(
+        "uint32"
+    )
     expected = cudf.Series(
         ["0.0.0.0", None, "0.0.0.0", "41.168.0.1", "127.0.0.1", "41.197.0.1"]
     )
 
-    got = cudf.Series(gsr._column.int2ip())
+    got = cudf.Series._from_column(gsr._column.int2ip())
 
     assert_eq(expected, got)
 
@@ -2718,7 +2736,7 @@ def test_string_isipv4():
 
 
 @pytest.mark.parametrize(
-    "dtype", sorted(list(dtypeutils.NUMERIC_TYPES - {"int64", "uint64"}))
+    "dtype", sorted(list(dtypeutils.NUMERIC_TYPES - {"uint32"}))
 )
 def test_string_int_to_ipv4_dtype_fail(dtype):
     gsr = cudf.Series([1, 2, 3, 4, 5]).astype(dtype)
diff --git a/python/cudf/cudf/tests/test_string_udfs.py b/python/cudf/cudf/tests/test_string_udfs.py
index 4432d2afc8e..69876d97aad 100644
--- a/python/cudf/cudf/tests/test_string_udfs.py
+++ b/python/cudf/cudf/tests/test_string_udfs.py
@@ -96,7 +96,7 @@ def run_udf_test(data, func, dtype):
     else:
         result = output
 
-    got = cudf.Series(result, dtype=dtype)
+    got = cudf.Series._from_column(result.astype(dtype))
     assert_eq(expect, got, check_dtype=False)
     with _CUDFNumbaConfig():
         udf_str_kernel.forall(len(data))(str_views, output)
@@ -105,7 +105,7 @@ def run_udf_test(data, func, dtype):
     else:
         result = output
 
-    got = cudf.Series(result, dtype=dtype)
+    got = cudf.Series._from_column(result.astype(dtype))
     assert_eq(expect, got, check_dtype=False)
 
 
diff --git a/python/cudf/cudf/tests/test_unique.py b/python/cudf/cudf/tests/test_unique.py
new file mode 100644
index 00000000000..699b3340521
--- /dev/null
+++ b/python/cudf/cudf/tests/test_unique.py
@@ -0,0 +1,117 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+import cupy as cp
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.fixture
+def df():
+    df = cudf.DataFrame()
+    np.random.seed(0)
+
+    arr = np.random.randint(2, size=10, dtype=np.int64)
+    df["foo"] = arr
+    df["bar"] = cudf.Series([pd.Timestamp(x) for x in arr])
+
+    return df
+
+
+@pytest.fixture(params=["foo", "bar"])
+def series_test_vals(request, df):
+    actual = cudf.unique(df[request.param])
+    expected = pd.unique(df[request.param].to_pandas())
+    return actual, expected
+
+
+def test_unique_series_obj(series_test_vals):
+    actual, expected = series_test_vals
+
+    assert isinstance(expected, np.ndarray)
+    assert isinstance(actual, cudf.Series)
+    assert_eq(actual, pd.Series(expected, name=actual.name))
+
+
+@pytest.mark.parametrize(
+    "index",
+    [
+        (cudf.Index, pd.Index),
+        (cudf.MultiIndex, pd.MultiIndex),
+        (cudf.DatetimeIndex, pd.DatetimeIndex),
+        (cudf.CategoricalIndex, pd.CategoricalIndex),
+    ],
+)
+@pytest.mark.parametrize("col", ["foo", "bar"])
+def test_unique_index_obj(index, col, df):
+    if index[0] == cudf.MultiIndex:
+        df.index = cudf.MultiIndex.from_arrays([df[col], df[col]])
+    else:
+        df.index = index[0](df[col])
+    actual = cudf.unique(df.index)
+    expected = pd.unique(df.index.to_pandas())
+
+    isinstance(expected, np.ndarray)
+    assert isinstance(actual, index[0])
+
+    if index[0] == cudf.MultiIndex:
+        expect = index[1].from_arrays(
+            [
+                [x[0] for x in expected],
+                [x[1] for x in expected],
+            ],
+            names=actual.names,
+        )
+        assert_eq(actual, expect)
+    else:
+        assert_eq(actual, index[1](expected, name=actual.name))
+
+
+def test_unique_cupy_ndarray(df):
+    arr = np.asarray(df["foo"].to_pandas())
+    garr = cp.asarray(df["foo"])
+
+    expected = pd.unique(arr)
+    actual = cudf.unique(garr)
+
+    isinstance(expected, np.ndarray)
+    isinstance(actual, cp.ndarray)
+    assert_eq(actual, expected)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        ["abc", "def", "abc", "a", "def", None],
+        [10, 20, 100, -10, 0, 1, None, 10, 100],
+    ],
+)
+def test_category_dtype_unique(data):
+    gs = cudf.Series(data, dtype="category")
+    ps = gs.to_pandas()
+
+    actual = cudf.unique(gs)
+    expected = pd.unique(ps)
+
+    assert isinstance(expected, pd.Categorical)
+    assert isinstance(actual, cudf.Series)
+    assert_eq(actual, pd.Series(expected))
+
+
+def test_unique_fails_value_error(df):
+    with pytest.raises(
+        ValueError,
+        match="Must pass cudf.Series, cudf.Index, or cupy.ndarray object",
+    ):
+        cudf.unique(df)
+
+
+def test_unique_fails_not_implemented_error(df):
+    with cudf.option_context("mode.pandas_compatible", True):
+        with pytest.raises(
+            NotImplementedError, match="cudf.Categorical is not implemented"
+        ):
+            cudf.unique(cudf.Series(["foo", "foo"], dtype="category"))
diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py
index 80555750b3a..1627107b57d 100644
--- a/python/cudf/cudf/utils/ioutils.py
+++ b/python/cudf/cudf/utils/ioutils.py
@@ -1,22 +1,22 @@
 # Copyright (c) 2019-2024, NVIDIA CORPORATION.
 
 import datetime
+import functools
+import operator
 import os
 import urllib
 import warnings
+from collections.abc import Callable
 from io import BufferedWriter, BytesIO, IOBase, TextIOWrapper
 from threading import Thread
-from typing import Callable
 
 import fsspec
 import fsspec.implementations.local
 import numpy as np
 import pandas as pd
-from fsspec.core import get_fs_token_paths
-from pyarrow import PythonFile as ArrowPythonFile
-from pyarrow.lib import NativeFile
+from fsspec.core import expand_paths_if_needed, get_fs_token_paths
 
-from cudf.api.extensions import no_default
+from cudf.api.types import is_list_like
 from cudf.core._compat import PANDAS_LT_300
 from cudf.utils.docutils import docfmt_partial
 
@@ -148,6 +148,9 @@
     For other URLs (e.g. starting with "s3://", and "gcs://") the key-value
     pairs are forwarded to ``fsspec.open``. Please see ``fsspec`` and
     ``urllib`` for more details.
+filesystem : fsspec.AbstractFileSystem, default None
+    Filesystem object to use when reading the parquet data. This argument
+    should not be used at the same time as `storage_options`.
 filters : list of tuple, list of lists of tuples, default None
     If not None, specifies a filter predicate used to filter out row groups
     using statistics stored for each row group as Parquet metadata. Row groups
@@ -173,32 +176,30 @@
 use_pandas_metadata : boolean, default True
     If True and dataset has custom PANDAS schema metadata, ensure that index
     columns are also loaded.
-use_python_file_object : boolean, default True
-    If True, Arrow-backed PythonFile objects will be used in place of fsspec
-    AbstractBufferedFile objects at IO time.
-
-    .. deprecated:: 24.08
-        `use_python_file_object` is deprecated and will be removed in a future
-        version of cudf, as PyArrow NativeFiles will no longer be accepted as
-        input/output in cudf readers/writers in the future.
-open_file_options : dict, optional
-    Dictionary of key-value pairs to pass to the function used to open remote
-    files. By default, this will be `fsspec.parquet.open_parquet_file`. To
-    deactivate optimized precaching, set the "method" to `None` under the
-    "precache_options" key. Note that the `open_file_func` key can also be
-    used to specify a custom file-open function.
-
-    .. deprecated:: 24.08
-        `open_file_options` is deprecated as it was intended for
-        pyarrow file inputs, which will no longer be accepted as
-        input/output cudf readers/writers in the future.
 bytes_per_thread : int, default None
     Determines the number of bytes to be allocated per thread to read the
     files in parallel. When there is a file of large size, we get slightly
     better throughput by decomposing it and transferring multiple "blocks"
     in parallel (using a python thread pool). Default allocation is
     {bytes_per_thread} bytes.
-    This parameter is functional only when `use_python_file_object=False`.
+skiprows : int, default None
+    If not None, the number of rows to skip from the start of the file.
+
+    .. note::
+       This option is not supported when the low-memory mode is on.
+nrows : int, default None
+    If not None, the total number of rows to read.
+
+    .. note:
+       This option is not supported when the low-memory mode is on.
+allow_mismatched_pq_schemas : boolean, default False
+    If True, enables reading (matching) columns specified in `columns` and `filters`
+    options from the input files with otherwise mismatched schemas.
+prefetch_options : dict, default None
+    WARNING: This is an experimental feature and may be removed at any
+    time without warning or deprecation period.
+    Dictionary of options to use to prefetch bytes from remote storage.
+    These options are passed through to `get_reader_filepath_or_buffer`.
 
 Returns
 -------
@@ -475,14 +476,6 @@
     This parameter is deprecated.
 use_index : bool, default True
     If True, use row index if available for faster seeking.
-use_python_file_object : boolean, default True
-    If True, Arrow-backed PythonFile objects will be used in place of fsspec
-    AbstractBufferedFile objects at IO time.
-
-    .. deprecated:: 24.08
-        `use_python_file_object` is deprecated and will be removed in a future
-        version of cudf, as PyArrow NativeFiles will no longer be accepted as
-        input/output in cudf readers/writers in the future.
 storage_options : dict, optional, default None
     Extra options that make sense for a particular storage connection,
     e.g. host, port, username, password, etc. For HTTP(S) URLs the key-value
@@ -496,7 +489,6 @@
     better throughput by decomposing it and transferring multiple "blocks"
     in parallel (using a python thread pool). Default allocation is
     {bytes_per_thread} bytes.
-    This parameter is functional only when `use_python_file_object=False`.
 
 Returns
 -------
@@ -824,7 +816,7 @@
     k1   k2
 0  1.0  [1]
 """  # noqa: E501
-doc_read_json = docfmt_partial(docstring=_docstring_read_json)
+doc_read_json: Callable = docfmt_partial(docstring=_docstring_read_json)
 
 _docstring_to_json = """
 Convert the cuDF object to a JSON string.
@@ -894,7 +886,7 @@
 --------
 cudf.read_json
 """
-doc_to_json = docfmt_partial(docstring=_docstring_to_json)
+doc_to_json: Callable = docfmt_partial(docstring=_docstring_to_json)
 
 _docstring_read_hdf = """
 Read from the store, close it if we opened it.
@@ -1199,14 +1191,6 @@
     size to zero to read all data after the offset location. Reads the row
     that starts before or at the end of the range, even if it ends after
     the end of the range.
-use_python_file_object : boolean, default True
-    If True, Arrow-backed PythonFile objects will be used in place of fsspec
-    AbstractBufferedFile objects at IO time.
-
-    .. deprecated:: 24.08
-        `use_python_file_object` is deprecated and will be removed in a future
-        version of cudf, as PyArrow NativeFiles will no longer be accepted as
-        input/output in cudf readers/writers in the future.
 storage_options : dict, optional, default None
     Extra options that make sense for a particular storage connection,
     e.g. host, port, username, password, etc. For HTTP(S) URLs the key-value
@@ -1220,7 +1204,6 @@
     better throughput by decomposing it and transferring multiple "blocks"
     in parallel (using a python thread pool). Default allocation is
     {bytes_per_thread} bytes.
-    This parameter is functional only when `use_python_file_object=False`.
 Returns
 -------
 GPU ``DataFrame`` object.
@@ -1433,33 +1416,18 @@
 Return either a filepath string to data, or a memory buffer of data.
 If filepath, then the source filepath is expanded to user's environment.
 If buffer, then data is returned in-memory as bytes or a ByteIO object.
+This function is designed to process multiple data sources of the same
+type at once. If path_or_data is a list, the output will also be a list.
 
 Parameters
 ----------
-path_or_data : str, file-like object, bytes, ByteIO
-    Path to data or the data itself.
-compression : str
-    Type of compression algorithm for the content
+path_or_data : str, file-like object, bytes, ByteIO, list
+    Path to data or the data itself. Pass in a list to process multiple
+    sources of the same type at once.
 mode : str
     Mode in which file is opened
 iotypes : (), default (BytesIO)
     Object type to exclude from file-like check
-use_python_file_object : boolean, default False
-    If True, Arrow-backed PythonFile objects will be used in place
-    of fsspec AbstractBufferedFile objects.
-
-    .. deprecated:: 24.08
-        `use_python_file_object` is deprecated and will be removed in a future
-        version of cudf, as PyArrow NativeFiles will no longer be accepted as
-        input/output in cudf readers/writers.
-open_file_options : dict, optional
-    Optional dictionary of keyword arguments to pass to
-    `_open_remote_files` (used for remote storage only).
-
-    .. deprecated:: 24.08
-        `open_file_options` is deprecated as it was intended for
-        pyarrow file inputs, which will no longer be accepted as
-        input/output cudf readers/writers in the future.
 allow_raw_text_input : boolean, default False
     If True, this indicates the input `path_or_data` could be a raw text
     input and will not check for its existence in the filesystem. If False,
@@ -1480,15 +1448,23 @@
     better throughput by decomposing it and transferring multiple "blocks"
     in parallel (using a Python thread pool). Default allocation is
     {bytes_per_thread} bytes.
-    This parameter is functional only when `use_python_file_object=False`.
+expand_dir_pattern : str, default None
+    Glob pattern to use when expanding directories into file paths
+    (e.g. "*.json"). If this parameter is not specified, directories
+    will not be expanded.
+prefetch_options : dict, default None
+    WARNING: This is an experimental feature and may be removed at any
+    time without warning or deprecation period.
+    Dictionary of options to use to prefetch bytes from remote storage.
+    These options are only used when `path_or_data` is a list of remote
+    paths. If 'method' is set to 'all' (the default), the only supported
+    option is 'blocksize' (default 256 MB). If method is set to 'parquet',
+    'columns' and 'row_groups' are also supported (default None).
 
 Returns
 -------
-filepath_or_buffer : str, bytes, BytesIO, list
-    Filepath string or in-memory buffer of data or a
-    list of Filepath strings or in-memory buffers of data.
-compression : str
-    Type of compression algorithm for the content
+List[str, bytes, BytesIO]
+    List of filepath strings or in-memory data buffers.
     """.format(bytes_per_thread=_BYTES_PER_THREAD_DEFAULT)
 
 
@@ -1545,29 +1521,15 @@ def _is_local_filesystem(fs):
     return isinstance(fs, fsspec.implementations.local.LocalFileSystem)
 
 
-def ensure_single_filepath_or_buffer(path_or_data, storage_options=None):
-    """Return False if `path_or_data` resolves to multiple filepaths or
-    buffers.
+def _select_single_source(sources: list, caller: str):
+    """Select the first element from a list of sources.
+    Raise an error if sources contains multiple elements
     """
-    path_or_data = stringify_pathlike(path_or_data)
-    if isinstance(path_or_data, str):
-        path_or_data = os.path.expanduser(path_or_data)
-        try:
-            fs, _, paths = get_fs_token_paths(
-                path_or_data, mode="rb", storage_options=storage_options
-            )
-        except ValueError as e:
-            if str(e).startswith("Protocol not known"):
-                return True
-            else:
-                raise e
-
-        if len(paths) > 1:
-            return False
-    elif isinstance(path_or_data, (list, tuple)) and len(path_or_data) > 1:
-        return False
-
-    return True
+    if len(sources) > 1:
+        raise ValueError(
+            f"{caller} does not support multiple sources, got: {sources}"
+        )
+    return sources[0]
 
 
 def is_directory(path_or_data, storage_options=None):
@@ -1590,11 +1552,18 @@ def is_directory(path_or_data, storage_options=None):
     return False
 
 
-def _get_filesystem_and_paths(path_or_data, storage_options):
+def _get_filesystem_and_paths(
+    path_or_data,
+    storage_options,
+    *,
+    filesystem=None,
+):
     # Returns a filesystem object and the filesystem-normalized
     # paths. If `path_or_data` does not correspond to a path or
     # list of paths (or if the protocol is not supported), the
     # return will be `None` for the fs and `[]` for the paths.
+    # If a filesystem object is already available, it can be
+    # passed with the `filesystem` argument.
 
     fs = None
     return_paths = path_or_data
@@ -1611,188 +1580,99 @@ def _get_filesystem_and_paths(path_or_data, storage_options):
         else:
             path_or_data = [path_or_data]
 
-        try:
-            fs, _, fs_paths = get_fs_token_paths(
-                path_or_data, mode="rb", storage_options=storage_options
-            )
-            return_paths = fs_paths
-        except ValueError as e:
-            if str(e).startswith("Protocol not known"):
-                return None, []
-            else:
-                raise e
-
-    return fs, return_paths
-
-
-def _set_context(obj, stack):
-    # Helper function to place open file on context stack
-    if stack is None:
-        return obj
-    return stack.enter_context(obj)
-
+        if filesystem is None:
+            try:
+                fs, _, fs_paths = get_fs_token_paths(
+                    path_or_data, mode="rb", storage_options=storage_options
+                )
+                return_paths = fs_paths
+            except ValueError as e:
+                if str(e).startswith("Protocol not known"):
+                    return None, []
+                else:
+                    raise e
+        else:
+            if not isinstance(filesystem, fsspec.AbstractFileSystem):
+                raise ValueError(
+                    f"Expected fsspec.AbstractFileSystem. Got {filesystem}"
+                )
 
-def _open_remote_files(
-    paths,
-    fs,
-    context_stack=None,
-    open_file_func=None,
-    precache_options=None,
-    **kwargs,
-):
-    """Return a list of open file-like objects given
-    a list of remote file paths.
+            if storage_options:
+                raise ValueError(
+                    f"Cannot specify storage_options when an explicit "
+                    f"filesystem object is specified. Got: {storage_options}"
+                )
 
-    Parameters
-    ----------
-    paths : list(str)
-        List of file-path strings.
-    fs : fsspec.AbstractFileSystem
-        Fsspec file-system object.
-    context_stack : contextlib.ExitStack, Optional
-        Context manager to use for open files.
-    open_file_func : Callable, Optional
-        Call-back function to use for opening. If this argument
-        is specified, all other arguments will be ignored.
-    precache_options : dict, optional
-        Dictionary of key-word arguments to pass to use for
-        precaching. Unless the input contains ``{"method": None}``,
-        ``fsspec.parquet.open_parquet_file`` will be used for remote
-        storage.
-    **kwargs :
-        Key-word arguments to be passed to format-specific
-        open functions.
-    """
+            fs = filesystem
+            return_paths = [
+                fs._strip_protocol(u)
+                for u in expand_paths_if_needed(
+                    path_or_data, "rb", 1, fs, None
+                )
+            ]
 
-    # Just use call-back function if one was specified
-    if open_file_func is not None:
-        return [
-            _set_context(open_file_func(path, **kwargs), context_stack)
-            for path in paths
-        ]
+    return fs, return_paths
 
-    # Check if the "precache" option is supported.
-    # In the future, fsspec should do this check for us
-    precache_options = (precache_options or {}).copy()
-    precache = precache_options.pop("method", None)
-    if precache not in ("parquet", None):
-        raise ValueError(f"{precache} not a supported `precache` option.")
-
-    # Check that "parts" caching (used for all format-aware file handling)
-    # is supported by the installed fsspec/s3fs version
-    if precache == "parquet" and not fsspec_parquet:
-        warnings.warn(
-            f"This version of fsspec ({fsspec.__version__}) does "
-            f"not support parquet-optimized precaching. Please upgrade "
-            f"to the latest fsspec version for better performance."
-        )
-        precache = None
-
-    if precache == "parquet":
-        # Use fsspec.parquet module.
-        # TODO: Use `cat_ranges` to collect "known"
-        # parts for all files at once.
-        row_groups = precache_options.pop("row_groups", None) or (
-            [None] * len(paths)
-        )
-        return [
-            ArrowPythonFile(
-                _set_context(
-                    fsspec_parquet.open_parquet_file(
-                        path,
-                        fs=fs,
-                        row_groups=rgs,
-                        **precache_options,
-                        **kwargs,
-                    ),
-                    context_stack,
-                )
-            )
-            for path, rgs in zip(paths, row_groups)
-        ]
 
-    # Avoid top-level pyarrow.fs import.
-    # Importing pyarrow.fs initializes a S3 SDK with a finalizer
-    # that runs atexit. In some circumstances it appears this
-    # runs a call into a logging system that is already shutdown.
-    # To avoid this, we only import this subsystem if it is
-    # really needed.
-    # See https://github.com/aws/aws-sdk-cpp/issues/2681
-    from pyarrow.fs import FSSpecHandler, PyFileSystem
-
-    # Default open - Use pyarrow filesystem API
-    pa_fs = PyFileSystem(FSSpecHandler(fs))
-    return [
-        _set_context(pa_fs.open_input_file(fpath), context_stack)
-        for fpath in paths
-    ]
+def _maybe_expand_directories(paths, glob_pattern, fs):
+    # Expand directory paths using a glob pattern.
+    # This is a no-op if either glob_pattern or fs are None
+    if fs is None or glob_pattern is None:
+        return paths
+    expanded_paths = []
+    for path in paths:
+        if fs.isdir(path):
+            expanded_paths.extend(fs.glob(fs.sep.join([path, glob_pattern])))
+        else:
+            expanded_paths.append(path)
+    return expanded_paths
 
 
 @doc_get_reader_filepath_or_buffer()
 def get_reader_filepath_or_buffer(
     path_or_data,
-    compression,
+    *,
     mode="rb",
     fs=None,
-    iotypes=(BytesIO, NativeFile),
-    # no_default aliases to False
-    use_python_file_object=no_default,
-    open_file_options=None,
+    iotypes=(BytesIO,),
     allow_raw_text_input=False,
     storage_options=None,
     bytes_per_thread=_BYTES_PER_THREAD_DEFAULT,
     warn_on_raw_text_input=None,
     warn_meta=None,
+    expand_dir_pattern=None,
+    prefetch_options=None,
 ):
     """{docstring}"""
 
-    path_or_data = stringify_pathlike(path_or_data)
-
-    if use_python_file_object is no_default:
-        use_python_file_object = False
-    elif use_python_file_object is not None:
-        warnings.warn(
-            "The 'use_python_file_object' keyword is deprecated and "
-            "will be removed in a future version.",
-            FutureWarning,
+    # Convert path_or_data to a list of input data sources
+    input_sources = [
+        stringify_pathlike(source)
+        for source in (
+            path_or_data if is_list_like(path_or_data) else [path_or_data]
         )
-    else:
-        # Preserve the readers (e.g. read_csv) default of True
-        # if no use_python_file_object option is specified by the user
-        # for now (note: this is different from the default for this
-        # function of False)
-        # TODO: when non-pyarrow file reading perf is good enough
-        # we can default this to False
-        use_python_file_object = True
-
-    if open_file_options is not None:
-        warnings.warn(
-            "The 'open_file_options' keyword is deprecated and "
-            "will be removed in a future version.",
-            FutureWarning,
-        )
-
-    if isinstance(path_or_data, str):
-        # Get a filesystem object if one isn't already available
-        paths = [path_or_data]
+    ]
+    if not input_sources:
+        raise ValueError("Empty input source list: {input_sources}.")
+
+    filepaths_or_buffers = []
+    string_paths = [isinstance(source, str) for source in input_sources]
+    if any(string_paths):
+        # Sources are all strings. Thes strings are typically
+        # file paths, but they may also be raw text strings.
+
+        # Don't allow a mix of source types
+        if not all(string_paths):
+            raise ValueError("Invalid input source list: {input_sources}.")
+
+        # Make sure we define a filesystem (if possible)
+        paths = input_sources
+        raw_text_input = False
         if fs is None:
-            fs, paths = _get_filesystem_and_paths(
-                path_or_data, storage_options
-            )
-            if fs is None:
-                if warn_on_raw_text_input:
-                    # Do not remove until pandas 3.0 support is added.
-                    assert (
-                        PANDAS_LT_300
-                    ), "Need to drop after pandas-3.0 support is added."
-                    warnings.warn(
-                        f"Passing literal {warn_meta[0]} to {warn_meta[1]} is "
-                        "deprecated and will be removed in a future version. "
-                        "To read from a literal string, wrap it in a "
-                        "'StringIO' object.",
-                        FutureWarning,
-                    )
-                return path_or_data, compression
+            fs, paths = _get_filesystem_and_paths(paths, storage_options)
+
+        # Expand directories (if necessary)
+        paths = _maybe_expand_directories(paths, expand_dir_pattern, fs)
 
         if _is_local_filesystem(fs):
             # Doing this as `read_json` accepts a json string
@@ -1814,7 +1694,7 @@ def get_reader_filepath_or_buffer(
 
             if len(paths):
                 if fs.exists(paths[0]):
-                    path_or_data = paths if len(paths) > 1 else paths[0]
+                    filepaths_or_buffers = paths
 
                 # raise FileNotFound if path looks like json
                 # following pandas
@@ -1824,21 +1704,29 @@ def get_reader_filepath_or_buffer(
                     tuple(f".json{c}" for c in compression_extensions)
                 ):
                     raise FileNotFoundError(
-                        f"{path_or_data} could not be resolved to any files"
+                        f"{input_sources} could not be resolved to any files"
                     )
-                elif warn_on_raw_text_input:
-                    # Do not remove until pandas 3.0 support is added.
-                    assert (
-                        PANDAS_LT_300
-                    ), "Need to drop after pandas-3.0 support is added."
-                    warnings.warn(
-                        f"Passing literal {warn_meta[0]} to {warn_meta[1]} is "
-                        "deprecated and will be removed in a future version. "
-                        "To read from a literal string, wrap it in a "
-                        "'StringIO' object.",
-                        FutureWarning,
-                    )
-            elif warn_on_raw_text_input:
+                else:
+                    raw_text_input = True
+            else:
+                raw_text_input = True
+
+        elif fs is not None:
+            if len(paths) == 0:
+                raise FileNotFoundError(
+                    f"{input_sources} could not be resolved to any files"
+                )
+            filepaths_or_buffers = _prefetch_remote_buffers(
+                paths,
+                fs,
+                **(prefetch_options or {}),
+            )
+        else:
+            raw_text_input = True
+
+        if raw_text_input:
+            filepaths_or_buffers = input_sources
+            if warn_on_raw_text_input:
                 # Do not remove until pandas 3.0 support is added.
                 assert (
                     PANDAS_LT_300
@@ -1851,45 +1739,25 @@ def get_reader_filepath_or_buffer(
                     FutureWarning,
                 )
 
-        else:
-            if len(paths) == 0:
-                raise FileNotFoundError(
-                    f"{path_or_data} could not be resolved to any files"
-                )
-            if use_python_file_object:
-                path_or_data = _open_remote_files(
-                    paths,
-                    fs,
-                    **(open_file_options or {}),
-                )
-            else:
-                path_or_data = [
+    else:
+        # Sources are already buffers or file-like objects
+        for source in input_sources:
+            if not isinstance(source, iotypes) and is_file_like(source):
+                if isinstance(source, TextIOWrapper):
+                    source = source.buffer
+                filepaths_or_buffers.append(
                     BytesIO(
                         _fsspec_data_transfer(
-                            fpath,
-                            fs=fs,
+                            source,
                             mode=mode,
                             bytes_per_thread=bytes_per_thread,
                         )
                     )
-                    for fpath in paths
-                ]
-            if len(path_or_data) == 1:
-                path_or_data = path_or_data[0]
-
-    elif not isinstance(path_or_data, iotypes) and is_file_like(path_or_data):
-        if isinstance(path_or_data, TextIOWrapper):
-            path_or_data = path_or_data.buffer
-        if use_python_file_object:
-            path_or_data = ArrowPythonFile(path_or_data)
-        else:
-            path_or_data = BytesIO(
-                _fsspec_data_transfer(
-                    path_or_data, mode=mode, bytes_per_thread=bytes_per_thread
                 )
-            )
+            else:
+                filepaths_or_buffers.append(source)
 
-    return path_or_data, compression
+    return filepaths_or_buffers
 
 
 def get_writer_filepath_or_buffer(path_or_data, mode, storage_options=None):
@@ -2242,3 +2110,101 @@ def _read_byte_ranges(
 
     for worker in workers:
         worker.join()
+
+
+def _get_remote_bytes_all(
+    remote_paths, fs, *, blocksize=_BYTES_PER_THREAD_DEFAULT
+):
+    # TODO: Experiment with a heuristic to avoid the fs.sizes
+    # call when we are reading many files at once (the latency
+    # of collecting the file sizes is unnecessary in this case)
+    if max(sizes := fs.sizes(remote_paths)) <= blocksize:
+        # Don't bother breaking up individual files
+        return fs.cat_ranges(remote_paths, None, None)
+    else:
+        # Construct list of paths, starts, and ends
+        paths, starts, ends = map(
+            list,
+            zip(
+                *(
+                    (r, j, min(j + blocksize, s))
+                    for r, s in zip(remote_paths, sizes)
+                    for j in range(0, s, blocksize)
+                )
+            ),
+        )
+
+        # Collect the byte ranges
+        chunks = fs.cat_ranges(paths, starts, ends)
+
+        # Construct local byte buffers
+        # (Need to make sure path offsets are ordered correctly)
+        unique_count = dict(zip(*np.unique(paths, return_counts=True)))
+        offset = np.cumsum([0] + [unique_count[p] for p in remote_paths])
+        buffers = [
+            functools.reduce(operator.add, chunks[offset[i] : offset[i + 1]])
+            for i in range(len(remote_paths))
+        ]
+        return buffers
+
+
+def _get_remote_bytes_parquet(
+    remote_paths,
+    fs,
+    *,
+    columns=None,
+    row_groups=None,
+    blocksize=_BYTES_PER_THREAD_DEFAULT,
+):
+    if fsspec_parquet is None or (columns is None and row_groups is None):
+        return _get_remote_bytes_all(remote_paths, fs, blocksize=blocksize)
+
+    sizes = fs.sizes(remote_paths)
+    data = fsspec_parquet._get_parquet_byte_ranges(
+        remote_paths,
+        fs,
+        columns=columns,
+        row_groups=row_groups,
+        max_block=blocksize,
+    )
+
+    buffers = []
+    for size, path in zip(sizes, remote_paths):
+        path_data = data[path]
+        buf = np.empty(size, dtype="b")
+        for range_offset in path_data.keys():
+            chunk = path_data[range_offset]
+            buf[range_offset[0] : range_offset[1]] = np.frombuffer(
+                chunk, dtype="b"
+            )
+        buffers.append(buf.tobytes())
+    return buffers
+
+
+def _prefetch_remote_buffers(
+    paths,
+    fs,
+    *,
+    method="all",
+    **prefetch_options,
+):
+    # Gather bytes ahead of time for remote filesystems
+    if fs and paths and not _is_local_filesystem(fs):
+        try:
+            prefetcher = {
+                "parquet": _get_remote_bytes_parquet,
+                "all": _get_remote_bytes_all,
+            }[method]
+        except KeyError:
+            raise ValueError(
+                f"{method} is not a supported remote-data prefetcher."
+                " Expected 'parquet' or 'all'."
+            )
+        return prefetcher(
+            paths,
+            fs,
+            **prefetch_options,
+        )
+
+    else:
+        return paths
diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py
index c9b343e0f9f..7347ec7866a 100644
--- a/python/cudf/cudf/utils/utils.py
+++ b/python/cudf/cudf/utils/utils.py
@@ -6,7 +6,6 @@
 import os
 import traceback
 import warnings
-from contextlib import contextmanager
 
 import numpy as np
 import pandas as pd
@@ -404,28 +403,3 @@ def _all_bools_with_nulls(lhs, rhs, bool_fill_value):
     if result_mask is not None:
         result_col = result_col.set_mask(result_mask.as_mask())
     return result_col
-
-
-@contextmanager
-def maybe_filter_deprecation(
-    condition: bool, message: str, category: type[Warning]
-):
-    """Conditionally filter a warning category.
-
-    Parameters
-    ----------
-    condition
-        If true, filter the warning
-    message
-        Message to match, passed to :func:`warnings.filterwarnings`
-    category
-        Category of warning, passed to :func:`warnings.filterwarnings`
-    """
-    with warnings.catch_warnings():
-        if condition:
-            warnings.filterwarnings(
-                "ignore",
-                message,
-                category=category,
-            )
-        yield
diff --git a/python/cudf/cudf_pandas_tests/data/repr_slow_down_test.ipynb b/python/cudf/cudf_pandas_tests/data/repr_slow_down_test.ipynb
new file mode 100644
index 00000000000..c7d39b78810
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/data/repr_slow_down_test.ipynb
@@ -0,0 +1,69 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext cudf.pandas"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "np.random.seed(0)\n",
+    "\n",
+    "num_rows = 25_000_000\n",
+    "num_columns = 12\n",
+    "\n",
+    "# Create a DataFrame with random data\n",
+    "df = pd.DataFrame(np.random.randint(0, 100, size=(num_rows, num_columns)),\n",
+    "                  columns=[f'Column_{i}' for i in range(1, num_columns + 1)])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py
index 6292022d8e4..c4ab4b0a853 100644
--- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py
+++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py
@@ -14,14 +14,20 @@
 import types
 from io import BytesIO, StringIO
 
+import cupy as cp
+import jupyter_client
+import nbformat
 import numpy as np
 import pyarrow as pa
 import pytest
-from numba import NumbaDeprecationWarning
+from nbconvert.preprocessors import ExecutePreprocessor
+from numba import NumbaDeprecationWarning, vectorize
 from pytz import utc
 
+from cudf.core._compat import PANDAS_GE_220
 from cudf.pandas import LOADED, Profiler
 from cudf.pandas.fast_slow_proxy import _Unusable, is_proxy_object
+from cudf.testing import assert_eq
 
 if not LOADED:
     raise ImportError("These tests must be run with cudf.pandas loaded")
@@ -42,6 +48,8 @@
     get_calendar,
 )
 
+from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
+
 # Accelerated pandas has the real pandas and cudf modules as attributes
 pd = xpd._fsproxy_slow
 cudf = xpd._fsproxy_fast
@@ -531,12 +539,15 @@ def test_array_ufunc(series):
 @pytest.mark.xfail(strict=False, reason="Fails in CI, passes locally.")
 def test_groupby_apply_func_returns_series(dataframe):
     pdf, df = dataframe
+    if PANDAS_GE_220:
+        kwargs = {"include_groups": False}
+    else:
+        kwargs = {}
+
     expect = pdf.groupby("a").apply(
-        lambda group: pd.Series({"x": 1}), include_groups=False
-    )
-    got = df.groupby("a").apply(
-        lambda group: xpd.Series({"x": 1}), include_groups=False
+        lambda group: pd.Series({"x": 1}), **kwargs
     )
+    got = df.groupby("a").apply(lambda group: xpd.Series({"x": 1}), **kwargs)
     tm.assert_equal(expect, got)
 
 
@@ -607,6 +618,10 @@ def test_array_function_series_fallback(series):
     tm.assert_equal(expect, got)
 
 
+@pytest.mark.xfail(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
 def test_timedeltaproperties(series):
     psr, sr = series
     psr, sr = psr.astype("timedelta64[ns]"), sr.astype("timedelta64[ns]")
@@ -666,6 +681,10 @@ def test_maintain_container_subclasses(multiindex):
     assert isinstance(got, xpd.core.indexes.frozen.FrozenList)
 
 
+@pytest.mark.xfail(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas due to unsupported boxcar window type",
+)
 def test_rolling_win_type():
     pdf = pd.DataFrame(range(5))
     df = xpd.DataFrame(range(5))
@@ -1281,6 +1300,10 @@ def max_times_two(self):
     assert s.max_times_two() == 6
 
 
+@pytest.mark.xfail(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="DatetimeArray.__floordiv__ missing in pandas-2.0.0",
+)
 def test_floordiv_array_vs_df():
     xarray = xpd.Series([1, 2, 3], dtype="datetime64[ns]").array
     parray = pd.Series([1, 2, 3], dtype="datetime64[ns]").array
@@ -1552,6 +1575,10 @@ def test_numpy_cupy_flatiter(series):
     assert type(arr.flat._fsproxy_slow) == np.flatiter
 
 
+@pytest.mark.xfail(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="pyarrow_numpy storage type was not supported in pandas-2.0.0",
+)
 def test_arrow_string_arrays():
     cu_s = xpd.Series(["a", "b", "c"])
     pd_s = pd.Series(["a", "b", "c"])
@@ -1632,3 +1659,82 @@ def test_change_index_name(index):
 
         assert s.index.name == name
         assert df.index.name == name
+
+
+def test_notebook_slow_repr():
+    notebook_filename = (
+        os.path.dirname(os.path.abspath(__file__))
+        + "/data/repr_slow_down_test.ipynb"
+    )
+    with open(notebook_filename, "r", encoding="utf-8") as f:
+        nb = nbformat.read(f, as_version=4)
+
+    ep = ExecutePreprocessor(
+        timeout=30, kernel_name=jupyter_client.KernelManager().kernel_name
+    )
+
+    try:
+        ep.preprocess(nb, {"metadata": {"path": "./"}})
+    except Exception as e:
+        assert False, f"Error executing the notebook: {e}"
+
+    # Collect the outputs
+    html_result = nb.cells[2]["outputs"][0]["data"]["text/html"]
+    for string in {
+        "div",
+        "Column_1",
+        "Column_2",
+        "Column_3",
+        "Column_4",
+        "tbody",
+        "</table>",
+    }:
+        assert (
+            string in html_result
+        ), f"Expected string {string} not found in the output"
+
+
+def test_numpy_ndarray_isinstancecheck(array):
+    arr1, arr2 = array
+    assert isinstance(arr1, np.ndarray)
+    assert isinstance(arr2, np.ndarray)
+
+
+def test_numpy_ndarray_np_ufunc(array):
+    arr1, arr2 = array
+
+    @np.vectorize
+    def add_one_ufunc(arr):
+        return arr + 1
+
+    assert_eq(add_one_ufunc(arr1), add_one_ufunc(arr2))
+
+
+def test_numpy_ndarray_cp_ufunc(array):
+    arr1, arr2 = array
+
+    @cp.vectorize
+    def add_one_ufunc(arr):
+        return arr + 1
+
+    assert_eq(add_one_ufunc(cp.asarray(arr1)), add_one_ufunc(arr2))
+
+
+def test_numpy_ndarray_numba_ufunc(array):
+    arr1, arr2 = array
+
+    @vectorize
+    def add_one_ufunc(arr):
+        return arr + 1
+
+    assert_eq(add_one_ufunc(arr1), add_one_ufunc(arr2))
+
+
+def test_numpy_ndarray_numba_cuda_ufunc(array):
+    arr1, arr2 = array
+
+    @vectorize(["int64(int64)"], target="cuda")
+    def add_one_ufunc(a):
+        return a + 1
+
+    assert_eq(cp.asarray(add_one_ufunc(arr1)), cp.asarray(add_one_ufunc(arr2)))
diff --git a/python/cudf/cudf_pandas_tests/test_profiler.py b/python/cudf/cudf_pandas_tests/test_profiler.py
index 588398265f2..5b7bde06d1d 100644
--- a/python/cudf/cudf_pandas_tests/test_profiler.py
+++ b/python/cudf/cudf_pandas_tests/test_profiler.py
@@ -5,6 +5,8 @@
 import os
 import subprocess
 
+import pytest
+
 from cudf.pandas import LOADED, Profiler
 
 if not LOADED:
@@ -13,7 +15,13 @@
 import numpy as np
 import pandas as pd
 
+from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
+
 
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="function names change across versions of pandas, so making sure it only runs on latest version of pandas",
+)
 def test_profiler():
     np.random.seed(42)
     with Profiler() as profiler:
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml b/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
new file mode 100644
index 00000000000..05e1d8178d5
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
@@ -0,0 +1,276 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Dependency list for https://github.com/rapidsai/dependency-file-generator
+files:
+  checks:
+    output: none
+    includes:
+      - develop
+      - py_version
+  test_dask:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_base
+      - test_dask
+  test_matplotlib:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_base
+      - test_matplotlib
+  test_numpy:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_base
+      - test_numpy
+  test_pytorch:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_base
+      - test_pytorch
+  test_seaborn:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_base
+      - test_seaborn
+  test_scipy:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_base
+      - test_scipy
+  test_sklearn:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_base
+      - test_sklearn
+  test_stumpy:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_base
+      - test_stumpy
+  test_tensorflow:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_base
+      - test_tensorflow
+  test_xgboost:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_base
+      - test_xgboost
+  test_cuml:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_base
+      - test_cuml
+  test_cugraph:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_base
+      - test_cugraph
+  test_ibis:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_base
+      - test_ibis
+  test_hvplot:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_base
+      - test_hvplot
+  test_holoviews:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_base
+      - test_holoviews
+  test_plotly:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_base
+      - test_plotly
+
+channels:
+  - rapidsai-nightly
+  - rapidsai
+  - conda-forge
+  - nvidia
+
+dependencies:
+  develop:
+    common:
+      - output_types: conda
+        packages:
+          - pre-commit
+  cuda_version:
+    specific:
+      - output_types: conda
+        matrices:
+          - matrix:
+              cuda: "11"
+            packages:
+              - cuda-version=11.8
+          - matrix:
+              cuda: "11.8"
+            packages:
+              - cuda-version=11.8
+          - matrix:
+              cuda: "12.0"
+            packages:
+              - cuda-version=12.0
+          - matrix:
+              cuda: "12.2"
+            packages:
+              - cuda-version=12.2
+          - matrix:
+              cuda: "12.5"
+            packages:
+              - cuda-version=12.5
+          - matrix:
+              cuda: "12"
+            packages:
+              - cuda-version=12.5
+  py_version:
+    specific:
+      - output_types: conda
+        matrices:
+          - matrix:
+              py: "3.10"
+            packages:
+              - python=3.10
+          - matrix:
+              py: "3.11"
+            packages:
+              - python=3.11
+          - matrix:
+            packages:
+              - python>=3.10,<3.12
+  test_base:
+    common:
+      - output_types: conda
+        packages:
+          - cudf==24.10.*,>=0.0.0a0
+          - pandas
+          - pytest
+          - pytest-xdist
+  test_dask:
+    common:
+      - output_types: conda
+        packages:
+          - dask
+  test_matplotlib:
+    common:
+      - output_types: conda
+        packages:
+          - matplotlib-base
+  test_numpy:
+    common:
+      - output_types: conda
+        packages:
+          - numpy
+  test_pytorch:
+    common:
+      - output_types: conda
+        packages:
+          - numpy
+          - pytorch>=2.1.0
+  test_seaborn:
+    common:
+      - output_types: conda
+        packages:
+          - seaborn
+  test_scipy:
+    common:
+      - output_types: conda
+        packages:
+          - scipy
+  test_sklearn:
+    common:
+      - output_types: conda
+        packages:
+          - scikit-learn
+  test_stumpy:
+    common:
+      - output_types: conda
+        packages:
+          - dask
+          - stumpy
+  test_tensorflow:
+    common:
+      - output_types: conda
+        packages:
+          - tensorflow
+  test_xgboost:
+    common:
+      - output_types: conda
+        packages:
+          - hypothesis
+          - numpy
+          - scipy
+          - scikit-learn
+          - pip
+          - pip:
+            - xgboost>=2.0.1
+  test_cuml:
+    common:
+      - output_types: conda
+        packages:
+          - cuml==24.10.*,>=0.0.0a0
+          - scikit-learn
+  test_cugraph:
+    common:
+      - output_types: conda
+        packages:
+          - cugraph==24.10.*,>=0.0.0a0
+          - networkx
+  test_ibis:
+    common:
+      - output_types: conda
+        packages:
+          - pip
+          - pip:
+              - ibis-framework[pandas]
+  test_hvplot:
+    common:
+      - output_types: conda
+        packages:
+          - hvplot
+  test_holoviews:
+    common:
+      - output_types: conda
+        packages:
+          - holoviews
+  test_plotly:
+    common:
+      - output_types: conda
+        packages:
+          - plotly
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/conftest.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/conftest.py
new file mode 100644
index 00000000000..33b6ffdbd5c
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/conftest.py
@@ -0,0 +1,173 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+
+from __future__ import annotations
+
+import os
+import pickle
+from typing import TYPE_CHECKING, BinaryIO
+
+import _pytest
+import _pytest.config
+import _pytest.nodes
+import pytest
+
+if TYPE_CHECKING:
+    import _pytest.python
+
+from _pytest.stash import StashKey
+
+from cudf.pandas.module_accelerator import disable_module_accelerator
+
+file_handle_key = StashKey[BinaryIO]()
+basename_key = StashKey[str]()
+test_folder_key = StashKey[str]()
+results = StashKey[tuple[dict, dict]]()
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--compare",
+        action="store_true",
+        default=False,
+        help="Run comparison step?",
+    )
+
+
+def read_results(f):
+    while True:
+        try:
+            yield pickle.load(f)
+        except EOFError:
+            return
+
+
+def pytest_collection_modifyitems(
+    session, config: _pytest.config.Config, items: list[_pytest.nodes.Item]
+):
+    if config.getoption("--compare"):
+        current_pass = "compare"
+    elif "cudf.pandas" in config.option.plugins:
+        current_pass = "cudf_pandas"
+    else:
+        current_pass = "gold"
+
+    def swap_xfail(item: _pytest.nodes.Item, name: str):
+        """Replace custom `xfail_**` mark with a `xfail` mark having the same kwargs."""
+
+        old_mark = item.keywords[name]
+        new_mark = pytest.mark.xfail(**old_mark.kwargs)
+
+        # Replace all "xfail_**" mark in the node chain with the "xfail" mark
+        # if not found, the node chain is not modified.
+        for node, mark in item.iter_markers_with_node(name):
+            idx = node.own_markers.index(mark)
+            node.own_markers[idx] = new_mark
+
+    for item in items:
+        if current_pass == "gold" and "xfail_gold" in item.keywords:
+            swap_xfail(item, "xfail_gold")
+        elif (
+            current_pass == "cudf_pandas"
+            and "xfail_cudf_pandas" in item.keywords
+        ):
+            swap_xfail(item, "xfail_cudf_pandas")
+        elif current_pass == "compare" and "xfail_compare" in item.keywords:
+            swap_xfail(item, "xfail_compare")
+
+
+def pytest_configure(config: _pytest.config.Config):
+    gold_basename = "results-gold"
+    cudf_basename = "results-cudf-pandas"
+    test_folder = os.path.join(os.path.dirname(__file__))
+
+    if config.getoption("--compare"):
+        # Everyone reads everything
+        gold_path = os.path.join(test_folder, f"{gold_basename}.pickle")
+        cudf_path = os.path.join(test_folder, f"{cudf_basename}.pickle")
+        with disable_module_accelerator():
+            with open(gold_path, "rb") as f:
+                gold_results = dict(read_results(f))
+        with open(cudf_path, "rb") as f:
+            cudf_results = dict(read_results(f))
+        config.stash[results] = (gold_results, cudf_results)
+    else:
+        if "cudf.pandas" in config.option.plugins:
+            basename = cudf_basename
+        else:
+            basename = gold_basename
+
+        if hasattr(config, "workerinput"):
+            # If we're on an xdist worker, open a worker-unique pickle file.
+            worker = config.workerinput["workerid"]
+            filename = f"{basename}-{worker}.pickle"
+        else:
+            filename = f"{basename}.pickle"
+
+        pickle_path = os.path.join(test_folder, filename)
+        config.stash[file_handle_key] = open(pickle_path, "wb")
+        config.stash[test_folder_key] = test_folder
+        config.stash[basename_key] = basename
+
+
+def pytest_pyfunc_call(pyfuncitem: _pytest.python.Function):
+    if pyfuncitem.config.getoption("--compare"):
+        gold_results, cudf_results = pyfuncitem.config.stash[results]
+        key = pyfuncitem.nodeid
+        try:
+            gold = gold_results[key]
+        except KeyError:
+            assert False, "pickled gold result is not available"
+        try:
+            cudf = cudf_results[key]
+        except KeyError:
+            assert False, "pickled cudf result is not available"
+        if gold is None and cudf is None:
+            raise ValueError(f"Integration test {key} did not return a value")
+        asserter = pyfuncitem.get_closest_marker("assert_eq")
+        if asserter is None:
+            assert gold == cudf, "Test failed"
+        else:
+            asserter.kwargs["fn"](gold, cudf)
+    else:
+        # Replace default call of test function with one that captures the
+        # result
+        testfunction = pyfuncitem.obj
+        funcargs = pyfuncitem.funcargs
+        testargs = {
+            arg: funcargs[arg] for arg in pyfuncitem._fixtureinfo.argnames
+        }
+        result = testfunction(**testargs)
+        # Tuple-based key-value pairs, key is the node-id
+        try:
+            pickle.dump(
+                (pyfuncitem.nodeid, result),
+                pyfuncitem.config.stash[file_handle_key],
+            )
+        except pickle.PicklingError:
+            pass
+    return True
+
+
+def pytest_unconfigure(config):
+    if config.getoption("--compare"):
+        return
+    if file_handle_key not in config.stash:
+        # We didn't open a pickle file
+        return
+    if not hasattr(config, "workerinput"):
+        # If we're the controlling process
+        if (
+            hasattr(config.option, "numprocesses")
+            and config.option.numprocesses is not None
+        ):
+            # Concat the worker partial pickle results and remove them
+            for i in range(config.option.numprocesses):
+                worker_result = os.path.join(
+                    config.stash[test_folder_key],
+                    f"{config.stash[basename_key]}-gw{i}.pickle",
+                )
+                with open(worker_result, "rb") as f:
+                    config.stash[file_handle_key].write(f.read())
+                os.remove(worker_result)
+    # Close our file
+    del config.stash[file_handle_key]
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/pytest.ini b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/pytest.ini
new file mode 100644
index 00000000000..817d98e6ba2
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/pytest.ini
@@ -0,0 +1,7 @@
+[pytest]
+xfail_strict=true
+markers=
+    assert_eq: custom binary asserter for a test
+    xfail_gold: this test is expected to fail in the gold pass
+    xfail_cudf_pandas: this test is expected to fail in the cudf_pandas pass
+    xfail_compare: this test is expected to fail in the comparison pass
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_cugraph.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_cugraph.py
new file mode 100644
index 00000000000..7acc8672063
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_cugraph.py
@@ -0,0 +1,94 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+import cugraph
+import cupy as cp
+import networkx as nx
+import numpy as np
+import pandas as pd
+import pytest
+
+cugraph_algos = [
+    "betweenness_centrality",
+    "degree_centrality",
+    "katz_centrality",
+    "sorensen_coefficient",
+    "jaccard_coefficient",
+]
+
+nx_algos = [
+    "betweenness_centrality",
+    "degree_centrality",
+    "katz_centrality",
+]
+
+
+def assert_cugraph_equal(expect, got):
+    if isinstance(expect, cp.ndarray):
+        expect = expect.get()
+    if isinstance(got, cp.ndarray):
+        got = got.get()
+    elif isinstance(expect, np.ndarray) and isinstance(got, np.ndarray):
+        assert np.array_equal(expect, got)
+    else:
+        assert expect == got
+
+
+pytestmark = pytest.mark.assert_eq(fn=assert_cugraph_equal)
+
+
+@pytest.fixture(scope="session")
+def df():
+    return pd.DataFrame({"source": [0, 1, 2], "destination": [1, 2, 3]})
+
+
+@pytest.fixture(scope="session")
+def adjacency_matrix():
+    data = {
+        "A": [0, 1, 1, 0],
+        "B": [1, 0, 0, 1],
+        "C": [1, 0, 0, 1],
+        "D": [0, 1, 1, 0],
+    }
+    df = pd.DataFrame(data, index=["A", "B", "C", "D"])
+    return df
+
+
+@pytest.mark.parametrize("algo", cugraph_algos)
+def test_cugraph_from_pandas_edgelist(df, algo):
+    G = cugraph.Graph()
+    G.from_pandas_edgelist(df)
+    return getattr(cugraph, algo)(G).to_pandas().values
+
+
+@pytest.mark.parametrize("algo", cugraph_algos)
+def test_cugraph_from_pandas_adjacency(adjacency_matrix, algo):
+    G = cugraph.Graph()
+    G.from_pandas_adjacency(adjacency_matrix)
+    res = getattr(cugraph, algo)(G).to_pandas()
+    return res.sort_values(list(res.columns)).values
+
+
+@pytest.mark.parametrize("algo", cugraph_algos)
+def test_cugraph_from_numpy_array(df, algo):
+    G = cugraph.Graph()
+    G.from_numpy_array(df.values)
+    return getattr(cugraph, algo)(G).to_pandas().values
+
+
+@pytest.mark.parametrize("algo", nx_algos)
+def test_networkx_from_pandas_edgelist(df, algo):
+    G = nx.from_pandas_edgelist(
+        df, "source", "destination", ["source", "destination"]
+    )
+    return getattr(nx, algo)(G)
+
+
+@pytest.mark.parametrize("algo", nx_algos)
+def test_networkx_from_pandas_adjacency(adjacency_matrix, algo):
+    G = nx.from_pandas_adjacency(adjacency_matrix)
+    return getattr(nx, algo)(G)
+
+
+@pytest.mark.parametrize("algo", nx_algos)
+def test_networkx_from_numpy_array(adjacency_matrix, algo):
+    G = nx.from_numpy_array(adjacency_matrix.values)
+    return getattr(nx, algo)(G)
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_cuml.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_cuml.py
new file mode 100644
index 00000000000..892d0886596
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_cuml.py
@@ -0,0 +1,152 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+import cupy as cp
+import numpy as np
+import pandas as pd
+import pytest
+from cuml.cluster import KMeans
+from cuml.decomposition import PCA
+from cuml.ensemble import RandomForestClassifier
+from cuml.linear_model import LinearRegression, LogisticRegression
+from cuml.metrics import accuracy_score
+from cuml.model_selection import train_test_split
+from cuml.pipeline import Pipeline
+from cuml.preprocessing import StandardScaler
+
+
+def assert_cuml_equal(expect, got):
+    # Coerce GPU arrays to CPU
+    if isinstance(expect, cp.ndarray):
+        expect = expect.get()
+    if isinstance(got, cp.ndarray):
+        got = got.get()
+
+    # Handle equality
+    if isinstance(expect, KMeans) and isinstance(got, KMeans):
+        # same clusters
+        np.testing.assert_allclose(
+            expect.cluster_centers_, got.cluster_centers_
+        )
+    elif isinstance(expect, np.ndarray) and isinstance(got, np.ndarray):
+        np.testing.assert_allclose(expect, got)
+    elif isinstance(expect, tuple) and isinstance(got, tuple):
+        assert len(expect) == len(got)
+        for e, g in zip(expect, got):
+            assert_cuml_equal(e, g)
+    elif isinstance(expect, pd.DataFrame):
+        assert pd.testing.assert_frame_equal(expect, got)
+    elif isinstance(expect, pd.Series):
+        assert pd.testing.assert_series_equal(expect, got)
+    else:
+        assert expect == got
+
+
+pytestmark = pytest.mark.assert_eq(fn=assert_cuml_equal)
+
+
+@pytest.fixture
+def binary_classification_data():
+    data = {
+        "feature1": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0],
+        "feature2": [2.0, 4.0, 1.0, 3.0, 5.0, 7.0, 6.0, 8.0, 10.0, 9.0],
+        "target": [0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
+    }
+    df = pd.DataFrame(data)
+    return df
+
+
+def test_linear_regression():
+    lr = LinearRegression(fit_intercept=True, normalize=False, algorithm="eig")
+    X = pd.DataFrame()
+    X["col1"] = np.array([1, 1, 2, 2], dtype=np.float32)
+    X["col2"] = np.array([1, 2, 2, 3], dtype=np.float32)
+    y = pd.Series(np.array([6.0, 8.0, 9.0, 11.0], dtype=np.float32))
+    lr.fit(X, y)
+
+    X_new = pd.DataFrame()
+    X_new["col1"] = np.array([3, 2], dtype=np.float32)
+    X_new["col2"] = np.array([5, 5], dtype=np.float32)
+    preds = lr.predict(X_new)
+    return preds.values
+
+
+def test_logistic_regression(binary_classification_data):
+    X = binary_classification_data[["feature1", "feature2"]]
+    y = binary_classification_data["target"]
+
+    (X_train, X_test, y_train, y_test) = train_test_split(
+        X, y, test_size=0.2, random_state=42
+    )
+
+    model = LogisticRegression()
+    model.fit(X_train, y_train)
+
+    y_pred = model.predict(X_test)
+    accuracy = accuracy_score(y_test, y_pred)
+
+    return accuracy
+
+
+def test_random_forest(binary_classification_data):
+    X = binary_classification_data[["feature1", "feature2"]]
+    y = binary_classification_data["target"]
+
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.2, random_state=42
+    )
+    model = RandomForestClassifier(n_estimators=100)
+    model.fit(X_train, y_train)
+    preds = model.predict(X_test)
+    return preds.values
+
+
+def test_clustering():
+    rng = np.random.default_rng(42)
+    nsamps = 300
+    X = rng.random((nsamps, 2))
+    data = pd.DataFrame(X, columns=["x", "y"])
+
+    kmeans = KMeans(n_clusters=3, random_state=42)
+    kmeans.fit(data)
+    return kmeans
+
+
+def test_data_scaling():
+    data = pd.Series([1.0, 2.0, 3.0, 4.0, 5.0])
+    scaler = StandardScaler()
+
+    scaled_data = scaler.fit_transform(data.values.reshape(-1, 1))
+    return scaled_data
+
+
+def test_pipeline(binary_classification_data):
+    X = binary_classification_data[["feature1", "feature2"]]
+    y = binary_classification_data["target"]
+
+    pipe = Pipeline(
+        [
+            ("scaler", StandardScaler()),
+            ("pca", PCA()),
+            ("random_forest", LogisticRegression()),
+        ]
+    )
+
+    pipe.fit(X, y)
+    results = pipe.predict(X)
+    return results.values
+
+
+@pytest.mark.parametrize(
+    "X, y",
+    [
+        (pd.DataFrame({"a": range(10), "b": range(10)}), pd.Series(range(10))),
+        (
+            pd.DataFrame({"a": range(10), "b": range(10)}).values,
+            pd.Series(range(10)).values,
+        ),  # cudf.pandas wrapped numpy arrays
+    ],
+)
+def test_train_test_split(X, y):
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
+
+    # Compare only the size of the data splits
+    return len(X_train), len(X_test), len(y_train), len(y_test)
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_dask.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_dask.py
new file mode 100644
index 00000000000..c34778dfded
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_dask.py
@@ -0,0 +1,10 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+import pandas as pd
+
+import dask.dataframe as dd
+
+
+def test_sum():
+    data = {"x": range(1, 11)}
+    ddf = dd.from_pandas(pd.DataFrame(data), npartitions=2)
+    return ddf["x"].sum().compute()
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_featureengine.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_featureengine.py
new file mode 100644
index 00000000000..3e247291fad
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_featureengine.py
@@ -0,0 +1,47 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+import numpy as np
+import pandas as pd
+from feature_engine.imputation import DropMissingData
+from feature_engine.preprocessing import MatchVariables
+
+
+def test_drop_missing_data():
+    data = {
+        "x": [np.nan, 1, 1, 0, np.nan],
+        "y": ["a", np.nan, "b", np.nan, "a"],
+    }
+    df = pd.DataFrame(data)
+
+    dmd = DropMissingData()
+    dmd.fit(df)
+    dmd.transform(df)
+
+    return dmd
+
+
+def test_match_variables():
+    train = pd.DataFrame(
+        {
+            "Name": ["tom", "nick", "krish", "jack"],
+            "City": ["London", "Manchester", "Liverpool", "Bristol"],
+            "Age": [20, 21, 19, 18],
+            "Marks": [0.9, 0.8, 0.7, 0.6],
+        }
+    )
+
+    test = pd.DataFrame(
+        {
+            "Name": ["tom", "sam", "nick"],
+            "Age": [20, 22, 23],
+            "Marks": [0.9, 0.7, 0.6],
+            "Hobbies": ["tennis", "rugby", "football"],
+        }
+    )
+
+    match_columns = MatchVariables()
+
+    match_columns.fit(train)
+
+    df_transformed = match_columns.transform(test)
+
+    return df_transformed
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py
new file mode 100644
index 00000000000..bef02c86355
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py
@@ -0,0 +1,79 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+import holoviews as hv
+import numpy as np
+import pandas as pd
+import pytest
+
+nsamps = 1000
+hv.extension("bokeh")  # load holoviews extension
+
+
+def assert_holoviews_equal(expect, got):
+    expect_data, expect_ndims, expect_kdims, expect_vdims, expect_shape = (
+        expect
+    )
+    got_data, got_ndims, got_kdims, got_vdims, got_shape = got
+
+    if isinstance(expect_data, dict):
+        np.testing.assert_allclose(expect_data["x"], got_data["x"])
+        np.testing.assert_allclose(
+            expect_data["Frequency"], got_data["Frequency"]
+        )
+    else:
+        pd._testing.assert_frame_equal(expect_data, got_data)
+    assert expect_ndims == got_ndims
+    assert expect_kdims == got_kdims
+    assert expect_vdims == got_vdims
+    assert expect_shape == got_shape
+
+
+pytestmark = pytest.mark.assert_eq(fn=assert_holoviews_equal)
+
+
+@pytest.fixture(scope="module")
+def df():
+    rng = np.random.default_rng(42)
+    return pd.DataFrame(
+        {
+            "x": rng.random(nsamps),
+            "y": rng.random(nsamps),
+            "category": rng.integers(0, 10, nsamps),
+            "category2": rng.integers(0, 10, nsamps),
+        }
+    )
+
+
+def get_plot_info(plot):
+    return (
+        plot.data,
+        plot.ndims,
+        plot.kdims,
+        plot.vdims,
+        plot.shape,
+    )
+
+
+def test_holoviews_barplot(df):
+    return get_plot_info(hv.Bars(df, kdims="category", vdims="y"))
+
+
+def test_holoviews_scatterplot(df):
+    return get_plot_info(hv.Scatter(df, kdims="x", vdims="y"))
+
+
+def test_holoviews_curve(df):
+    return get_plot_info(hv.Curve(df, kdims="category", vdims="y"))
+
+
+def test_holoviews_heatmap(df):
+    return get_plot_info(
+        hv.HeatMap(df, kdims=["category", "category2"], vdims="y")
+    )
+
+
+def test_holoviews_histogram(df):
+    return get_plot_info(hv.Histogram(df.values))
+
+
+def test_holoviews_hexbin(df):
+    return get_plot_info(hv.HexTiles(df, kdims=["x", "y"], vdims="y"))
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_hvplot.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_hvplot.py
new file mode 100644
index 00000000000..0f0d2f8bcbd
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_hvplot.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+import hvplot.pandas  # noqa: F401, needs to monkey patch pandas with this.
+import numpy as np
+import pandas as pd
+import pytest
+
+nsamps = 1000
+
+
+def assert_hvplot_equal(expect, got):
+    expect_data, expect_ndims, expect_kdims, expect_vdims, expect_shape = (
+        expect
+    )
+    got_data, got_ndims, got_kdims, got_vdims, got_shape = got
+
+    if isinstance(expect_data, dict):
+        np.testing.assert_allclose(expect_data["x"], got_data["x"])
+        np.testing.assert_allclose(
+            expect_data["Frequency"], got_data["Frequency"]
+        )
+    else:
+        pd._testing.assert_frame_equal(expect_data, got_data)
+    assert expect_ndims == got_ndims
+    assert expect_kdims == got_kdims
+    assert expect_vdims == got_vdims
+    assert expect_shape == got_shape
+
+
+pytestmark = pytest.mark.assert_eq(fn=assert_hvplot_equal)
+
+
+@pytest.fixture(scope="module")
+def df():
+    rng = np.random.default_rng(42)
+    return pd.DataFrame(
+        {
+            "x": rng.random(nsamps),
+            "y": rng.random(nsamps),
+            "category": rng.integers(0, 10, nsamps),
+            "category2": rng.integers(0, 10, nsamps),
+        }
+    )
+
+
+def get_plot_info(plot):
+    return (
+        plot.data,
+        plot.ndims,
+        plot.kdims,
+        plot.vdims,
+        plot.shape,
+    )
+
+
+def test_hvplot_barplot(df):
+    return get_plot_info(df.hvplot.bar(x="category", y="y"))
+
+
+def test_hvplot_scatterplot(df):
+    return get_plot_info(df.hvplot.scatter(x="x", y="y"))
+
+
+def test_hvplot_lineplot(df):
+    return get_plot_info(df.hvplot.line(x="x", y="y"))
+
+
+def test_hvplot_heatmap(df):
+    return get_plot_info(df.hvplot.heatmap(x="x", y="y", C="y"))
+
+
+def test_hvplot_hexbin(df):
+    return get_plot_info(df.hvplot.hexbin(x="x", y="y", C="y"))
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_ibis.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_ibis.py
new file mode 100644
index 00000000000..2a8cf7c6ac2
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_ibis.py
@@ -0,0 +1,169 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+
+import ibis
+import numpy as np
+import pandas as pd
+import pytest
+
+ibis.set_backend("pandas")
+ibis.options.interactive = False
+
+
+def ibis_assert_equal(expect, got, rtol: float = 1e-7, atol: float = 0.0):
+    pd._testing.assert_almost_equal(expect, got, rtol=rtol, atol=atol)
+
+
+pytestmark = pytest.mark.assert_eq(fn=ibis_assert_equal)
+
+
+COLUMN_REDUCTIONS = ["sum", "min", "max", "mean", "var", "std"]
+ELEMENTWISE_UFUNCS = [
+    "sin",
+    "cos",
+    "atan",
+    "exp",
+    "log",
+    "abs",
+]
+STRING_UNARY_FUNCS = [
+    "lower",
+    "upper",
+    "capitalize",
+    "reverse",
+]
+
+
+@pytest.fixture
+def ibis_table_num_str():
+    N = 1000
+    K = 5
+    rng = np.random.default_rng(42)
+
+    df = pd.DataFrame(
+        rng.integers(0, 100, (N, K)), columns=[f"col{x}" for x in np.arange(K)]
+    )
+    df["key"] = rng.choice(np.arange(10), N)
+    df["str_col"] = rng.choice(["Hello", "World", "It's", "Me", "Again"], N)
+    table = ibis.memtable(df, name="t")
+    return table
+
+
+@pytest.fixture
+def ibis_table_num():
+    N = 100
+    K = 2
+    rng = np.random.default_rng(42)
+
+    df = pd.DataFrame(
+        rng.integers(0, 100, (N, K)), columns=[f"val{x}" for x in np.arange(K)]
+    )
+    df["key"] = rng.choice(np.arange(10), N)
+    table = ibis.memtable(df, name="t")
+    return table
+
+
+@pytest.mark.parametrize("op", COLUMN_REDUCTIONS)
+def test_column_reductions(ibis_table_num_str, op):
+    t = ibis_table_num_str
+    return getattr(t.col1, op)().to_pandas()
+
+
+@pytest.mark.parametrize("op", ["mean", "sum", "min", "max"])
+def test_groupby_reductions(ibis_table_num_str, op):
+    t = ibis_table_num_str
+    return getattr(t.group_by("key").col1, op)().to_pandas()
+
+
+@pytest.mark.parametrize("op", ELEMENTWISE_UFUNCS)
+def test_mutate_ufunc(ibis_table_num_str, op):
+    t = ibis_table_num_str
+    expr = getattr(t.col1, op)()
+    return t.mutate(col1_sin=expr).to_pandas()
+
+
+@pytest.mark.parametrize("op", STRING_UNARY_FUNCS)
+def test_string_unary(ibis_table_num_str, op):
+    t = ibis_table_num_str
+    return getattr(t.str_col, op)().to_pandas()
+
+
+def test_nunique(ibis_table_num_str):
+    t = ibis_table_num_str
+    return t.col1.nunique().to_pandas()
+
+
+def test_count(ibis_table_num_str):
+    t = ibis_table_num_str
+    return t.col1.count().to_pandas()
+
+
+def test_select(ibis_table_num_str):
+    t = ibis_table_num_str
+    return t.select("col0", "col1").to_pandas()
+
+
+def test_between(ibis_table_num_str):
+    t = ibis_table_num_str
+    return t.key.between(4, 8).to_pandas()
+
+
+def test_notin(ibis_table_num_str):
+    t = ibis_table_num_str
+    return t.key.notin([0, 1, 8, 3]).to_pandas()
+
+
+def test_window(ibis_table_num_str):
+    t = ibis_table_num_str
+    return (
+        t.group_by("key").mutate(demeaned=t.col1 - t.col1.mean()).to_pandas()
+    )
+
+
+def test_limit(ibis_table_num_str):
+    t = ibis_table_num_str
+    return t.limit(5).to_pandas()
+
+
+def test_filter(ibis_table_num_str):
+    t = ibis_table_num_str
+    return t.filter([t.key == 4, t.col0 > 15]).to_pandas()
+
+
+@pytest.mark.skip(reason="Join ordering not currently guaranteed, i.e., flaky")
+@pytest.mark.parametrize("join_type", ["inner", "left", "right"])
+def test_join_exact_ordering(ibis_table_num_str, ibis_table_num, join_type):
+    t1 = ibis_table_num_str
+    t2 = ibis_table_num
+    res = t1.join(t2, "key", how=join_type).to_pandas()
+    return res
+
+
+@pytest.mark.parametrize("join_type", ["inner", "left", "right"])
+def test_join_sort_correctness(ibis_table_num_str, ibis_table_num, join_type):
+    """
+    While we don't currently guarantee exact row ordering
+    we can still test join correctness with ex-post sorting.
+    """
+    t1 = ibis_table_num_str
+    t2 = ibis_table_num
+    res = t1.join(t2, "key", how=join_type).to_pandas()
+
+    res_sorted = res.sort_values(by=res.columns.tolist()).reset_index(
+        drop=True
+    )
+    return res_sorted
+
+
+def test_order_by(ibis_table_num_str):
+    t = ibis_table_num_str
+    return t.order_by(ibis.desc("col1")).to_pandas()
+
+
+def test_aggregate_having(ibis_table_num_str):
+    t = ibis_table_num_str
+    return t.aggregate(
+        by=["key"],
+        sum_c0=t.col0.sum(),
+        avg_c0=t.col0.mean(),
+        having=t.col1.mean() > 50,
+    ).to_pandas()
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py
new file mode 100644
index 00000000000..665b9d6fb08
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py
@@ -0,0 +1,70 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import pytest
+from matplotlib.axes import Axes
+from matplotlib.collections import PathCollection
+from matplotlib.lines import Line2D
+from matplotlib.patches import Rectangle
+from pandas._testing import assert_equal
+
+
+def assert_plots_equal(expect, got):
+    if isinstance(expect, Axes) and isinstance(got, Axes):
+        for expect_ch, got_ch in zip(
+            expect.get_children(), got.get_children()
+        ):
+            assert type(expect_ch) == type(got_ch)
+            if isinstance(expect_ch, Line2D):
+                assert_equal(expect_ch.get_xdata(), got_ch.get_xdata())
+                assert_equal(expect_ch.get_ydata(), got_ch.get_ydata())
+            elif isinstance(expect_ch, Rectangle):
+                assert expect_ch.get_height() == got_ch.get_height()
+    elif isinstance(expect, PathCollection) and isinstance(
+        got, PathCollection
+    ):
+        assert_equal(expect.get_offsets()[:, 0], got.get_offsets()[:, 0])
+        assert_equal(expect.get_offsets()[:, 1], got.get_offsets()[:, 1])
+    else:
+        assert_equal(expect, got)
+
+
+pytestmark = pytest.mark.assert_eq(fn=assert_plots_equal)
+
+
+def test_line():
+    df = pd.DataFrame({"x": [1, 2, 3, 4, 5], "y": [2, 4, 6, 8, 10]})
+    (data,) = plt.plot(df["x"], df["y"], marker="o", linestyle="-")
+
+    return plt.gca()
+
+
+def test_bar():
+    data = pd.Series([1, 2, 3, 4, 5], index=["a", "b", "c", "d", "e"])
+    ax = data.plot(kind="bar")
+    return ax
+
+
+def test_scatter():
+    df = pd.DataFrame({"x": [1, 2, 3, 4, 5], "y": [5, 4, 3, 2, 1]})
+
+    fig, ax = plt.subplots(figsize=(8, 6))
+    ax.scatter(df["x"], df["y"])
+
+    return plt.gca()
+
+
+def test_dataframe_plot():
+    rng = np.random.default_rng(42)
+    df = pd.DataFrame(rng.random((10, 5)), columns=["a", "b", "c", "d", "e"])
+    ax = df.plot()
+
+    return ax
+
+
+def test_series_plot():
+    sr = pd.Series([1, 2, 3, 4, 5])
+    ax = sr.plot()
+
+    return ax
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_numpy.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_numpy.py
new file mode 100644
index 00000000000..472f1889354
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_numpy.py
@@ -0,0 +1,59 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+nsamps = 1000
+reductions = ["sum", "min", "max", "mean", "var", "std"]
+
+
+pytestmark = pytest.mark.assert_eq(fn=np.testing.assert_allclose)
+
+
+@pytest.fixture(scope="module")
+def sr():
+    rng = np.random.default_rng(42)
+    return pd.Series(rng.random(nsamps))
+
+
+@pytest.mark.parametrize("op", reductions)
+def test_numpy_series_reductions(sr, op):
+    return getattr(np, op)(sr)
+
+
+@pytest.fixture(scope="module")
+def df():
+    rng = np.random.default_rng(42)
+    return pd.DataFrame({"A": rng.random(nsamps), "B": rng.random(nsamps)})
+
+
+@pytest.mark.parametrize("op", reductions)
+def test_numpy_dataframe_reductions(df, op):
+    return getattr(np, op)(df)
+
+
+def test_numpy_dot(df):
+    return np.dot(df, df.T)
+
+
+def test_numpy_fft(sr):
+    fft = np.fft.fft(sr)
+    return fft
+
+
+def test_numpy_sort(df):
+    return np.sort(df)
+
+
+@pytest.mark.parametrize("percentile", [0, 25, 50, 75, 100])
+def test_numpy_percentile(df, percentile):
+    return np.percentile(df, percentile)
+
+
+def test_numpy_unique(df):
+    return np.unique(df)
+
+
+def test_numpy_transpose(df):
+    return np.transpose(df)
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_plotly.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_plotly.py
new file mode 100644
index 00000000000..27d9df83476
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_plotly.py
@@ -0,0 +1,67 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+import numpy as np
+import pandas as pd
+import plotly.express as px
+import pytest
+
+nsamps = 100
+
+
+def assert_plotly_equal(expect, got):
+    assert type(expect) == type(got)
+    if isinstance(expect, dict):
+        assert expect.keys() == got.keys()
+        for k in expect.keys():
+            assert_plotly_equal(expect[k], got[k])
+    elif isinstance(got, list):
+        assert len(expect) == len(got)
+        for i in range(len(expect)):
+            assert_plotly_equal(expect[i], got[i])
+    elif isinstance(expect, np.ndarray):
+        np.testing.assert_allclose(expect, got)
+    else:
+        assert expect == got
+
+
+pytestmark = pytest.mark.assert_eq(fn=assert_plotly_equal)
+
+
+@pytest.fixture(scope="module")
+def df():
+    rng = np.random.default_rng(42)
+    return pd.DataFrame(
+        {
+            "x": rng.random(nsamps),
+            "y": rng.random(nsamps),
+            "category": rng.integers(0, 10, nsamps),
+            "category2": rng.integers(0, 10, nsamps),
+        }
+    )
+
+
+def test_plotly_scatterplot(df):
+    return px.scatter(df, x="x", y="y").to_plotly_json()
+
+
+def test_plotly_lineplot(df):
+    return px.line(df, x="category", y="y").to_plotly_json()
+
+
+def test_plotly_barplot(df):
+    return px.bar(df, x="category", y="y").to_plotly_json()
+
+
+def test_plotly_histogram(df):
+    return px.histogram(df, x="category").to_plotly_json()
+
+
+def test_plotly_pie(df):
+    return px.pie(df, values="category", names="category2").to_plotly_json()
+
+
+def test_plotly_heatmap(df):
+    return px.density_heatmap(df, x="category", y="category2").to_plotly_json()
+
+
+def test_plotly_boxplot(df):
+    return px.box(df, x="category", y="y").to_plotly_json()
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_pytorch.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_pytorch.py
new file mode 100644
index 00000000000..ae9db3836a6
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_pytorch.py
@@ -0,0 +1,128 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+import torch
+
+pytestmark = pytest.mark.assert_eq(fn=torch.testing.assert_close)
+
+
+@pytest.fixture
+def data():
+    rng = np.random.default_rng(0)
+    x1 = rng.random(100, dtype=np.float32)
+    x2 = rng.random(100, dtype=np.float32)
+    y = np.zeros(100).astype(np.int64)
+
+    y[(x1 > x2) & (x1 > 0)] = 0
+    y[(x1 < x2) & (x1 > 0)] = 1
+    y[(x1 > x2) & (x1 < 0)] = 2
+    y[(x1 < x2) & (x1 < 0)] = 3
+
+    return x1, x2, y
+
+
+class Dataset(torch.utils.data.Dataset):
+    def __init__(self, x1, x2, y):
+        self.x1 = x1
+        self.x2 = x2
+        self.y = y
+
+    def __getitem__(self, idx):
+        x1 = self.x1[idx]
+        x2 = self.x2[idx]
+        y = self.y[idx]
+        return (x1, x2), y
+
+    def __len__(self):
+        return len(self.x1)
+
+
+def test_dataloader_auto_batching(data):
+    x1, x2, y = (pd.Series(i) for i in data)
+
+    dataset = Dataset(x1, x2, y)
+
+    # default collate_fn
+    dataloader = torch.utils.data.DataLoader(dataset, batch_size=10)
+
+    (x1, x2), y = next(iter(dataloader))
+    return x1, x2, y
+
+
+def test_dataloader_manual_batching(data):
+    x1, x2, y = (pd.Series(i) for i in data)
+
+    dataset = Dataset(x1, x2, y)
+
+    # default collate_fn
+    dataloader = torch.utils.data.DataLoader(dataset, batch_size=None)
+
+    (x1, x2), y = next(iter(dataloader))
+    return x1, x2, y
+
+
+class Model(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.fc1 = torch.nn.Linear(2, 10)
+        self.relu1 = torch.nn.ReLU()
+        self.fc2 = torch.nn.Linear(10, 10)
+        self.relu2 = torch.nn.ReLU()
+        self.output = torch.nn.Linear(10, 4)
+
+    def forward(self, x1, x2):
+        x = torch.stack([x1, x2], dim=0).T
+        x = self.fc1(x)
+        x = self.relu1(x)
+        x = self.fc2(x)
+        x = self.relu2(x)
+        return torch.nn.functional.softmax(x, dim=1)
+
+
+def train(model, dataloader, optimizer, criterion):
+    model.train()
+    for (x1, x2), y in dataloader:
+        x1 = x1.to("cuda")
+        x2 = x2.to("cuda")
+        y = y.to("cuda")
+
+        optimizer.zero_grad()
+        y_pred = model(x1, x2)
+        loss = criterion(y_pred, y)
+        loss.backward()
+        optimizer.step()
+
+
+def test_torch_train(data):
+    torch.manual_seed(0)
+
+    x1, x2, y = (pd.Series(i) for i in data)
+    dataset = Dataset(x1, x2, y)
+    # default collate_fn
+    dataloader = torch.utils.data.DataLoader(dataset, batch_size=10)
+
+    model = Model().to("cuda")
+    optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
+    criterion = torch.nn.CrossEntropyLoss()
+
+    train(model, dataloader, optimizer, criterion)
+
+    test_x1, test_x2 = next(iter(dataloader))[0]
+    test_x1 = test_x1.to("cuda")
+    test_x2 = test_x2.to("cuda")
+
+    return model(test_x1, test_x2)
+
+
+def test_torch_tensor_ctor():
+    s = pd.Series(range(5))
+    return torch.tensor(s.values)
+
+
+@pytest.mark.xfail_cudf_pandas(reason="Known failure, see xdf/#210")
+@pytest.mark.xfail_compare
+def test_torch_tensor_from_numpy():
+    s = pd.Series(range(5))
+    return torch.from_numpy(s.values)
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_scipy.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_scipy.py
new file mode 100644
index 00000000000..963a8549000
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_scipy.py
@@ -0,0 +1,65 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+import scipy
+
+
+@pytest.mark.parametrize("func", ["hmean", "tvar", "gstd"])
+def test_scipy_stats(func):
+    rng = np.random.default_rng(42)
+    data = pd.Series(rng.random(1000))
+    return getattr(scipy.stats, func)(data)
+
+
+@pytest.mark.parametrize("func", ["norm"])
+def test_scipy_linalg(func):
+    rng = np.random.default_rng(42)
+    data = pd.Series(rng.random(1000))
+    return getattr(scipy.linalg, func)(data)
+
+
+pytestmark = pytest.mark.assert_eq(fn=pd._testing.assert_almost_equal)
+
+
+def test_compute_pi():
+    def circle(x):
+        return (1 - x**2) ** 0.5
+
+    x = pd.Series(np.linspace(0, 1, 100))
+    y = pd.Series(circle(np.linspace(0, 1, 100)))
+
+    result = scipy.integrate.trapezoid(y, x)
+    return result * 4
+
+
+def test_matrix_solve():
+    A = pd.DataFrame([[2, 3], [1, 2]])
+    b = pd.Series([1, 2])
+
+    return scipy.linalg.solve(A, b)
+
+
+def test_correlation():
+    data = pd.DataFrame({"A": [1, 2, 3, 4, 5], "B": [5, 4, 3, 2, 1]})
+
+    return scipy.stats.pearsonr(data["A"], data["B"])
+
+
+def test_optimization():
+    x = pd.Series([1.0, 2.0, 3.0, 4.0, 5.0])
+
+    def rosen(x):  # banana function from scipy tutorial
+        return sum(
+            100.0 * (x[1:] - x[:-1] ** 2.0) ** 2.0 + (1 - x[:-1]) ** 2.0
+        )
+
+    result = scipy.optimize.fmin(rosen, x)
+    return result
+
+
+def test_regression():
+    data = pd.DataFrame({"x": [1, 2, 3, 4, 5], "y": [2, 4, 5, 4, 5]})
+    result = scipy.stats.linregress(data["y"], data["y"])
+    return result.slope, result.intercept
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py
new file mode 100644
index 00000000000..4b272900acd
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py
@@ -0,0 +1,60 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+import pandas as pd
+import pytest
+import seaborn as sns
+from matplotlib.axes import Axes
+from matplotlib.collections import PathCollection
+from matplotlib.lines import Line2D
+from matplotlib.patches import Rectangle
+from pandas._testing import assert_equal
+
+
+def assert_plots_equal(expect, got):
+    if isinstance(expect, Axes) and isinstance(got, Axes):
+        for expect_ch, got_ch in zip(
+            expect.get_children(), got.get_children()
+        ):
+            assert type(expect_ch) == type(got_ch)
+            if isinstance(expect_ch, Line2D):
+                assert_equal(expect_ch.get_xdata(), got_ch.get_xdata())
+                assert_equal(expect_ch.get_ydata(), got_ch.get_ydata())
+            elif isinstance(expect_ch, Rectangle):
+                assert expect_ch.get_height() == got_ch.get_height()
+    elif isinstance(expect, PathCollection) and isinstance(
+        got, PathCollection
+    ):
+        assert_equal(expect.get_offsets()[:, 0], got.get_offsets()[:, 0])
+        assert_equal(expect.get_offsets()[:, 1], got.get_offsets()[:, 1])
+    else:
+        assert_equal(expect, got)
+
+
+pytestmark = pytest.mark.assert_eq(fn=assert_plots_equal)
+
+
+@pytest.fixture(scope="module")
+def df():
+    df = pd.DataFrame(
+        {
+            "x": [2, 3, 4, 5, 11],
+            "y": [4, 3, 2, 1, 15],
+            "hue": ["c", "a", "b", "b", "a"],
+        }
+    )
+    return df
+
+
+def test_bar(df):
+    ax = sns.barplot(data=df, x="x", y="y")
+    return ax
+
+
+def test_scatter(df):
+    ax = sns.scatterplot(data=df, x="x", y="y", hue="hue")
+    return ax
+
+
+def test_lineplot_with_sns_data():
+    df = sns.load_dataset("flights")
+    ax = sns.lineplot(data=df, x="month", y="passengers")
+    return ax
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_sklearn.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_sklearn.py
new file mode 100644
index 00000000000..1635fd3dcda
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_sklearn.py
@@ -0,0 +1,82 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+import numpy as np
+import pandas as pd
+import pytest
+from sklearn.cluster import KMeans
+from sklearn.feature_selection import SelectKBest, f_classif
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import accuracy_score
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+
+
+def test_regression():
+    data = {
+        "feature1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+        "feature2": [2, 4, 1, 3, 5, 7, 6, 8, 10, 9],
+        "target": [0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
+    }
+    df = pd.DataFrame(data)
+
+    X = df[["feature1", "feature2"]]
+    y = df["target"]
+
+    # Data Splitting
+    (X_train, X_test, y_train, y_test) = train_test_split(
+        X, y, test_size=0.2, random_state=42
+    )
+
+    # Basic deterministic LR model
+    model = LogisticRegression()
+    model.fit(X_train, y_train)
+
+    # predction phase
+    y_pred = model.predict(X_test)
+    accuracy = accuracy_score(y_test, y_pred)
+
+    return accuracy
+
+
+@pytest.mark.assert_eq(fn=np.testing.assert_allclose)
+def test_clustering():
+    rng = np.random.default_rng(42)
+    nsamps = 300
+    X = rng.random((nsamps, 2))
+    data = pd.DataFrame(X, columns=["x", "y"])
+
+    # Create and fit a KMeans clustering model
+    kmeans = KMeans(n_clusters=3, random_state=42)
+    kmeans.fit(data)
+    return kmeans.cluster_centers_
+
+
+def test_feature_selection():
+    rng = np.random.default_rng(42)
+    n_samples = 100
+    n_features = 10
+
+    X = rng.random((n_samples, n_features))
+    y = rng.integers(0, 2, size=n_samples)
+
+    data = pd.DataFrame(
+        X, columns=[f"feature{i}" for i in range(1, n_features + 1)]
+    )
+    data["target"] = y
+
+    # Select the top k features
+    k_best = SelectKBest(score_func=f_classif, k=5)
+    k_best.fit_transform(X, y)
+
+    feat_inds = k_best.get_support(indices=True)
+    features = data.iloc[:, feat_inds]
+
+    return sorted(features.columns.tolist())
+
+
+@pytest.mark.assert_eq(fn=np.testing.assert_allclose)
+def test_data_scaling():
+    data = pd.Series([1.0, 2.0, 3.0, 4.0, 5.0])
+    scaler = StandardScaler()
+
+    scaled_data = scaler.fit_transform(data.values.reshape(-1, 1))
+    return scaled_data
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_stumpy.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_stumpy.py
new file mode 100644
index 00000000000..69248002a58
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_stumpy.py
@@ -0,0 +1,94 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+import stumpy
+from numba import cuda
+from pandas._testing import assert_equal
+
+
+def stumpy_assert_equal(expected, got):
+    def as_float64(x):
+        if isinstance(x, (tuple, list)):
+            return [as_float64(y) for y in x]
+        else:
+            return x.astype(np.float64)
+
+    assert_equal(as_float64(expected), as_float64(got))
+
+
+pytestmark = pytest.mark.assert_eq(fn=stumpy_assert_equal)
+
+
+def test_1d_time_series():
+    rng = np.random.default_rng(42)
+    ts = pd.Series(rng.random(10))
+    m = 3
+
+    return stumpy.stump(ts, m)
+
+
+def test_1d_gpu():
+    rng = np.random.default_rng(42)
+    your_time_series = rng.random(10000)
+    window_size = (
+        50  # Approximately, how many data points might be found in a pattern
+    )
+    all_gpu_devices = [
+        device.id for device in cuda.list_devices()
+    ]  # Get a list of all available GPU devices
+
+    return stumpy.gpu_stump(
+        your_time_series, m=window_size, device_id=all_gpu_devices
+    )
+
+
+def test_multidimensional_timeseries():
+    rng = np.random.default_rng(42)
+    # Each row represents data from a different dimension while each column represents
+    # data from the same dimension
+    your_time_series = rng.random((3, 1000))
+    # Approximately, how many data points might be found in a pattern
+    window_size = 50
+
+    return stumpy.mstump(your_time_series, m=window_size)
+
+
+def test_anchored_time_series_chains():
+    rng = np.random.default_rng(42)
+    your_time_series = rng.random(10000)
+    window_size = (
+        50  # Approximately, how many data points might be found in a pattern
+    )
+
+    matrix_profile = stumpy.stump(your_time_series, m=window_size)
+
+    left_matrix_profile_index = matrix_profile[:, 2]
+    right_matrix_profile_index = matrix_profile[:, 3]
+    idx = 10  # Subsequence index for which to retrieve the anchored time series chain for
+
+    anchored_chain = stumpy.atsc(
+        left_matrix_profile_index, right_matrix_profile_index, idx
+    )
+
+    all_chain_set, longest_unanchored_chain = stumpy.allc(
+        left_matrix_profile_index, right_matrix_profile_index
+    )
+
+    return anchored_chain, all_chain_set, longest_unanchored_chain
+
+
+def test_semantic_segmentation():
+    rng = np.random.default_rng(42)
+    your_time_series = rng.random(10000)
+    window_size = (
+        50  # Approximately, how many data points might be found in a pattern
+    )
+
+    matrix_profile = stumpy.stump(your_time_series, m=window_size)
+
+    subseq_len = 50
+    return stumpy.fluss(
+        matrix_profile[:, 1], L=subseq_len, n_regimes=2, excl_factor=1
+    )
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_stumpy_distributed.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_stumpy_distributed.py
new file mode 100644
index 00000000000..37e3cc34856
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_stumpy_distributed.py
@@ -0,0 +1,48 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+import stumpy
+from pandas._testing import assert_equal
+
+from dask.distributed import Client, LocalCluster
+
+
+def stumpy_assert_equal(expected, got):
+    def as_float64(x):
+        if isinstance(x, (tuple, list)):
+            return [as_float64(y) for y in x]
+        else:
+            return x.astype(np.float64)
+
+    assert_equal(as_float64(expected), as_float64(got))
+
+
+pytestmark = pytest.mark.assert_eq(fn=stumpy_assert_equal)
+
+
+# Shared dask client for all tests in this module
+@pytest.fixture(scope="module")
+def dask_client():
+    with LocalCluster(n_workers=4, threads_per_worker=1) as cluster:
+        with Client(cluster) as dask_client:
+            yield dask_client
+
+
+def test_1d_distributed(dask_client):
+    np.random.seed(42)
+    ts = pd.Series(np.random.rand(100))
+    m = 10
+    return stumpy.stumped(dask_client, ts, m)
+
+
+def test_multidimensional_distributed_timeseries(dask_client):
+    np.random.seed(42)
+    # Each row represents data from a different dimension while each column represents
+    # data from the same dimension
+    your_time_series = np.random.rand(3, 1000)
+    # Approximately, how many data points might be found in a pattern
+    window_size = 50
+
+    return stumpy.mstumped(dask_client, your_time_series, m=window_size)
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_tensorflow.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_tensorflow.py
new file mode 100644
index 00000000000..ba1f518cbfd
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_tensorflow.py
@@ -0,0 +1,367 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+import tensorflow as tf
+
+SHUFFLE_BUFFER = 500
+BATCH_SIZE = 2
+
+pytestmark = pytest.mark.assert_eq(fn=pd._testing.assert_equal)
+
+
+@pytest.fixture(scope="module")
+def df():
+    rng = np.random.RandomState(42)
+
+    nrows = 303
+    columns = {
+        "age": rng.randint(29, 78, size=(nrows,), dtype="int64"),
+        "sex": rng.randint(0, 2, size=(nrows,), dtype="int64"),
+        "cp": rng.randint(0, 5, size=(nrows,), dtype="int64"),
+        "trestbps": rng.randint(94, 201, size=(nrows,), dtype="int64"),
+        "chol": rng.randint(126, 565, size=(nrows,), dtype="int64"),
+        "fbs": rng.randint(0, 2, size=(nrows,), dtype="int64"),
+        "restecg": rng.randint(0, 3, size=(nrows,), dtype="int64"),
+        "thalach": rng.randint(71, 203, size=(nrows,), dtype="int64"),
+        "exang": rng.randint(0, 2, size=(nrows,), dtype="int64"),
+        "oldpeak": rng.uniform(0.0, 6.2, size=(nrows,)),
+        "slope": rng.randint(1, 4, size=(nrows,), dtype="int64"),
+        "ca": rng.randint(0, 4, size=(nrows,), dtype="int64"),
+        "thal": rng.choice(
+            ["fixed", "normal", "reversible", "1", "2"], size=(nrows,)
+        ),
+        "target": rng.randint(0, 2, size=(nrows,), dtype="int64"),
+    }
+
+    return pd.DataFrame(columns)
+
+
+@pytest.fixture(scope="module")
+def target(df):
+    return df.pop("target")
+
+
+@pytest.fixture
+def model_gen():
+    def make_model(numeric_features):
+        normalizer = tf.keras.layers.Normalization(axis=-1)
+        normalizer.adapt(numeric_features)
+        model = tf.keras.Sequential(
+            [
+                normalizer,
+                tf.keras.layers.Dense(10, activation="relu"),
+                tf.keras.layers.Dense(1),
+            ]
+        )
+
+        model.compile(
+            optimizer="adam",
+            loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
+            metrics=["accuracy"],
+        )
+        return model
+
+    return make_model
+
+
+def test_dataframe_as_array(model_gen, df, target):
+    tf.keras.utils.set_random_seed(42)
+
+    numeric_feature_names = ["age", "thalach", "trestbps", "chol", "oldpeak"]
+    numeric_features = df[numeric_feature_names]
+
+    numeric_features = tf.convert_to_tensor(
+        numeric_features.values, dtype=tf.float32
+    )
+
+    model = model_gen(numeric_features)
+    model.fit(numeric_features, target, epochs=1, batch_size=BATCH_SIZE)
+
+    test_data = numeric_features[:BATCH_SIZE]
+    return model.predict(test_data)
+
+
+def test_dataframe_as_dataset(model_gen, df, target):
+    tf.keras.utils.set_random_seed(42)
+
+    numeric_feature_names = ["age", "thalach", "trestbps", "chol", "oldpeak"]
+    numeric_features = df[numeric_feature_names]
+
+    numeric_features = tf.convert_to_tensor(
+        numeric_features.values, dtype=tf.float32
+    )
+
+    dataset = tf.data.Dataset.from_tensor_slices((numeric_features, target))
+    dataset = dataset.shuffle(SHUFFLE_BUFFER).batch(BATCH_SIZE)
+
+    model = model_gen(numeric_features)
+    model.fit(dataset, epochs=1)
+
+    test_data = dataset.take(1)
+    return model.predict(test_data)
+
+
+def stack_dict(inputs, func=tf.stack):
+    values = []
+    for key in sorted(inputs.keys()):
+        values.append(CastLayer()(inputs[key]))
+
+    class MyLayer(tf.keras.layers.Layer):
+        def call(self, val):
+            return func(val, axis=-1)
+
+    return MyLayer()(values)
+
+
+def test_dataframe_as_dictionary_with_keras_input_layer(df, target):
+    # ensure deterministic results
+    tf.keras.utils.set_random_seed(42)
+
+    numeric_feature_names = ["age", "thalach", "trestbps", "chol", "oldpeak"]
+    numeric_features = df[numeric_feature_names]
+
+    inputs = {}
+    for name in numeric_features:
+        inputs[name] = tf.keras.Input(shape=(1,), name=name, dtype=tf.float32)
+
+    x = stack_dict(inputs, func=tf.concat)
+
+    normalizer = tf.keras.layers.Normalization(axis=-1)
+    normalizer.adapt(stack_dict(dict(numeric_features)))
+
+    x = normalizer(x)
+    x = tf.keras.layers.Dense(10, activation="relu")(x)
+    x = tf.keras.layers.Dense(1)(x)
+
+    model = tf.keras.Model(inputs, x)
+
+    model.compile(
+        optimizer="adam",
+        loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
+        metrics=["accuracy"],
+        run_eagerly=True,
+    )
+
+    # Train with dictionary of columns as input:
+    model.fit(dict(numeric_features), target, epochs=1, batch_size=BATCH_SIZE)
+
+    # Train with a dataset of dictionary-elements
+    numeric_dict_ds = tf.data.Dataset.from_tensor_slices(
+        (dict(numeric_features), target)
+    )
+    numeric_dict_batches = numeric_dict_ds.shuffle(SHUFFLE_BUFFER).batch(
+        BATCH_SIZE
+    )
+    model.fit(numeric_dict_batches, epochs=1)
+
+    # Predict
+    return model.predict(numeric_dict_batches.take(1))
+
+
+def test_full_example_train_with_ds(df, target):
+    # https://www.tensorflow.org/tutorials/load_data/pandas_dataframe#full_example
+    # Inputs are converted to tf.dataset and then batched
+
+    # ensure deterministic results
+    tf.keras.utils.set_random_seed(42)
+
+    numeric_feature_names = ["age", "thalach", "trestbps", "chol", "oldpeak"]
+    binary_feature_names = ["sex", "fbs", "exang"]
+    categorical_feature_names = ["cp", "restecg", "slope", "thal", "ca"]
+
+    numeric_features = df[numeric_feature_names]
+
+    inputs = {}
+    for name, column in df.items():
+        if isinstance(column[0], str):
+            dtype = tf.string
+        elif name in categorical_feature_names or name in binary_feature_names:
+            dtype = tf.int64
+        else:
+            dtype = tf.float32
+
+        inputs[name] = tf.keras.Input(shape=(), name=name, dtype=dtype)
+
+    preprocessed = []
+
+    # Process binary features
+    for name in binary_feature_names:
+        inp = inputs[name]
+        inp = inp[:, tf.newaxis]
+        float_value = CastLayer()(inp)
+        preprocessed.append(float_value)
+
+    normalizer = tf.keras.layers.Normalization(axis=-1)
+    normalizer.adapt(stack_dict(dict(numeric_features)))
+
+    # Process numeric features
+    numeric_inputs = {}
+    for name in numeric_feature_names:
+        numeric_inputs[name] = inputs[name]
+
+    numeric_inputs = stack_dict(numeric_inputs)
+    numeric_normalized = normalizer(numeric_inputs)
+
+    preprocessed.append(numeric_normalized)
+
+    # Process categorical features
+    for name in categorical_feature_names:
+        vocab = sorted(set(df[name]))
+        print(f"name: {name}")
+        print(f"vocab: {vocab}\n")
+
+        if isinstance(vocab[0], str):
+            lookup = tf.keras.layers.StringLookup(
+                vocabulary=vocab, output_mode="one_hot"
+            )
+        else:
+            lookup = tf.keras.layers.IntegerLookup(
+                vocabulary=vocab, output_mode="one_hot"
+            )
+
+        x = inputs[name][:, tf.newaxis]
+        x = lookup(x)
+        preprocessed.append(x)
+
+    # Concatenate all tensors
+    preprocesssed_result = MyConcatLayer()(preprocessed)
+
+    preprocessor = tf.keras.Model(inputs, preprocesssed_result)
+
+    # Create the model
+    body = tf.keras.Sequential(
+        [
+            tf.keras.layers.Dense(10, activation="relu"),
+            tf.keras.layers.Dense(10, activation="relu"),
+            tf.keras.layers.Dense(1),
+        ]
+    )
+
+    x = preprocessor(inputs)
+    result = body(x)
+
+    model = tf.keras.Model(inputs, result)
+
+    model.compile(
+        optimizer="adam",
+        loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
+        metrics=["accuracy"],
+    )
+
+    ds = tf.data.Dataset.from_tensor_slices((dict(df), target))
+    ds = ds.batch(BATCH_SIZE)
+    model.fit(ds, epochs=1)
+
+    return model.predict(ds.take(1))
+
+
+class CastLayer(tf.keras.layers.Layer):
+    def __init__(self, **kwargs):
+        super(CastLayer, self).__init__(**kwargs)
+
+    def call(self, inp):
+        return tf.cast(inp, tf.float32)
+
+
+class MyConcatLayer(tf.keras.layers.Layer):
+    def call(self, values):
+        values = [tf.cast(v, tf.float32) for v in values]
+        return tf.concat(values, axis=-1)
+
+
+def test_full_example_train_with_df(df, target):
+    # https://www.tensorflow.org/tutorials/load_data/pandas_dataframe#full_example
+    # Inputs are directly passed as dictionary of series
+
+    # ensure deterministic results
+    tf.keras.utils.set_random_seed(42)
+
+    numeric_feature_names = ["age", "thalach", "trestbps", "chol", "oldpeak"]
+    binary_feature_names = ["sex", "fbs", "exang"]
+    categorical_feature_names = ["cp", "restecg", "slope", "thal", "ca"]
+
+    numeric_features = df[numeric_feature_names]
+
+    inputs = {}
+
+    for name, column in df.items():
+        if isinstance(column[0], str):
+            dtype = tf.string
+        elif name in categorical_feature_names or name in binary_feature_names:
+            dtype = tf.int64
+        else:
+            dtype = tf.float32
+
+        inputs[name] = tf.keras.Input(shape=(), name=name, dtype=dtype)
+
+    preprocessed = []
+
+    # Process binary features
+    for name in binary_feature_names:
+        inp = inputs[name]
+        inp = inp[:, tf.newaxis]
+        float_value = CastLayer()(inp)
+        preprocessed.append(float_value)
+
+    normalizer = tf.keras.layers.Normalization(axis=-1)
+    normalizer.adapt(stack_dict(dict(numeric_features)))
+
+    # Process numeric features
+    numeric_inputs = {}
+    for name in numeric_feature_names:
+        numeric_inputs[name] = inputs[name]
+
+    numeric_inputs = stack_dict(numeric_inputs)
+    numeric_normalized = normalizer(numeric_inputs)
+
+    preprocessed.append(numeric_normalized)
+
+    # Process categorical features
+    for name in categorical_feature_names:
+        vocab = sorted(set(df[name]))
+        print(f"name: {name}")
+        print(f"vocab: {vocab}\n")
+
+        if isinstance(vocab[0], str):
+            lookup = tf.keras.layers.StringLookup(
+                vocabulary=vocab, output_mode="one_hot"
+            )
+        else:
+            lookup = tf.keras.layers.IntegerLookup(
+                vocabulary=vocab, output_mode="one_hot"
+            )
+
+        x = inputs[name][:, tf.newaxis]
+        x = lookup(x)
+        preprocessed.append(x)
+
+    # Concatenate all tensors
+    preprocesssed_result = MyConcatLayer()(preprocessed)
+
+    preprocessor = tf.keras.Model(inputs, preprocesssed_result)
+
+    # Create the model
+    body = tf.keras.Sequential(
+        [
+            tf.keras.layers.Dense(10, activation="relu"),
+            tf.keras.layers.Dense(10, activation="relu"),
+            tf.keras.layers.Dense(1),
+        ]
+    )
+
+    x = preprocessor(inputs)
+    result = body(x)
+
+    model = tf.keras.Model(inputs, result)
+
+    model.compile(
+        optimizer="adam",
+        loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
+        metrics=["accuracy"],
+    )
+
+    model.fit(dict(df), target, epochs=1, batch_size=BATCH_SIZE)
+
+    return model.predict(dict(df[:BATCH_SIZE]))
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py
new file mode 100644
index 00000000000..70f1e6a4250
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py
@@ -0,0 +1,135 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+
+from __future__ import annotations
+
+import numpy as np
+import pandas as pd
+import pytest
+import scipy.sparse
+import xgboost as xgb
+from sklearn.datasets import make_regression
+from xgboost.testing import IteratorForTest, make_categorical
+
+n_samples = 128
+n_features = 16
+
+
+def xgboost_assert_equal(expect, got, rtol: float = 1e-7, atol: float = 0.0):
+    if isinstance(expect, (tuple, list)):
+        assert len(expect) == len(got)
+        for e, g in zip(expect, got):
+            xgboost_assert_equal(e, g, rtol, atol)
+    elif isinstance(expect, scipy.sparse.csr_matrix):
+        np.testing.assert_allclose(expect.data, got.data, rtol=rtol, atol=atol)
+        np.testing.assert_equal(expect.indices, got.indices)
+        np.testing.assert_equal(expect.indptr, got.indptr)
+    else:
+        pd._testing.assert_almost_equal(expect, got, rtol=rtol, atol=atol)
+
+
+pytestmark = pytest.mark.assert_eq(fn=xgboost_assert_equal)
+
+
+@pytest.fixture
+def reg_data() -> tuple[np.ndarray, np.ndarray]:
+    X, y = make_regression(n_samples, n_features, random_state=11)
+    return X, y
+
+
+@pytest.fixture
+def reg_batches_data() -> tuple[list[pd.DataFrame], list[pd.Series]]:
+    cov = []
+    res = []
+    for i in range(3):
+        X, y = make_regression(n_samples, n_features, random_state=i + 1)
+        cov.append(pd.DataFrame(X))
+        res.append(pd.Series(y))
+    return cov, res
+
+
+def test_with_dmatrix(
+    reg_data: tuple[np.ndarray, np.ndarray],
+) -> tuple[scipy.sparse.csr_matrix, scipy.sparse.csr_matrix]:
+    """DMatrix is the primary interface for XGBoost."""
+    X, y = reg_data
+    X_df = pd.DataFrame(X)
+    y_ser = pd.Series(y)
+    Xy = xgb.DMatrix(X_df, y_ser)
+    assert Xy.feature_names == list(map(str, X_df.columns))
+    csr_0 = Xy.get_data()
+
+    Xc, yc = make_categorical(
+        n_samples, n_features, n_categories=13, onehot=False
+    )
+    Xy = xgb.DMatrix(Xc, yc, enable_categorical=True)
+    csr_1 = Xy.get_data()
+    return csr_0, csr_1
+
+
+def test_with_quantile_dmatrix(
+    reg_data: tuple[np.ndarray, np.ndarray],
+) -> tuple[scipy.sparse.csr_matrix, scipy.sparse.csr_matrix]:
+    """QuantileDMatrix is an optimization for the `hist` tree method for XGBoost."""
+    from xgboost.testing.data import memory
+
+    memory.clear(warn=False)
+
+    X, y = reg_data
+    X_df = pd.DataFrame(X)
+    y_ser = pd.Series(y)
+    Xy = xgb.QuantileDMatrix(X_df, y_ser)
+    assert Xy.feature_names == list(map(str, X_df.columns))
+    csr_0 = Xy.get_data()
+
+    Xc, yc = make_categorical(
+        n_samples, n_features, n_categories=13, onehot=False
+    )
+    Xy = xgb.QuantileDMatrix(Xc, yc, enable_categorical=True)
+    csr_1 = Xy.get_data()
+    return csr_0, csr_1
+
+
+def test_with_iter_quantile_dmatrix(
+    reg_batches_data: tuple[list[pd.DataFrame], list[pd.DataFrame]],
+) -> scipy.sparse.csr_matrix:
+    """Using iterator to initialize QuantileDMatrix."""
+    cov, res = reg_batches_data
+    it = IteratorForTest(cov, res, w=None, cache=None)
+    Xy = xgb.QuantileDMatrix(it)
+    csr = Xy.get_data()
+    return csr
+
+
+@pytest.mark.parametrize("device", ["cpu", "cuda"])
+def test_with_external_memory(
+    device: str,
+    reg_batches_data: tuple[list[pd.DataFrame], list[pd.DataFrame]],
+) -> np.ndarray:
+    """Test with iterator-based external memory."""
+    cov, res = reg_batches_data
+    it = IteratorForTest(cov, res, w=None, cache="cache")
+    Xy = xgb.DMatrix(it)
+    predt = xgb.train({"device": device}, Xy, num_boost_round=1).predict(Xy)
+    return predt
+
+
+@pytest.mark.parametrize("device", ["cpu", "cuda"])
+def test_predict(device: str) -> np.ndarray:
+    reg = xgb.XGBRegressor(n_estimators=2, device=device)
+    X, y = make_regression(n_samples, n_features, random_state=11)
+    X_df = pd.DataFrame(X)
+    reg.fit(X_df, y)
+    booster = reg.get_booster()
+
+    predt0 = reg.predict(X_df)
+
+    predt1 = booster.inplace_predict(X_df)
+    np.testing.assert_allclose(predt0, predt1)
+
+    predt2 = booster.predict(xgb.DMatrix(X_df))
+    np.testing.assert_allclose(predt0, predt2)
+
+    predt3 = booster.inplace_predict(X)
+    np.testing.assert_allclose(predt0, predt3)
+
+    return predt0
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index b2ddb06d8c9..17d1292980b 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -4,7 +4,7 @@
 build-backend = "rapids_build_backend.build"
 requires = [
     "rapids-build-backend>=0.3.0,<0.4.0.dev0",
-    "scikit-build-core[pyproject]>=0.7.0",
+    "scikit-build-core[pyproject]>=0.10.0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [project]
@@ -16,20 +16,22 @@ authors = [
     { name = "NVIDIA Corporation" },
 ]
 license = { text = "Apache 2.0" }
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 dependencies = [
     "cachetools",
     "cubinlinker",
     "cuda-python>=11.7.1,<12.0a0",
     "cupy-cuda11x>=12.0.0",
     "fsspec>=0.6.0",
+    "libcudf==24.10.*,>=0.0.0a0",
     "numba>=0.57",
-    "numpy>=1.23,<2.0a0",
+    "numpy>=1.23,<3.0a0",
     "nvtx>=0.2.1",
     "packaging",
     "pandas>=2.0,<2.2.3dev0",
     "ptxcompiler",
-    "pyarrow>=16.1.0,<16.2.0a0",
+    "pyarrow>=14.0.0,<18.0.0a0",
+    "pylibcudf==24.10.*,>=0.0.0a0",
     "rich",
     "rmm==24.10.*,>=0.0.0a0",
     "typing_extensions>=4.0.0",
@@ -40,7 +42,6 @@ classifiers = [
     "Topic :: Scientific/Engineering",
     "License :: OSI Approved :: Apache Software License",
     "Programming Language :: Python",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
 ]
@@ -62,11 +63,15 @@ test = [
     "tzdata",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 pandas-tests = [
+    "ipython",
     "pandas[test, pyarrow, performance, computation, fss, excel, parquet, feather, hdf5, spss, html, xml, plot, output-formatting, clipboard, compression]",
     "pytest-reportlog",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 cudf-pandas-tests = [
     "ipython",
+    "jupyter_client",
+    "nbconvert",
+    "nbformat",
     "openpyxl",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
@@ -88,6 +93,7 @@ known_dask = [
 ]
 known_rapids = [
     "rmm",
+    "pylibcudf"
 ]
 known_first_party = [
     "cudf",
@@ -124,16 +130,18 @@ matrix-entry = "cuda_suffixed=true"
 requires = [
     "cmake>=3.26.4,!=3.30.0",
     "cython>=3.0.3",
+    "libcudf==24.10.*,>=0.0.0a0",
+    "librmm==24.10.*,>=0.0.0a0",
     "ninja",
-    "numpy==1.23.*",
-    "pyarrow==16.1.0.*",
+    "pylibcudf==24.10.*,>=0.0.0a0",
     "rmm==24.10.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [tool.scikit-build]
 build-dir = "build/{wheel_tag}"
 cmake.build-type = "Release"
-cmake.minimum-version = "3.26.4"
+cmake.version = "CMakeLists.txt"
+minimum-version = "build-system.requires"
 ninja.make-fallback = true
 sdist.exclude = ["*tests*"]
 sdist.reproducible = true
diff --git a/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt b/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt
index 4f3b9220a4f..4490c41c7a9 100644
--- a/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt
+++ b/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -20,5 +20,3 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${linked_libraries}"
 )
-include(../../../cudf/cmake/Modules/LinkPyarrowHeaders.cmake)
-link_to_pyarrow_headers("${RAPIDS_CYTHON_CREATED_TARGETS}")
diff --git a/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd b/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd
index 2de0bf39785..e65b0d233b9 100644
--- a/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd
+++ b/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd
@@ -6,9 +6,8 @@ from libcpp.map cimport map
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from libcpp.vector cimport vector
-
-from cudf._lib.pylibcudf.io.datasource cimport Datasource
-from cudf._lib.pylibcudf.libcudf.io.datasource cimport datasource
+from pylibcudf.io.datasource cimport Datasource
+from pylibcudf.libcudf.io.datasource cimport datasource
 
 
 cdef extern from "cudf_kafka/kafka_callback.hpp" \
diff --git a/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx b/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx
index 2927dc0aa9a..20aa43b0134 100644
--- a/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx
+++ b/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx
@@ -6,8 +6,7 @@ from libcpp.map cimport map
 from libcpp.memory cimport make_unique, unique_ptr
 from libcpp.string cimport string
 from libcpp.utility cimport move
-
-from cudf._lib.pylibcudf.libcudf.io.datasource cimport datasource
+from pylibcudf.libcudf.io.datasource cimport datasource
 
 from cudf_kafka._lib.kafka cimport kafka_consumer
 
diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml
index a9b60133f42..6ca798bb11c 100644
--- a/python/cudf_kafka/pyproject.toml
+++ b/python/cudf_kafka/pyproject.toml
@@ -4,7 +4,7 @@
 build-backend = "rapids_build_backend.build"
 requires = [
     "rapids-build-backend>=0.3.0,<0.4.0.dev0",
-    "scikit-build-core[pyproject]>=0.7.0",
+    "scikit-build-core[pyproject]>=0.10.0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [project]
@@ -16,7 +16,7 @@ authors = [
     { name = "NVIDIA Corporation" },
 ]
 license = { text = "Apache 2.0" }
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 dependencies = [
     "cudf==24.10.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
@@ -86,7 +86,8 @@ filterwarnings = [
 [tool.scikit-build]
 build-dir = "build/{wheel_tag}"
 cmake.build-type = "Release"
-cmake.minimum-version = "3.26.4"
+cmake.version = "CMakeLists.txt"
+minimum-version = "build-system.requires"
 ninja.make-fallback = true
 sdist.exclude = ["*tests*"]
 sdist.reproducible = true
@@ -105,6 +106,4 @@ requires = [
     "cmake>=3.26.4,!=3.30.0",
     "cython>=3.0.3",
     "ninja",
-    "numpy==1.23.*",
-    "pyarrow==16.1.0.*",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
diff --git a/python/cudf_polars/cudf_polars/containers/column.py b/python/cudf_polars/cudf_polars/containers/column.py
index 02018548b2c..dd3b771e305 100644
--- a/python/cudf_polars/cudf_polars/containers/column.py
+++ b/python/cudf_polars/cudf_polars/containers/column.py
@@ -8,7 +8,7 @@
 import functools
 from typing import TYPE_CHECKING
 
-import cudf._lib.pylibcudf as plc
+import pylibcudf as plc
 
 if TYPE_CHECKING:
     from typing_extensions import Self
diff --git a/python/cudf_polars/cudf_polars/containers/dataframe.py b/python/cudf_polars/cudf_polars/containers/dataframe.py
index dba76855329..a5c99e2bc11 100644
--- a/python/cudf_polars/cudf_polars/containers/dataframe.py
+++ b/python/cudf_polars/cudf_polars/containers/dataframe.py
@@ -10,11 +10,10 @@
 from typing import TYPE_CHECKING, cast
 
 import pyarrow as pa
+import pylibcudf as plc
 
 import polars as pl
 
-import cudf._lib.pylibcudf as plc
-
 from cudf_polars.containers.column import NamedColumn
 from cudf_polars.utils import dtypes
 
@@ -106,7 +105,9 @@ def from_polars(cls, df: pl.DataFrame) -> Self:
         return cls(
             [
                 NamedColumn(column, h_col.name).copy_metadata(h_col)
-                for column, h_col in zip(d_table.columns(), df.iter_columns())
+                for column, h_col in zip(
+                    d_table.columns(), df.iter_columns(), strict=True
+                )
             ]
         )
 
@@ -135,8 +136,10 @@ def from_table(cls, table: plc.Table, names: Sequence[str]) -> Self:
         if table.num_columns() != len(names):
             raise ValueError("Mismatching name and table length.")
         return cls(
-            # TODO: strict=True when we drop py39
-            [NamedColumn(c, name) for c, name in zip(table.columns(), names)]
+            [
+                NamedColumn(c, name)
+                for c, name in zip(table.columns(), names, strict=True)
+            ]
         )
 
     def sorted_like(
@@ -166,8 +169,7 @@ def sorted_like(
         subset = self.column_names_set if subset is None else subset
         self.columns = [
             c.sorted_like(other) if c.name in subset else c
-            # TODO: strict=True when we drop py39
-            for c, other in zip(self.columns, like.columns)
+            for c, other in zip(self.columns, like.columns, strict=True)
         ]
         return self
 
diff --git a/python/cudf_polars/cudf_polars/dsl/expr.py b/python/cudf_polars/cudf_polars/dsl/expr.py
index 9e0fca3f52f..e1b4d30b76b 100644
--- a/python/cudf_polars/cudf_polars/dsl/expr.py
+++ b/python/cudf_polars/cudf_polars/dsl/expr.py
@@ -21,11 +21,10 @@
 from typing import TYPE_CHECKING, Any, ClassVar, NamedTuple
 
 import pyarrow as pa
+import pylibcudf as plc
 
 from polars.polars import _expr_nodes as pl_expr
 
-import cudf._lib.pylibcudf as plc
-
 from cudf_polars.containers import Column, NamedColumn
 from cudf_polars.utils import dtypes, sorting
 
diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py
index 7f62dff4389..e334e6f5cc5 100644
--- a/python/cudf_polars/cudf_polars/dsl/ir.py
+++ b/python/cudf_polars/cudf_polars/dsl/ir.py
@@ -18,21 +18,20 @@
 import types
 from functools import cache
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Callable, ClassVar
+from typing import TYPE_CHECKING, Any, ClassVar
 
 import pyarrow as pa
+import pylibcudf as plc
 from typing_extensions import assert_never
 
 import polars as pl
 
-import cudf._lib.pylibcudf as plc
-
 import cudf_polars.dsl.expr as expr
 from cudf_polars.containers import DataFrame, NamedColumn
 from cudf_polars.utils import sorting
 
 if TYPE_CHECKING:
-    from collections.abc import MutableMapping
+    from collections.abc import Callable, MutableMapping
     from typing import Literal
 
     from cudf_polars.typing import Schema
@@ -311,7 +310,8 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
                 *(
                     (piece.tbl, piece.column_names(include_children=False))
                     for piece in pieces
-                )
+                ),
+                strict=True,
             )
             df = DataFrame.from_table(
                 plc.concatenate.concatenate(list(tables)),
@@ -321,7 +321,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
             tbl_w_meta = plc.io.parquet.read_parquet(
                 plc.io.SourceInfo(self.paths),
                 columns=with_columns,
-                num_rows=nrows,
+                nrows=nrows,
             )
             df = DataFrame.from_table(
                 tbl_w_meta.tbl,
@@ -427,7 +427,8 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
             pdf = pdf.select(self.projection)
         df = DataFrame.from_polars(pdf)
         assert all(
-            c.obj.type() == dtype for c, dtype in zip(df.columns, self.schema.values())
+            c.obj.type() == dtype
+            for c, dtype in zip(df.columns, self.schema.values(), strict=True)
         )
         if self.predicate is not None:
             (mask,) = broadcast(self.predicate.evaluate(df), target_length=df.num_rows)
@@ -601,9 +602,10 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
         for i, table in enumerate(raw_tables):
             (column,) = table.columns()
             raw_columns.append(NamedColumn(column, f"tmp{i}"))
-        mapping = dict(zip(replacements, raw_columns))
+        mapping = dict(zip(replacements, raw_columns, strict=True))
         result_keys = [
-            NamedColumn(gk, k.name) for gk, k in zip(group_keys.columns(), keys)
+            NamedColumn(gk, k.name)
+            for gk, k in zip(group_keys.columns(), keys, strict=True)
         ]
         result_subs = DataFrame(raw_columns)
         results = [
@@ -753,7 +755,9 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
             columns = plc.join.cross_join(left.table, right.table).columns()
             left_cols = [
                 NamedColumn(new, old.name).sorted_like(old)
-                for new, old in zip(columns[: left.num_columns], left.columns)
+                for new, old in zip(
+                    columns[: left.num_columns], left.columns, strict=True
+                )
             ]
             right_cols = [
                 NamedColumn(
@@ -762,7 +766,9 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
                     if old.name not in left.column_names_set
                     else f"{old.name}{suffix}",
                 )
-                for new, old in zip(columns[left.num_columns :], right.columns)
+                for new, old in zip(
+                    columns[left.num_columns :], right.columns, strict=True
+                )
             ]
             return DataFrame([*left_cols, *right_cols])
         # TODO: Waiting on clarity based on https://github.com/pola-rs/polars/issues/17184
@@ -804,6 +810,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
                         for left_col, right_col in zip(
                             left.select_columns(left_on.column_names_set),
                             right.select_columns(right_on.column_names_set),
+                            strict=True,
                         )
                     )
                 )
@@ -910,7 +917,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
         result = DataFrame(
             [
                 NamedColumn(c, old.name).sorted_like(old)
-                for c, old in zip(table.columns(), df.columns)
+                for c, old in zip(table.columns(), df.columns, strict=True)
             ]
         )
         if keys_sorted or self.stable:
@@ -975,7 +982,8 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
             self.null_order,
         )
         columns = [
-            NamedColumn(c, old.name) for c, old in zip(table.columns(), df.columns)
+            NamedColumn(c, old.name)
+            for c, old in zip(table.columns(), df.columns, strict=True)
         ]
         # If a sort key is in the result table, set the sortedness property
         for k, i in enumerate(keys_in_result):
@@ -1090,7 +1098,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
             # final tag is "swapping" which is useful for the
             # optimiser (it blocks some pushdown operations)
             old, new, _ = self.options
-            return df.rename_columns(dict(zip(old, new)))
+            return df.rename_columns(dict(zip(old, new, strict=True)))
         elif self.name == "explode":
             df = self.df.evaluate(cache=cache)
             ((to_explode,),) = self.options
diff --git a/python/cudf_polars/cudf_polars/dsl/translate.py b/python/cudf_polars/cudf_polars/dsl/translate.py
index dec45679c75..6dc97c7cb51 100644
--- a/python/cudf_polars/cudf_polars/dsl/translate.py
+++ b/python/cudf_polars/cudf_polars/dsl/translate.py
@@ -11,14 +11,13 @@
 from typing import Any
 
 import pyarrow as pa
+import pylibcudf as plc
 from typing_extensions import assert_never
 
 import polars as pl
 import polars.polars as plrs
 from polars.polars import _expr_nodes as pl_expr, _ir_nodes as pl_ir
 
-import cudf._lib.pylibcudf as plc
-
 from cudf_polars.dsl import expr, ir
 from cudf_polars.typing import NodeTraverser
 from cudf_polars.utils import dtypes
diff --git a/python/cudf_polars/cudf_polars/typing/__init__.py b/python/cudf_polars/cudf_polars/typing/__init__.py
index c04eac41bb7..adab10bdded 100644
--- a/python/cudf_polars/cudf_polars/typing/__init__.py
+++ b/python/cudf_polars/cudf_polars/typing/__init__.py
@@ -8,14 +8,13 @@
 from collections.abc import Mapping
 from typing import TYPE_CHECKING, Literal, Protocol, Union
 
-from polars.polars import _expr_nodes as pl_expr, _ir_nodes as pl_ir
+import pylibcudf as plc
 
-import cudf._lib.pylibcudf as plc
+from polars.polars import _expr_nodes as pl_expr, _ir_nodes as pl_ir
 
 if TYPE_CHECKING:
-    from typing import Callable
-
-    from typing_extensions import TypeAlias
+    from collections.abc import Callable
+    from typing import TypeAlias
 
     import polars as pl
 
diff --git a/python/cudf_polars/cudf_polars/utils/dtypes.py b/python/cudf_polars/cudf_polars/utils/dtypes.py
index cd68d021286..7f6ea1edfd9 100644
--- a/python/cudf_polars/cudf_polars/utils/dtypes.py
+++ b/python/cudf_polars/cudf_polars/utils/dtypes.py
@@ -8,12 +8,11 @@
 from functools import cache
 
 import pyarrow as pa
+import pylibcudf as plc
 from typing_extensions import assert_never
 
 import polars as pl
 
-import cudf._lib.pylibcudf as plc
-
 __all__ = ["from_polars", "downcast_arrow_lists"]
 
 
diff --git a/python/cudf_polars/cudf_polars/utils/sorting.py b/python/cudf_polars/cudf_polars/utils/sorting.py
index 57f94c4ec4c..6ce216cbf8f 100644
--- a/python/cudf_polars/cudf_polars/utils/sorting.py
+++ b/python/cudf_polars/cudf_polars/utils/sorting.py
@@ -7,7 +7,7 @@
 
 from typing import TYPE_CHECKING
 
-import cudf._lib.pylibcudf as plc
+import pylibcudf as plc
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
@@ -45,7 +45,7 @@ def sort_order(
     null_precedence = []
     if len(descending) != len(nulls_last) or len(descending) != num_keys:
         raise ValueError("Mismatching length of arguments in sort_order")
-    for asc, null_last in zip(column_order, nulls_last):
+    for asc, null_last in zip(column_order, nulls_last, strict=True):
         if (asc == plc.types.Order.ASCENDING) ^ (not null_last):
             null_precedence.append(plc.types.NullOrder.AFTER)
         elif (asc == plc.types.Order.ASCENDING) ^ null_last:
diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml
index def1d086cc1..f2bab9e6623 100644
--- a/python/cudf_polars/pyproject.toml
+++ b/python/cudf_polars/pyproject.toml
@@ -17,10 +17,10 @@ authors = [
     { name = "NVIDIA Corporation" },
 ]
 license = { text = "Apache 2.0" }
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 dependencies = [
-    "cudf==24.8.*,>=0.0.0a0",
-    "polars>=1.0",
+    "polars>=1.0,<1.3",
+    "pylibcudf==24.10.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 classifiers = [
     "Intended Audience :: Developers",
@@ -28,7 +28,6 @@ classifiers = [
     "Topic :: Scientific/Engineering",
     "License :: OSI Approved :: Apache Software License",
     "Programming Language :: Python",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
 ]
@@ -62,7 +61,7 @@ exclude_also = [
 [tool.ruff]
 line-length = 88
 indent-width = 4
-target-version = "py39"
+target-version = "py310"
 fix = true
 
 [tool.ruff.lint]
@@ -115,6 +114,8 @@ ignore = [
   "TD003", # Missing issue link on the line following this TODO
   # tryceratops
   "TRY003", # Avoid specifying long messages outside the exception class
+  # pyupgrade
+  "UP038",  # Use `X | Y` in `isinstance` call instead of `(X, Y)`
   # Lints below are turned off because of conflicts with the ruff
   # formatter
   # See https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules
@@ -137,6 +138,10 @@ fixable = ["ALL"]
 
 [tool.ruff.lint.per-file-ignores]
 "**/tests/**/*.py" = ["D"]
+"**/cudf_polars/typing/__init__.py" = [
+  # pyupgrade
+  "UP007", # Use `X | Y` for type annotations
+]
 
 [tool.ruff.lint.flake8-pytest-style]
 # https://docs.astral.sh/ruff/settings/#lintflake8-pytest-style
diff --git a/python/cudf_polars/tests/containers/test_column.py b/python/cudf_polars/tests/containers/test_column.py
index 4f3c0de5975..19919877f84 100644
--- a/python/cudf_polars/tests/containers/test_column.py
+++ b/python/cudf_polars/tests/containers/test_column.py
@@ -6,10 +6,9 @@
 from functools import partial
 
 import pyarrow
+import pylibcudf as plc
 import pytest
 
-import cudf._lib.pylibcudf as plc
-
 from cudf_polars.containers import Column, NamedColumn
 
 
diff --git a/python/cudf_polars/tests/containers/test_dataframe.py b/python/cudf_polars/tests/containers/test_dataframe.py
index 87508e17407..6b470268084 100644
--- a/python/cudf_polars/tests/containers/test_dataframe.py
+++ b/python/cudf_polars/tests/containers/test_dataframe.py
@@ -3,12 +3,11 @@
 
 from __future__ import annotations
 
+import pylibcudf as plc
 import pytest
 
 import polars as pl
 
-import cudf._lib.pylibcudf as plc
-
 from cudf_polars.containers import DataFrame, NamedColumn
 
 
diff --git a/python/cudf_polars/tests/dsl/test_expr.py b/python/cudf_polars/tests/dsl/test_expr.py
index ddc3ca66d86..b7d4672daca 100644
--- a/python/cudf_polars/tests/dsl/test_expr.py
+++ b/python/cudf_polars/tests/dsl/test_expr.py
@@ -3,10 +3,9 @@
 
 from __future__ import annotations
 
+import pylibcudf as plc
 import pytest
 
-import cudf._lib.pylibcudf as plc
-
 from cudf_polars.dsl import expr
 
 
diff --git a/python/cudf_polars/tests/expressions/test_literal.py b/python/cudf_polars/tests/expressions/test_literal.py
index 5bd3131d1d7..ced49bdc254 100644
--- a/python/cudf_polars/tests/expressions/test_literal.py
+++ b/python/cudf_polars/tests/expressions/test_literal.py
@@ -2,12 +2,11 @@
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
+import pylibcudf as plc
 import pytest
 
 import polars as pl
 
-import cudf._lib.pylibcudf as plc
-
 from cudf_polars.testing.asserts import (
     assert_gpu_result_equal,
     assert_ir_translation_raises,
diff --git a/python/cudf_polars/tests/expressions/test_sort.py b/python/cudf_polars/tests/expressions/test_sort.py
index d46df92db94..76c7648813a 100644
--- a/python/cudf_polars/tests/expressions/test_sort.py
+++ b/python/cudf_polars/tests/expressions/test_sort.py
@@ -4,12 +4,11 @@
 
 import itertools
 
+import pylibcudf as plc
 import pytest
 
 import polars as pl
 
-import cudf._lib.pylibcudf as plc
-
 from cudf_polars import translate_ir
 from cudf_polars.testing.asserts import assert_gpu_result_equal
 
diff --git a/python/cudf_polars/tests/utils/test_broadcast.py b/python/cudf_polars/tests/utils/test_broadcast.py
index 69ad1e519e2..35aaef44e1f 100644
--- a/python/cudf_polars/tests/utils/test_broadcast.py
+++ b/python/cudf_polars/tests/utils/test_broadcast.py
@@ -3,10 +3,9 @@
 
 from __future__ import annotations
 
+import pylibcudf as plc
 import pytest
 
-import cudf._lib.pylibcudf as plc
-
 from cudf_polars.containers import NamedColumn
 from cudf_polars.dsl.ir import broadcast
 
diff --git a/python/custreamz/pyproject.toml b/python/custreamz/pyproject.toml
index d6b88167262..be5331236a5 100644
--- a/python/custreamz/pyproject.toml
+++ b/python/custreamz/pyproject.toml
@@ -17,7 +17,7 @@ authors = [
     { name = "NVIDIA Corporation" },
 ]
 license = { text = "Apache 2.0" }
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 dependencies = [
     "confluent-kafka>=1.9.0,<1.10.0a0",
     "cudf==24.10.*,>=0.0.0a0",
@@ -31,7 +31,6 @@ classifiers = [
     "Topic :: Apache Kafka",
     "License :: OSI Approved :: Apache Software License",
     "Programming Language :: Python",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
 ]
diff --git a/python/dask_cudf/README.md b/python/dask_cudf/README.md
deleted file mode 120000
index fe840054137..00000000000
--- a/python/dask_cudf/README.md
+++ /dev/null
@@ -1 +0,0 @@
-../../README.md
\ No newline at end of file
diff --git a/python/dask_cudf/README.md b/python/dask_cudf/README.md
new file mode 100644
index 00000000000..6edb9f87d48
--- /dev/null
+++ b/python/dask_cudf/README.md
@@ -0,0 +1,135 @@
+# <div align="left"><img src="../../img/rapids_logo.png" width="90px"/>&nbsp;Dask cuDF - A GPU Backend for Dask DataFrame</div>
+
+Dask cuDF (a.k.a. dask-cudf or `dask_cudf`) is an extension library for [Dask DataFrame](https://docs.dask.org/en/stable/dataframe.html). When installed, Dask cuDF is automatically registered as the `"cudf"` [dataframe backend](https://docs.dask.org/en/stable/how-to/selecting-the-collection-backend.html) for Dask DataFrame.
+
+## Using Dask cuDF
+
+### The Dask DataFrame API (Recommended)
+
+Simply set the `"dataframe.backend"` [configuration](https://docs.dask.org/en/stable/configuration.html) to `"cudf"` in Dask, and the public Dask DataFrame API will leverage `cudf` automatically:
+
+```python
+import dask
+dask.config.set({"dataframe.backend": "cudf"})
+
+import dask.dataframe as dd
+# This gives us a cuDF-backed dataframe
+df = dd.read_parquet("data.parquet", ...)
+```
+
+> [!IMPORTANT]
+> The `"dataframe.backend"` configuration will only be used for collection creation when the following APIs are used: `read_parquet`, `read_json`, `read_csv`, `read_orc`, `read_hdf`, and `from_dict`. For example, if `from_map`, `from_pandas`, `from_delayed`, or `from_array` are used, the backend of the new collection will depend on the input to the function:
+
+```python
+import pandas as pd
+import cudf
+
+# This gives us a Pandas-backed dataframe
+dd.from_pandas(pd.DataFrame({"a": range(10)}))
+
+# This gives us a cuDF-backed dataframe
+dd.from_pandas(cudf.DataFrame({"a": range(10)}))
+```
+
+A cuDF-backed DataFrame collection can be moved to the `"pandas"` backend:
+
+```python
+df = df.to_backend("pandas")
+```
+
+Similarly, a Pandas-backed DataFrame collection can be moved to the `"cudf"` backend:
+
+```python
+df = df.to_backend("cudf")
+```
+
+### The Explicit Dask cuDF API
+
+In addition to providing the `"cudf"` backend for Dask DataFrame, Dask cuDF also provides an explicit `dask_cudf` API:
+
+```python
+import dask_cudf
+
+# This always gives us a cuDF-backed dataframe
+df = dask_cudf.read_parquet("data.parquet", ...)
+```
+
+> [!NOTE]
+> This API is used implicitly by the Dask DataFrame API when the `"cudf"` backend is enabled. Therefore, using it directly will not provide any performance benefit over the CPU/GPU-portable `dask.dataframe` API. Also, using some parts of the explicit API are incompatible with automatic query planning (see the next section).
+
+See the [Dask cuDF's API documentation](https://docs.rapids.ai/api/dask-cudf/stable/) for further information.
+
+## Query Planning
+
+Dask cuDF now provides automatic query planning by default (RAPIDS 24.06+). As long as the `"dataframe.query-planning"` configuration is set to `True` (the default) when `dask.dataframe` is first imported, [Dask Expressions](https://github.com/dask/dask-expr) will be used under the hood.
+
+For example, the following user code will automatically benefit from predicate pushdown when the result is computed.
+
+```python
+df = dd.read_parquet("/my/parquet/dataset/")
+result = df.sort_values('B')['A']
+```
+
+Unoptimized expression graph (`df.pprint()`):
+```
+Projection: columns='A'
+  SortValues: by=['B'] shuffle_method='tasks' options={}
+    ReadParquetFSSpec: path='/my/parquet/dataset/' ...
+```
+
+Simplified expression graph (`df.simplify().pprint()`):
+```
+Projection: columns='A'
+  SortValues: by=['B'] shuffle_method='tasks' options={}
+    ReadParquetFSSpec: path='/my/parquet/dataset/' columns=['A', 'B'] ...
+```
+
+> [!NOTE]
+> Dask will automatically simplify the expression graph (within `optimize`) when the result is converted to a task graph (via `compute` or `persist`). The user does not need to call `simplify` themself.
+
+
+## Using Multiple GPUs and Multiple Nodes
+
+Whenever possible, Dask cuDF (i.e. Dask DataFrame) will automatically try to partition your data into small-enough tasks to fit comfortably in the memory of a single GPU. This means the necessary compute tasks needed to compute a query can often be streamed to a single GPU process for out-of-core computing. This also means that the compute tasks can be executed in parallel over a multi-GPU cluster.
+
+> [!IMPORTANT]
+> Neither Dask cuDF nor Dask DataFrame provide support for multi-GPU or multi-node execution on their own. You must deploy a distributed cluster (ideally with [Dask CUDA](https://docs.rapids.ai/api/dask-cuda/stable/)) to leverage multiple GPUs.
+
+In order to execute your Dask workflow on multiple GPUs, you will typically need to use [Dask CUDA](https://docs.rapids.ai/api/dask-cuda/stable/) to deploy distributed Dask cluster, and [Distributed](https://distributed.dask.org/en/stable/client.html) to define a `client` object. For example:
+
+```python
+
+from dask_cuda import LocalCUDACluster
+from distributed import Client
+
+client = Client(
+    LocalCUDACluster(
+        CUDA_VISIBLE_DEVICES="0,1",  # Use two workers (on devices 0 and 1)
+        rmm_pool_size=0.9,  # Use 90% of GPU memory as a pool for faster allocations
+        enable_cudf_spill=True,  # Improve device memory stability
+        local_directory="/fast/scratch/",  # Use fast local storage for spilling
+    )
+)
+
+df = dd.read_parquet("/my/parquet/dataset/")
+agg = df.groupby('B').sum()
+agg.compute()  # This will use the cluster defined above
+```
+
+> [!NOTE]
+> This example uses `compute` to materialize a concrete `cudf.DataFrame` object in local memory. Never call `compute` on a large collection that cannot fit comfortably in the memory of a single GPU! See Dask's [documentation on managing computation](https://distributed.dask.org/en/stable/manage-computation.html) for more details.
+
+Please see the [Dask CUDA](https://docs.rapids.ai/api/dask-cuda/stable/) documentation for more information about deploying GPU-aware clusters (including [best practices](https://docs.rapids.ai/api/dask-cuda/stable/examples/best-practices/)).
+
+## Install
+
+See the [RAPIDS install page](https://docs.rapids.ai/install) for the most up-to-date information and commands for installing Dask cuDF and other RAPIDS packages.
+
+## Resources
+
+- [Dask cuDF API documentation](https://docs.rapids.ai/api/dask-cudf/stable/)
+- [cuDF API documentation](https://docs.rapids.ai/api/cudf/stable/)
+- [10 Minutes to cuDF and Dask cuDF](https://docs.rapids.ai/api/cudf/stable/user_guide/10min/)
+- [Dask CUDA documentation](https://docs.rapids.ai/api/dask-cuda/stable/)
+- [Deployment](https://docs.rapids.ai/deployment/stable/)
+- [RAPIDS Community](https://rapids.ai/learn-more/#get-involved): Get help, contribute, and collaborate.
diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py
index 4bdb5d921ec..9347ebba5de 100644
--- a/python/dask_cudf/dask_cudf/backends.py
+++ b/python/dask_cudf/dask_cudf/backends.py
@@ -55,37 +55,34 @@
 @meta_nonempty.register(cudf.BaseIndex)
 @_dask_cudf_performance_tracking
 def _nonempty_index(idx):
-    if isinstance(idx, cudf.core.index.RangeIndex):
-        return cudf.core.index.RangeIndex(2, name=idx.name)
-    elif isinstance(idx, cudf.core.index.DatetimeIndex):
-        start = "1970-01-01"
-        data = np.array([start, "1970-01-02"], dtype=idx.dtype)
+    """Return a non-empty cudf.Index as metadata."""
+    # TODO: IntervalIndex, TimedeltaIndex?
+    if isinstance(idx, cudf.RangeIndex):
+        return cudf.RangeIndex(2, name=idx.name)
+    elif isinstance(idx, cudf.DatetimeIndex):
+        data = np.array(["1970-01-01", "1970-01-02"], dtype=idx.dtype)
         values = cudf.core.column.as_column(data)
-        return cudf.core.index.DatetimeIndex(values, name=idx.name)
-    elif isinstance(idx, cudf.core.index.CategoricalIndex):
-        key = tuple(idx._data.keys())
-        assert len(key) == 1
-        categories = idx._data[key[0]].categories
-        codes = [0, 0]
-        ordered = idx._data[key[0]].ordered
-        values = cudf.core.column.build_categorical_column(
-            categories=categories, codes=codes, ordered=ordered
+        return cudf.DatetimeIndex(values, name=idx.name)
+    elif isinstance(idx, cudf.CategoricalIndex):
+        values = cudf.core.column.CategoricalColumn(
+            data=None,
+            size=None,
+            dtype=idx.dtype,
+            children=(cudf.core.column.as_column([0, 0], dtype=np.uint8),),
         )
-        return cudf.core.index.CategoricalIndex(values, name=idx.name)
-    elif isinstance(idx, cudf.core.multiindex.MultiIndex):
+        return cudf.CategoricalIndex(values, name=idx.name)
+    elif isinstance(idx, cudf.MultiIndex):
         levels = [meta_nonempty(lev) for lev in idx.levels]
-        codes = [[0, 0] for i in idx.levels]
-        return cudf.core.multiindex.MultiIndex(
-            levels=levels, codes=codes, names=idx.names
-        )
-    elif isinstance(idx._column, cudf.core.column.StringColumn):
+        codes = [[0, 0]] * idx.nlevels
+        return cudf.MultiIndex(levels=levels, codes=codes, names=idx.names)
+    elif is_string_dtype(idx.dtype):
         return cudf.Index(["cat", "dog"], name=idx.name)
-    elif isinstance(idx, cudf.core.index.Index):
-        return cudf.core.index.Index(
-            np.arange(2, dtype=idx.dtype), name=idx.name
-        )
+    elif isinstance(idx, cudf.Index):
+        return cudf.Index(np.arange(2, dtype=idx.dtype), name=idx.name)
 
-    raise TypeError(f"Don't know how to handle index of type {type(idx)}")
+    raise TypeError(
+        f"Don't know how to handle index of type {type(idx).__name__}"
+    )
 
 
 def _nest_list_data(data, leaf_type):
@@ -101,49 +98,55 @@ def _nest_list_data(data, leaf_type):
 
 
 @_dask_cudf_performance_tracking
-def _get_non_empty_data(s):
-    if isinstance(s, cudf.core.column.CategoricalColumn):
+def _get_non_empty_data(
+    s: cudf.core.column.ColumnBase,
+) -> cudf.core.column.ColumnBase:
+    """Return a non-empty column as metadata from a column."""
+    if isinstance(s.dtype, cudf.CategoricalDtype):
         categories = (
-            s.categories if len(s.categories) else [UNKNOWN_CATEGORIES]
+            s.categories if len(s.categories) else [UNKNOWN_CATEGORIES]  # type: ignore[attr-defined]
         )
         codes = cudf.core.column.as_column(
             0,
-            dtype=cudf._lib.types.size_type_dtype,
+            dtype=np.uint8,
             length=2,
         )
-        ordered = s.ordered
-        data = cudf.core.column.build_categorical_column(
-            categories=categories, codes=codes, ordered=ordered
+        return cudf.core.column.CategoricalColumn(
+            data=None,
+            size=codes.size,
+            dtype=cudf.CategoricalDtype(
+                categories=categories, ordered=s.dtype.ordered
+            ),
+            children=(codes,),  # type: ignore[arg-type]
         )
-    elif isinstance(s, cudf.core.column.ListColumn):
+    elif isinstance(s.dtype, cudf.ListDtype):
         leaf_type = s.dtype.leaf_type
         if is_string_dtype(leaf_type):
             data = ["cat", "dog"]
         else:
             data = np.array([0, 1], dtype=leaf_type).tolist()
         data = _nest_list_data(data, s.dtype) * 2
-        data = cudf.core.column.as_column(data, dtype=s.dtype)
-    elif isinstance(s, cudf.core.column.StructColumn):
+        return cudf.core.column.as_column(data, dtype=s.dtype)
+    elif isinstance(s.dtype, cudf.StructDtype):
+        # Handles IntervalColumn
         struct_dtype = s.dtype
-        data = [{key: None for key in struct_dtype.fields.keys()}] * 2
-        data = cudf.core.column.as_column(data, dtype=s.dtype)
+        struct_data = [{key: None for key in struct_dtype.fields.keys()}] * 2
+        return cudf.core.column.as_column(struct_data, dtype=s.dtype)
     elif is_string_dtype(s.dtype):
-        data = pa.array(["cat", "dog"])
+        return cudf.core.column.as_column(pa.array(["cat", "dog"]))
     elif isinstance(s.dtype, pd.DatetimeTZDtype):
-        from cudf.utils.dtypes import get_time_unit
-
-        data = cudf.date_range("2001-01-01", periods=2, freq=get_time_unit(s))
-        data = data.tz_localize(str(s.dtype.tz))._column
+        date_data = cudf.date_range("2001-01-01", periods=2, freq=s.time_unit)  # type: ignore[attr-defined]
+        return date_data.tz_localize(str(s.dtype.tz))._column
+    elif s.dtype.kind in "fiubmM":
+        return cudf.core.column.as_column(
+            np.arange(start=0, stop=2, dtype=s.dtype)
+        )
+    elif isinstance(s.dtype, cudf.core.dtypes.DecimalDtype):
+        return cudf.core.column.as_column(range(2), dtype=s.dtype)
     else:
-        if pd.api.types.is_numeric_dtype(s.dtype):
-            data = cudf.core.column.as_column(
-                cp.arange(start=0, stop=2, dtype=s.dtype)
-            )
-        else:
-            data = cudf.core.column.as_column(
-                cp.arange(start=0, stop=2, dtype="int64")
-            ).astype(s.dtype)
-    return data
+        raise TypeError(
+            f"Don't know how to handle column of type {type(s).__name__}"
+        )
 
 
 @meta_nonempty.register(cudf.Series)
@@ -153,7 +156,7 @@ def _nonempty_series(s, idx=None):
         idx = _nonempty_index(s.index)
     data = _get_non_empty_data(s._column)
 
-    return cudf.Series(data, name=s.name, index=idx)
+    return cudf.Series._from_column(data, name=s.name, index=idx)
 
 
 @meta_nonempty.register(cudf.DataFrame)
@@ -161,24 +164,25 @@ def _nonempty_series(s, idx=None):
 def meta_nonempty_cudf(x):
     idx = meta_nonempty(x.index)
     columns_with_dtype = dict()
-    res = cudf.DataFrame(index=idx)
-    for col in x._data.names:
-        dtype = str(x._data[col].dtype)
-        if dtype in ("list", "struct", "category"):
+    res = {}
+    for col_label, col in x._data.items():
+        dtype = col.dtype
+        if isinstance(
+            dtype,
+            (cudf.ListDtype, cudf.StructDtype, cudf.CategoricalDtype),
+        ):
             # 1. Not possible to hash and store list & struct types
             #    as they can contain different levels of nesting or
             #    fields.
-            # 2. Not possible to has `category` types as
+            # 2. Not possible to hash `category` types as
             #    they often contain an underlying types to them.
-            res._data[col] = _get_non_empty_data(x._data[col])
+            res[col_label] = _get_non_empty_data(col)
         else:
             if dtype not in columns_with_dtype:
-                columns_with_dtype[dtype] = cudf.core.column.as_column(
-                    _get_non_empty_data(x._data[col])
-                )
-            res._data[col] = columns_with_dtype[dtype]
+                columns_with_dtype[dtype] = _get_non_empty_data(col)
+            res[col_label] = columns_with_dtype[dtype]
 
-    return res
+    return cudf.DataFrame._from_data(res, index=idx)
 
 
 @make_meta_dispatch.register((cudf.Series, cudf.DataFrame))
@@ -196,9 +200,7 @@ def make_meta_cudf_index(x, index=None):
 @_dask_cudf_performance_tracking
 def _empty_series(name, dtype, index=None):
     if isinstance(dtype, str) and dtype == "category":
-        return cudf.Series(
-            [UNKNOWN_CATEGORIES], dtype=dtype, name=name, index=index
-        ).iloc[:0]
+        dtype = cudf.CategoricalDtype(categories=[UNKNOWN_CATEGORIES])
     return cudf.Series([], dtype=dtype, name=name, index=index)
 
 
@@ -336,7 +338,7 @@ def percentile_cudf(a, q, interpolation="linear"):
     if isinstance(q, Iterator):
         q = list(q)
 
-    if cudf.api.types._is_categorical_dtype(a.dtype):
+    if isinstance(a.dtype, cudf.CategoricalDtype):
         result = cp.percentile(a.cat.codes, q, interpolation=interpolation)
 
         return (
@@ -345,7 +347,7 @@ def percentile_cudf(a, q, interpolation="linear"):
             ),
             n,
         )
-    if np.issubdtype(a.dtype, np.datetime64):
+    if a.dtype.kind == "M":
         result = a.quantile(
             [i / 100.0 for i in q], interpolation=interpolation
         )
@@ -424,7 +426,7 @@ def hash_object_cudf_index(ind, index=None):
         return ind.to_frame(index=False).hash_values()
 
     col = cudf.core.column.as_column(ind)
-    return cudf.Series(col).hash_values()
+    return cudf.Series._from_column(col).hash_values()
 
 
 @group_split_dispatch.register((cudf.Series, cudf.DataFrame))
@@ -505,6 +507,25 @@ def _unsupported_kwargs(old, new, kwargs):
         )
 
 
+def _raise_unsupported_parquet_kwargs(
+    open_file_options=None, filesystem=None, **kwargs
+):
+    import fsspec
+
+    if open_file_options is not None:
+        raise ValueError(
+            "The open_file_options argument is no longer supported "
+            "by the 'cudf' backend."
+        )
+
+    if filesystem not in ("fsspec", None) and not isinstance(
+        filesystem, fsspec.AbstractFileSystem
+    ):
+        raise ValueError(
+            f"filesystem={filesystem} is not supported by the 'cudf' backend."
+        )
+
+
 # Register cudf->pandas
 to_pandas_dispatch = PandasBackendEntrypoint.to_backend_dispatch()
 
@@ -525,6 +546,12 @@ def to_cudf_dispatch_from_pandas(data, nan_as_null=None, **kwargs):
     return cudf.from_pandas(data, nan_as_null=nan_as_null)
 
 
+@to_cudf_dispatch.register((cudf.DataFrame, cudf.Series, cudf.Index))
+def to_cudf_dispatch_from_cudf(data, **kwargs):
+    _unsupported_kwargs("cudf", "cudf", kwargs)
+    return data
+
+
 # Define "cudf" backend engine to be registered with Dask
 class CudfBackendEntrypoint(DataFrameBackendEntrypoint):
     """Backend-entrypoint class for Dask-DataFrame
@@ -580,6 +607,7 @@ def from_dict(
     def read_parquet(*args, engine=None, **kwargs):
         from dask_cudf.io.parquet import CudfEngine
 
+        _raise_unsupported_parquet_kwargs(**kwargs)
         return _default_backend(
             dd.read_parquet,
             *args,
@@ -630,20 +658,20 @@ class CudfDXBackendEntrypoint(DataFrameBackendEntrypoint):
     Examples
     --------
     >>> import dask
-    >>> import dask_expr
+    >>> import dask_expr as dx
     >>> with dask.config.set({"dataframe.backend": "cudf"}):
     ...     ddf = dx.from_dict({"a": range(10)})
     >>> type(ddf._meta)
     <class 'cudf.core.dataframe.DataFrame'>
     """
 
-    @classmethod
-    def to_backend_dispatch(cls):
-        return CudfBackendEntrypoint.to_backend_dispatch()
+    @staticmethod
+    def to_backend(data, **kwargs):
+        import dask_expr as dx
 
-    @classmethod
-    def to_backend(cls, *args, **kwargs):
-        return CudfBackendEntrypoint.to_backend(*args, **kwargs)
+        from dask_cudf.expr._expr import ToCudfBackend
+
+        return dx.new_collection(ToCudfBackend(data, kwargs))
 
     @staticmethod
     def from_dict(
@@ -666,6 +694,42 @@ def from_dict(
             constructor=constructor,
         )
 
+    @staticmethod
+    def read_parquet(*args, engine=None, **kwargs):
+        import dask_expr as dx
+
+        from dask_cudf.io.parquet import CudfEngine
+
+        _raise_unsupported_parquet_kwargs(**kwargs)
+        return _default_backend(
+            dx.read_parquet, *args, engine=CudfEngine, **kwargs
+        )
+
+    @staticmethod
+    def read_csv(
+        path,
+        *args,
+        header="infer",
+        dtype_backend=None,
+        storage_options=None,
+        **kwargs,
+    ):
+        import dask_expr as dx
+        from fsspec.utils import stringify_path
+
+        if not isinstance(path, str):
+            path = stringify_path(path)
+        return dx.new_collection(
+            dx.io.csv.ReadCSV(
+                path,
+                dtype_backend=dtype_backend,
+                storage_options=storage_options,
+                kwargs=kwargs,
+                header=header,
+                dataframe_backend="cudf",
+            )
+        )
+
     @staticmethod
     def read_json(*args, **kwargs):
         from dask_cudf.io.json import read_json as read_json_impl
diff --git a/python/dask_cudf/dask_cudf/core.py b/python/dask_cudf/dask_cudf/core.py
index aab56e3a1b0..3181c8d69ec 100644
--- a/python/dask_cudf/dask_cudf/core.py
+++ b/python/dask_cudf/dask_cudf/core.py
@@ -342,7 +342,7 @@ def groupby(self, by=None, **kwargs):
 def sum_of_squares(x):
     x = x.astype("f8")._column
     outcol = libcudf.reduce.reduce("sum_of_squares", x)
-    return cudf.Series(outcol)
+    return cudf.Series._from_column(outcol)
 
 
 @_dask_cudf_performance_tracking
diff --git a/python/dask_cudf/dask_cudf/expr/_expr.py b/python/dask_cudf/dask_cudf/expr/_expr.py
index 8fccaccb695..8a2c50d3fe7 100644
--- a/python/dask_cudf/dask_cudf/expr/_expr.py
+++ b/python/dask_cudf/dask_cudf/expr/_expr.py
@@ -4,12 +4,41 @@
 import dask_expr._shuffle as _shuffle_module
 from dask_expr import new_collection
 from dask_expr._cumulative import CumulativeBlockwise
-from dask_expr._expr import Expr, VarColumns
+from dask_expr._expr import Elemwise, Expr, VarColumns
 from dask_expr._reductions import Reduction, Var
 
 from dask.dataframe.core import is_dataframe_like, make_meta, meta_nonempty
 from dask.dataframe.dispatch import is_categorical_dtype
 
+import cudf
+
+##
+## Custom expressions
+##
+
+
+class ToCudfBackend(Elemwise):
+    # TODO: Inherit from ToBackend when rapids-dask-dependency
+    # is pinned to dask>=2024.8.1
+    _parameters = ["frame", "options"]
+    _projection_passthrough = True
+    _filter_passthrough = True
+    _preserves_partitioning_information = True
+
+    @staticmethod
+    def operation(df, options):
+        from dask_cudf.backends import to_cudf_dispatch
+
+        return to_cudf_dispatch(df, **options)
+
+    def _simplify_down(self):
+        if isinstance(
+            self.frame._meta, (cudf.DataFrame, cudf.Series, cudf.Index)
+        ):
+            # We already have cudf data
+            return self.frame
+
+
 ##
 ## Custom expression patching
 ##
diff --git a/python/dask_cudf/dask_cudf/io/json.py b/python/dask_cudf/dask_cudf/io/json.py
index 8705d98e9d6..98c5ceedb76 100644
--- a/python/dask_cudf/dask_cudf/io/json.py
+++ b/python/dask_cudf/dask_cudf/io/json.py
@@ -81,7 +81,7 @@ def read_json(
 
         If str, this value will be used as the ``engine`` argument
         when :func:`cudf.read_json` is used to create each partition.
-        If a :obj:`~typing.Callable`, this value will be used as the
+        If a :obj:`~collections.abc.Callable`, this value will be used as the
         underlying function used to create each partition from JSON
         data. The default value is "auto", so that
         ``engine=partial(cudf.read_json, engine="auto")`` will be
diff --git a/python/dask_cudf/dask_cudf/io/parquet.py b/python/dask_cudf/dask_cudf/io/parquet.py
index f0cab953458..e793d4381d1 100644
--- a/python/dask_cudf/dask_cudf/io/parquet.py
+++ b/python/dask_cudf/dask_cudf/io/parquet.py
@@ -1,7 +1,6 @@
 # Copyright (c) 2019-2024, NVIDIA CORPORATION.
 import itertools
 import warnings
-from contextlib import ExitStack
 from functools import partial
 from io import BufferedWriter, BytesIO, IOBase
 
@@ -20,20 +19,11 @@
     create_metadata_file_dd = None
 
 import cudf
-from cudf.core.column import as_column, build_categorical_column
+from cudf.core.column import CategoricalColumn, as_column
 from cudf.io import write_to_dataset
-from cudf.io.parquet import (
-    _apply_post_filters,
-    _default_open_file_options,
-    _normalize_filters,
-)
+from cudf.io.parquet import _apply_post_filters, _normalize_filters
 from cudf.utils.dtypes import cudf_dtype_from_pa_type
-from cudf.utils.ioutils import (
-    _ROW_GROUP_SIZE_BYTES_DEFAULT,
-    _is_local_filesystem,
-    _open_remote_files,
-)
-from cudf.utils.utils import maybe_filter_deprecation
+from cudf.utils.ioutils import _ROW_GROUP_SIZE_BYTES_DEFAULT
 
 
 class CudfEngine(ArrowDatasetEngine):
@@ -98,63 +88,40 @@ def _read_paths(
 
         dataset_kwargs = dataset_kwargs or {}
         dataset_kwargs["partitioning"] = partitioning or "hive"
-        with ExitStack() as stack:
-            # Non-local filesystem handling
-            paths_or_fobs = paths
-            if not _is_local_filesystem(fs):
-                paths_or_fobs = _open_remote_files(
-                    paths_or_fobs,
-                    fs,
-                    context_stack=stack,
-                    **_default_open_file_options(
-                        open_file_options, columns, row_groups
-                    ),
-                )
 
-            # Filter out deprecation warning unless the user
-            # specifies open_file_options and/or use_python_file_object.
-            # Otherwise, the FutureWarning is out of their control.
-            with maybe_filter_deprecation(
-                (
-                    not open_file_options
-                    and "use_python_file_object" not in kwargs
-                ),
-                message="Support for reading pyarrow's NativeFile is deprecated",
-                category=FutureWarning,
-            ):
-                # Use cudf to read in data
-                try:
-                    df = cudf.read_parquet(
-                        paths_or_fobs,
-                        engine="cudf",
-                        columns=columns,
-                        row_groups=row_groups if row_groups else None,
-                        dataset_kwargs=dataset_kwargs,
-                        categorical_partitions=False,
-                        **kwargs,
-                    )
-                except RuntimeError as err:
-                    # TODO: Remove try/except after null-schema issue is resolved
-                    # (See: https://github.com/rapidsai/cudf/issues/12702)
-                    if len(paths_or_fobs) > 1:
-                        df = cudf.concat(
-                            [
-                                cudf.read_parquet(
-                                    pof,
-                                    engine="cudf",
-                                    columns=columns,
-                                    row_groups=row_groups[i]
-                                    if row_groups
-                                    else None,
-                                    dataset_kwargs=dataset_kwargs,
-                                    categorical_partitions=False,
-                                    **kwargs,
-                                )
-                                for i, pof in enumerate(paths_or_fobs)
-                            ]
+        # Use cudf to read in data
+        try:
+            df = cudf.read_parquet(
+                paths,
+                engine="cudf",
+                columns=columns,
+                row_groups=row_groups if row_groups else None,
+                dataset_kwargs=dataset_kwargs,
+                categorical_partitions=False,
+                filesystem=fs,
+                **kwargs,
+            )
+        except RuntimeError as err:
+            # TODO: Remove try/except after null-schema issue is resolved
+            # (See: https://github.com/rapidsai/cudf/issues/12702)
+            if len(paths) > 1:
+                df = cudf.concat(
+                    [
+                        cudf.read_parquet(
+                            path,
+                            engine="cudf",
+                            columns=columns,
+                            row_groups=row_groups[i] if row_groups else None,
+                            dataset_kwargs=dataset_kwargs,
+                            categorical_partitions=False,
+                            filesystem=fs,
+                            **kwargs,
                         )
-                    else:
-                        raise err
+                        for i, path in enumerate(paths)
+                    ]
+                )
+            else:
+                raise err
 
         # Apply filters (if any are defined)
         df = _apply_post_filters(df, filters)
@@ -196,12 +163,14 @@ def _read_paths(
                         partitions[i].keys.get_loc(index2),
                         length=len(df),
                     )
-                    df[name] = build_categorical_column(
-                        categories=partitions[i].keys,
-                        codes=codes,
+                    df[name] = CategoricalColumn(
+                        data=None,
                         size=codes.size,
+                        dtype=cudf.CategoricalDtype(
+                            categories=partitions[i].keys, ordered=False
+                        ),
                         offset=codes.offset,
-                        ordered=False,
+                        children=(codes,),
                     )
                 elif name not in df.columns:
                     # Add non-categorical partition column
diff --git a/python/dask_cudf/dask_cudf/io/tests/test_s3.py b/python/dask_cudf/dask_cudf/io/tests/test_s3.py
index ac3245b3748..a14ffbc37dc 100644
--- a/python/dask_cudf/dask_cudf/io/tests/test_s3.py
+++ b/python/dask_cudf/dask_cudf/io/tests/test_s3.py
@@ -5,8 +5,8 @@
 from contextlib import contextmanager
 from io import BytesIO
 
+import fsspec
 import pandas as pd
-import pyarrow.fs as pa_fs
 import pytest
 
 from dask.dataframe import assert_eq
@@ -119,81 +119,67 @@ def test_read_csv(s3_base, s3so):
         assert df.a.sum().compute() == 4
 
 
-def test_read_csv_warns(s3_base, s3so):
-    with s3_context(
-        s3_base=s3_base,
-        bucket="daskcsv_warns",
-        files={"a.csv": b"a,b\n1,2\n3,4\n"},
-    ):
-        with pytest.warns(FutureWarning):
-            df = dask_cudf.read_csv(
-                "s3://daskcsv_warns/*.csv",
-                blocksize="50 B",
-                storage_options=s3so,
-                use_python_file_object=True,
-            )
-            assert df.a.sum().compute() == 4
-
-
-@pytest.mark.parametrize(
-    "open_file_options",
-    [
-        {"precache_options": {"method": None}},
-        {"precache_options": {"method": "parquet"}},
-        {"open_file_func": None},
-    ],
-)
-def test_read_parquet_open_file_options(s3_base, s3so, open_file_options, pdf):
+def test_read_parquet_open_file_options_raises():
+    with pytest.raises(ValueError):
+        dask_cudf.read_parquet(
+            "s3://my/path",
+            open_file_options={"precache_options": {"method": "parquet"}},
+        )
+
+
+def test_read_parquet_filesystem(s3_base, s3so, pdf):
+    fname = "test_parquet_filesystem.parquet"
+    bucket = "parquet"
     buffer = BytesIO()
     pdf.to_parquet(path=buffer)
     buffer.seek(0)
-    with s3_context(
-        s3_base=s3_base, bucket="daskparquet", files={"file.parq": buffer}
-    ):
-        if "open_file_func" in open_file_options:
-            fs = pa_fs.S3FileSystem(
-                endpoint_override=s3so["client_kwargs"]["endpoint_url"],
+    with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}):
+        path = f"s3://{bucket}/{fname}"
+
+        # Cannot pass filesystem="arrow"
+        with pytest.raises(ValueError):
+            dask_cudf.read_parquet(
+                path,
+                storage_options=s3so,
+                filesystem="arrow",
             )
-            open_file_options["open_file_func"] = fs.open_input_file
+
+        # Can pass filesystem="fsspec"
         df = dask_cudf.read_parquet(
-            "s3://daskparquet/*.parq",
+            path,
             storage_options=s3so,
-            open_file_options=open_file_options,
+            filesystem="fsspec",
         )
-        with pytest.warns(FutureWarning):
-            assert df.a.sum().compute() == 10
-        with pytest.warns(FutureWarning):
-            assert df.b.sum().compute() == 9
+        assert df.b.sum().compute() == 9
 
 
-def test_read_parquet(s3_base, s3so, pdf):
-    fname = "test_parquet_reader_dask.parquet"
+def test_read_parquet_filesystem_explicit(s3_base, s3so, pdf):
+    fname = "test_parquet_filesystem_explicit.parquet"
     bucket = "parquet"
     buffer = BytesIO()
     pdf.to_parquet(path=buffer)
     buffer.seek(0)
     with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}):
-        got = dask_cudf.read_parquet(
-            f"s3://{bucket}/{fname}",
-            storage_options=s3so,
-        )
-        assert_eq(pdf, got)
+        path = f"s3://{bucket}/{fname}"
+        fs = fsspec.core.get_fs_token_paths(
+            path, mode="rb", storage_options=s3so
+        )[0]
+        df = dask_cudf.read_parquet(path, filesystem=fs)
+        assert df.b.sum().compute() == 9
 
 
-def test_read_parquet_use_python_file_object(s3_base, s3so, pdf):
-    fname = "test_parquet_use_python_file_object.parquet"
+def test_read_parquet(s3_base, s3so, pdf):
+    fname = "test_parquet_reader_dask.parquet"
     bucket = "parquet"
     buffer = BytesIO()
     pdf.to_parquet(path=buffer)
     buffer.seek(0)
     with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}):
-        with pytest.warns(FutureWarning):
-            got = dask_cudf.read_parquet(
-                f"s3://{bucket}/{fname}",
-                storage_options=s3so,
-                read={"use_python_file_object": True},
-            ).head()
-            assert_eq(pdf, got)
+        got = dask_cudf.read_parquet(
+            f"s3://{bucket}/{fname}",
+            storage_options=s3so,
+        )
+        assert_eq(pdf, got)
 
 
 def test_read_orc(s3_base, s3so, pdf):
@@ -208,19 +194,3 @@ def test_read_orc(s3_base, s3so, pdf):
             storage_options=s3so,
         )
         assert_eq(pdf, got)
-
-
-def test_read_orc_use_python_file_object(s3_base, s3so, pdf):
-    fname = "test_orc_use_python_file_object.orc"
-    bucket = "orc"
-    buffer = BytesIO()
-    pdf.to_orc(path=buffer)
-    buffer.seek(0)
-    with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}):
-        with pytest.warns(FutureWarning):
-            got = dask_cudf.read_orc(
-                f"s3://{bucket}/{fname}",
-                storage_options=s3so,
-                use_python_file_object=True,
-            ).head()
-            assert_eq(pdf, got)
diff --git a/python/dask_cudf/dask_cudf/tests/test_applymap.py b/python/dask_cudf/dask_cudf/tests/test_applymap.py
index d84235481c3..e4e79b7b8cf 100644
--- a/python/dask_cudf/dask_cudf/tests/test_applymap.py
+++ b/python/dask_cudf/dask_cudf/tests/test_applymap.py
@@ -5,6 +5,8 @@
 
 from dask import dataframe as dd
 
+from cudf.core._compat import PANDAS_GE_210
+
 from dask_cudf.tests.utils import _make_random_frame
 
 
@@ -18,6 +20,10 @@
     ],
 )
 @pytest.mark.parametrize("has_na", [True, False])
+@pytest.mark.skipif(
+    not PANDAS_GE_210,
+    reason="DataFrame.map requires pandas>=2.1.0",
+)
 def test_applymap_basic(func, has_na):
     size = 2000
     pdf, dgdf = _make_random_frame(size, include_na=False)
diff --git a/python/dask_cudf/dask_cudf/tests/test_core.py b/python/dask_cudf/dask_cudf/tests/test_core.py
index 174923c2c7e..905d8c08135 100644
--- a/python/dask_cudf/dask_cudf/tests/test_core.py
+++ b/python/dask_cudf/dask_cudf/tests/test_core.py
@@ -15,7 +15,11 @@
 import cudf
 
 import dask_cudf
-from dask_cudf.tests.utils import skip_dask_expr, xfail_dask_expr
+from dask_cudf.tests.utils import (
+    require_dask_expr,
+    skip_dask_expr,
+    xfail_dask_expr,
+)
 
 
 def test_from_dict_backend_dispatch():
@@ -993,3 +997,13 @@ def test_series_isin_error():
         ser.isin([1, 5, "a"])
     with pytest.raises(TypeError):
         ddf.isin([1, 5, "a"]).compute()
+
+
+@require_dask_expr()
+def test_to_backend_simplify():
+    # Check that column projection is not blocked by to_backend
+    with dask.config.set({"dataframe.backend": "pandas"}):
+        df = dd.from_dict({"x": [1, 2, 3], "y": [4, 5, 6]}, npartitions=2)
+        df2 = df.to_backend("cudf")[["y"]].simplify()
+        df3 = df[["y"]].to_backend("cudf").to_backend("cudf").simplify()
+        assert df2._name == df3._name
diff --git a/python/dask_cudf/dask_cudf/tests/test_distributed.py b/python/dask_cudf/dask_cudf/tests/test_distributed.py
index be10b0d4843..d03180852eb 100644
--- a/python/dask_cudf/dask_cudf/tests/test_distributed.py
+++ b/python/dask_cudf/dask_cudf/tests/test_distributed.py
@@ -80,6 +80,11 @@ def test_str_series_roundtrip():
 
 
 def test_p2p_shuffle():
+    pytest.importorskip(
+        "pyarrow",
+        minversion="14.0.1",
+        reason="P2P shuffling requires pyarrow>=14.0.1",
+    )
     # Check that we can use `shuffle_method="p2p"`
     with dask_cuda.LocalCUDACluster(n_workers=1) as cluster:
         with Client(cluster):
diff --git a/python/dask_cudf/dask_cudf/tests/test_groupby.py b/python/dask_cudf/dask_cudf/tests/test_groupby.py
index cf916b713b2..7b9f0ca328a 100644
--- a/python/dask_cudf/dask_cudf/tests/test_groupby.py
+++ b/python/dask_cudf/dask_cudf/tests/test_groupby.py
@@ -9,6 +9,7 @@
 from dask.utils_test import hlg_layer
 
 import cudf
+from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
 from cudf.testing._utils import expect_warning_if
 
 import dask_cudf
@@ -316,6 +317,10 @@ def test_groupby_dropna_cudf(dropna, by):
         (None, ["a", "d"]),
     ],
 )
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
 def test_groupby_dropna_dask(dropna, by):
     # NOTE: This test is borrowed from upstream dask
     #       (dask/dask/dataframe/tests/test_groupby.py)
diff --git a/python/dask_cudf/dask_cudf/tests/test_join.py b/python/dask_cudf/dask_cudf/tests/test_join.py
index ed291ef31a7..3e078c47cdd 100644
--- a/python/dask_cudf/dask_cudf/tests/test_join.py
+++ b/python/dask_cudf/dask_cudf/tests/test_join.py
@@ -386,3 +386,14 @@ def test_issue_12773():
         expected.to_pandas(),
         check_index=False,
     )
+
+
+@pytest.mark.parametrize(
+    "typ", [cudf.Decimal32Dtype, cudf.Decimal64Dtype, cudf.Decimal128Dtype]
+)
+def test_merge_on_decimal(typ):
+    df = cudf.DataFrame({"a": [1], "b": [2]}, dtype=typ(1))
+    ddf = dask_cudf.from_cudf(df, npartitions=1)
+    result = ddf.merge(ddf, left_on="a", right_on="a")
+    expected = df.merge(df, left_on="a", right_on="a")
+    dd.assert_eq(result, expected)
diff --git a/python/dask_cudf/dask_cudf/tests/utils.py b/python/dask_cudf/dask_cudf/tests/utils.py
index c7dedbb6b4a..cc0c6899804 100644
--- a/python/dask_cudf/dask_cudf/tests/utils.py
+++ b/python/dask_cudf/dask_cudf/tests/utils.py
@@ -48,3 +48,7 @@ def xfail_dask_expr(reason=_default_reason, lt_version=None):
     else:
         xfail = QUERY_PLANNING_ON
     return pytest.mark.xfail(xfail, reason=reason)
+
+
+def require_dask_expr(reason="requires dask-expr"):
+    return pytest.mark.skipif(not QUERY_PLANNING_ON, reason=reason)
diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml
index 872ecd35c28..93bf532d67f 100644
--- a/python/dask_cudf/pyproject.toml
+++ b/python/dask_cudf/pyproject.toml
@@ -17,12 +17,12 @@ authors = [
     { name = "NVIDIA Corporation" },
 ]
 license = { text = "Apache 2.0" }
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 dependencies = [
     "cudf==24.10.*,>=0.0.0a0",
     "cupy-cuda11x>=12.0.0",
     "fsspec>=0.6.0",
-    "numpy>=1.23,<2.0a0",
+    "numpy>=1.23,<3.0a0",
     "pandas>=2.0,<2.2.3dev0",
     "rapids-dask-dependency==24.10.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
@@ -32,7 +32,6 @@ classifiers = [
     "Topic :: Scientific/Engineering",
     "License :: OSI Approved :: Apache Software License",
     "Programming Language :: Python",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
 ]
diff --git a/python/libcudf/CMakeLists.txt b/python/libcudf/CMakeLists.txt
new file mode 100644
index 00000000000..96eb6c3bb30
--- /dev/null
+++ b/python/libcudf/CMakeLists.txt
@@ -0,0 +1,53 @@
+# =============================================================================
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
+
+include(../../rapids_config.cmake)
+
+project(
+  libcudf-python
+  VERSION "${RAPIDS_VERSION}"
+  LANGUAGES CXX
+)
+
+# Check if cudf is already available. If so, it is the user's responsibility to ensure that the
+# CMake package is also available at build time of the Python cudf package.
+find_package(cudf "${RAPIDS_VERSION}")
+
+if(cudf_FOUND)
+  return()
+endif()
+
+unset(cudf_FOUND)
+
+# Find Python early so that later commands can use it
+find_package(Python 3.10 REQUIRED COMPONENTS Interpreter)
+
+set(BUILD_TESTS OFF)
+set(BUILD_BENCHMARKS OFF)
+set(CUDF_BUILD_TESTUTIL OFF)
+set(CUDF_BUILD_STREAMS_TEST_UTIL OFF)
+set(CUDA_STATIC_RUNTIME ON)
+
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
+
+add_subdirectory(../../cpp cudf-cpp)
+
+# Ensure other libraries needed by libcudf.so get installed alongside it.
+include(cmake/Modules/WheelHelpers.cmake)
+install_aliased_imported_targets(
+  TARGETS cudf nvcomp::nvcomp nvcomp::nvcomp_gdeflate nvcomp::nvcomp_bitcomp DESTINATION
+  ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}
+)
diff --git a/python/libcudf/LICENSE b/python/libcudf/LICENSE
new file mode 120000
index 00000000000..30cff7403da
--- /dev/null
+++ b/python/libcudf/LICENSE
@@ -0,0 +1 @@
+../../LICENSE
\ No newline at end of file
diff --git a/python/libcudf/README.md b/python/libcudf/README.md
new file mode 120000
index 00000000000..fe840054137
--- /dev/null
+++ b/python/libcudf/README.md
@@ -0,0 +1 @@
+../../README.md
\ No newline at end of file
diff --git a/python/cudf/cmake/Modules/WheelHelpers.cmake b/python/libcudf/cmake/Modules/WheelHelpers.cmake
similarity index 100%
rename from python/cudf/cmake/Modules/WheelHelpers.cmake
rename to python/libcudf/cmake/Modules/WheelHelpers.cmake
diff --git a/python/libcudf/libcudf/VERSION b/python/libcudf/libcudf/VERSION
new file mode 120000
index 00000000000..d62dc733efd
--- /dev/null
+++ b/python/libcudf/libcudf/VERSION
@@ -0,0 +1 @@
+../../../VERSION
\ No newline at end of file
diff --git a/python/libcudf/libcudf/__init__.py b/python/libcudf/libcudf/__init__.py
new file mode 100644
index 00000000000..10c476cbe89
--- /dev/null
+++ b/python/libcudf/libcudf/__init__.py
@@ -0,0 +1,16 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from libcudf._version import __git_commit__, __version__
+from libcudf.load import load_library
diff --git a/python/libcudf/libcudf/_version.py b/python/libcudf/libcudf/_version.py
new file mode 100644
index 00000000000..7dd732b4905
--- /dev/null
+++ b/python/libcudf/libcudf/_version.py
@@ -0,0 +1,33 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import importlib.resources
+
+__version__ = (
+    importlib.resources.files(__package__)
+    .joinpath("VERSION")
+    .read_text()
+    .strip()
+)
+try:
+    __git_commit__ = (
+        importlib.resources.files(__package__)
+        .joinpath("GIT_COMMIT")
+        .read_text()
+        .strip()
+    )
+except FileNotFoundError:
+    __git_commit__ = ""
+
+__all__ = ["__git_commit__", "__version__"]
diff --git a/python/libcudf/libcudf/load.py b/python/libcudf/libcudf/load.py
new file mode 100644
index 00000000000..ba134710868
--- /dev/null
+++ b/python/libcudf/libcudf/load.py
@@ -0,0 +1,47 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import ctypes
+import os
+
+
+def load_library():
+    # Dynamically load libcudf.so. Prefer a system library if one is present to
+    # avoid clobbering symbols that other packages might expect, but if no
+    # other library is present use the one in the wheel.
+    libcudf_lib = None
+    try:
+        libcudf_lib = ctypes.CDLL("libcudf.so", ctypes.RTLD_GLOBAL)
+    except OSError:
+        # If neither of these directories contain the library, we assume we are in an
+        # environment where the C++ library is already installed somewhere else and the
+        # CMake build of the libcudf Python package was a no-op.
+        #
+        # Note that this approach won't work for real editable installs of the libcudf package.
+        # scikit-build-core has limited support for importlib.resources so there isn't a clean
+        # way to support that case yet.
+        for lib_dir in ("lib", "lib64"):
+            if os.path.isfile(
+                lib := os.path.join(
+                    os.path.dirname(__file__), lib_dir, "libcudf.so"
+                )
+            ):
+                libcudf_lib = ctypes.CDLL(lib, ctypes.RTLD_GLOBAL)
+                break
+
+    # The caller almost never needs to do anything with this library, but no
+    # harm in offering the option since this object at least provides a handle
+    # to inspect where libcudf was loaded from.
+    return libcudf_lib
diff --git a/python/libcudf/pyproject.toml b/python/libcudf/pyproject.toml
new file mode 100644
index 00000000000..5f4b9957fd0
--- /dev/null
+++ b/python/libcudf/pyproject.toml
@@ -0,0 +1,71 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+[build-system]
+build-backend = "rapids_build_backend.build"
+requires = [
+    "rapids-build-backend>=0.3.0,<0.4.0.dev0",
+    "scikit-build-core[pyproject]>=0.10.0",
+] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+
+[project]
+name = "libcudf"
+dynamic = ["version"]
+description = "cuDF - GPU Dataframe (C++)"
+readme = { file = "README.md", content-type = "text/markdown" }
+authors = [
+    { name = "NVIDIA Corporation" },
+]
+license = { text = "Apache 2.0" }
+requires-python = ">=3.10"
+classifiers = [
+    "Intended Audience :: Developers",
+    "Topic :: Database",
+    "Topic :: Scientific/Engineering",
+    "License :: OSI Approved :: Apache Software License",
+    "Programming Language :: C++",
+    "Environment :: GPU :: NVIDIA CUDA",
+]
+
+[project.urls]
+Homepage = "https://github.com/rapidsai/cudf"
+
+[project.entry-points."cmake.prefix"]
+libcudf = "libcudf"
+
+[tool.scikit-build]
+build-dir = "build/{wheel_tag}"
+cmake.build-type = "Release"
+cmake.version = "CMakeLists.txt"
+minimum-version = "build-system.requires"
+ninja.make-fallback = true
+sdist.reproducible = true
+wheel.packages = ["libcudf"]
+wheel.install-dir = "libcudf"
+wheel.py-api = "py3"
+
+[tool.scikit-build.metadata.version]
+provider = "scikit_build_core.metadata.regex"
+input = "libcudf/VERSION"
+regex = "(?P<value>.*)"
+
+[tool.rapids-build-backend]
+build-backend = "scikit_build_core.build"
+dependencies-file = "../../dependencies.yaml"
+matrix-entry = "cuda_suffixed=true"
+requires = [
+    "cmake>=3.26.4,!=3.30.0",
+    "librmm==24.10.*,>=0.0.0a0",
+    "ninja",
+] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
diff --git a/python/pylibcudf/CMakeLists.txt b/python/pylibcudf/CMakeLists.txt
new file mode 100644
index 00000000000..a4b831790fb
--- /dev/null
+++ b/python/pylibcudf/CMakeLists.txt
@@ -0,0 +1,43 @@
+# =============================================================================
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
+
+include(../../rapids_config.cmake)
+include(rapids-cuda)
+rapids_cuda_init_architectures(pylibcudf)
+
+project(
+  pylibcudf
+  VERSION "${RAPIDS_VERSION}"
+  LANGUAGES CXX CUDA
+)
+
+find_package(cudf "${RAPIDS_VERSION}" REQUIRED)
+
+# an installed version of libcudf doesn't provide the dlpack headers so we need to download dlpack
+# for the interop.pyx
+include(rapids-cpm)
+rapids_cpm_init()
+include(../../cpp/cmake/thirdparty/get_dlpack.cmake)
+
+include(rapids-cython-core)
+
+rapids_cython_init()
+
+add_subdirectory(pylibcudf)
+
+if(DEFINED cython_lib_dir)
+  rapids_cython_add_rpath_entries(TARGET cudf PATHS "${cython_lib_dir}")
+endif()
diff --git a/python/pylibcudf/README.md b/python/pylibcudf/README.md
new file mode 120000
index 00000000000..fe840054137
--- /dev/null
+++ b/python/pylibcudf/README.md
@@ -0,0 +1 @@
+../../README.md
\ No newline at end of file
diff --git a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt b/python/pylibcudf/pylibcudf/CMakeLists.txt
similarity index 84%
rename from python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
rename to python/pylibcudf/pylibcudf/CMakeLists.txt
index df4591baa71..a4f17344cb0 100644
--- a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
+++ b/python/pylibcudf/pylibcudf/CMakeLists.txt
@@ -29,6 +29,7 @@ set(cython_sources
     join.pyx
     lists.pyx
     merge.pyx
+    null_mask.pyx
     quantiles.pyx
     reduce.pyx
     replace.pyx
@@ -52,7 +53,12 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX pylibcudf_ ASSOCIATED_TARGETS cudf
 )
-link_to_pyarrow_headers(pylibcudf_interop)
+
+target_include_directories(pylibcudf_interop PUBLIC "$<BUILD_INTERFACE:${DLPACK_INCLUDE_DIR}>")
+
+include(${rapids-cmake-dir}/export/find_package_root.cmake)
+include(../../../cpp/cmake/thirdparty/get_nanoarrow.cmake)
+target_link_libraries(pylibcudf_interop PUBLIC nanoarrow)
 
 add_subdirectory(libcudf)
 add_subdirectory(strings)
diff --git a/python/pylibcudf/pylibcudf/VERSION b/python/pylibcudf/pylibcudf/VERSION
new file mode 120000
index 00000000000..d62dc733efd
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/VERSION
@@ -0,0 +1 @@
+../../../VERSION
\ No newline at end of file
diff --git a/python/cudf/cudf/_lib/pylibcudf/__init__.pxd b/python/pylibcudf/pylibcudf/__init__.pxd
similarity index 97%
rename from python/cudf/cudf/_lib/pylibcudf/__init__.pxd
rename to python/pylibcudf/pylibcudf/__init__.pxd
index 71f523fc3cd..841efa59bda 100644
--- a/python/cudf/cudf/_lib/pylibcudf/__init__.pxd
+++ b/python/pylibcudf/pylibcudf/__init__.pxd
@@ -15,6 +15,7 @@ from . cimport (
     join,
     lists,
     merge,
+    null_mask,
     quantiles,
     reduce,
     replace,
@@ -57,6 +58,7 @@ __all__ = [
     "join",
     "lists",
     "merge",
+    "null_mask",
     "quantiles",
     "reduce",
     "replace",
diff --git a/python/cudf/cudf/_lib/pylibcudf/__init__.py b/python/pylibcudf/pylibcudf/__init__.py
similarity index 78%
rename from python/cudf/cudf/_lib/pylibcudf/__init__.py
rename to python/pylibcudf/pylibcudf/__init__.py
index 9705eba84b1..d3878a89a6a 100644
--- a/python/cudf/cudf/_lib/pylibcudf/__init__.py
+++ b/python/pylibcudf/pylibcudf/__init__.py
@@ -1,5 +1,15 @@
 # Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
+# If libcudf was installed as a wheel, we must request it to load the library symbols.
+# Otherwise, we assume that the library was installed in a system path that ld can find.
+try:
+    import libcudf
+except ModuleNotFoundError:
+    pass
+else:
+    libcudf.load_library()
+    del libcudf
+
 from . import (
     aggregation,
     binaryop,
@@ -12,9 +22,11 @@
     filling,
     groupby,
     interop,
+    io,
     join,
     lists,
     merge,
+    null_mask,
     quantiles,
     reduce,
     replace,
@@ -58,6 +70,7 @@
     "join",
     "lists",
     "merge",
+    "null_mask",
     "quantiles",
     "reduce",
     "replace",
diff --git a/python/pylibcudf/pylibcudf/_version.py b/python/pylibcudf/pylibcudf/_version.py
new file mode 100644
index 00000000000..d2765e5d53c
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/_version.py
@@ -0,0 +1,24 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+
+from __future__ import annotations
+
+import importlib.resources
+
+__version__ = (
+    importlib.resources.files(__package__)
+    .joinpath("VERSION")
+    .read_text()
+    .strip()
+)
+try:
+    __git_commit__ = (
+        importlib.resources.files(__package__)
+        .joinpath("GIT_COMMIT")
+        .read_text()
+        .strip()
+    )
+except FileNotFoundError:
+    __git_commit__ = ""
+
+__all__ = ["__git_commit__", "__version__"]
diff --git a/python/cudf/cudf/_lib/pylibcudf/aggregation.pxd b/python/pylibcudf/pylibcudf/aggregation.pxd
similarity index 96%
rename from python/cudf/cudf/_lib/pylibcudf/aggregation.pxd
rename to python/pylibcudf/pylibcudf/aggregation.pxd
index 0981d0e855a..c9ab1eab21c 100644
--- a/python/cudf/cudf/_lib/pylibcudf/aggregation.pxd
+++ b/python/pylibcudf/pylibcudf/aggregation.pxd
@@ -1,8 +1,7 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.aggregation cimport (
+from pylibcudf.libcudf.aggregation cimport (
     Kind as kind_t,
     aggregation,
     correlation_type,
@@ -15,7 +14,7 @@ from cudf._lib.pylibcudf.libcudf.aggregation cimport (
     rolling_aggregation,
     scan_aggregation,
 )
-from cudf._lib.pylibcudf.libcudf.types cimport (
+from pylibcudf.libcudf.types cimport (
     interpolation,
     nan_equality,
     null_equality,
diff --git a/python/cudf/cudf/_lib/pylibcudf/aggregation.pyx b/python/pylibcudf/pylibcudf/aggregation.pyx
similarity index 96%
rename from python/cudf/cudf/_lib/pylibcudf/aggregation.pyx
rename to python/pylibcudf/pylibcudf/aggregation.pyx
index eed2f6de585..e510b738f70 100644
--- a/python/cudf/cudf/_lib/pylibcudf/aggregation.pyx
+++ b/python/pylibcudf/pylibcudf/aggregation.pyx
@@ -4,8 +4,7 @@ from cython.operator cimport dereference
 from libcpp.cast cimport dynamic_cast
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
-
-from cudf._lib.pylibcudf.libcudf.aggregation cimport (
+from pylibcudf.libcudf.aggregation cimport (
     aggregation,
     correlation_type,
     ewm_history,
@@ -41,7 +40,7 @@ from cudf._lib.pylibcudf.libcudf.aggregation cimport (
     rolling_aggregation,
     scan_aggregation,
 )
-from cudf._lib.pylibcudf.libcudf.types cimport (
+from pylibcudf.libcudf.types cimport (
     interpolation,
     nan_equality,
     null_equality,
@@ -51,18 +50,16 @@ from cudf._lib.pylibcudf.libcudf.types cimport (
     size_type,
 )
 
-from cudf._lib.pylibcudf.libcudf.aggregation import Kind  # no-cython-lint
-from cudf._lib.pylibcudf.libcudf.aggregation import \
+from pylibcudf.libcudf.aggregation import Kind  # no-cython-lint
+from pylibcudf.libcudf.aggregation import \
     correlation_type as CorrelationType  # no-cython-lint
-from cudf._lib.pylibcudf.libcudf.aggregation import \
+from pylibcudf.libcudf.aggregation import \
     ewm_history as EWMHistory  # no-cython-lint
-from cudf._lib.pylibcudf.libcudf.aggregation import \
+from pylibcudf.libcudf.aggregation import \
     rank_method as RankMethod  # no-cython-lint
-from cudf._lib.pylibcudf.libcudf.aggregation import \
+from pylibcudf.libcudf.aggregation import \
     rank_percentage as RankPercentage  # no-cython-lint
-from cudf._lib.pylibcudf.libcudf.aggregation import (  # no-cython-lint
-    udf_type as UdfType,
-)
+from pylibcudf.libcudf.aggregation import udf_type as UdfType  # no-cython-lint
 
 from .types cimport DataType
 
@@ -71,7 +68,7 @@ cdef class Aggregation:
     """A type of aggregation to perform.
 
     Aggregations are passed to APIs like
-    :py:func:`~cudf._lib.pylibcudf.groupby.GroupBy.aggregate` to indicate what
+    :py:func:`~pylibcudf.groupby.GroupBy.aggregate` to indicate what
     operations to perform. Using a class for aggregations provides a unified
     API for handling parametrizable aggregations. This class should never be
     instantiated directly, only via one of the factory functions.
diff --git a/python/cudf/cudf/_lib/pylibcudf/binaryop.pxd b/python/pylibcudf/pylibcudf/binaryop.pxd
similarity index 90%
rename from python/cudf/cudf/_lib/pylibcudf/binaryop.pxd
rename to python/pylibcudf/pylibcudf/binaryop.pxd
index 2411e28ac66..06625e9e2db 100644
--- a/python/cudf/cudf/_lib/pylibcudf/binaryop.pxd
+++ b/python/pylibcudf/pylibcudf/binaryop.pxd
@@ -1,8 +1,7 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
 from libcpp cimport bool
-
-from cudf._lib.pylibcudf.libcudf.binaryop cimport binary_operator
+from pylibcudf.libcudf.binaryop cimport binary_operator
 
 from .column cimport Column
 from .scalar cimport Scalar
diff --git a/python/cudf/cudf/_lib/pylibcudf/binaryop.pyx b/python/pylibcudf/pylibcudf/binaryop.pyx
similarity index 86%
rename from python/cudf/cudf/_lib/pylibcudf/binaryop.pyx
rename to python/pylibcudf/pylibcudf/binaryop.pyx
index 44d9f4ad04a..5a67f4d6cdb 100644
--- a/python/cudf/cudf/_lib/pylibcudf/binaryop.pyx
+++ b/python/pylibcudf/pylibcudf/binaryop.pyx
@@ -5,12 +5,11 @@ from cython.operator import dereference
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
+from pylibcudf.libcudf cimport binaryop as cpp_binaryop
+from pylibcudf.libcudf.binaryop cimport binary_operator
+from pylibcudf.libcudf.column.column cimport column
 
-from cudf._lib.pylibcudf.libcudf cimport binaryop as cpp_binaryop
-from cudf._lib.pylibcudf.libcudf.binaryop cimport binary_operator
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-
-from cudf._lib.pylibcudf.libcudf.binaryop import \
+from pylibcudf.libcudf.binaryop import \
     binary_operator as BinaryOperator  # no-cython-lint
 
 from .column cimport Column
@@ -27,9 +26,9 @@ cpdef Column binary_operation(
     """Perform a binary operation between a column and another column or scalar.
 
     ``lhs`` and ``rhs`` may be a
-    :py:class:`~cudf._lib.pylibcudf.column.Column` or a
-    :py:class:`~cudf._lib.pylibcudf.scalar.Scalar`, but at least one must be a
-    :py:class:`~cudf._lib.pylibcudf.column.Column`.
+    :py:class:`~pylibcudf.column.Column` or a
+    :py:class:`~pylibcudf.scalar.Scalar`, but at least one must be a
+    :py:class:`~pylibcudf.column.Column`.
 
     For details, see :cpp:func:`binary_operation`.
 
diff --git a/python/cudf/cudf/_lib/pylibcudf/column.pxd b/python/pylibcudf/pylibcudf/column.pxd
similarity index 84%
rename from python/cudf/cudf/_lib/pylibcudf/column.pxd
rename to python/pylibcudf/pylibcudf/column.pxd
index 13ee0a70681..92d63e4e495 100644
--- a/python/cudf/cudf/_lib/pylibcudf/column.pxd
+++ b/python/pylibcudf/pylibcudf/column.pxd
@@ -2,16 +2,13 @@
 
 from libcpp.memory cimport unique_ptr
 from libcpp.vector cimport vector
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport (
     column_view,
     mutable_column_view,
 )
-from cudf._lib.pylibcudf.libcudf.lists.lists_column_view cimport (
-    lists_column_view,
-)
-from cudf._lib.pylibcudf.libcudf.types cimport bitmask_type, size_type
+from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
+from pylibcudf.libcudf.types cimport bitmask_type, size_type
 
 from .gpumemoryview cimport gpumemoryview
 from .types cimport DataType
diff --git a/python/cudf/cudf/_lib/pylibcudf/column.pyx b/python/pylibcudf/pylibcudf/column.pyx
similarity index 97%
rename from python/cudf/cudf/_lib/pylibcudf/column.pyx
rename to python/pylibcudf/pylibcudf/column.pyx
index a61e0629292..a37a12fc7e1 100644
--- a/python/cudf/cudf/_lib/pylibcudf/column.pyx
+++ b/python/pylibcudf/pylibcudf/column.pyx
@@ -3,25 +3,20 @@
 from cython.operator cimport dereference
 from libcpp.memory cimport make_unique, unique_ptr
 from libcpp.utility cimport move
+from pylibcudf.libcudf.column.column cimport column, column_contents
+from pylibcudf.libcudf.column.column_factories cimport make_column_from_scalar
+from pylibcudf.libcudf.scalar.scalar cimport scalar
+from pylibcudf.libcudf.types cimport size_type
 
 from rmm._lib.device_buffer cimport DeviceBuffer
 
-from cudf._lib.pylibcudf.libcudf.column.column cimport column, column_contents
-from cudf._lib.pylibcudf.libcudf.column.column_factories cimport (
-    make_column_from_scalar,
-)
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
-
 from .gpumemoryview cimport gpumemoryview
 from .scalar cimport Scalar
-from .types cimport DataType, type_id
+from .types cimport DataType, size_of, type_id
 from .utils cimport int_to_bitmask_ptr, int_to_void_ptr
 
 import functools
 
-import numpy as np
-
 
 cdef class Column:
     """A container of nullable device data as a column of elements.
@@ -303,14 +298,15 @@ cdef class Column:
             raise ValueError("mask not yet supported.")
 
         typestr = iface['typestr'][1:]
+        data_type = _datatype_from_dtype_desc(typestr)
+
         if not is_c_contiguous(
             iface['shape'],
             iface['strides'],
-            np.dtype(typestr).itemsize
+            size_of(data_type)
         ):
             raise ValueError("Data must be C-contiguous")
 
-        data_type = _datatype_from_dtype_desc(typestr)
         size = iface['shape'][0]
         return Column(
             data_type,
diff --git a/python/cudf/cudf/_lib/pylibcudf/column_factories.pxd b/python/pylibcudf/pylibcudf/column_factories.pxd
similarity index 92%
rename from python/cudf/cudf/_lib/pylibcudf/column_factories.pxd
rename to python/pylibcudf/pylibcudf/column_factories.pxd
index 9dbd74ab16c..fef02359240 100644
--- a/python/cudf/cudf/_lib/pylibcudf/column_factories.pxd
+++ b/python/pylibcudf/pylibcudf/column_factories.pxd
@@ -1,8 +1,7 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
-
-from cudf._lib.pylibcudf.libcudf.types cimport mask_state, size_type
+from pylibcudf.libcudf.types cimport mask_state, size_type
 
 from .column cimport Column
 from .types cimport DataType, size_type, type_id
diff --git a/python/cudf/cudf/_lib/pylibcudf/column_factories.pyx b/python/pylibcudf/pylibcudf/column_factories.pyx
similarity index 96%
rename from python/cudf/cudf/_lib/pylibcudf/column_factories.pyx
rename to python/pylibcudf/pylibcudf/column_factories.pyx
index ef7f512f0e5..4601cba515a 100644
--- a/python/cudf/cudf/_lib/pylibcudf/column_factories.pyx
+++ b/python/pylibcudf/pylibcudf/column_factories.pyx
@@ -1,9 +1,8 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_factories cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_factories cimport (
     make_duration_column as cpp_make_duration_column,
     make_empty_column as cpp_make_empty_column,
     make_fixed_point_column as cpp_make_fixed_point_column,
@@ -11,7 +10,7 @@ from cudf._lib.pylibcudf.libcudf.column.column_factories cimport (
     make_numeric_column as cpp_make_numeric_column,
     make_timestamp_column as cpp_make_timestamp_column,
 )
-from cudf._lib.pylibcudf.libcudf.types cimport mask_state, size_type
+from pylibcudf.libcudf.types cimport mask_state, size_type
 
 from .types cimport DataType, type_id
 
diff --git a/python/cudf/cudf/_lib/pylibcudf/concatenate.pxd b/python/pylibcudf/pylibcudf/concatenate.pxd
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/concatenate.pxd
rename to python/pylibcudf/pylibcudf/concatenate.pxd
diff --git a/python/cudf/cudf/_lib/pylibcudf/concatenate.pyx b/python/pylibcudf/pylibcudf/concatenate.pyx
similarity index 80%
rename from python/cudf/cudf/_lib/pylibcudf/concatenate.pyx
rename to python/pylibcudf/pylibcudf/concatenate.pyx
index 5e40f921b2c..8bdcc086e0f 100644
--- a/python/cudf/cudf/_lib/pylibcudf/concatenate.pyx
+++ b/python/pylibcudf/pylibcudf/concatenate.pyx
@@ -3,12 +3,11 @@
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
-
-from cudf._lib.pylibcudf.libcudf cimport concatenate as cpp_concatenate
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf cimport concatenate as cpp_concatenate
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.table.table_view cimport table_view
 
 from .column cimport Column
 from .table cimport Table
diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pxd b/python/pylibcudf/pylibcudf/copying.pxd
similarity index 94%
rename from python/cudf/cudf/_lib/pylibcudf/copying.pxd
rename to python/pylibcudf/pylibcudf/copying.pxd
index 06543d3ca92..7dfed437673 100644
--- a/python/cudf/cudf/_lib/pylibcudf/copying.pxd
+++ b/python/pylibcudf/pylibcudf/copying.pxd
@@ -1,12 +1,11 @@
 # Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
 from libcpp cimport bool as cbool
-
-from cudf._lib.pylibcudf.libcudf.copying cimport (
+from pylibcudf.libcudf.copying cimport (
     mask_allocation_policy,
     out_of_bounds_policy,
 )
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.types cimport size_type
 
 from .column cimport Column
 from .scalar cimport Scalar
diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pyx b/python/pylibcudf/pylibcudf/copying.pyx
similarity index 96%
rename from python/cudf/cudf/_lib/pylibcudf/copying.pyx
rename to python/pylibcudf/pylibcudf/copying.pyx
index 2d59deb3864..9743119d92a 100644
--- a/python/cudf/cudf/_lib/pylibcudf/copying.pyx
+++ b/python/pylibcudf/pylibcudf/copying.pyx
@@ -6,29 +6,28 @@ from libcpp.functional cimport reference_wrapper
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
-
 # TODO: We want to make cpp a more full-featured package so that we can access
 # directly from that. It will make namespacing much cleaner in pylibcudf. What
 # we really want here would be
 # cimport libcudf... libcudf.copying.algo(...)
-from cudf._lib.pylibcudf.libcudf cimport copying as cpp_copying
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport (
+from pylibcudf.libcudf cimport copying as cpp_copying
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport (
     column_view,
     mutable_column_view,
 )
-from cudf._lib.pylibcudf.libcudf.copying cimport (
+from pylibcudf.libcudf.copying cimport (
     mask_allocation_policy,
     out_of_bounds_policy,
 )
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.scalar.scalar cimport scalar
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.types cimport size_type
 
-from cudf._lib.pylibcudf.libcudf.copying import \
+from pylibcudf.libcudf.copying import \
     mask_allocation_policy as MaskAllocationPolicy  # no-cython-lint
-from cudf._lib.pylibcudf.libcudf.copying import \
+from pylibcudf.libcudf.copying import \
     out_of_bounds_policy as OutOfBoundsPolicy  # no-cython-lint
 
 from .column cimport Column
diff --git a/python/cudf/cudf/_lib/pylibcudf/datetime.pxd b/python/pylibcudf/pylibcudf/datetime.pxd
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/datetime.pxd
rename to python/pylibcudf/pylibcudf/datetime.pxd
diff --git a/python/cudf/cudf/_lib/pylibcudf/datetime.pyx b/python/pylibcudf/pylibcudf/datetime.pyx
similarity index 78%
rename from python/cudf/cudf/_lib/pylibcudf/datetime.pyx
rename to python/pylibcudf/pylibcudf/datetime.pyx
index 82351327de6..0ddc68bcb9d 100644
--- a/python/cudf/cudf/_lib/pylibcudf/datetime.pyx
+++ b/python/pylibcudf/pylibcudf/datetime.pyx
@@ -1,11 +1,8 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.datetime cimport (
-    extract_year as cpp_extract_year,
-)
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.datetime cimport extract_year as cpp_extract_year
 
 from .column cimport Column
 
diff --git a/python/cudf/cudf/_lib/exception_handler.pxd b/python/pylibcudf/pylibcudf/exception_handler.pxd
similarity index 95%
rename from python/cudf/cudf/_lib/exception_handler.pxd
rename to python/pylibcudf/pylibcudf/exception_handler.pxd
index 4337d8db285..6abcd0a1c0f 100644
--- a/python/cudf/cudf/_lib/exception_handler.pxd
+++ b/python/pylibcudf/pylibcudf/exception_handler.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
 
 # See
@@ -24,7 +24,7 @@ cdef extern from *:
      * Since this function interoperates with Python's exception state, it
      * does not throw any C++ exceptions.
      */
-    void cudf_exception_handler()
+    void libcudf_exception_handler()
     {
       // Catch a handful of different errors here and turn them into the
       // equivalent Python errors.
@@ -66,4 +66,4 @@ cdef extern from *:
 
     }  // anonymous namespace
     """
-    cdef void cudf_exception_handler()
+    cdef void libcudf_exception_handler()
diff --git a/python/cudf/cudf/_lib/pylibcudf/experimental.pxd b/python/pylibcudf/pylibcudf/experimental.pxd
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/experimental.pxd
rename to python/pylibcudf/pylibcudf/experimental.pxd
diff --git a/python/cudf/cudf/_lib/pylibcudf/experimental.pyx b/python/pylibcudf/pylibcudf/experimental.pyx
similarity index 92%
rename from python/cudf/cudf/_lib/pylibcudf/experimental.pyx
rename to python/pylibcudf/pylibcudf/experimental.pyx
index 1e2a682d879..b25a53e13b2 100644
--- a/python/cudf/cudf/_lib/pylibcudf/experimental.pyx
+++ b/python/pylibcudf/pylibcudf/experimental.pyx
@@ -2,8 +2,7 @@
 
 from libcpp cimport bool
 from libcpp.string cimport string
-
-from cudf._lib.pylibcudf.libcudf cimport experimental as cpp_experimental
+from pylibcudf.libcudf cimport experimental as cpp_experimental
 
 
 cpdef enable_prefetching(str key):
diff --git a/python/cudf/cudf/_lib/pylibcudf/expressions.pxd b/python/pylibcudf/pylibcudf/expressions.pxd
similarity index 91%
rename from python/cudf/cudf/_lib/pylibcudf/expressions.pxd
rename to python/pylibcudf/pylibcudf/expressions.pxd
index 64825b89d9f..65660b7c449 100644
--- a/python/cudf/cudf/_lib/pylibcudf/expressions.pxd
+++ b/python/pylibcudf/pylibcudf/expressions.pxd
@@ -1,8 +1,7 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
-
-from cudf._lib.pylibcudf.libcudf.expressions cimport (
+from pylibcudf.libcudf.expressions cimport (
     ast_operator,
     expression,
     table_reference,
diff --git a/python/cudf/cudf/_lib/pylibcudf/expressions.pyx b/python/pylibcudf/pylibcudf/expressions.pyx
similarity index 94%
rename from python/cudf/cudf/_lib/pylibcudf/expressions.pyx
rename to python/pylibcudf/pylibcudf/expressions.pyx
index b983a617533..a44c9e25987 100644
--- a/python/cudf/cudf/_lib/pylibcudf/expressions.pyx
+++ b/python/pylibcudf/pylibcudf/expressions.pyx
@@ -1,7 +1,7 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
-from cudf._lib.pylibcudf.libcudf.expressions import \
+from pylibcudf.libcudf.expressions import \
     ast_operator as ASTOperator  # no-cython-lint
-from cudf._lib.pylibcudf.libcudf.expressions import \
+from pylibcudf.libcudf.expressions import \
     table_reference as TableReference  # no-cython-lint
 
 from cython.operator cimport dereference
@@ -9,22 +9,21 @@ from libc.stdint cimport int32_t, int64_t
 from libcpp.memory cimport make_unique, unique_ptr
 from libcpp.string cimport string
 from libcpp.utility cimport move
-
-from cudf._lib.pylibcudf.libcudf cimport expressions as libcudf_exp
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport (
+from pylibcudf.libcudf cimport expressions as libcudf_exp
+from pylibcudf.libcudf.scalar.scalar cimport (
     duration_scalar,
     numeric_scalar,
     string_scalar,
     timestamp_scalar,
 )
-from cudf._lib.pylibcudf.libcudf.types cimport size_type, type_id
-from cudf._lib.pylibcudf.libcudf.wrappers.durations cimport (
+from pylibcudf.libcudf.types cimport size_type, type_id
+from pylibcudf.libcudf.wrappers.durations cimport (
     duration_ms,
     duration_ns,
     duration_s,
     duration_us,
 )
-from cudf._lib.pylibcudf.libcudf.wrappers.timestamps cimport (
+from pylibcudf.libcudf.wrappers.timestamps cimport (
     timestamp_ms,
     timestamp_ns,
     timestamp_s,
diff --git a/python/cudf/cudf/_lib/pylibcudf/filling.pxd b/python/pylibcudf/pylibcudf/filling.pxd
similarity index 90%
rename from python/cudf/cudf/_lib/pylibcudf/filling.pxd
rename to python/pylibcudf/pylibcudf/filling.pxd
index 3560ebf2ea2..b9345f8cd42 100644
--- a/python/cudf/cudf/_lib/pylibcudf/filling.pxd
+++ b/python/pylibcudf/pylibcudf/filling.pxd
@@ -1,5 +1,5 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.types cimport size_type
 
 from .column cimport Column
 from .scalar cimport Scalar
diff --git a/python/cudf/cudf/_lib/pylibcudf/filling.pyx b/python/pylibcudf/pylibcudf/filling.pyx
similarity index 94%
rename from python/cudf/cudf/_lib/pylibcudf/filling.pyx
rename to python/pylibcudf/pylibcudf/filling.pyx
index 05f67681428..61b430e64aa 100644
--- a/python/cudf/cudf/_lib/pylibcudf/filling.pyx
+++ b/python/pylibcudf/pylibcudf/filling.pyx
@@ -3,16 +3,15 @@
 from cython.operator cimport dereference
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.filling cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.filling cimport (
     fill as cpp_fill,
     fill_in_place as cpp_fill_in_place,
     repeat as cpp_repeat,
     sequence as cpp_sequence,
 )
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.types cimport size_type
 
 from .column cimport Column
 from .scalar cimport Scalar
diff --git a/python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pxd b/python/pylibcudf/pylibcudf/gpumemoryview.pxd
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pxd
rename to python/pylibcudf/pylibcudf/gpumemoryview.pxd
diff --git a/python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pyx b/python/pylibcudf/pylibcudf/gpumemoryview.pyx
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pyx
rename to python/pylibcudf/pylibcudf/gpumemoryview.pyx
diff --git a/python/cudf/cudf/_lib/pylibcudf/groupby.pxd b/python/pylibcudf/pylibcudf/groupby.pxd
similarity index 87%
rename from python/cudf/cudf/_lib/pylibcudf/groupby.pxd
rename to python/pylibcudf/pylibcudf/groupby.pxd
index eaa05c26986..79af2f1b746 100644
--- a/python/cudf/cudf/_lib/pylibcudf/groupby.pxd
+++ b/python/pylibcudf/pylibcudf/groupby.pxd
@@ -3,20 +3,19 @@
 from libcpp.memory cimport unique_ptr
 from libcpp.pair cimport pair
 from libcpp.vector cimport vector
-
-from cudf._lib.pylibcudf.libcudf.aggregation cimport (
+from pylibcudf.libcudf.aggregation cimport (
     aggregation,
     groupby_aggregation,
     groupby_scan_aggregation,
 )
-from cudf._lib.pylibcudf.libcudf.groupby cimport (
+from pylibcudf.libcudf.groupby cimport (
     aggregation_request,
     aggregation_result,
     groupby,
     scan_request,
 )
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.types cimport null_order, order
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.types cimport null_order, order
 
 from .column cimport Column
 from .table cimport Table
diff --git a/python/cudf/cudf/_lib/pylibcudf/groupby.pyx b/python/pylibcudf/pylibcudf/groupby.pyx
similarity index 96%
rename from python/cudf/cudf/_lib/pylibcudf/groupby.pyx
rename to python/pylibcudf/pylibcudf/groupby.pyx
index f5bb46ca6a2..ae5d33aaa46 100644
--- a/python/cudf/cudf/_lib/pylibcudf/groupby.pyx
+++ b/python/pylibcudf/pylibcudf/groupby.pyx
@@ -6,18 +6,17 @@ from libcpp.memory cimport make_unique, unique_ptr
 from libcpp.pair cimport pair
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
-
-from cudf._lib.pylibcudf.libcudf.groupby cimport (
+from pylibcudf.libcudf.groupby cimport (
     aggregation_request,
     aggregation_result,
     groupby,
     groups,
     scan_request,
 )
-from cudf._lib.pylibcudf.libcudf.replace cimport replace_policy
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.replace cimport replace_policy
+from pylibcudf.libcudf.scalar.scalar cimport scalar
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.types cimport size_type
 
 from .aggregation cimport Aggregation
 from .column cimport Column
@@ -156,7 +155,7 @@ cdef class GroupBy:
         Parameters
         ----------
         requests : List[GroupByRequest]
-            The list of `~.cudf._lib.pylibcudf.groupby.GroupByRequest` , each
+            The list of `~.pylibcudf.groupby.GroupByRequest` , each
             representing a set of aggregations to perform on a given column of values.
 
         Returns
@@ -188,7 +187,7 @@ cdef class GroupBy:
         Parameters
         ----------
         requests : List[GroupByRequest]
-            The list of `~.cudf._lib.pylibcudf.groupby.GroupByRequest` , each
+            The list of `~.pylibcudf.groupby.GroupByRequest` , each
             representing a set of aggregations to perform on a given column of values.
 
         Returns
diff --git a/python/cudf/cudf/_lib/pylibcudf/interop.pyx b/python/pylibcudf/pylibcudf/interop.pyx
similarity index 67%
rename from python/cudf/cudf/_lib/pylibcudf/interop.pyx
rename to python/pylibcudf/pylibcudf/interop.pyx
index adf7e1fd7e8..1a03fa5b45b 100644
--- a/python/cudf/cudf/_lib/pylibcudf/interop.pyx
+++ b/python/pylibcudf/pylibcudf/interop.pyx
@@ -1,40 +1,30 @@
 # Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
-from cpython cimport pycapsule
-from cython.operator cimport dereference
-from libcpp.memory cimport shared_ptr, unique_ptr
+from cpython.pycapsule cimport PyCapsule_GetPointer, PyCapsule_New
+from libc.stdlib cimport free
+from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
-from pyarrow cimport lib as pa
 
 from dataclasses import dataclass, field
 from functools import singledispatch
 
 from pyarrow import lib as pa
 
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.interop cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.interop cimport (
     ArrowArray,
     ArrowArrayStream,
     ArrowSchema,
     column_metadata,
-    from_arrow as cpp_from_arrow,
     from_arrow_column as cpp_from_arrow_column,
     from_arrow_stream as cpp_from_arrow_stream,
-    to_arrow as cpp_to_arrow,
-)
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport (
-    fixed_point_scalar,
-    scalar,
-)
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.wrappers.decimals cimport (
-    decimal32,
-    decimal64,
-    decimal128,
-    scale_type,
+    to_arrow_host_raw,
+    to_arrow_schema_raw,
 )
+from pylibcudf.libcudf.table.table cimport table
 
+from . cimport copying
 from .column cimport Column
 from .scalar cimport Scalar
 from .table cimport Table
@@ -109,7 +99,9 @@ def from_arrow(pyarrow_object, *, DataType data_type=None):
     Union[Table, Scalar]
         The converted object of type corresponding to the input type in cudf.
     """
-    raise TypeError("from_arrow only accepts Table and Scalar objects")
+    raise TypeError(
+        f"Unsupported type {type(pyarrow_object)} for conversion from arrow"
+    )
 
 
 @from_arrow.register(pa.DataType)
@@ -133,7 +125,7 @@ def _from_arrow_table(pyarrow_object, *, DataType data_type=None):
         raise ValueError("data_type may not be passed for tables")
     stream = pyarrow_object.__arrow_c_stream__()
     cdef ArrowArrayStream* c_stream = (
-        <ArrowArrayStream*>pycapsule.PyCapsule_GetPointer(stream, "arrow_array_stream")
+        <ArrowArrayStream*>PyCapsule_GetPointer(stream, "arrow_array_stream")
     )
 
     cdef unique_ptr[table] c_result
@@ -146,68 +138,30 @@ def _from_arrow_table(pyarrow_object, *, DataType data_type=None):
 
 @from_arrow.register(pa.Scalar)
 def _from_arrow_scalar(pyarrow_object, *, DataType data_type=None):
-    cdef shared_ptr[pa.CScalar] arrow_scalar = pa.pyarrow_unwrap_scalar(pyarrow_object)
-
-    cdef unique_ptr[scalar] c_result
-    with nogil:
-        c_result = move(cpp_from_arrow(dereference(arrow_scalar)))
-
-    cdef Scalar result = Scalar.from_libcudf(move(c_result))
-
-    if result.type().id() != type_id.DECIMAL128:
-        if data_type is not None:
-            raise ValueError(
-                "dtype may not be passed for non-decimal types"
-            )
-        return result
-
-    if data_type is None:
-        raise ValueError(
-            "Decimal scalars must be constructed with a dtype"
-        )
-
-    cdef type_id tid = data_type.id()
-
-    if tid == type_id.DECIMAL32:
-        result.c_obj.reset(
-            new fixed_point_scalar[decimal32](
-                (
-                    <fixed_point_scalar[decimal128]*> result.c_obj.get()
-                ).value(),
-                scale_type(-pyarrow_object.type.scale),
-                result.c_obj.get().is_valid()
-            )
-        )
-    elif tid == type_id.DECIMAL64:
-        result.c_obj.reset(
-            new fixed_point_scalar[decimal64](
-                (
-                    <fixed_point_scalar[decimal128]*> result.c_obj.get()
-                ).value(),
-                scale_type(-pyarrow_object.type.scale),
-                result.c_obj.get().is_valid()
-            )
-        )
-    elif tid != type_id.DECIMAL128:
-        raise ValueError(
-            "Decimal scalars may only be cast to decimals"
-        )
-
-    return result
+    if isinstance(pyarrow_object.type, pa.ListType) and pyarrow_object.as_py() is None:
+        # pyarrow doesn't correctly handle None values for list types, so
+        # we have to create this one manually.
+        # https://github.com/apache/arrow/issues/40319
+        pa_array = pa.array([None], type=pyarrow_object.type)
+    else:
+        pa_array = pa.array([pyarrow_object])
+    return copying.get_element(
+        from_arrow(pa_array, data_type=data_type),
+        0,
+    )
 
 
 @from_arrow.register(pa.Array)
-@from_arrow.register(pa.ChunkedArray)
 def _from_arrow_column(pyarrow_object, *, DataType data_type=None):
     if data_type is not None:
         raise ValueError("data_type may not be passed for arrays")
 
     schema, array = pyarrow_object.__arrow_c_array__()
     cdef ArrowSchema* c_schema = (
-        <ArrowSchema*>pycapsule.PyCapsule_GetPointer(schema, "arrow_schema")
+        <ArrowSchema*>PyCapsule_GetPointer(schema, "arrow_schema")
     )
     cdef ArrowArray* c_array = (
-        <ArrowArray*>pycapsule.PyCapsule_GetPointer(array, "arrow_array")
+        <ArrowArray*>PyCapsule_GetPointer(array, "arrow_array")
     )
 
     cdef unique_ptr[column] c_result
@@ -238,7 +192,7 @@ def to_arrow(cudf_object, metadata=None):
     Union[pyarrow.Array, pyarrow.Table, pyarrow.Scalar]
         The converted object of type corresponding to the input type in PyArrow.
     """
-    raise TypeError("to_arrow only accepts Table and Scalar objects")
+    raise TypeError(f"Unsupported type {type(cudf_object)} for conversion to arrow")
 
 
 @to_arrow.register(DataType)
@@ -281,46 +235,83 @@ def _to_arrow_datatype(cudf_object, **kwargs):
             )
 
 
-@to_arrow.register(Table)
-def _to_arrow_table(cudf_object, metadata=None):
+cdef void _release_schema(object schema_capsule) noexcept:
+    """Release the ArrowSchema object stored in a PyCapsule."""
+    cdef ArrowSchema* schema = <ArrowSchema*>PyCapsule_GetPointer(
+        schema_capsule, 'arrow_schema'
+    )
+    if schema.release != NULL:
+        schema.release(schema)
+
+    free(schema)
+
+
+cdef void _release_array(object array_capsule) noexcept:
+    """Release the ArrowArray object stored in a PyCapsule."""
+    cdef ArrowArray* array = <ArrowArray*>PyCapsule_GetPointer(
+        array_capsule, 'arrow_array'
+    )
+    if array.release != NULL:
+        array.release(array)
+
+    free(array)
+
+
+def _table_to_schema(Table tbl, metadata):
     if metadata is None:
-        metadata = [ColumnMetadata() for _ in range(len(cudf_object.columns()))]
+        metadata = [ColumnMetadata() for _ in range(len(tbl.columns()))]
     metadata = [ColumnMetadata(m) if isinstance(m, str) else m for m in metadata]
-    cdef vector[column_metadata] c_table_metadata
-    cdef shared_ptr[pa.CTable] c_table_result
+
+    cdef vector[column_metadata] c_metadata
+    c_metadata.reserve(len(metadata))
     for meta in metadata:
-        c_table_metadata.push_back(_metadata_to_libcudf(meta))
+        c_metadata.push_back(_metadata_to_libcudf(meta))
+
+    cdef ArrowSchema* raw_schema_ptr
     with nogil:
-        c_table_result = move(
-            cpp_to_arrow((<Table> cudf_object).view(), c_table_metadata)
-        )
+        raw_schema_ptr = to_arrow_schema_raw(tbl.view(), c_metadata)
 
-    return pa.pyarrow_wrap_table(c_table_result)
+    return PyCapsule_New(<void*>raw_schema_ptr, 'arrow_schema', _release_schema)
 
 
-@to_arrow.register(Scalar)
-def _to_arrow_scalar(cudf_object, metadata=None):
-    # Note that metadata for scalars is primarily important for preserving
-    # information on nested types since names are otherwise irrelevant.
-    if metadata is None:
-        metadata = ColumnMetadata()
-    metadata = ColumnMetadata(metadata) if isinstance(metadata, str) else metadata
-    cdef column_metadata c_scalar_metadata = _metadata_to_libcudf(metadata)
-    cdef shared_ptr[pa.CScalar] c_scalar_result
+def _table_to_host_array(Table tbl):
+    cdef ArrowArray* raw_host_array_ptr
     with nogil:
-        c_scalar_result = move(
-            cpp_to_arrow(
-                dereference((<Scalar> cudf_object).c_obj), c_scalar_metadata
-            )
-        )
+        raw_host_array_ptr = to_arrow_host_raw(tbl.view())
+
+    return PyCapsule_New(<void*>raw_host_array_ptr, "arrow_array", _release_array)
+
+
+class _TableWithArrowMetadata:
+    def __init__(self, tbl, metadata=None):
+        self.tbl = tbl
+        self.metadata = metadata
 
-    return pa.pyarrow_wrap_scalar(c_scalar_result)
+    def __arrow_c_array__(self, requested_schema=None):
+        return _table_to_schema(self.tbl, self.metadata), _table_to_host_array(self.tbl)
+
+
+# TODO: In the long run we should get rid of the `to_arrow` functions in favor of using
+# the protocols directly via `pa.table(cudf_object, schema=...)` directly. We can do the
+# same for columns. We cannot do this for scalars since there is no corresponding
+# protocol. Since this will require broader changes throughout the codebase, the current
+# approach is to leverage the protocol internally but to continue exposing `to_arrow`.
+@to_arrow.register(Table)
+def _to_arrow_table(cudf_object, metadata=None):
+    test_table = _TableWithArrowMetadata(cudf_object, metadata)
+    return pa.table(test_table)
 
 
 @to_arrow.register(Column)
 def _to_arrow_array(cudf_object, metadata=None):
     """Create a PyArrow array from a pylibcudf column."""
-    if metadata is None:
-        metadata = ColumnMetadata()
-    metadata = ColumnMetadata(metadata) if isinstance(metadata, str) else metadata
-    return to_arrow(Table([cudf_object]), [metadata])[0]
+    if metadata is not None:
+        metadata = [metadata]
+    return to_arrow(Table([cudf_object]), metadata)[0]
+
+
+@to_arrow.register(Scalar)
+def _to_arrow_scalar(cudf_object, metadata=None):
+    # Note that metadata for scalars is primarily important for preserving
+    # information on nested types since names are otherwise irrelevant.
+    return to_arrow(Column.from_scalar(cudf_object, 1), metadata=metadata)[0]
diff --git a/python/cudf/cudf/_lib/pylibcudf/io/CMakeLists.txt b/python/pylibcudf/pylibcudf/io/CMakeLists.txt
similarity index 80%
rename from python/cudf/cudf/_lib/pylibcudf/io/CMakeLists.txt
rename to python/pylibcudf/pylibcudf/io/CMakeLists.txt
index 55bea4fc262..bcc2151f5b6 100644
--- a/python/cudf/cudf/_lib/pylibcudf/io/CMakeLists.txt
+++ b/python/pylibcudf/pylibcudf/io/CMakeLists.txt
@@ -20,8 +20,3 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX pylibcudf_io_ ASSOCIATED_TARGETS cudf
 )
-
-set(targets_using_arrow_headers pylibcudf_io_avro pylibcudf_io_csv pylibcudf_io_datasource
-                                pylibcudf_io_json pylibcudf_io_parquet pylibcudf_io_types
-)
-link_to_pyarrow_headers("${targets_using_arrow_headers}")
diff --git a/python/cudf/cudf/_lib/pylibcudf/io/__init__.pxd b/python/pylibcudf/pylibcudf/io/__init__.pxd
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/io/__init__.pxd
rename to python/pylibcudf/pylibcudf/io/__init__.pxd
diff --git a/python/cudf/cudf/_lib/pylibcudf/io/__init__.py b/python/pylibcudf/pylibcudf/io/__init__.py
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/io/__init__.py
rename to python/pylibcudf/pylibcudf/io/__init__.py
diff --git a/python/pylibcudf/pylibcudf/io/avro.pxd b/python/pylibcudf/pylibcudf/io/avro.pxd
new file mode 100644
index 00000000000..8696fcb3c15
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/io/avro.pxd
@@ -0,0 +1,12 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+from pylibcudf.io.types cimport SourceInfo, TableWithMetadata
+from pylibcudf.libcudf.io.avro cimport avro_reader_options
+from pylibcudf.libcudf.types cimport size_type
+
+
+cpdef TableWithMetadata read_avro(
+    SourceInfo source_info,
+    list columns = *,
+    size_type skip_rows = *,
+    size_type num_rows = *
+)
diff --git a/python/cudf/cudf/_lib/pylibcudf/io/avro.pyx b/python/pylibcudf/pylibcudf/io/avro.pyx
similarity index 89%
rename from python/cudf/cudf/_lib/pylibcudf/io/avro.pyx
rename to python/pylibcudf/pylibcudf/io/avro.pyx
index 538bd8aa322..667c67f4c36 100644
--- a/python/cudf/cudf/_lib/pylibcudf/io/avro.pyx
+++ b/python/pylibcudf/pylibcudf/io/avro.pyx
@@ -3,13 +3,12 @@
 from libcpp.string cimport string
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
-
-from cudf._lib.pylibcudf.io.types cimport SourceInfo, TableWithMetadata
-from cudf._lib.pylibcudf.libcudf.io.avro cimport (
+from pylibcudf.io.types cimport SourceInfo, TableWithMetadata
+from pylibcudf.libcudf.io.avro cimport (
     avro_reader_options,
     read_avro as cpp_read_avro,
 )
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.types cimport size_type
 
 
 cpdef TableWithMetadata read_avro(
diff --git a/python/cudf/cudf/_lib/pylibcudf/io/csv.pyx b/python/pylibcudf/pylibcudf/io/csv.pyx
similarity index 97%
rename from python/cudf/cudf/_lib/pylibcudf/io/csv.pyx
rename to python/pylibcudf/pylibcudf/io/csv.pyx
index e9efb5befee..b53d6771cd6 100644
--- a/python/cudf/cudf/_lib/pylibcudf/io/csv.pyx
+++ b/python/pylibcudf/pylibcudf/io/csv.pyx
@@ -5,19 +5,18 @@ from libcpp.map cimport map
 from libcpp.string cimport string
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
-
-from cudf._lib.pylibcudf.io.types cimport SourceInfo, TableWithMetadata
-from cudf._lib.pylibcudf.libcudf.io.csv cimport (
+from pylibcudf.io.types cimport SourceInfo, TableWithMetadata
+from pylibcudf.libcudf.io.csv cimport (
     csv_reader_options,
     read_csv as cpp_read_csv,
 )
-from cudf._lib.pylibcudf.libcudf.io.types cimport (
+from pylibcudf.libcudf.io.types cimport (
     compression_type,
     quote_style,
     table_with_metadata,
 )
-from cudf._lib.pylibcudf.libcudf.types cimport data_type, size_type
-from cudf._lib.pylibcudf.types cimport DataType
+from pylibcudf.libcudf.types cimport data_type, size_type
+from pylibcudf.types cimport DataType
 
 
 cdef tuple _process_parse_dates_hex(list cols):
diff --git a/python/pylibcudf/pylibcudf/io/datasource.pxd b/python/pylibcudf/pylibcudf/io/datasource.pxd
new file mode 100644
index 00000000000..c08f36693c7
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/io/datasource.pxd
@@ -0,0 +1,7 @@
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+
+from pylibcudf.libcudf.io.datasource cimport datasource
+
+
+cdef class Datasource:
+    cdef datasource* get_datasource(self) except * nogil
diff --git a/python/pylibcudf/pylibcudf/io/datasource.pyx b/python/pylibcudf/pylibcudf/io/datasource.pyx
new file mode 100644
index 00000000000..02418444caa
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/io/datasource.pyx
@@ -0,0 +1,10 @@
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+
+from pylibcudf.libcudf.io.datasource cimport datasource
+
+
+cdef class Datasource:
+    cdef datasource* get_datasource(self) except * nogil:
+        with gil:
+            raise NotImplementedError("get_datasource() should not "
+                                      + "be directly invoked here")
diff --git a/python/cudf/cudf/_lib/pylibcudf/io/json.pxd b/python/pylibcudf/pylibcudf/io/json.pxd
similarity index 79%
rename from python/cudf/cudf/_lib/pylibcudf/io/json.pxd
rename to python/pylibcudf/pylibcudf/io/json.pxd
index 2e0e92a054f..f65c1034598 100644
--- a/python/cudf/cudf/_lib/pylibcudf/io/json.pxd
+++ b/python/pylibcudf/pylibcudf/io/json.pxd
@@ -1,14 +1,13 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 from libcpp cimport bool
-
-from cudf._lib.pylibcudf.io.types cimport (
+from pylibcudf.io.types cimport (
     SinkInfo,
     SourceInfo,
     TableWithMetadata,
     compression_type,
 )
-from cudf._lib.pylibcudf.libcudf.io.json cimport json_recovery_mode_t
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.io.json cimport json_recovery_mode_t
+from pylibcudf.libcudf.types cimport size_type
 
 
 cpdef TableWithMetadata read_json(
@@ -16,8 +15,8 @@ cpdef TableWithMetadata read_json(
     list dtypes = *,
     compression_type compression = *,
     bool lines = *,
-    size_type byte_range_offset = *,
-    size_type byte_range_size = *,
+    size_t byte_range_offset = *,
+    size_t byte_range_size = *,
     bool keep_quotes = *,
     bool mixed_types_as_string = *,
     bool prune_columns = *,
diff --git a/python/cudf/cudf/_lib/pylibcudf/io/json.pyx b/python/pylibcudf/pylibcudf/io/json.pyx
similarity index 93%
rename from python/cudf/cudf/_lib/pylibcudf/io/json.pyx
rename to python/pylibcudf/pylibcudf/io/json.pyx
index 2710ee60075..29e49083bc6 100644
--- a/python/cudf/cudf/_lib/pylibcudf/io/json.pyx
+++ b/python/pylibcudf/pylibcudf/io/json.pyx
@@ -5,14 +5,9 @@ from libcpp.map cimport map
 from libcpp.string cimport string
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
-
-from cudf._lib.pylibcudf.concatenate cimport concatenate
-from cudf._lib.pylibcudf.io.types cimport (
-    SinkInfo,
-    SourceInfo,
-    TableWithMetadata,
-)
-from cudf._lib.pylibcudf.libcudf.io.json cimport (
+from pylibcudf.concatenate cimport concatenate
+from pylibcudf.io.types cimport SinkInfo, SourceInfo, TableWithMetadata
+from pylibcudf.libcudf.io.json cimport (
     json_reader_options,
     json_recovery_mode_t,
     json_writer_options,
@@ -20,13 +15,13 @@ from cudf._lib.pylibcudf.libcudf.io.json cimport (
     schema_element,
     write_json as cpp_write_json,
 )
-from cudf._lib.pylibcudf.libcudf.io.types cimport (
+from pylibcudf.libcudf.io.types cimport (
     compression_type,
     table_metadata,
     table_with_metadata,
 )
-from cudf._lib.pylibcudf.libcudf.types cimport data_type, size_type
-from cudf._lib.pylibcudf.types cimport DataType
+from pylibcudf.libcudf.types cimport data_type, size_type
+from pylibcudf.types cimport DataType
 
 
 cdef map[string, schema_element] _generate_schema_map(list dtypes):
@@ -56,8 +51,8 @@ cdef json_reader_options _setup_json_reader_options(
         list dtypes,
         compression_type compression,
         bool lines,
-        size_type byte_range_offset,
-        size_type byte_range_size,
+        size_t byte_range_offset,
+        size_t byte_range_size,
         bool keep_quotes,
         bool mixed_types_as_string,
         bool prune_columns,
@@ -194,8 +189,8 @@ cpdef TableWithMetadata read_json(
     list dtypes = None,
     compression_type compression = compression_type.AUTO,
     bool lines = False,
-    size_type byte_range_offset = 0,
-    size_type byte_range_size = 0,
+    size_t byte_range_offset = 0,
+    size_t byte_range_size = 0,
     bool keep_quotes = False,
     bool mixed_types_as_string = False,
     bool prune_columns = False,
@@ -217,9 +212,9 @@ cpdef TableWithMetadata read_json(
         (column_child_name, column_child_type, list of grandchild dtypes).
     compression: CompressionType, default CompressionType.AUTO
         The compression format of the JSON source.
-    byte_range_offset : size_type, default 0
+    byte_range_offset : size_t, default 0
         Number of bytes to skip from source start.
-    byte_range_size : size_type, default 0
+    byte_range_size : size_t, default 0
         Number of bytes to read. By default, will read all bytes.
     keep_quotes : bool, default False
         Whether the reader should keep quotes of string values.
@@ -270,7 +265,7 @@ cpdef void write_json(
     str false_value = "false"
 ):
     """
-    Writes a :py:class:`~cudf._lib.pylibcudf.table.Table` to JSON format.
+    Writes a :py:class:`~pylibcudf.table.Table` to JSON format.
 
     Parameters
     ----------
diff --git a/python/cudf/cudf/_lib/pylibcudf/io/parquet.pxd b/python/pylibcudf/pylibcudf/io/parquet.pxd
similarity index 69%
rename from python/cudf/cudf/_lib/pylibcudf/io/parquet.pxd
rename to python/pylibcudf/pylibcudf/io/parquet.pxd
index 027f215fb91..9c476030ded 100644
--- a/python/cudf/cudf/_lib/pylibcudf/io/parquet.pxd
+++ b/python/pylibcudf/pylibcudf/io/parquet.pxd
@@ -3,14 +3,13 @@
 from libc.stdint cimport int64_t
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.expressions cimport Expression
-from cudf._lib.pylibcudf.io.types cimport SourceInfo, TableWithMetadata
-from cudf._lib.pylibcudf.libcudf.io.parquet cimport (
+from pylibcudf.expressions cimport Expression
+from pylibcudf.io.types cimport SourceInfo, TableWithMetadata
+from pylibcudf.libcudf.io.parquet cimport (
     chunked_parquet_reader as cpp_chunked_parquet_reader,
 )
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
-from cudf._lib.pylibcudf.types cimport DataType
+from pylibcudf.libcudf.types cimport size_type
+from pylibcudf.types cimport DataType
 
 
 cdef class ChunkedParquetReader:
@@ -28,7 +27,8 @@ cpdef read_parquet(
     bool convert_strings_to_categories = *,
     bool use_pandas_metadata = *,
     int64_t skip_rows = *,
-    size_type num_rows = *,
+    size_type nrows = *,
+    bool allow_mismatched_pq_schemas = *,
     # disabled see comment in parquet.pyx for more
     # ReaderColumnSchema reader_column_schema = *,
     # DataType timestamp_type = *
diff --git a/python/cudf/cudf/_lib/pylibcudf/io/parquet.pyx b/python/pylibcudf/pylibcudf/io/parquet.pyx
similarity index 82%
rename from python/cudf/cudf/_lib/pylibcudf/io/parquet.pyx
rename to python/pylibcudf/pylibcudf/io/parquet.pyx
index 96119e1b714..df1f1b14247 100644
--- a/python/cudf/cudf/_lib/pylibcudf/io/parquet.pyx
+++ b/python/pylibcudf/pylibcudf/io/parquet.pyx
@@ -5,17 +5,16 @@ from libcpp cimport bool
 from libcpp.string cimport string
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
-
-from cudf._lib.pylibcudf.expressions cimport Expression
-from cudf._lib.pylibcudf.io.types cimport SourceInfo, TableWithMetadata
-from cudf._lib.pylibcudf.libcudf.expressions cimport expression
-from cudf._lib.pylibcudf.libcudf.io.parquet cimport (
+from pylibcudf.expressions cimport Expression
+from pylibcudf.io.types cimport SourceInfo, TableWithMetadata
+from pylibcudf.libcudf.expressions cimport expression
+from pylibcudf.libcudf.io.parquet cimport (
     chunked_parquet_reader as cpp_chunked_parquet_reader,
     parquet_reader_options,
     read_parquet as cpp_read_parquet,
 )
-from cudf._lib.pylibcudf.libcudf.io.types cimport table_with_metadata
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.io.types cimport table_with_metadata
+from pylibcudf.libcudf.types cimport size_type
 
 
 cdef parquet_reader_options _setup_parquet_reader_options(
@@ -26,7 +25,8 @@ cdef parquet_reader_options _setup_parquet_reader_options(
     bool convert_strings_to_categories = False,
     bool use_pandas_metadata = True,
     int64_t skip_rows = 0,
-    size_type num_rows = -1,
+    size_type nrows = -1,
+    bool allow_mismatched_pq_schemas=False,
     # ReaderColumnSchema reader_column_schema = None,
     # DataType timestamp_type = DataType(type_id.EMPTY)
 ):
@@ -35,13 +35,14 @@ cdef parquet_reader_options _setup_parquet_reader_options(
         parquet_reader_options.builder(source_info.c_obj)
         .convert_strings_to_categories(convert_strings_to_categories)
         .use_pandas_metadata(use_pandas_metadata)
+        .allow_mismatched_pq_schemas(allow_mismatched_pq_schemas)
         .use_arrow_schema(True)
         .build()
     )
     if row_groups is not None:
         opts.set_row_groups(row_groups)
-    if num_rows != -1:
-        opts.set_num_rows(num_rows)
+    if nrows != -1:
+        opts.set_num_rows(nrows)
     if skip_rows != 0:
         opts.set_skip_rows(skip_rows)
     if columns is not None:
@@ -73,7 +74,7 @@ cdef class ChunkedParquetReader:
         Whether to convert string columns to the category type
     skip_rows : int64_t, default 0
         The number of rows to skip from the start of the file.
-    num_rows : size_type, default -1
+    nrows : size_type, default -1
         The number of rows to read. By default, read the entire file.
     chunk_read_limit : size_t, default 0
         Limit on total number of bytes to be returned per read,
@@ -81,6 +82,9 @@ cdef class ChunkedParquetReader:
     pass_read_limit : size_t, default 1024000000
         Limit on the amount of memory used for reading and decompressing data
         or 0 if there is no limit.
+    allow_mismatched_pq_schemas : bool, default False
+        Whether to read (matching) columns specified in `columns` from
+        the input files with otherwise mismatched schemas.
     """
     def __init__(
         self,
@@ -90,9 +94,10 @@ cdef class ChunkedParquetReader:
         bool use_pandas_metadata=True,
         bool convert_strings_to_categories=False,
         int64_t skip_rows = 0,
-        size_type num_rows = -1,
+        size_type nrows = -1,
         size_t chunk_read_limit=0,
-        size_t pass_read_limit=1024000000
+        size_t pass_read_limit=1024000000,
+        bool allow_mismatched_pq_schemas=False
     ):
 
         cdef parquet_reader_options opts = _setup_parquet_reader_options(
@@ -103,7 +108,8 @@ cdef class ChunkedParquetReader:
             convert_strings_to_categories=convert_strings_to_categories,
             use_pandas_metadata=use_pandas_metadata,
             skip_rows=skip_rows,
-            num_rows=num_rows,
+            nrows=nrows,
+            allow_mismatched_pq_schemas=allow_mismatched_pq_schemas,
         )
 
         with nogil:
@@ -152,7 +158,8 @@ cpdef read_parquet(
     bool convert_strings_to_categories = False,
     bool use_pandas_metadata = True,
     int64_t skip_rows = 0,
-    size_type num_rows = -1,
+    size_type nrows = -1,
+    bool allow_mismatched_pq_schemas = False,
     # Disabled, these aren't used by cudf-python
     # we should only add them back in if there's user demand
     # ReaderColumnSchema reader_column_schema = None,
@@ -169,7 +176,7 @@ cpdef read_parquet(
     row_groups : list[list[size_type]], default None
         List of row groups to be read.
     filters : Expression, default None
-        An AST :py:class:`cudf._lib.pylibcudf.expressions.Expression`
+        An AST :py:class:`pylibcudf.expressions.Expression`
         to use for predicate pushdown.
     convert_strings_to_categories : bool, default False
         Whether to convert string columns to the category type
@@ -178,8 +185,11 @@ cpdef read_parquet(
         the per-file user metadata of the ``TableWithMetadata``
     skip_rows : int64_t, default 0
         The number of rows to skip from the start of the file.
-    num_rows : size_type, default -1
+    nrows : size_type, default -1
         The number of rows to read. By default, read the entire file.
+    allow_mismatched_pq_schemas : bool, default False
+        If True, enable reading (matching) columns specified in `columns`
+        from the input files with otherwise mismatched schemas.
 
     Returns
     -------
@@ -195,7 +205,8 @@ cpdef read_parquet(
         convert_strings_to_categories,
         use_pandas_metadata,
         skip_rows,
-        num_rows,
+        nrows,
+        allow_mismatched_pq_schemas,
     )
 
     with nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/io/types.pxd b/python/pylibcudf/pylibcudf/io/types.pxd
similarity index 87%
rename from python/cudf/cudf/_lib/pylibcudf/io/types.pxd
rename to python/pylibcudf/pylibcudf/io/types.pxd
index 0094bf6032c..0ab28cb0973 100644
--- a/python/cudf/cudf/_lib/pylibcudf/io/types.pxd
+++ b/python/pylibcudf/pylibcudf/io/types.pxd
@@ -1,9 +1,8 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from libcpp.vector cimport vector
-
-from cudf._lib.pylibcudf.libcudf.io.data_sink cimport data_sink
-from cudf._lib.pylibcudf.libcudf.io.types cimport (
+from pylibcudf.libcudf.io.data_sink cimport data_sink
+from pylibcudf.libcudf.io.types cimport (
     column_encoding,
     column_in_metadata,
     column_name_info,
@@ -19,7 +18,7 @@ from cudf._lib.pylibcudf.libcudf.io.types cimport (
     table_metadata,
     table_with_metadata,
 )
-from cudf._lib.pylibcudf.table cimport Table
+from pylibcudf.table cimport Table
 
 
 cdef class TableWithMetadata:
diff --git a/python/cudf/cudf/_lib/pylibcudf/io/types.pyx b/python/pylibcudf/pylibcudf/io/types.pyx
similarity index 96%
rename from python/cudf/cudf/_lib/pylibcudf/io/types.pyx
rename to python/pylibcudf/pylibcudf/io/types.pyx
index 95fa7d4c2ee..1600a805b37 100644
--- a/python/cudf/cudf/_lib/pylibcudf/io/types.pyx
+++ b/python/pylibcudf/pylibcudf/io/types.pyx
@@ -6,11 +6,10 @@ from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
-
-from cudf._lib.pylibcudf.io.datasource cimport Datasource
-from cudf._lib.pylibcudf.libcudf.io.data_sink cimport data_sink
-from cudf._lib.pylibcudf.libcudf.io.datasource cimport datasource
-from cudf._lib.pylibcudf.libcudf.io.types cimport (
+from pylibcudf.io.datasource cimport Datasource
+from pylibcudf.libcudf.io.data_sink cimport data_sink
+from pylibcudf.libcudf.io.datasource cimport datasource
+from pylibcudf.libcudf.io.types cimport (
     column_name_info,
     host_buffer,
     source_info,
@@ -22,9 +21,9 @@ import errno
 import io
 import os
 
-from cudf._lib.pylibcudf.libcudf.io.json import \
+from pylibcudf.libcudf.io.json import \
     json_recovery_mode_t as JSONRecoveryMode  # no-cython-lint
-from cudf._lib.pylibcudf.libcudf.io.types import \
+from pylibcudf.libcudf.io.types import \
     compression_type as CompressionType  # no-cython-lint
 
 
diff --git a/python/cudf/cudf/_lib/pylibcudf/join.pxd b/python/pylibcudf/pylibcudf/join.pxd
similarity index 91%
rename from python/cudf/cudf/_lib/pylibcudf/join.pxd
rename to python/pylibcudf/pylibcudf/join.pxd
index 83b4776c16e..06969b4a2db 100644
--- a/python/cudf/cudf/_lib/pylibcudf/join.pxd
+++ b/python/pylibcudf/pylibcudf/join.pxd
@@ -1,6 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from cudf._lib.pylibcudf.libcudf.types cimport null_equality
+from pylibcudf.libcudf.types cimport null_equality
 
 from .column cimport Column
 from .table cimport Table
diff --git a/python/cudf/cudf/_lib/pylibcudf/join.pyx b/python/pylibcudf/pylibcudf/join.pyx
similarity index 95%
rename from python/cudf/cudf/_lib/pylibcudf/join.pyx
rename to python/pylibcudf/pylibcudf/join.pyx
index 2ded84d84d1..25664286f19 100644
--- a/python/cudf/cudf/_lib/pylibcudf/join.pyx
+++ b/python/pylibcudf/pylibcudf/join.pyx
@@ -4,14 +4,13 @@ from cython.operator import dereference
 
 from libcpp.memory cimport make_unique, unique_ptr
 from libcpp.utility cimport move
+from pylibcudf.libcudf cimport join as cpp_join
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.types cimport null_equality
 
 from rmm._lib.device_buffer cimport device_buffer
 
-from cudf._lib.pylibcudf.libcudf cimport join as cpp_join
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.types cimport null_equality
-
 from .column cimport Column
 from .table cimport Table
 
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/CMakeLists.txt b/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/CMakeLists.txt
rename to python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/__init__.pxd b/python/pylibcudf/pylibcudf/libcudf/__init__.pxd
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/__init__.pxd
rename to python/pylibcudf/pylibcudf/libcudf/__init__.pxd
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/__init__.py b/python/pylibcudf/pylibcudf/libcudf/__init__.py
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/__init__.py
rename to python/pylibcudf/pylibcudf/libcudf/__init__.py
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/aggregation.pxd b/python/pylibcudf/pylibcudf/libcudf/aggregation.pxd
similarity index 98%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/aggregation.pxd
rename to python/pylibcudf/pylibcudf/libcudf/aggregation.pxd
index fe04db52094..58c579b86de 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/aggregation.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/aggregation.pxd
@@ -5,8 +5,7 @@ from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from libcpp.vector cimport vector
-
-from cudf._lib.pylibcudf.libcudf.types cimport (
+from pylibcudf.libcudf.types cimport (
     data_type,
     interpolation,
     nan_equality,
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/aggregation.pyx b/python/pylibcudf/pylibcudf/libcudf/aggregation.pyx
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/aggregation.pyx
rename to python/pylibcudf/pylibcudf/libcudf/aggregation.pyx
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/binaryop.pxd b/python/pylibcudf/pylibcudf/libcudf/binaryop.pxd
similarity index 78%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/binaryop.pxd
rename to python/pylibcudf/pylibcudf/libcudf/binaryop.pxd
index b34fea6a775..d39767b4aa8 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/binaryop.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/binaryop.pxd
@@ -4,12 +4,11 @@ from libc.stdint cimport int32_t
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
-
-from cudf._lib.exception_handler cimport cudf_exception_handler
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
-from cudf._lib.pylibcudf.libcudf.types cimport data_type
+from pylibcudf.exception_handler cimport libcudf_exception_handler
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport scalar
+from pylibcudf.libcudf.types cimport data_type
 
 
 cdef extern from "cudf/binaryop.hpp" namespace "cudf" nogil:
@@ -55,28 +54,28 @@ cdef extern from "cudf/binaryop.hpp" namespace "cudf" nogil:
         const column_view& rhs,
         binary_operator op,
         data_type output_type
-    ) except +cudf_exception_handler
+    ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] binary_operation (
         const column_view& lhs,
         const scalar& rhs,
         binary_operator op,
         data_type output_type
-    ) except +cudf_exception_handler
+    ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] binary_operation (
         const column_view& lhs,
         const column_view& rhs,
         binary_operator op,
         data_type output_type
-    ) except +cudf_exception_handler
+    ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] binary_operation (
         const column_view& lhs,
         const column_view& rhs,
         const string& op,
         data_type output_type
-    ) except +cudf_exception_handler
+    ) except +libcudf_exception_handler
 
 cdef extern from "cudf/binaryop.hpp" namespace "cudf::binops" nogil:
     cdef bool is_supported_operation(
@@ -84,4 +83,4 @@ cdef extern from "cudf/binaryop.hpp" namespace "cudf::binops" nogil:
         data_type lhs_type,
         data_type rhs_type,
         binary_operator op
-    ) except +cudf_exception_handler
+    ) except +libcudf_exception_handler
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/binaryop.pyx b/python/pylibcudf/pylibcudf/libcudf/binaryop.pyx
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/binaryop.pyx
rename to python/pylibcudf/pylibcudf/libcudf/binaryop.pyx
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/column/__init__.pxd b/python/pylibcudf/pylibcudf/libcudf/column/__init__.pxd
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/column/__init__.pxd
rename to python/pylibcudf/pylibcudf/libcudf/column/__init__.pxd
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/column/__init__.py b/python/pylibcudf/pylibcudf/libcudf/column/__init__.py
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/column/__init__.py
rename to python/pylibcudf/pylibcudf/libcudf/column/__init__.py
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/column/column.pxd b/python/pylibcudf/pylibcudf/libcudf/column/column.pxd
similarity index 87%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/column/column.pxd
rename to python/pylibcudf/pylibcudf/libcudf/column/column.pxd
index dd184d31cc6..7a369701bbd 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/column/column.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/column/column.pxd
@@ -3,14 +3,13 @@
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.vector cimport vector
-
-from rmm._lib.device_buffer cimport device_buffer
-
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport (
+from pylibcudf.libcudf.column.column_view cimport (
     column_view,
     mutable_column_view,
 )
-from cudf._lib.pylibcudf.libcudf.types cimport data_type, size_type
+from pylibcudf.libcudf.types cimport data_type, size_type
+
+from rmm._lib.device_buffer cimport device_buffer
 
 
 cdef extern from "cudf/column/column.hpp" namespace "cudf" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/column/column_factories.pxd b/python/pylibcudf/pylibcudf/libcudf/column/column_factories.pxd
similarity index 93%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/column/column_factories.pxd
rename to python/pylibcudf/pylibcudf/libcudf/column/column_factories.pxd
index 2faff21a77b..f1a326bcd40 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/column/column_factories.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/column/column_factories.pxd
@@ -1,12 +1,9 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from rmm._lib.device_buffer cimport device_buffer
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
-from cudf._lib.pylibcudf.libcudf.types cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.scalar.scalar cimport scalar
+from pylibcudf.libcudf.types cimport (
     bitmask_type,
     data_type,
     mask_state,
@@ -14,6 +11,8 @@ from cudf._lib.pylibcudf.libcudf.types cimport (
     type_id,
 )
 
+from rmm._lib.device_buffer cimport device_buffer
+
 
 cdef extern from "cudf/column/column_factories.hpp" namespace "cudf" nogil:
     cdef unique_ptr[column] make_numeric_column(data_type type,
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/column/column_view.pxd b/python/pylibcudf/pylibcudf/libcudf/column/column_view.pxd
similarity index 97%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/column/column_view.pxd
rename to python/pylibcudf/pylibcudf/libcudf/column/column_view.pxd
index c6403babe89..c0e971eb5bd 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/column/column_view.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/column/column_view.pxd
@@ -2,12 +2,7 @@
 
 from libcpp cimport bool
 from libcpp.vector cimport vector
-
-from cudf._lib.pylibcudf.libcudf.types cimport (
-    bitmask_type,
-    data_type,
-    size_type,
-)
+from pylibcudf.libcudf.types cimport bitmask_type, data_type, size_type
 
 
 cdef extern from "cudf/column/column_view.hpp" namespace "cudf" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/concatenate.pxd b/python/pylibcudf/pylibcudf/libcudf/concatenate.pxd
similarity index 77%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/concatenate.pxd
rename to python/pylibcudf/pylibcudf/libcudf/concatenate.pxd
index 0c362390ff2..92f5a185a54 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/concatenate.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/concatenate.pxd
@@ -2,13 +2,12 @@
 
 from libcpp.memory cimport unique_ptr
 from libcpp.vector cimport vector
+from pylibcudf.libcudf.column.column cimport column, column_view
+from pylibcudf.libcudf.table.table cimport table, table_view
+from pylibcudf.libcudf.utilities.host_span cimport host_span
 
 from rmm._lib.device_buffer cimport device_buffer
 
-from cudf._lib.pylibcudf.libcudf.column.column cimport column, column_view
-from cudf._lib.pylibcudf.libcudf.table.table cimport table, table_view
-from cudf._lib.pylibcudf.libcudf.utilities.host_span cimport host_span
-
 
 cdef extern from "cudf/concatenate.hpp" namespace "cudf" nogil:
     # The versions of concatenate taking vectors don't exist in libcudf
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/contiguous_split.pxd b/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd
similarity index 85%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/contiguous_split.pxd
rename to python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd
index b06feacb016..cadac6a0022 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/contiguous_split.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd
@@ -3,12 +3,11 @@
 from libc.stdint cimport uint8_t
 from libcpp.memory cimport unique_ptr
 from libcpp.vector cimport vector
+from pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.types cimport size_type
 
 from rmm._lib.device_buffer cimport device_buffer
 
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
-
 
 cdef extern from "cudf/contiguous_split.hpp" namespace "cudf" nogil:
     cdef cppclass packed_columns:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/copying.pxd b/python/pylibcudf/pylibcudf/libcudf/copying.pxd
similarity index 74%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/copying.pxd
rename to python/pylibcudf/pylibcudf/libcudf/copying.pxd
index 001489d69bf..4d4a4ba9b89 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/copying.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/copying.pxd
@@ -5,19 +5,18 @@ from libcpp cimport bool
 from libcpp.functional cimport reference_wrapper
 from libcpp.memory cimport unique_ptr
 from libcpp.vector cimport vector
-
-from rmm._lib.device_buffer cimport device_buffer
-
-from cudf._lib.exception_handler cimport cudf_exception_handler
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport (
+from pylibcudf.exception_handler cimport libcudf_exception_handler
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport (
     column_view,
     mutable_column_view,
 )
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.scalar.scalar cimport scalar
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.types cimport size_type
+
+from rmm._lib.device_buffer cimport device_buffer
 
 ctypedef const scalar constscalar
 
@@ -30,25 +29,25 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil:
         const table_view& source_table,
         const column_view& gather_map,
         out_of_bounds_policy policy
-    ) except +cudf_exception_handler
+    ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] shift(
         const column_view& input,
         size_type offset,
         const scalar& fill_values
-    ) except +cudf_exception_handler
+    ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] scatter (
         const table_view& source_table,
         const column_view& scatter_map,
         const table_view& target_table,
-    ) except +cudf_exception_handler
+    ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] scatter (
         const vector[reference_wrapper[constscalar]]& source_scalars,
         const column_view& indices,
         const table_view& target,
-    ) except +cudf_exception_handler
+    ) except +libcudf_exception_handler
 
     cpdef enum class mask_allocation_policy(int32_t):
         NEVER
@@ -57,22 +56,22 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil:
 
     cdef unique_ptr[column] empty_like (
         const column_view& input_column
-    ) except +cudf_exception_handler
+    ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] allocate_like (
         const column_view& input_column,
         mask_allocation_policy policy
-    ) except +cudf_exception_handler
+    ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] allocate_like (
         const column_view& input_column,
         size_type size,
         mask_allocation_policy policy
-    ) except +cudf_exception_handler
+    ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] empty_like (
         const table_view& input_table
-    ) except +cudf_exception_handler
+    ) except +libcudf_exception_handler
 
     cdef void copy_range_in_place (
         const column_view& input_column,
@@ -80,7 +79,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil:
         size_type input_begin,
         size_type input_end,
         size_type target_begin
-    ) except +cudf_exception_handler
+    ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] copy_range (
         const column_view& input_column,
@@ -88,68 +87,68 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil:
         size_type input_begin,
         size_type input_end,
         size_type target_begin
-    ) except +cudf_exception_handler
+    ) except +libcudf_exception_handler
 
     cdef vector[column_view] slice (
         const column_view& input_column,
         vector[size_type] indices
-    ) except +cudf_exception_handler
+    ) except +libcudf_exception_handler
 
     cdef vector[table_view] slice (
         const table_view& input_table,
         vector[size_type] indices
-    ) except +cudf_exception_handler
+    ) except +libcudf_exception_handler
 
     cdef vector[column_view] split (
         const column_view& input_column,
         vector[size_type] splits
-    ) except +cudf_exception_handler
+    ) except +libcudf_exception_handler
 
     cdef vector[table_view] split (
         const table_view& input_table,
         vector[size_type] splits
-    ) except +cudf_exception_handler
+    ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] copy_if_else (
         const column_view& lhs,
         const column_view& rhs,
         const column_view& boolean_mask
-    ) except +cudf_exception_handler
+    ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] copy_if_else (
         const scalar& lhs,
         const column_view& rhs,
         const column_view& boolean_mask
-    ) except +cudf_exception_handler
+    ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] copy_if_else (
         const column_view& lhs,
         const scalar& rhs,
         const column_view boolean_mask
-    ) except +cudf_exception_handler
+    ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] copy_if_else (
         const scalar& lhs,
         const scalar& rhs,
         const column_view boolean_mask
-    ) except +cudf_exception_handler
+    ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] boolean_mask_scatter (
         const table_view& input,
         const table_view& target,
         const column_view& boolean_mask
-    ) except +cudf_exception_handler
+    ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] boolean_mask_scatter (
         const vector[reference_wrapper[constscalar]]& input,
         const table_view& target,
         const column_view& boolean_mask
-    ) except +cudf_exception_handler
+    ) except +libcudf_exception_handler
 
     cdef unique_ptr[scalar] get_element (
         const column_view& input,
         size_type index
-    ) except +cudf_exception_handler
+    ) except +libcudf_exception_handler
 
     cpdef enum class sample_with_replacement(bool):
         FALSE
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/copying.pyx b/python/pylibcudf/pylibcudf/libcudf/copying.pyx
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/copying.pyx
rename to python/pylibcudf/pylibcudf/libcudf/copying.pyx
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/datetime.pxd b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd
similarity index 92%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/datetime.pxd
rename to python/pylibcudf/pylibcudf/libcudf/datetime.pxd
index 7db77b9c7c5..a4465343197 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/datetime.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd
@@ -1,10 +1,9 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport scalar
 
 
 cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/experimental.pxd b/python/pylibcudf/pylibcudf/libcudf/experimental.pxd
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/experimental.pxd
rename to python/pylibcudf/pylibcudf/libcudf/experimental.pxd
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/expressions.pxd b/python/pylibcudf/pylibcudf/libcudf/expressions.pxd
similarity index 90%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/expressions.pxd
rename to python/pylibcudf/pylibcudf/libcudf/expressions.pxd
index 427e16d4ff8..5ba2dff6074 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/expressions.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/expressions.pxd
@@ -3,15 +3,14 @@
 from libc.stdint cimport int32_t
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.scalar.scalar cimport (
     duration_scalar,
     numeric_scalar,
     timestamp_scalar,
 )
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.types cimport size_type
 
 
 cdef extern from "cudf/ast/expressions.hpp" namespace "cudf::ast" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/expressions.pyx b/python/pylibcudf/pylibcudf/libcudf/expressions.pyx
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/expressions.pyx
rename to python/pylibcudf/pylibcudf/libcudf/expressions.pyx
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/filling.pxd b/python/pylibcudf/pylibcudf/libcudf/filling.pxd
similarity index 74%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/filling.pxd
rename to python/pylibcudf/pylibcudf/libcudf/filling.pxd
index 16ed682f930..7bed80050d2 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/filling.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/filling.pxd
@@ -2,16 +2,15 @@
 
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport (
     column_view,
     mutable_column_view,
 )
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.scalar.scalar cimport scalar
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.types cimport size_type
 
 
 cdef extern from "cudf/filling.hpp" namespace "cudf" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/groupby.pxd b/python/pylibcudf/pylibcudf/libcudf/groupby.pxd
similarity index 83%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/groupby.pxd
rename to python/pylibcudf/pylibcudf/libcudf/groupby.pxd
index 16607cc3711..848462131fe 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/groupby.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/groupby.pxd
@@ -5,25 +5,24 @@ from libcpp.functional cimport reference_wrapper
 from libcpp.memory cimport unique_ptr
 from libcpp.pair cimport pair
 from libcpp.vector cimport vector
-
-from cudf._lib.pylibcudf.libcudf.aggregation cimport (
+from pylibcudf.libcudf.aggregation cimport (
     groupby_aggregation,
     groupby_scan_aggregation,
 )
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.replace cimport replace_policy
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
-from cudf._lib.pylibcudf.libcudf.types cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.replace cimport replace_policy
+from pylibcudf.libcudf.scalar.scalar cimport scalar
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.types cimport (
     null_order,
     null_policy,
     order,
     size_type,
     sorted,
 )
-from cudf._lib.pylibcudf.libcudf.utilities.host_span cimport host_span
+from pylibcudf.libcudf.utilities.host_span cimport host_span
 
 # workaround for https://github.com/cython/cython/issues/3885
 ctypedef const scalar constscalar
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/hash.pxd b/python/pylibcudf/pylibcudf/libcudf/hash.pxd
similarity index 86%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/hash.pxd
rename to python/pylibcudf/pylibcudf/libcudf/hash.pxd
index 5346252df69..51678ba69d8 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/hash.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/hash.pxd
@@ -3,10 +3,9 @@
 from libc.stdint cimport uint32_t, uint64_t
 from libcpp.memory cimport unique_ptr
 from libcpp.vector cimport vector
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.table.table_view cimport table_view
 
 
 cdef extern from "cudf/hashing.hpp" namespace "cudf::hashing" nogil:
diff --git a/python/pylibcudf/pylibcudf/libcudf/interop.pxd b/python/pylibcudf/pylibcudf/libcudf/interop.pxd
new file mode 100644
index 00000000000..9228c017d93
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/libcudf/interop.pxd
@@ -0,0 +1,86 @@
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+
+from libcpp.memory cimport shared_ptr, unique_ptr
+from libcpp.string cimport string
+from libcpp.vector cimport vector
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport scalar
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.table.table_view cimport table_view
+
+
+cdef extern from "dlpack/dlpack.h" nogil:
+    ctypedef struct DLManagedTensor:
+        void(*deleter)(DLManagedTensor*) except +
+
+
+# The Arrow structs are not namespaced.
+cdef extern from "cudf/interop.hpp" nogil:
+    cdef struct ArrowSchema:
+        void (*release)(ArrowSchema*) noexcept nogil
+
+    cdef struct ArrowArray:
+        void (*release)(ArrowArray*) noexcept nogil
+
+    cdef struct ArrowArrayStream:
+        void (*release)(ArrowArrayStream*) noexcept nogil
+
+    cdef struct ArrowDeviceArray:
+        ArrowArray array
+
+
+cdef extern from "cudf/interop.hpp" namespace "cudf" \
+        nogil:
+    cdef unique_ptr[table] from_dlpack(const DLManagedTensor* tensor
+                                       ) except +
+
+    DLManagedTensor* to_dlpack(table_view input_table
+                               ) except +
+
+    cdef cppclass column_metadata:
+        column_metadata() except +
+        column_metadata(string name_) except +
+        string name
+        vector[column_metadata] children_meta
+
+    cdef unique_ptr[table] from_arrow_stream(ArrowArrayStream* input) except +
+    cdef unique_ptr[column] from_arrow_column(
+        const ArrowSchema* schema,
+        const ArrowArray* input
+    ) except +
+
+
+cdef extern from *:
+    # Rather than exporting the underlying functions directly to Cython, we expose
+    # these wrappers that handle the release to avoid needing to teach Cython how
+    # to handle unique_ptrs with custom deleters that aren't default constructible.
+    # This will go away once we introduce cudf::arrow_column (need a
+    # cudf::arrow_schema as well), see
+    # https://github.com/rapidsai/cudf/issues/16104.
+    """
+    #include <nanoarrow/nanoarrow.h>
+    #include <nanoarrow/nanoarrow_device.h>
+
+    ArrowSchema* to_arrow_schema_raw(
+      cudf::table_view const& input,
+      cudf::host_span<cudf::column_metadata const> metadata) {
+      return to_arrow_schema(input, metadata).release();
+    }
+
+    ArrowArray* to_arrow_host_raw(
+      cudf::table_view const& tbl,
+      rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+      rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) {
+      // Assumes the sync event is null and the data is already on the host.
+      ArrowArray *arr = new ArrowArray();
+      auto device_arr = cudf::to_arrow_host(tbl, stream, mr);
+      ArrowArrayMove(&device_arr->array, arr);
+      return arr;
+    }
+    """
+    cdef ArrowSchema *to_arrow_schema_raw(
+        const table_view& tbl,
+        const vector[column_metadata]& metadata,
+    ) except + nogil
+    cdef ArrowArray* to_arrow_host_raw(const table_view& tbl) except + nogil
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/CMakeLists.txt b/python/pylibcudf/pylibcudf/libcudf/io/CMakeLists.txt
similarity index 89%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/io/CMakeLists.txt
rename to python/pylibcudf/pylibcudf/libcudf/io/CMakeLists.txt
index 6831063ecb9..9f5f74506e9 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/CMakeLists.txt
+++ b/python/pylibcudf/pylibcudf/libcudf/io/CMakeLists.txt
@@ -21,6 +21,3 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cudf MODULE_PREFIX cpp_io_
 )
-
-set(targets_using_arrow_headers cpp_io_json cpp_io_types)
-link_to_pyarrow_headers("${targets_using_arrow_headers}")
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/__init__.pxd b/python/pylibcudf/pylibcudf/libcudf/io/__init__.pxd
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/io/__init__.pxd
rename to python/pylibcudf/pylibcudf/libcudf/io/__init__.pxd
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/__init__.py b/python/pylibcudf/pylibcudf/libcudf/io/__init__.py
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/io/__init__.py
rename to python/pylibcudf/pylibcudf/libcudf/io/__init__.py
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/avro.pxd b/python/pylibcudf/pylibcudf/libcudf/io/avro.pxd
similarity index 91%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/io/avro.pxd
rename to python/pylibcudf/pylibcudf/libcudf/io/avro.pxd
index 530df5aa8f1..2d76e2f6c80 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/avro.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/io/avro.pxd
@@ -1,10 +1,9 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
+cimport pylibcudf.libcudf.io.types as cudf_io_types
 from libcpp.string cimport string
 from libcpp.vector cimport vector
-
-cimport cudf._lib.pylibcudf.libcudf.io.types as cudf_io_types
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.types cimport size_type
 
 
 cdef extern from "cudf/io/avro.hpp" \
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/csv.pxd b/python/pylibcudf/pylibcudf/libcudf/io/csv.pxd
similarity index 98%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/io/csv.pxd
rename to python/pylibcudf/pylibcudf/libcudf/io/csv.pxd
index b5ff6558cd8..73a6d98650c 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/csv.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/io/csv.pxd
@@ -1,15 +1,14 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
+cimport pylibcudf.libcudf.io.types as cudf_io_types
+cimport pylibcudf.libcudf.table.table_view as cudf_table_view
 from libc.stdint cimport uint8_t
 from libcpp cimport bool
 from libcpp.map cimport map
 from libcpp.memory cimport shared_ptr, unique_ptr
 from libcpp.string cimport string
 from libcpp.vector cimport vector
-
-cimport cudf._lib.pylibcudf.libcudf.io.types as cudf_io_types
-cimport cudf._lib.pylibcudf.libcudf.table.table_view as cudf_table_view
-from cudf._lib.pylibcudf.libcudf.types cimport data_type, size_type
+from pylibcudf.libcudf.types cimport data_type, size_type
 
 
 cdef extern from "cudf/io/csv.hpp" \
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/data_sink.pxd b/python/pylibcudf/pylibcudf/libcudf/io/data_sink.pxd
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/io/data_sink.pxd
rename to python/pylibcudf/pylibcudf/libcudf/io/data_sink.pxd
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/datasource.pxd b/python/pylibcudf/pylibcudf/libcudf/io/datasource.pxd
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/io/datasource.pxd
rename to python/pylibcudf/pylibcudf/libcudf/io/datasource.pxd
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/json.pxd b/python/pylibcudf/pylibcudf/libcudf/io/json.pxd
similarity index 91%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/io/json.pxd
rename to python/pylibcudf/pylibcudf/libcudf/io/json.pxd
index 86621ae184f..1c74f8ca3ac 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/json.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/io/json.pxd
@@ -1,15 +1,14 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
+cimport pylibcudf.libcudf.io.types as cudf_io_types
+cimport pylibcudf.libcudf.table.table_view as cudf_table_view
 from libc.stdint cimport int32_t, uint8_t
 from libcpp cimport bool
 from libcpp.map cimport map
 from libcpp.memory cimport shared_ptr, unique_ptr
 from libcpp.string cimport string
 from libcpp.vector cimport vector
-
-cimport cudf._lib.pylibcudf.libcudf.io.types as cudf_io_types
-cimport cudf._lib.pylibcudf.libcudf.table.table_view as cudf_table_view
-from cudf._lib.pylibcudf.libcudf.types cimport data_type, size_type
+from pylibcudf.libcudf.types cimport data_type, size_type
 
 
 cdef extern from "cudf/io/json.hpp" \
@@ -28,8 +27,8 @@ cdef extern from "cudf/io/json.hpp" \
         cudf_io_types.source_info get_source() except +
         vector[string] get_dtypes() except +
         cudf_io_types.compression_type get_compression() except +
-        size_type get_byte_range_offset() except +
-        size_type get_byte_range_size() except +
+        size_t get_byte_range_offset() except +
+        size_t get_byte_range_size() except +
         bool is_enabled_lines() except +
         bool is_enabled_mixed_types_as_string() except +
         bool is_enabled_prune_columns() except +
@@ -42,8 +41,8 @@ cdef extern from "cudf/io/json.hpp" \
         void set_compression(
             cudf_io_types.compression_type compression
         ) except +
-        void set_byte_range_offset(size_type offset) except +
-        void set_byte_range_size(size_type size) except +
+        void set_byte_range_offset(size_t offset) except +
+        void set_byte_range_size(size_t size) except +
         void enable_lines(bool val) except +
         void enable_mixed_types_as_string(bool val) except +
         void enable_prune_columns(bool val) except +
@@ -74,10 +73,10 @@ cdef extern from "cudf/io/json.hpp" \
             cudf_io_types.compression_type compression
         ) except +
         json_reader_options_builder& byte_range_offset(
-            size_type offset
+            size_t offset
         ) except +
         json_reader_options_builder& byte_range_size(
-            size_type size
+            size_t size
         ) except +
         json_reader_options_builder& lines(
             bool val
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/json.pyx b/python/pylibcudf/pylibcudf/libcudf/io/json.pyx
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/io/json.pyx
rename to python/pylibcudf/pylibcudf/libcudf/io/json.pyx
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/orc.pxd b/python/pylibcudf/pylibcudf/libcudf/io/orc.pxd
similarity index 97%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/io/orc.pxd
rename to python/pylibcudf/pylibcudf/libcudf/io/orc.pxd
index 25f91849dea..e4a09b8feb2 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/orc.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/io/orc.pxd
@@ -1,5 +1,7 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
+cimport pylibcudf.libcudf.io.types as cudf_io_types
+cimport pylibcudf.libcudf.table.table_view as cudf_table_view
 from libc.stdint cimport int64_t, uint8_t
 from libcpp cimport bool
 from libcpp.map cimport map
@@ -7,10 +9,7 @@ from libcpp.memory cimport shared_ptr, unique_ptr
 from libcpp.optional cimport optional
 from libcpp.string cimport string
 from libcpp.vector cimport vector
-
-cimport cudf._lib.pylibcudf.libcudf.io.types as cudf_io_types
-cimport cudf._lib.pylibcudf.libcudf.table.table_view as cudf_table_view
-from cudf._lib.pylibcudf.libcudf.types cimport data_type, size_type
+from pylibcudf.libcudf.types cimport data_type, size_type
 
 
 cdef extern from "cudf/io/orc.hpp" \
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/orc_metadata.pxd b/python/pylibcudf/pylibcudf/libcudf/io/orc_metadata.pxd
similarity index 94%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/io/orc_metadata.pxd
rename to python/pylibcudf/pylibcudf/libcudf/io/orc_metadata.pxd
index a23655b06f8..db6cb0cdfa5 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/orc_metadata.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/io/orc_metadata.pxd
@@ -1,13 +1,12 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
+cimport pylibcudf.libcudf.io.types as cudf_io_types
 from libc.stdint cimport int32_t, int64_t, uint32_t, uint64_t
 from libcpp cimport bool
 from libcpp.optional cimport optional
 from libcpp.string cimport string
 from libcpp.vector cimport vector
-
-cimport cudf._lib.pylibcudf.libcudf.io.types as cudf_io_types
-from cudf._lib.variant cimport monostate, variant
+from pylibcudf.variant cimport monostate, variant
 
 
 cdef extern from "cudf/io/orc_metadata.hpp" \
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/parquet.pxd b/python/pylibcudf/pylibcudf/libcudf/io/parquet.pxd
similarity index 80%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/io/parquet.pxd
rename to python/pylibcudf/pylibcudf/libcudf/io/parquet.pxd
index d86915c7da9..de6a6c1e82d 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/parquet.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/io/parquet.pxd
@@ -8,23 +8,31 @@ from libcpp.memory cimport shared_ptr, unique_ptr
 from libcpp.optional cimport optional
 from libcpp.string cimport string
 from libcpp.vector cimport vector
-
-cimport cudf._lib.pylibcudf.libcudf.io.types as cudf_io_types
-cimport cudf._lib.pylibcudf.libcudf.table.table_view as cudf_table_view
-from cudf._lib.pylibcudf.libcudf.expressions cimport expression
-from cudf._lib.pylibcudf.libcudf.types cimport data_type, size_type
+from pylibcudf.libcudf.expressions cimport expression
+from pylibcudf.libcudf.io.types cimport (
+    compression_type,
+    dictionary_policy,
+    partition_info,
+    sink_info,
+    source_info,
+    statistics_freq,
+    table_input_metadata,
+    table_with_metadata,
+)
+from pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.types cimport data_type, size_type
 
 
 cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil:
     cdef cppclass parquet_reader_options:
         parquet_reader_options() except +
-        cudf_io_types.source_info get_source_info() except +
+        source_info get_source_info() except +
         vector[vector[size_type]] get_row_groups() except +
         const optional[reference_wrapper[expression]]& get_filter() except +
         data_type get_timestamp_type() except +
         bool is_enabled_use_pandas_metadata() except +
         bool is_enabled_arrow_schema() except +
-
+        bool is_enabled_allow_mismatched_pq_schemas() except +
         # setter
 
         void set_filter(expression &filter) except +
@@ -33,18 +41,19 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil:
         void set_row_groups(vector[vector[size_type]] row_grp) except +
         void set_skip_rows(int64_t val) except +
         void enable_use_arrow_schema(bool val) except +
+        void enable_allow_mismatched_pq_schemas(bool val) except +
         void enable_use_pandas_metadata(bool val) except +
         void set_timestamp_type(data_type type) except +
 
         @staticmethod
         parquet_reader_options_builder builder(
-            cudf_io_types.source_info src
+            source_info src
         ) except +
 
     cdef cppclass parquet_reader_options_builder:
         parquet_reader_options_builder() except +
         parquet_reader_options_builder(
-            cudf_io_types.source_info src
+            source_info src
         ) except +
         parquet_reader_options_builder& columns(
             vector[string] col_names
@@ -61,6 +70,9 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil:
         parquet_reader_options_builder& use_arrow_schema(
             bool val
         ) except +
+        parquet_reader_options_builder& allow_mismatched_pq_schemas(
+            bool val
+        ) except +
         parquet_reader_options_builder& timestamp_type(
             data_type type
         ) except +
@@ -69,15 +81,15 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil:
         ) except +
         parquet_reader_options build() except +
 
-    cdef cudf_io_types.table_with_metadata read_parquet(
+    cdef table_with_metadata read_parquet(
         parquet_reader_options args) except +
 
     cdef cppclass parquet_writer_options_base:
         parquet_writer_options_base() except +
-        cudf_io_types.sink_info get_sink_info() except +
-        cudf_io_types.compression_type get_compression() except +
-        cudf_io_types.statistics_freq get_stats_level() except +
-        const optional[cudf_io_types.table_input_metadata]& get_metadata(
+        sink_info get_sink_info() except +
+        compression_type get_compression() except +
+        statistics_freq get_stats_level() except +
+        const optional[table_input_metadata]& get_metadata(
         ) except +
         size_t get_row_group_size_bytes() except +
         size_type get_row_group_size_rows() except +
@@ -87,16 +99,16 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil:
         bool is_enabled_write_arrow_schema() except +
 
         void set_metadata(
-            cudf_io_types.table_input_metadata m
+            table_input_metadata m
         ) except +
         void set_key_value_metadata(
             vector[map[string, string]] kvm
         ) except +
         void set_stats_level(
-            cudf_io_types.statistics_freq sf
+            statistics_freq sf
         ) except +
         void set_compression(
-            cudf_io_types.compression_type compression
+            compression_type compression
         ) except +
         void set_int96_timestamps(
             bool enabled
@@ -111,14 +123,14 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil:
         void set_max_dictionary_size(size_t val) except +
         void enable_write_v2_headers(bool val) except +
         void enable_write_arrow_schema(bool val) except +
-        void set_dictionary_policy(cudf_io_types.dictionary_policy policy) except +
+        void set_dictionary_policy(dictionary_policy policy) except +
 
     cdef cppclass parquet_writer_options(parquet_writer_options_base):
         parquet_writer_options() except +
-        cudf_table_view.table_view get_table() except +
+        table_view get_table() except +
         string get_column_chunks_file_paths() except +
         void set_partitions(
-            vector[cudf_io_types.partition_info] partitions
+            vector[partition_info] partitions
         ) except +
         void set_column_chunks_file_paths(
             vector[string] column_chunks_file_paths
@@ -126,24 +138,24 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil:
 
         @staticmethod
         parquet_writer_options_builder builder(
-            cudf_io_types.sink_info sink_,
-            cudf_table_view.table_view table_
+            sink_info sink_,
+            table_view table_
         ) except +
 
     cdef cppclass parquet_writer_options_builder_base[BuilderT, OptionsT]:
         parquet_writer_options_builder_base() except +
 
         BuilderT& metadata(
-            cudf_io_types.table_input_metadata m
+            table_input_metadata m
         ) except +
         BuilderT& key_value_metadata(
             vector[map[string, string]] kvm
         ) except +
         BuilderT& stats_level(
-            cudf_io_types.statistics_freq sf
+            statistics_freq sf
         ) except +
         BuilderT& compression(
-            cudf_io_types.compression_type compression
+            compression_type compression
         ) except +
         BuilderT& int96_timestamps(
             bool enabled
@@ -173,7 +185,7 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil:
             bool val
         ) except +
         BuilderT& dictionary_policy(
-            cudf_io_types.dictionary_policy val
+            dictionary_policy val
         ) except +
         OptionsT build() except +
 
@@ -182,11 +194,11 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil:
                                                 parquet_writer_options]):
         parquet_writer_options_builder() except +
         parquet_writer_options_builder(
-            cudf_io_types.sink_info sink_,
-            cudf_table_view.table_view table_
+            sink_info sink_,
+            table_view table_
         ) except +
         parquet_writer_options_builder& partitions(
-            vector[cudf_io_types.partition_info] partitions
+            vector[partition_info] partitions
         ) except +
         parquet_writer_options_builder& column_chunks_file_paths(
             vector[string] column_chunks_file_paths
@@ -201,7 +213,7 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil:
 
         @staticmethod
         chunked_parquet_writer_options_builder builder(
-            cudf_io_types.sink_info sink_,
+            sink_info sink_,
         ) except +
 
     cdef cppclass chunked_parquet_writer_options_builder(
@@ -210,18 +222,18 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil:
             ):
         chunked_parquet_writer_options_builder() except +
         chunked_parquet_writer_options_builder(
-            cudf_io_types.sink_info sink_,
+            sink_info sink_,
         ) except +
 
     cdef cppclass parquet_chunked_writer:
         parquet_chunked_writer() except +
         parquet_chunked_writer(chunked_parquet_writer_options args) except +
         parquet_chunked_writer& write(
-            cudf_table_view.table_view table_,
+            table_view table_,
         ) except +
         parquet_chunked_writer& write(
-            const cudf_table_view.table_view& table_,
-            const vector[cudf_io_types.partition_info]& partitions,
+            const table_view& table_,
+            const vector[partition_info]& partitions,
         ) except +
         unique_ptr[vector[uint8_t]] close(
             vector[string] column_chunks_file_paths,
@@ -237,7 +249,7 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil:
             size_t pass_read_limit,
             const parquet_reader_options& options) except +
         bool has_next() except +
-        cudf_io_types.table_with_metadata read_chunk() except +
+        table_with_metadata read_chunk() except +
 
     cdef unique_ptr[vector[uint8_t]] merge_row_group_metadata(
         const vector[unique_ptr[vector[uint8_t]]]& metadata_list
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/parquet_metadata.pxd b/python/pylibcudf/pylibcudf/libcudf/io/parquet_metadata.pxd
similarity index 89%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/io/parquet_metadata.pxd
rename to python/pylibcudf/pylibcudf/libcudf/io/parquet_metadata.pxd
index 34a299b73ab..8e6da56c9a6 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/parquet_metadata.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/io/parquet_metadata.pxd
@@ -1,12 +1,11 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
+cimport pylibcudf.libcudf.io.types as cudf_io_types
 from libc.stdint cimport int64_t
 from libcpp.string cimport string
 from libcpp.unordered_map cimport unordered_map
 from libcpp.vector cimport vector
-
-cimport cudf._lib.pylibcudf.libcudf.io.types as cudf_io_types
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.types cimport size_type
 
 
 cdef extern from "cudf/io/parquet_metadata.hpp" namespace "cudf::io" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/text.pxd b/python/pylibcudf/pylibcudf/libcudf/io/text.pxd
similarity index 96%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/io/text.pxd
rename to python/pylibcudf/pylibcudf/libcudf/io/text.pxd
index bec223d4079..14397ef970d 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/text.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/io/text.pxd
@@ -4,8 +4,7 @@ from libc.stdint cimport uint64_t
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column cimport column
 
 
 cdef extern from "cudf/io/text/byte_range_info.hpp" \
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/timezone.pxd b/python/pylibcudf/pylibcudf/libcudf/io/timezone.pxd
similarity index 86%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/io/timezone.pxd
rename to python/pylibcudf/pylibcudf/libcudf/io/timezone.pxd
index 88cb5544dc1..676901efcec 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/timezone.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/io/timezone.pxd
@@ -4,8 +4,7 @@ from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.optional cimport optional
 from libcpp.string cimport string
-
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.table.table cimport table
 
 
 cdef extern from "cudf/timezone.hpp" namespace "cudf" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/types.pxd b/python/pylibcudf/pylibcudf/libcudf/io/types.pxd
similarity index 72%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/io/types.pxd
rename to python/pylibcudf/pylibcudf/libcudf/io/types.pxd
index 0a6bddcd907..5f3be2f0727 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/types.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/io/types.pxd
@@ -1,20 +1,17 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
+cimport pylibcudf.libcudf.io.data_sink as cudf_io_data_sink
+cimport pylibcudf.libcudf.io.datasource as cudf_io_datasource
+cimport pylibcudf.libcudf.table.table_view as cudf_table_view
 from libc.stdint cimport int32_t, uint8_t
 from libcpp cimport bool
 from libcpp.map cimport map
-from libcpp.memory cimport shared_ptr, unique_ptr
-from libcpp.pair cimport pair
+from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from libcpp.unordered_map cimport unordered_map
 from libcpp.vector cimport vector
-from pyarrow.includes.libarrow cimport CRandomAccessFile
-
-cimport cudf._lib.pylibcudf.libcudf.io.data_sink as cudf_io_data_sink
-cimport cudf._lib.pylibcudf.libcudf.io.datasource as cudf_io_datasource
-cimport cudf._lib.pylibcudf.libcudf.table.table_view as cudf_table_view
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.types cimport size_type
 
 
 cdef extern from "cudf/io/types.hpp" \
@@ -43,32 +40,32 @@ cdef extern from "cudf/io/types.hpp" \
     cpdef enum class io_type(int32_t):
         FILEPATH
         HOST_BUFFER
+        DEVICE_BUFFER
         VOID
         USER_IMPLEMENTED
 
     cpdef enum class statistics_freq(int32_t):
-        STATISTICS_NONE = 0,
-        STATISTICS_ROWGROUP = 1,
-        STATISTICS_PAGE = 2,
-        STATISTICS_COLUMN = 3,
+        STATISTICS_NONE,
+        STATISTICS_ROWGROUP,
+        STATISTICS_PAGE,
+        STATISTICS_COLUMN,
 
     cpdef enum class dictionary_policy(int32_t):
-        NEVER = 0,
-        ADAPTIVE = 1,
-        ALWAYS = 2,
-
-    cdef extern from "cudf/io/types.hpp" namespace "cudf::io" nogil:
-        cpdef enum class column_encoding(int32_t):
-            USE_DEFAULT = -1
-            DICTIONARY = 0
-            PLAIN = 1
-            DELTA_BINARY_PACKED = 2
-            DELTA_LENGTH_BYTE_ARRAY =3
-            DELTA_BYTE_ARRAY = 4
-            BYTE_STREAM_SPLIT = 5
-            DIRECT = 6
-            DIRECT_V2 = 7
-            DICTIONARY_V2 = 8
+        NEVER,
+        ADAPTIVE,
+        ALWAYS,
+
+    cpdef enum class column_encoding(int32_t):
+        USE_DEFAULT
+        DICTIONARY
+        PLAIN
+        DELTA_BINARY_PACKED
+        DELTA_LENGTH_BYTE_ARRAY
+        DELTA_BYTE_ARRAY
+        BYTE_STREAM_SPLIT
+        DIRECT
+        DIRECT_V2
+        DICTIONARY_V2
 
     cdef cppclass column_name_info:
         string name
@@ -77,7 +74,6 @@ cdef extern from "cudf/io/types.hpp" \
     cdef cppclass table_metadata:
         table_metadata() except +
 
-        vector[string] column_names
         map[string, string] user_data
         vector[unordered_map[string, string]] per_file_user_data
         vector[column_name_info] schema_info
@@ -121,10 +117,7 @@ cdef extern from "cudf/io/types.hpp" \
         host_buffer(const char* data, size_t size)
 
     cdef cppclass source_info:
-        io_type type
         const vector[string]& filepaths() except +
-        const vector[host_buffer]& buffers() except +
-        vector[shared_ptr[CRandomAccessFile]] files
 
         source_info() except +
         source_info(const vector[string] &filepaths) except +
@@ -133,9 +126,7 @@ cdef extern from "cudf/io/types.hpp" \
         source_info(const vector[cudf_io_datasource.datasource*] &datasources) except +
 
     cdef cppclass sink_info:
-        io_type type
         const vector[string]& filepaths()
-        const vector[vector[char] *]& buffers()
         const vector[cudf_io_data_sink.data_sink *]& user_sinks()
 
         sink_info() except +
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/types.pyx b/python/pylibcudf/pylibcudf/libcudf/io/types.pyx
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/io/types.pyx
rename to python/pylibcudf/pylibcudf/libcudf/io/types.pyx
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/join.pxd b/python/pylibcudf/pylibcudf/libcudf/join.pxd
similarity index 88%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/join.pxd
rename to python/pylibcudf/pylibcudf/libcudf/join.pxd
index 32cd17f7c11..6f6c145b23c 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/join.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/join.pxd
@@ -4,14 +4,13 @@ from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.pair cimport pair
 from libcpp.vector cimport vector
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.types cimport null_equality, size_type
 
 from rmm._lib.device_uvector cimport device_uvector
 
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
-from cudf._lib.pylibcudf.libcudf.types cimport null_equality, size_type
-
 ctypedef unique_ptr[device_uvector[size_type]] gather_map_type
 ctypedef pair[gather_map_type, gather_map_type] gather_map_pair_type
 
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/labeling.pxd b/python/pylibcudf/pylibcudf/libcudf/labeling.pxd
similarity index 78%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/labeling.pxd
rename to python/pylibcudf/pylibcudf/libcudf/labeling.pxd
index 54731bf29af..ec6ef6b2a41 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/labeling.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/labeling.pxd
@@ -1,9 +1,8 @@
 # Copyright (c) 2021-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
 
 
 cdef extern from "cudf/labeling/label_bins.hpp" namespace "cudf" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/__init__.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/__init__.pxd
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/lists/__init__.pxd
rename to python/pylibcudf/pylibcudf/libcudf/lists/__init__.pxd
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/__init__.py b/python/pylibcudf/pylibcudf/libcudf/lists/__init__.py
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/lists/__init__.py
rename to python/pylibcudf/pylibcudf/libcudf/lists/__init__.py
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/combine.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/combine.pxd
similarity index 78%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/lists/combine.pxd
rename to python/pylibcudf/pylibcudf/libcudf/lists/combine.pxd
index 728bd840f71..d077958ce03 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/combine.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/combine.pxd
@@ -1,10 +1,9 @@
 # Copyright (c) 2021-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.table.table_view cimport table_view
 
 
 cdef extern from "cudf/lists/combine.hpp" namespace \
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/contains.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/contains.pxd
similarity index 63%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/lists/contains.pxd
rename to python/pylibcudf/pylibcudf/libcudf/lists/contains.pxd
index 82aed7d70a0..81a5ad46389 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/contains.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/contains.pxd
@@ -2,14 +2,11 @@
 
 from libc.stdint cimport int32_t
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.exception_handler cimport cudf_exception_handler
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.lists.lists_column_view cimport (
-    lists_column_view,
-)
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
+from pylibcudf.exception_handler cimport libcudf_exception_handler
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
+from pylibcudf.libcudf.scalar.scalar cimport scalar
 
 
 cdef extern from "cudf/lists/contains.hpp" namespace "cudf::lists" nogil:
@@ -21,25 +18,25 @@ cdef extern from "cudf/lists/contains.hpp" namespace "cudf::lists" nogil:
     cdef unique_ptr[column] contains(
         const lists_column_view& lists,
         const scalar& search_key,
-    ) except +cudf_exception_handler
+    ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] contains(
         const lists_column_view& lists,
         const column_view& search_keys,
-    ) except +cudf_exception_handler
+    ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] contains_nulls(
         const lists_column_view& lists,
-    ) except +cudf_exception_handler
+    ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] index_of(
         const lists_column_view& lists,
         const scalar& search_key,
         duplicate_find_option find_option,
-    ) except +cudf_exception_handler
+    ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] index_of(
         const lists_column_view& lists,
         const column_view& search_keys,
         duplicate_find_option find_option,
-    ) except +cudf_exception_handler
+    ) except +libcudf_exception_handler
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/count_elements.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/count_elements.pxd
similarity index 61%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/lists/count_elements.pxd
rename to python/pylibcudf/pylibcudf/libcudf/lists/count_elements.pxd
index ba57a839fbc..e283551ed0c 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/count_elements.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/count_elements.pxd
@@ -1,11 +1,8 @@
 # Copyright (c) 2021-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.lists.lists_column_view cimport (
-    lists_column_view,
-)
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
 
 
 cdef extern from "cudf/lists/count_elements.hpp" namespace "cudf::lists" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/explode.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/explode.pxd
similarity index 59%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/lists/explode.pxd
rename to python/pylibcudf/pylibcudf/libcudf/lists/explode.pxd
index 622a866f593..c64b2715cca 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/explode.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/explode.pxd
@@ -1,10 +1,9 @@
 # Copyright (c) 2021-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.types cimport size_type
 
 
 cdef extern from "cudf/lists/explode.hpp" namespace "cudf" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/extract.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/extract.pxd
similarity index 64%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/lists/extract.pxd
rename to python/pylibcudf/pylibcudf/libcudf/lists/extract.pxd
index 53609ba8830..2ea060d87de 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/extract.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/extract.pxd
@@ -1,12 +1,9 @@
 # Copyright (c) 2021-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column, column_view
-from cudf._lib.pylibcudf.libcudf.lists.lists_column_view cimport (
-    lists_column_view,
-)
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.column.column cimport column, column_view
+from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
+from pylibcudf.libcudf.types cimport size_type
 
 
 cdef extern from "cudf/lists/extract.hpp" namespace "cudf::lists" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/filling.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/filling.pxd
similarity index 76%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/lists/filling.pxd
rename to python/pylibcudf/pylibcudf/libcudf/lists/filling.pxd
index 8403fd179f7..54f5a8409b6 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/filling.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/filling.pxd
@@ -1,9 +1,8 @@
 # Copyright (c) 2021-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
 
 
 cdef extern from "cudf/lists/filling.hpp" namespace "cudf::lists" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/gather.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/gather.pxd
similarity index 67%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/lists/gather.pxd
rename to python/pylibcudf/pylibcudf/libcudf/lists/gather.pxd
index ab7ed141365..a762c6aa333 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/gather.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/gather.pxd
@@ -1,11 +1,8 @@
 # Copyright (c) 2021-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.lists.lists_column_view cimport (
-    lists_column_view,
-)
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
 
 
 cdef extern from "cudf/lists/gather.hpp" namespace "cudf::lists" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/lists_column_view.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/lists_column_view.pxd
similarity index 86%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/lists/lists_column_view.pxd
rename to python/pylibcudf/pylibcudf/libcudf/lists/lists_column_view.pxd
index 8917a6ac899..f43340a78b0 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/lists_column_view.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/lists_column_view.pxd
@@ -1,10 +1,10 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport (
+from pylibcudf.libcudf.column.column_view cimport (
     column_view,
     mutable_column_view,
 )
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.types cimport size_type
 
 
 cdef extern from "cudf/lists/lists_column_view.hpp" namespace "cudf" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/reverse.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/reverse.pxd
similarity index 62%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/lists/reverse.pxd
rename to python/pylibcudf/pylibcudf/libcudf/lists/reverse.pxd
index 0382a5d42c3..43b671ebfa0 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/reverse.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/reverse.pxd
@@ -1,11 +1,8 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.lists.lists_column_view cimport (
-    lists_column_view,
-)
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
 
 
 cdef extern from "cudf/lists/reverse.hpp" namespace "cudf::lists" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/set_operations.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/set_operations.pxd
similarity index 81%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/lists/set_operations.pxd
rename to python/pylibcudf/pylibcudf/libcudf/lists/set_operations.pxd
index eb796897f87..266f04ef6b3 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/set_operations.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/set_operations.pxd
@@ -1,12 +1,9 @@
 # Copyright (c) 2021-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.lists.lists_column_view cimport (
-    lists_column_view,
-)
-from cudf._lib.pylibcudf.libcudf.types cimport nan_equality, null_equality
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
+from pylibcudf.libcudf.types cimport nan_equality, null_equality
 
 
 cdef extern from "cudf/lists/set_operations.hpp" namespace "cudf::lists" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/sorting.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/sorting.pxd
similarity index 69%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/lists/sorting.pxd
rename to python/pylibcudf/pylibcudf/libcudf/lists/sorting.pxd
index 337ac73908b..ea45f999c47 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/sorting.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/sorting.pxd
@@ -1,12 +1,9 @@
 # Copyright (c) 2021-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.lists.lists_column_view cimport (
-    lists_column_view,
-)
-from cudf._lib.pylibcudf.libcudf.types cimport null_order, order
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
+from pylibcudf.libcudf.types cimport null_order, order
 
 
 cdef extern from "cudf/lists/sorting.hpp" namespace "cudf::lists" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/stream_compaction.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd
similarity index 68%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/lists/stream_compaction.pxd
rename to python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd
index b1fcf7800b0..d9df7c3ca2e 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/stream_compaction.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd
@@ -1,12 +1,9 @@
 # Copyright (c) 2021-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.lists.lists_column_view cimport (
-    lists_column_view,
-)
-from cudf._lib.pylibcudf.libcudf.types cimport nan_equality, null_equality
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
+from pylibcudf.libcudf.types cimport nan_equality, null_equality
 
 
 cdef extern from "cudf/lists/stream_compaction.hpp" \
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/merge.pxd b/python/pylibcudf/pylibcudf/libcudf/merge.pxd
similarity index 69%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/merge.pxd
rename to python/pylibcudf/pylibcudf/libcudf/merge.pxd
index dacb3dc2d74..6930b7a0d06 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/merge.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/merge.pxd
@@ -1,11 +1,10 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
+cimport pylibcudf.libcudf.types as libcudf_types
 from libcpp.memory cimport unique_ptr
 from libcpp.vector cimport vector
-
-cimport cudf._lib.pylibcudf.libcudf.types as libcudf_types
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.table.table_view cimport table_view
 
 
 cdef extern from "cudf/merge.hpp" namespace "cudf" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/null_mask.pxd b/python/pylibcudf/pylibcudf/libcudf/null_mask.pxd
similarity index 76%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/null_mask.pxd
rename to python/pylibcudf/pylibcudf/libcudf/null_mask.pxd
index 0cab404c05f..5f582091b06 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/null_mask.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/null_mask.pxd
@@ -2,19 +2,12 @@
 
 from libc.stdint cimport int32_t
 from libcpp.pair cimport pair
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.types cimport bitmask_type, mask_state, size_type
 
 from rmm._lib.device_buffer cimport device_buffer
 
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
-from cudf._lib.pylibcudf.libcudf.types cimport (
-    bitmask_type,
-    mask_state,
-    size_type,
-)
-
-ctypedef int32_t underlying_type_t_mask_state
-
 
 cdef extern from "cudf/null_mask.hpp" namespace "cudf" nogil:
     cdef device_buffer copy_bitmask "cudf::copy_bitmask" (
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/__init__.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/__init__.pxd
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/__init__.pxd
rename to python/pylibcudf/pylibcudf/libcudf/nvtext/__init__.pxd
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/__init__.py b/python/pylibcudf/pylibcudf/libcudf/nvtext/__init__.py
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/__init__.py
rename to python/pylibcudf/pylibcudf/libcudf/nvtext/__init__.py
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/byte_pair_encode.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/byte_pair_encode.pxd
similarity index 73%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/byte_pair_encode.pxd
rename to python/pylibcudf/pylibcudf/libcudf/nvtext/byte_pair_encode.pxd
index 033a820d2ef..fd768d22704 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/byte_pair_encode.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/byte_pair_encode.pxd
@@ -2,10 +2,9 @@
 
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 
 
 cdef extern from "nvtext/byte_pair_encoding.hpp" namespace "nvtext" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/edit_distance.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/edit_distance.pxd
similarity index 75%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/edit_distance.pxd
rename to python/pylibcudf/pylibcudf/libcudf/nvtext/edit_distance.pxd
index ca1f6650a5a..d459372fb8f 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/edit_distance.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/edit_distance.pxd
@@ -2,9 +2,8 @@
 
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
 
 
 cdef extern from "nvtext/edit_distance.hpp" namespace "nvtext" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/generate_ngrams.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/generate_ngrams.pxd
similarity index 69%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/generate_ngrams.pxd
rename to python/pylibcudf/pylibcudf/libcudf/nvtext/generate_ngrams.pxd
index 2034b1c1ee5..eefae746662 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/generate_ngrams.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/generate_ngrams.pxd
@@ -1,11 +1,10 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.types cimport size_type
 
 
 cdef extern from "nvtext/generate_ngrams.hpp" namespace "nvtext" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/jaccard.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/jaccard.pxd
similarity index 61%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/jaccard.pxd
rename to python/pylibcudf/pylibcudf/libcudf/nvtext/jaccard.pxd
index 789a1a2c35a..16c5f7f575e 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/jaccard.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/jaccard.pxd
@@ -1,10 +1,9 @@
 # Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.types cimport size_type
 
 
 cdef extern from "nvtext/jaccard.hpp" namespace "nvtext" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/minhash.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/minhash.pxd
similarity index 70%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/minhash.pxd
rename to python/pylibcudf/pylibcudf/libcudf/nvtext/minhash.pxd
index fc5577bf3f9..0c352a5068b 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/minhash.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/minhash.pxd
@@ -1,10 +1,9 @@
 # Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.types cimport size_type
 
 
 cdef extern from "nvtext/minhash.hpp" namespace "nvtext" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/ngrams_tokenize.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/ngrams_tokenize.pxd
similarity index 58%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/ngrams_tokenize.pxd
rename to python/pylibcudf/pylibcudf/libcudf/nvtext/ngrams_tokenize.pxd
index 229f4d8f5a3..89f6e5edfc4 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/ngrams_tokenize.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/ngrams_tokenize.pxd
@@ -1,11 +1,10 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.types cimport size_type
 
 
 cdef extern from "nvtext/ngrams_tokenize.hpp" namespace "nvtext" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/normalize.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/normalize.pxd
similarity index 75%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/normalize.pxd
rename to python/pylibcudf/pylibcudf/libcudf/nvtext/normalize.pxd
index 65c63b089df..cbf121920e1 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/normalize.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/normalize.pxd
@@ -2,9 +2,8 @@
 
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
 
 
 cdef extern from "nvtext/normalize.hpp" namespace "nvtext" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/replace.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/replace.pxd
similarity index 69%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/replace.pxd
rename to python/pylibcudf/pylibcudf/libcudf/nvtext/replace.pxd
index aaad28d2684..6bcfa1d9380 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/replace.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/replace.pxd
@@ -1,11 +1,10 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.types cimport size_type
 
 
 cdef extern from "nvtext/replace.hpp" namespace "nvtext" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/stemmer.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pxd
similarity index 79%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/stemmer.pxd
rename to python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pxd
index 040d4c9de63..673bffa28ae 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/stemmer.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pxd
@@ -2,10 +2,9 @@
 
 from libc.stdint cimport int32_t
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.types cimport size_type
 
 
 cdef extern from "nvtext/stemmer.hpp" namespace "nvtext" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/subword_tokenize.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/subword_tokenize.pxd
similarity index 92%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/subword_tokenize.pxd
rename to python/pylibcudf/pylibcudf/libcudf/nvtext/subword_tokenize.pxd
index cce40bcd3f6..aabac0a617b 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/subword_tokenize.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/subword_tokenize.pxd
@@ -4,9 +4,8 @@ from libc.stdint cimport uint16_t, uint32_t
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
 
 
 cdef extern from "nvtext/subword_tokenize.hpp" namespace "nvtext" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/tokenize.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/tokenize.pxd
similarity index 84%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/tokenize.pxd
rename to python/pylibcudf/pylibcudf/libcudf/nvtext/tokenize.pxd
index 721a6cabd01..34c054cf36f 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/nvtext/tokenize.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/tokenize.pxd
@@ -1,11 +1,10 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.types cimport size_type
 
 
 cdef extern from "nvtext/tokenize.hpp" namespace "nvtext" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/partitioning.pxd b/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd
similarity index 69%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/partitioning.pxd
rename to python/pylibcudf/pylibcudf/libcudf/partitioning.pxd
index babb167d2a0..1ea10e8a194 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/partitioning.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd
@@ -1,15 +1,14 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
+cimport pylibcudf.libcudf.types as libcudf_types
 from libc.stdint cimport uint32_t
 from libcpp.memory cimport unique_ptr
 from libcpp.pair cimport pair
 from libcpp.vector cimport vector
-
-cimport cudf._lib.pylibcudf.libcudf.types as libcudf_types
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.table.table_view cimport table_view
 
 
 cdef extern from "cudf/partitioning.hpp" namespace "cudf" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/quantiles.pxd b/python/pylibcudf/pylibcudf/libcudf/quantiles.pxd
similarity index 70%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/quantiles.pxd
rename to python/pylibcudf/pylibcudf/libcudf/quantiles.pxd
index 32cfec2d4fc..cf2350fc36c 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/quantiles.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/quantiles.pxd
@@ -3,12 +3,11 @@
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.vector cimport vector
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
-from cudf._lib.pylibcudf.libcudf.types cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.types cimport (
     interpolation,
     null_order,
     order,
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/reduce.pxd b/python/pylibcudf/pylibcudf/libcudf/reduce.pxd
similarity index 69%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/reduce.pxd
rename to python/pylibcudf/pylibcudf/libcudf/reduce.pxd
index 3ae1f1a2906..6d2f4bd23d1 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/reduce.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/reduce.pxd
@@ -3,15 +3,11 @@
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport pair
-
-from cudf._lib.pylibcudf.libcudf.aggregation cimport (
-    reduce_aggregation,
-    scan_aggregation,
-)
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
-from cudf._lib.pylibcudf.libcudf.types cimport data_type
+from pylibcudf.libcudf.aggregation cimport reduce_aggregation, scan_aggregation
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport scalar
+from pylibcudf.libcudf.types cimport data_type
 
 
 cdef extern from "cudf/reduction.hpp" namespace "cudf" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/reduce.pyx b/python/pylibcudf/pylibcudf/libcudf/reduce.pyx
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/reduce.pyx
rename to python/pylibcudf/pylibcudf/libcudf/reduce.pyx
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/replace.pxd b/python/pylibcudf/pylibcudf/libcudf/replace.pxd
similarity index 83%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/replace.pxd
rename to python/pylibcudf/pylibcudf/libcudf/replace.pxd
index e67efbdaba0..4ac44fc946e 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/replace.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/replace.pxd
@@ -2,15 +2,12 @@
 
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.types import cudf_to_np_types, np_to_cudf_types
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport (
     column_view,
     mutable_column_view,
 )
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
+from pylibcudf.libcudf.scalar.scalar cimport scalar
 
 
 cdef extern from "cudf/replace.hpp" namespace "cudf" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/replace.pyx b/python/pylibcudf/pylibcudf/libcudf/replace.pyx
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/replace.pyx
rename to python/pylibcudf/pylibcudf/libcudf/replace.pyx
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/reshape.pxd b/python/pylibcudf/pylibcudf/libcudf/reshape.pxd
similarity index 57%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/reshape.pxd
rename to python/pylibcudf/pylibcudf/libcudf/reshape.pxd
index dfd9a71c3d3..446a082ab1b 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/reshape.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/reshape.pxd
@@ -1,11 +1,10 @@
 # Copyright (c) 2019-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.types cimport size_type
 
 
 cdef extern from "cudf/reshape.hpp" namespace "cudf" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/rolling.pxd b/python/pylibcudf/pylibcudf/libcudf/rolling.pxd
similarity index 64%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/rolling.pxd
rename to python/pylibcudf/pylibcudf/libcudf/rolling.pxd
index d7844f99a73..9e76faa0eba 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/rolling.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/rolling.pxd
@@ -1,13 +1,10 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.types import cudf_to_np_types, np_to_cudf_types
-
-from cudf._lib.pylibcudf.libcudf.aggregation cimport rolling_aggregation
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.aggregation cimport rolling_aggregation
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.types cimport size_type
 
 
 cdef extern from "cudf/rolling.hpp" namespace "cudf" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/round.pxd b/python/pylibcudf/pylibcudf/libcudf/round.pxd
similarity index 75%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/round.pxd
rename to python/pylibcudf/pylibcudf/libcudf/round.pxd
index 027c4634c9f..1b65133f275 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/round.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/round.pxd
@@ -2,9 +2,8 @@
 
 from libc.stdint cimport int32_t
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
 
 
 cdef extern from "cudf/round.hpp" namespace "cudf" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/round.pyx b/python/pylibcudf/pylibcudf/libcudf/round.pyx
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/round.pyx
rename to python/pylibcudf/pylibcudf/libcudf/round.pyx
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/scalar/__init__.pxd b/python/pylibcudf/pylibcudf/libcudf/scalar/__init__.pxd
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/scalar/__init__.pxd
rename to python/pylibcudf/pylibcudf/libcudf/scalar/__init__.pxd
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/scalar/__init__.py b/python/pylibcudf/pylibcudf/libcudf/scalar/__init__.py
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/scalar/__init__.py
rename to python/pylibcudf/pylibcudf/libcudf/scalar/__init__.py
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/scalar/scalar.pxd b/python/pylibcudf/pylibcudf/libcudf/scalar/scalar.pxd
similarity index 91%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/scalar/scalar.pxd
rename to python/pylibcudf/pylibcudf/libcudf/scalar/scalar.pxd
index 662eb90096e..4b40a8a26f6 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/scalar/scalar.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/scalar/scalar.pxd
@@ -3,11 +3,10 @@
 from libc.stdint cimport int32_t, int64_t
 from libcpp cimport bool
 from libcpp.string cimport string
-
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
-from cudf._lib.pylibcudf.libcudf.types cimport data_type
-from cudf._lib.pylibcudf.libcudf.wrappers.decimals cimport scale_type
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.types cimport data_type
+from pylibcudf.libcudf.wrappers.decimals cimport scale_type
 
 
 cdef extern from "cudf/scalar/scalar.hpp" namespace "cudf" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/scalar/scalar_factories.pxd b/python/pylibcudf/pylibcudf/libcudf/scalar/scalar_factories.pxd
similarity index 76%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/scalar/scalar_factories.pxd
rename to python/pylibcudf/pylibcudf/libcudf/scalar/scalar_factories.pxd
index 8092c3d637d..ee4b47935b2 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/scalar/scalar_factories.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/scalar/scalar_factories.pxd
@@ -2,9 +2,8 @@
 
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
-
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport scalar
 
 
 cdef extern from "cudf/scalar/scalar_factories.hpp" namespace "cudf" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/search.pxd b/python/pylibcudf/pylibcudf/libcudf/search.pxd
similarity index 73%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/search.pxd
rename to python/pylibcudf/pylibcudf/libcudf/search.pxd
index e2247a1366f..5a6ad5384c9 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/search.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/search.pxd
@@ -1,12 +1,11 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
+cimport pylibcudf.libcudf.types as libcudf_types
 from libcpp.memory cimport unique_ptr
 from libcpp.vector cimport vector
-
-cimport cudf._lib.pylibcudf.libcudf.types as libcudf_types
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.table.table_view cimport table_view
 
 
 cdef extern from "cudf/search.hpp" namespace "cudf" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/sorting.pxd b/python/pylibcudf/pylibcudf/libcudf/sorting.pxd
similarity index 84%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/sorting.pxd
rename to python/pylibcudf/pylibcudf/libcudf/sorting.pxd
index 3d7d3aa2790..9e899855486 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/sorting.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/sorting.pxd
@@ -1,17 +1,14 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
+cimport pylibcudf.libcudf.types as libcudf_types
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.vector cimport vector
-
-from cudf._lib.types import cudf_to_np_types, np_to_cudf_types
-
-cimport cudf._lib.pylibcudf.libcudf.types as libcudf_types
-from cudf._lib.pylibcudf.libcudf.aggregation cimport rank_method
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.aggregation cimport rank_method
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.table.table_view cimport table_view
 
 
 cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/stream_compaction.pxd b/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd
similarity index 85%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/stream_compaction.pxd
rename to python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd
index 11d803e5b76..7830c9478c2 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/stream_compaction.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd
@@ -3,14 +3,11 @@
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.vector cimport vector
-
-from cudf._lib.types import cudf_to_np_types, np_to_cudf_types
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
-from cudf._lib.pylibcudf.libcudf.types cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.types cimport (
     nan_equality,
     nan_policy,
     null_equality,
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/stream_compaction.pyx b/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pyx
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/stream_compaction.pyx
rename to python/pylibcudf/pylibcudf/libcudf/stream_compaction.pyx
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/CMakeLists.txt b/python/pylibcudf/pylibcudf/libcudf/strings/CMakeLists.txt
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/CMakeLists.txt
rename to python/pylibcudf/pylibcudf/libcudf/strings/CMakeLists.txt
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/__init__.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/__init__.pxd
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/__init__.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/__init__.pxd
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/__init__.py b/python/pylibcudf/pylibcudf/libcudf/strings/__init__.py
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/__init__.py
rename to python/pylibcudf/pylibcudf/libcudf/strings/__init__.py
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/attributes.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/attributes.pxd
similarity index 76%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/attributes.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/attributes.pxd
index c4d52c83663..5e510339834 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/attributes.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/attributes.pxd
@@ -1,9 +1,8 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
 
 
 cdef extern from "cudf/strings/attributes.hpp" namespace "cudf::strings" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/capitalize.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/capitalize.pxd
similarity index 63%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/capitalize.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/capitalize.pxd
index b0771e16680..77e3f46d7ee 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/capitalize.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/capitalize.pxd
@@ -1,12 +1,9 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from cudf._lib.pylibcudf.libcudf.strings.char_types cimport (
-    string_character_types,
-)
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.strings.char_types cimport string_character_types
 
 
 cdef extern from "cudf/strings/capitalize.hpp" namespace "cudf::strings" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/case.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/case.pxd
similarity index 81%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/case.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/case.pxd
index 82c146b0023..7869e90f387 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/case.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/case.pxd
@@ -1,8 +1,7 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
 
 
 cdef extern from "cudf/strings/case.hpp" namespace "cudf::strings" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/char_types.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/char_types.pxd
similarity index 82%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/char_types.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/char_types.pxd
index f63e1a93f91..5d54c1c3593 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/char_types.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/char_types.pxd
@@ -2,10 +2,9 @@
 
 from libc.stdint cimport uint32_t
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 
 
 cdef extern from "cudf/strings/char_types/char_types.hpp" \
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/char_types.pyx b/python/pylibcudf/pylibcudf/libcudf/strings/char_types.pyx
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/char_types.pyx
rename to python/pylibcudf/pylibcudf/libcudf/strings/char_types.pyx
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/combine.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/combine.pxd
similarity index 83%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/combine.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/combine.pxd
index b05e46af0d6..e4c9fa5817a 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/combine.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/combine.pxd
@@ -1,11 +1,10 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.table.table_view cimport table_view
 
 
 cdef extern from "cudf/strings/combine.hpp" namespace "cudf::strings" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/contains.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/contains.pxd
similarity index 69%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/contains.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/contains.pxd
index f8ed253ff3c..c2fb5f0dce4 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/contains.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/contains.pxd
@@ -1,11 +1,10 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from cudf._lib.pylibcudf.libcudf.strings.regex_program cimport regex_program
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.strings.regex_program cimport regex_program
 
 
 cdef extern from "cudf/strings/contains.hpp" namespace "cudf::strings" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/convert/__init__.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/__init__.pxd
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/convert/__init__.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/convert/__init__.pxd
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/convert/__init__.py b/python/pylibcudf/pylibcudf/libcudf/strings/convert/__init__.py
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/convert/__init__.py
rename to python/pylibcudf/pylibcudf/libcudf/strings/convert/__init__.py
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/convert/convert_booleans.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_booleans.pxd
similarity index 69%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/convert/convert_booleans.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_booleans.pxd
index daac2b5be28..83a9573baad 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/convert/convert_booleans.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_booleans.pxd
@@ -1,9 +1,8 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 
 
 cdef extern from "cudf/strings/convert/convert_booleans.hpp" namespace \
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/convert/convert_datetime.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_datetime.pxd
similarity index 76%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/convert/convert_datetime.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_datetime.pxd
index 263cee4fe1e..fa8975c4df9 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/convert/convert_datetime.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_datetime.pxd
@@ -2,10 +2,9 @@
 
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.types cimport data_type
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.types cimport data_type
 
 
 cdef extern from "cudf/strings/convert/convert_datetime.hpp" namespace \
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/convert/convert_durations.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd
similarity index 72%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/convert/convert_durations.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd
index af357b9bde4..ebe10574353 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/convert/convert_durations.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd
@@ -2,10 +2,9 @@
 
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.types cimport data_type
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.types cimport data_type
 
 
 cdef extern from "cudf/strings/convert/convert_durations.hpp" namespace \
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd
similarity index 73%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd
index 91c1abdb5e4..6f820f3c9a4 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd
@@ -1,10 +1,9 @@
 # Copyright (c) 2021-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.types cimport data_type
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.types cimport data_type
 
 
 cdef extern from "cudf/strings/convert/convert_fixed_point.hpp" namespace \
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/convert/convert_floats.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_floats.pxd
similarity index 71%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/convert/convert_floats.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_floats.pxd
index 5fbf2be0244..f4fc4674506 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/convert/convert_floats.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_floats.pxd
@@ -1,10 +1,9 @@
 # Copyright (c) 2021-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.types cimport data_type
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.types cimport data_type
 
 
 cdef extern from "cudf/strings/convert/convert_floats.hpp" namespace \
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/convert/convert_integers.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_integers.pxd
similarity index 80%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/convert/convert_integers.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_integers.pxd
index 3d6c59cbfcf..f12aab0a2e4 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/convert/convert_integers.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_integers.pxd
@@ -1,10 +1,9 @@
 # Copyright (c) 2021-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.types cimport data_type
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.types cimport data_type
 
 
 cdef extern from "cudf/strings/convert/convert_integers.hpp" namespace \
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd
similarity index 76%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd
index 86de956b6b6..fe571cfced6 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd
@@ -1,9 +1,8 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
 
 
 cdef extern from "cudf/strings/convert/convert_ipv4.hpp" namespace \
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/convert/convert_lists.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_lists.pxd
similarity index 62%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/convert/convert_lists.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_lists.pxd
index aba2dbcca64..109111568d8 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/convert/convert_lists.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_lists.pxd
@@ -1,9 +1,8 @@
 # Copyright (c) 2021-2024, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 
 
 cdef extern from "cudf/strings/convert/convert_lists.hpp" namespace \
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/convert/convert_urls.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_urls.pxd
similarity index 72%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/convert/convert_urls.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_urls.pxd
index fb7e0cae6de..5c07b698454 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/convert/convert_urls.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_urls.pxd
@@ -1,9 +1,8 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
 
 
 cdef extern from "cudf/strings/convert/convert_urls.hpp" namespace \
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/extract.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/extract.pxd
new file mode 100644
index 00000000000..12cd628fc1f
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/extract.pxd
@@ -0,0 +1,14 @@
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+
+from libcpp.memory cimport unique_ptr
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.strings.regex_program cimport regex_program
+from pylibcudf.libcudf.table.table cimport table
+
+
+cdef extern from "cudf/strings/extract.hpp" namespace "cudf::strings" nogil:
+
+    cdef unique_ptr[table] extract(
+        column_view source_strings,
+        regex_program) except +
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/find.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/find.pxd
similarity index 83%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/find.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/find.pxd
index 04e2ed554ee..1d1df1b8b8e 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/find.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/find.pxd
@@ -2,11 +2,10 @@
 
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.types cimport size_type
 
 
 cdef extern from "cudf/strings/find.hpp" namespace "cudf::strings" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/find_multiple.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/find_multiple.pxd
similarity index 68%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/find_multiple.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/find_multiple.pxd
index 1f1adc8e99f..0491644a10a 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/find_multiple.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/find_multiple.pxd
@@ -1,9 +1,8 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
 
 
 cdef extern from "cudf/strings/find_multiple.hpp" namespace "cudf::strings" \
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/findall.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd
similarity index 56%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/findall.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd
index 4bc450b8911..b25724586e1 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/findall.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd
@@ -1,10 +1,9 @@
 # Copyright (c) 2019-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.strings.regex_program cimport regex_program
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.strings.regex_program cimport regex_program
 
 
 cdef extern from "cudf/strings/findall.hpp" namespace "cudf::strings" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/json.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/json.pxd
similarity index 79%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/json.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/json.pxd
index 5926fa1d29f..571ba7be7af 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/json.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/json.pxd
@@ -3,10 +3,9 @@
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar, string_scalar
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport scalar, string_scalar
 
 
 cdef extern from "cudf/json/json.hpp" namespace "cudf" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/padding.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/padding.pxd
similarity index 59%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/padding.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/padding.pxd
index 26681a1aa00..657fe61eb14 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/padding.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/padding.pxd
@@ -2,12 +2,11 @@
 from libc.stdint cimport int32_t
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from cudf._lib.pylibcudf.libcudf.strings.side_type cimport side_type
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.strings.side_type cimport side_type
+from pylibcudf.libcudf.types cimport size_type
 
 
 cdef extern from "cudf/strings/padding.hpp" namespace "cudf::strings" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/regex_flags.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/regex_flags.pxd
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/regex_flags.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/regex_flags.pxd
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/regex_flags.pyx b/python/pylibcudf/pylibcudf/libcudf/strings/regex_flags.pyx
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/regex_flags.pyx
rename to python/pylibcudf/pylibcudf/libcudf/strings/regex_flags.pyx
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/regex_program.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/regex_program.pxd
similarity index 84%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/regex_program.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/regex_program.pxd
index e92c8bd7737..5d1d9e583d5 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/regex_program.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/regex_program.pxd
@@ -2,8 +2,7 @@
 
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
-
-from cudf._lib.pylibcudf.libcudf.strings.regex_flags cimport regex_flags
+from pylibcudf.libcudf.strings.regex_flags cimport regex_flags
 
 
 cdef extern from "cudf/strings/regex/regex_program.hpp" \
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/repeat.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/repeat.pxd
similarity index 67%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/repeat.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/repeat.pxd
index 9e128529406..410ff58f299 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/repeat.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/repeat.pxd
@@ -1,10 +1,9 @@
 # Copyright (c) 2021-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.types cimport size_type
 
 
 cdef extern from "cudf/strings/repeat_strings.hpp" namespace "cudf::strings" \
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/replace.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/replace.pxd
similarity index 73%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/replace.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/replace.pxd
index 34e03eec638..fd5f4fc4751 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/replace.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/replace.pxd
@@ -3,11 +3,10 @@
 from libc.stdint cimport int32_t
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.types cimport size_type
 
 
 cdef extern from "cudf/strings/replace.hpp" namespace "cudf::strings" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/replace_re.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/replace_re.pxd
similarity index 63%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/replace_re.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/replace_re.pxd
index 739505cd51d..40f0e2fa50c 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/replace_re.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/replace_re.pxd
@@ -3,13 +3,12 @@
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from libcpp.vector cimport vector
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from cudf._lib.pylibcudf.libcudf.strings.regex_program cimport regex_program
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.strings.regex_program cimport regex_program
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.types cimport size_type
 
 
 cdef extern from "cudf/strings/replace_re.hpp" namespace "cudf::strings" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/side_type.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/side_type.pxd
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/side_type.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/side_type.pxd
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/split/__init__.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/split/__init__.pxd
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/split/__init__.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/split/__init__.pxd
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/split/__init__.py b/python/pylibcudf/pylibcudf/libcudf/strings/split/__init__.py
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/split/__init__.py
rename to python/pylibcudf/pylibcudf/libcudf/strings/split/__init__.py
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/split/partition.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/split/partition.pxd
similarity index 63%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/split/partition.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/split/partition.pxd
index 5119124b3e3..4162e886a7d 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/split/partition.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/split/partition.pxd
@@ -2,11 +2,10 @@
 
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.table.table cimport table
 
 
 cdef extern from "cudf/strings/split/partition.hpp" namespace \
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/split/split.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/split/split.pxd
similarity index 78%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/split/split.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/split/split.pxd
index 4f75664e47a..3046149aebb 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/split/split.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/split/split.pxd
@@ -2,13 +2,12 @@
 
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from cudf._lib.pylibcudf.libcudf.strings.regex_program cimport regex_program
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.strings.regex_program cimport regex_program
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.types cimport size_type
 
 
 cdef extern from "cudf/strings/split/split.hpp" namespace \
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/strip.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/strip.pxd
similarity index 52%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/strip.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/strip.pxd
index 2d6fd6a9e89..b0ca771762d 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/strip.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/strip.pxd
@@ -1,11 +1,10 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from cudf._lib.pylibcudf.libcudf.strings.side_type cimport side_type
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.strings.side_type cimport side_type
 
 
 cdef extern from "cudf/strings/strip.hpp" namespace "cudf::strings" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/substring.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/substring.pxd
similarity index 66%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/substring.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/substring.pxd
index 02123cc0807..576dae9387f 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/substring.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/substring.pxd
@@ -1,11 +1,10 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport numeric_scalar
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport numeric_scalar
+from pylibcudf.libcudf.types cimport size_type
 
 
 cdef extern from "cudf/strings/slice.hpp" namespace "cudf::strings" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/translate.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/translate.pxd
similarity index 73%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/translate.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/translate.pxd
index b23ac277216..85fa719128a 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/translate.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/translate.pxd
@@ -4,11 +4,10 @@ from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.pair cimport pair
 from libcpp.vector cimport vector
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from cudf._lib.pylibcudf.libcudf.types cimport char_utf8
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.types cimport char_utf8
 
 
 cdef extern from "cudf/strings/translate.hpp" namespace "cudf::strings" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/wrap.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/wrap.pxd
similarity index 58%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings/wrap.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings/wrap.pxd
index 1d92d445634..c0053391328 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings/wrap.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/wrap.pxd
@@ -1,10 +1,9 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.types cimport size_type
 
 
 cdef extern from "cudf/strings/wrap.hpp" namespace "cudf::strings" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings_udf.pxd b/python/pylibcudf/pylibcudf/libcudf/strings_udf.pxd
similarity index 85%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/strings_udf.pxd
rename to python/pylibcudf/pylibcudf/libcudf/strings_udf.pxd
index 804ad30dfb1..0c8fe1060ac 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/strings_udf.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings_udf.pxd
@@ -4,13 +4,12 @@ from libc.stdint cimport uint8_t, uint16_t
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from libcpp.vector cimport vector
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.types cimport size_type
 
 from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
 
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
-
 
 cdef extern from "cudf/strings/udf/udf_string.hpp" namespace \
         "cudf::strings::udf" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/table/__init__.pxd b/python/pylibcudf/pylibcudf/libcudf/table/__init__.pxd
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/table/__init__.pxd
rename to python/pylibcudf/pylibcudf/libcudf/table/__init__.pxd
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/table/__init__.py b/python/pylibcudf/pylibcudf/libcudf/table/__init__.py
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/table/__init__.py
rename to python/pylibcudf/pylibcudf/libcudf/table/__init__.py
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/table/table.pxd b/python/pylibcudf/pylibcudf/libcudf/table/table.pxd
similarity index 69%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/table/table.pxd
rename to python/pylibcudf/pylibcudf/libcudf/table/table.pxd
index 737a1327d45..654c29b083a 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/table/table.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/table/table.pxd
@@ -2,13 +2,9 @@
 
 from libcpp.memory cimport unique_ptr
 from libcpp.vector cimport vector
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport (
-    mutable_table_view,
-    table_view,
-)
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.table.table_view cimport mutable_table_view, table_view
+from pylibcudf.libcudf.types cimport size_type
 
 
 cdef extern from "cudf/table/table.hpp" namespace "cudf" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/table/table_view.pxd b/python/pylibcudf/pylibcudf/libcudf/table/table_view.pxd
similarity index 87%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/table/table_view.pxd
rename to python/pylibcudf/pylibcudf/libcudf/table/table_view.pxd
index 00e1a89c025..3af2f6a6c2c 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/table/table_view.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/table/table_view.pxd
@@ -1,12 +1,11 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 from libcpp.vector cimport vector
-
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport (
+from pylibcudf.libcudf.column.column_view cimport (
     column_view,
     mutable_column_view,
 )
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.types cimport size_type
 
 
 cdef extern from "cudf/table/table_view.hpp" namespace "cudf" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/transform.pxd b/python/pylibcudf/pylibcudf/libcudf/transform.pxd
similarity index 73%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/transform.pxd
rename to python/pylibcudf/pylibcudf/libcudf/transform.pxd
index b0a978fe5c5..38298a7c1f1 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/transform.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/transform.pxd
@@ -4,20 +4,15 @@ from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.pair cimport pair
 from libcpp.string cimport string
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.expressions cimport expression
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.types cimport bitmask_type, data_type, size_type
 
 from rmm._lib.device_buffer cimport device_buffer
 
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.expressions cimport expression
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
-from cudf._lib.pylibcudf.libcudf.types cimport (
-    bitmask_type,
-    data_type,
-    size_type,
-)
-
 
 cdef extern from "cudf/transform.hpp" namespace "cudf" nogil:
     cdef pair[unique_ptr[device_buffer], size_type] bools_to_mask (
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/transpose.pxd b/python/pylibcudf/pylibcudf/libcudf/transpose.pxd
similarity index 69%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/transpose.pxd
rename to python/pylibcudf/pylibcudf/libcudf/transpose.pxd
index 5dcb9c165ad..9c0e3c073b0 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/transpose.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/transpose.pxd
@@ -2,9 +2,8 @@
 
 from libcpp.memory cimport unique_ptr
 from libcpp.pair cimport pair
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.table.table_view cimport table_view
 
 
 cdef extern from "cudf/transpose.hpp" namespace "cudf" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/types.pxd b/python/pylibcudf/pylibcudf/libcudf/types.pxd
similarity index 97%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/types.pxd
rename to python/pylibcudf/pylibcudf/libcudf/types.pxd
index 8e94ec296cf..eabae68bc90 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/types.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/types.pxd
@@ -98,3 +98,5 @@ cdef extern from "cudf/types.hpp" namespace "cudf" nogil:
         HIGHER
         MIDPOINT
         NEAREST
+
+    cdef size_type size_of(data_type t) except +
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/types.pyx b/python/pylibcudf/pylibcudf/libcudf/types.pyx
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/types.pyx
rename to python/pylibcudf/pylibcudf/libcudf/types.pyx
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/unary.pxd b/python/pylibcudf/pylibcudf/libcudf/unary.pxd
similarity index 85%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/unary.pxd
rename to python/pylibcudf/pylibcudf/libcudf/unary.pxd
index 2a1b189af51..887f8c7fca4 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/unary.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/unary.pxd
@@ -3,10 +3,9 @@
 from libc.stdint cimport int32_t
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.types cimport data_type
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.types cimport data_type
 
 
 cdef extern from "cudf/unary.hpp" namespace "cudf" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/unary.pyx b/python/pylibcudf/pylibcudf/libcudf/unary.pyx
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/unary.pyx
rename to python/pylibcudf/pylibcudf/libcudf/unary.pyx
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/utilities/__init__.pxd b/python/pylibcudf/pylibcudf/libcudf/utilities/__init__.pxd
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/utilities/__init__.pxd
rename to python/pylibcudf/pylibcudf/libcudf/utilities/__init__.pxd
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/utilities/__init__.py b/python/pylibcudf/pylibcudf/libcudf/utilities/__init__.py
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/utilities/__init__.py
rename to python/pylibcudf/pylibcudf/libcudf/utilities/__init__.py
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/utilities/host_span.pxd b/python/pylibcudf/pylibcudf/libcudf/utilities/host_span.pxd
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/utilities/host_span.pxd
rename to python/pylibcudf/pylibcudf/libcudf/utilities/host_span.pxd
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/utilities/traits.pxd b/python/pylibcudf/pylibcudf/libcudf/utilities/traits.pxd
similarity index 93%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/utilities/traits.pxd
rename to python/pylibcudf/pylibcudf/libcudf/utilities/traits.pxd
index 0cc58af735b..69765e44274 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/utilities/traits.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/utilities/traits.pxd
@@ -2,8 +2,7 @@
 
 from libcpp cimport bool
 from libcpp.vector cimport vector
-
-from cudf._lib.pylibcudf.libcudf.types cimport data_type
+from pylibcudf.libcudf.types cimport data_type
 
 
 cdef extern from "cudf/utilities/traits.hpp" namespace "cudf" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/utilities/type_dispatcher.pxd b/python/pylibcudf/pylibcudf/libcudf/utilities/type_dispatcher.pxd
similarity index 73%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/utilities/type_dispatcher.pxd
rename to python/pylibcudf/pylibcudf/libcudf/utilities/type_dispatcher.pxd
index 890fca3a662..fbeb6e9db90 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/utilities/type_dispatcher.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/utilities/type_dispatcher.pxd
@@ -1,6 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from cudf._lib.pylibcudf.libcudf.types cimport type_id
+from pylibcudf.libcudf.types cimport type_id
 
 
 cdef extern from "cudf/utilities/type_dispatcher.hpp" namespace "cudf" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/wrappers/__init__.pxd b/python/pylibcudf/pylibcudf/libcudf/wrappers/__init__.pxd
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/wrappers/__init__.pxd
rename to python/pylibcudf/pylibcudf/libcudf/wrappers/__init__.pxd
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/wrappers/__init__.py b/python/pylibcudf/pylibcudf/libcudf/wrappers/__init__.py
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/wrappers/__init__.py
rename to python/pylibcudf/pylibcudf/libcudf/wrappers/__init__.py
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/wrappers/decimals.pxd b/python/pylibcudf/pylibcudf/libcudf/wrappers/decimals.pxd
similarity index 90%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/wrappers/decimals.pxd
rename to python/pylibcudf/pylibcudf/libcudf/wrappers/decimals.pxd
index 09b0c87e4b8..558299501d6 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/wrappers/decimals.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/wrappers/decimals.pxd
@@ -1,8 +1,7 @@
 # Copyright (c) 2021-2024, NVIDIA CORPORATION.
 
 from libc.stdint cimport int32_t, int64_t
-
-from cudf._lib.pylibcudf.libcudf.types cimport int128
+from pylibcudf.libcudf.types cimport int128
 
 
 cdef extern from "cudf/fixed_point/fixed_point.hpp" namespace "numeric" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/wrappers/durations.pxd b/python/pylibcudf/pylibcudf/libcudf/wrappers/durations.pxd
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/wrappers/durations.pxd
rename to python/pylibcudf/pylibcudf/libcudf/wrappers/durations.pxd
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/wrappers/timestamps.pxd b/python/pylibcudf/pylibcudf/libcudf/wrappers/timestamps.pxd
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/libcudf/wrappers/timestamps.pxd
rename to python/pylibcudf/pylibcudf/libcudf/wrappers/timestamps.pxd
diff --git a/python/cudf/cudf/_lib/pylibcudf/lists.pxd b/python/pylibcudf/pylibcudf/lists.pxd
similarity index 94%
rename from python/cudf/cudf/_lib/pylibcudf/lists.pxd
rename to python/pylibcudf/pylibcudf/lists.pxd
index 17619b489d2..e7d006e6e2e 100644
--- a/python/cudf/cudf/_lib/pylibcudf/lists.pxd
+++ b/python/pylibcudf/pylibcudf/lists.pxd
@@ -1,8 +1,7 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
 from libcpp cimport bool
-
-from cudf._lib.pylibcudf.libcudf.types cimport null_order, size_type
+from pylibcudf.libcudf.types cimport null_order, size_type
 
 from .column cimport Column
 from .scalar cimport Scalar
diff --git a/python/cudf/cudf/_lib/pylibcudf/lists.pyx b/python/pylibcudf/pylibcudf/lists.pyx
similarity index 95%
rename from python/cudf/cudf/_lib/pylibcudf/lists.pyx
rename to python/pylibcudf/pylibcudf/lists.pyx
index c944fc35800..947caddc485 100644
--- a/python/cudf/cudf/_lib/pylibcudf/lists.pyx
+++ b/python/pylibcudf/pylibcudf/lists.pyx
@@ -4,9 +4,8 @@ from cython.operator cimport dereference
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.lists cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.lists cimport (
     contains as cpp_contains,
     explode as cpp_explode,
     filling as cpp_filling,
@@ -14,34 +13,34 @@ from cudf._lib.pylibcudf.libcudf.lists cimport (
     reverse as cpp_reverse,
     set_operations as cpp_set_operations,
 )
-from cudf._lib.pylibcudf.libcudf.lists.combine cimport (
+from pylibcudf.libcudf.lists.combine cimport (
     concatenate_list_elements as cpp_concatenate_list_elements,
     concatenate_null_policy,
     concatenate_rows as cpp_concatenate_rows,
 )
-from cudf._lib.pylibcudf.libcudf.lists.count_elements cimport (
+from pylibcudf.libcudf.lists.count_elements cimport (
     count_elements as cpp_count_elements,
 )
-from cudf._lib.pylibcudf.libcudf.lists.extract cimport (
+from pylibcudf.libcudf.lists.extract cimport (
     extract_list_element as cpp_extract_list_element,
 )
-from cudf._lib.pylibcudf.libcudf.lists.sorting cimport (
+from pylibcudf.libcudf.lists.sorting cimport (
     sort_lists as cpp_sort_lists,
     stable_sort_lists as cpp_stable_sort_lists,
 )
-from cudf._lib.pylibcudf.libcudf.lists.stream_compaction cimport (
+from pylibcudf.libcudf.lists.stream_compaction cimport (
     apply_boolean_mask as cpp_apply_boolean_mask,
     distinct as cpp_distinct,
 )
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.types cimport (
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.types cimport (
     nan_equality,
     null_equality,
     null_order,
     order,
     size_type,
 )
-from cudf._lib.pylibcudf.lists cimport ColumnOrScalar, ColumnOrSizeType
+from pylibcudf.lists cimport ColumnOrScalar, ColumnOrSizeType
 
 from .column cimport Column, ListColumnView
 from .scalar cimport Scalar
@@ -131,8 +130,8 @@ cpdef Column contains(Column input, ColumnOrScalar search_key):
     the search_key is contained in the input.
 
     ``search_key`` may be a
-    :py:class:`~cudf._lib.pylibcudf.column.Column` or a
-    :py:class:`~cudf._lib.pylibcudf.scalar.Scalar`.
+    :py:class:`~pylibcudf.column.Column` or a
+    :py:class:`~pylibcudf.scalar.Scalar`.
 
     For details, see :cpp:func:`contains`.
 
@@ -192,8 +191,8 @@ cpdef Column index_of(Column input, ColumnOrScalar search_key, bool find_first_o
     key row within the corresponding list row in the lists column.
 
     ``search_key`` may be a
-    :py:class:`~cudf._lib.pylibcudf.column.Column` or a
-    :py:class:`~cudf._lib.pylibcudf.scalar.Scalar`.
+    :py:class:`~pylibcudf.column.Column` or a
+    :py:class:`~pylibcudf.scalar.Scalar`.
 
     For details, see :cpp:func:`index_of`.
 
diff --git a/python/cudf/cudf/_lib/pylibcudf/merge.pxd b/python/pylibcudf/pylibcudf/merge.pxd
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/merge.pxd
rename to python/pylibcudf/pylibcudf/merge.pxd
diff --git a/python/cudf/cudf/_lib/pylibcudf/merge.pyx b/python/pylibcudf/pylibcudf/merge.pyx
similarity index 83%
rename from python/cudf/cudf/_lib/pylibcudf/merge.pyx
rename to python/pylibcudf/pylibcudf/merge.pyx
index 5aa46c142f6..a7d43c9d158 100644
--- a/python/cudf/cudf/_lib/pylibcudf/merge.pyx
+++ b/python/pylibcudf/pylibcudf/merge.pyx
@@ -3,11 +3,10 @@
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
-
-from cudf._lib.pylibcudf.libcudf cimport merge as cpp_merge
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
-from cudf._lib.pylibcudf.libcudf.types cimport null_order, order, size_type
+from pylibcudf.libcudf cimport merge as cpp_merge
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.types cimport null_order, order, size_type
 
 from .table cimport Table
 
diff --git a/python/pylibcudf/pylibcudf/null_mask.pxd b/python/pylibcudf/pylibcudf/null_mask.pxd
new file mode 100644
index 00000000000..ab5c0080312
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/null_mask.pxd
@@ -0,0 +1,18 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.libcudf.types cimport mask_state, size_type
+
+from rmm._lib.device_buffer cimport DeviceBuffer
+
+from .column cimport Column
+
+
+cpdef DeviceBuffer copy_bitmask(Column col)
+
+cpdef size_t bitmask_allocation_size_bytes(size_type number_of_bits)
+
+cpdef DeviceBuffer create_null_mask(size_type size, mask_state state = *)
+
+cpdef tuple bitmask_and(list columns)
+
+cpdef tuple bitmask_or(list columns)
diff --git a/python/pylibcudf/pylibcudf/null_mask.pyx b/python/pylibcudf/pylibcudf/null_mask.pyx
new file mode 100644
index 00000000000..5bdde06f21f
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/null_mask.pyx
@@ -0,0 +1,142 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from libcpp.memory cimport make_unique
+from libcpp.pair cimport pair
+from libcpp.utility cimport move
+from pylibcudf.libcudf cimport null_mask as cpp_null_mask
+from pylibcudf.libcudf.types cimport mask_state, size_type
+
+from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
+
+from pylibcudf.libcudf.types import mask_state as MaskState  # no-cython-lint
+
+from .column cimport Column
+from .table cimport Table
+
+
+cdef DeviceBuffer buffer_to_python(device_buffer buf):
+    return DeviceBuffer.c_from_unique_ptr(make_unique[device_buffer](move(buf)))
+
+
+cpdef DeviceBuffer copy_bitmask(Column col):
+    """Copies ``col``'s bitmask into a ``DeviceBuffer``.
+
+    For details, see :cpp:func:`copy_bitmask`.
+
+    Parameters
+    ----------
+    col : Column
+        Column whose bitmask needs to be copied
+
+    Returns
+    -------
+    rmm.DeviceBuffer
+        A ``DeviceBuffer`` containing ``col``'s bitmask, or an empty ``DeviceBuffer``
+        if ``col`` is not nullable
+    """
+    cdef device_buffer db
+
+    with nogil:
+        db = move(cpp_null_mask.copy_bitmask(col.view()))
+
+    return buffer_to_python(move(db))
+
+cpdef size_t bitmask_allocation_size_bytes(size_type number_of_bits):
+    """
+    Computes the required bytes necessary to represent the specified number of bits
+    with a 64B padding boundary.
+
+    For details, see :cpp:func:`bitmask_allocation_size_bytes`.
+
+    Parameters
+    ----------
+    number_of_bits : size_type
+        The number of bits that need to be represented
+
+    Returns
+    -------
+    size_t
+        The necessary number of bytes
+    """
+    with nogil:
+        return cpp_null_mask.bitmask_allocation_size_bytes(number_of_bits)
+
+
+cpdef DeviceBuffer create_null_mask(
+    size_type size,
+    mask_state state = mask_state.UNINITIALIZED
+):
+    """Creates a ``DeviceBuffer`` for use as a null value indicator bitmask of a
+    ``Column``.
+
+    For details, see :cpp:func:`create_null_mask`.
+
+    Parameters
+    ----------
+    size : size_type
+        The number of elements to be represented by the mask
+    state : mask_state, optional
+        The desired state of the mask. Can be one of { MaskState.UNALLOCATED,
+        MaskState.UNINITIALIZED, MaskState.ALL_VALID, MaskState.ALL_NULL }
+        (default MaskState.UNINITIALIZED)
+
+    Returns
+    -------
+    rmm.DeviceBuffer
+        A ``DeviceBuffer`` for use as a null bitmask satisfying the desired size and
+        state
+    """
+    cdef device_buffer db
+
+    with nogil:
+        db = move(cpp_null_mask.create_null_mask(size, state))
+
+    return buffer_to_python(move(db))
+
+
+cpdef tuple bitmask_and(list columns):
+    """Performs bitwise AND of the bitmasks of a list of columns.
+
+    For details, see :cpp:func:`bitmask_and`.
+
+    Parameters
+    ----------
+    columns : list
+        The list of columns
+
+    Returns
+    -------
+    tuple[DeviceBuffer, size_type]
+        A tuple of the resulting mask and count of unset bits
+    """
+    cdef Table c_table = Table(columns)
+    cdef pair[device_buffer, size_type] c_result
+
+    with nogil:
+        c_result = move(cpp_null_mask.bitmask_and(c_table.view()))
+
+    return buffer_to_python(move(c_result.first)), c_result.second
+
+
+cpdef tuple bitmask_or(list columns):
+    """Performs bitwise OR of the bitmasks of a list of columns.
+
+    For details, see :cpp:func:`bitmask_or`.
+
+    Parameters
+    ----------
+    columns : list
+        The list of columns
+
+    Returns
+    -------
+    tuple[DeviceBuffer, size_type]
+        A tuple of the resulting mask and count of unset bits
+    """
+    cdef Table c_table = Table(columns)
+    cdef pair[device_buffer, size_type] c_result
+
+    with nogil:
+        c_result = move(cpp_null_mask.bitmask_or(c_table.view()))
+
+    return buffer_to_python(move(c_result.first)), c_result.second
diff --git a/python/cudf/cudf/_lib/pylibcudf/quantiles.pxd b/python/pylibcudf/pylibcudf/quantiles.pxd
similarity index 86%
rename from python/cudf/cudf/_lib/pylibcudf/quantiles.pxd
rename to python/pylibcudf/pylibcudf/quantiles.pxd
index 70ff135ca77..fbc1dfb30a6 100644
--- a/python/cudf/cudf/_lib/pylibcudf/quantiles.pxd
+++ b/python/pylibcudf/pylibcudf/quantiles.pxd
@@ -1,7 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 from libcpp.vector cimport vector
-
-from cudf._lib.pylibcudf.libcudf.types cimport interpolation, sorted
+from pylibcudf.libcudf.types cimport interpolation, sorted
 
 from .column cimport Column
 from .table cimport Table
diff --git a/python/cudf/cudf/_lib/pylibcudf/quantiles.pyx b/python/pylibcudf/pylibcudf/quantiles.pyx
similarity index 93%
rename from python/cudf/cudf/_lib/pylibcudf/quantiles.pyx
rename to python/pylibcudf/pylibcudf/quantiles.pyx
index c1f0e30ccd3..b847ade774d 100644
--- a/python/cudf/cudf/_lib/pylibcudf/quantiles.pyx
+++ b/python/pylibcudf/pylibcudf/quantiles.pyx
@@ -4,15 +4,14 @@ from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.quantiles cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.quantiles cimport (
     quantile as cpp_quantile,
     quantiles as cpp_quantiles,
 )
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.types cimport null_order, order, sorted
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.types cimport null_order, order, sorted
 
 from .column cimport Column
 from .table cimport Table
diff --git a/python/cudf/cudf/_lib/pylibcudf/reduce.pxd b/python/pylibcudf/pylibcudf/reduce.pxd
similarity index 85%
rename from python/cudf/cudf/_lib/pylibcudf/reduce.pxd
rename to python/pylibcudf/pylibcudf/reduce.pxd
index 935efd4acf2..047f08297e4 100644
--- a/python/cudf/cudf/_lib/pylibcudf/reduce.pxd
+++ b/python/pylibcudf/pylibcudf/reduce.pxd
@@ -1,6 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from cudf._lib.pylibcudf.libcudf.reduce cimport scan_type
+from pylibcudf.libcudf.reduce cimport scan_type
 
 from .aggregation cimport Aggregation
 from .column cimport Column
diff --git a/python/cudf/cudf/_lib/pylibcudf/reduce.pyx b/python/pylibcudf/pylibcudf/reduce.pyx
similarity index 85%
rename from python/cudf/cudf/_lib/pylibcudf/reduce.pyx
rename to python/pylibcudf/pylibcudf/reduce.pyx
index c272f183007..b0212a5b9c1 100644
--- a/python/cudf/cudf/_lib/pylibcudf/reduce.pyx
+++ b/python/pylibcudf/pylibcudf/reduce.pyx
@@ -3,23 +3,18 @@
 from cython.operator cimport dereference
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move, pair
-
-from cudf._lib.pylibcudf.libcudf cimport reduce as cpp_reduce
-from cudf._lib.pylibcudf.libcudf.aggregation cimport (
-    reduce_aggregation,
-    scan_aggregation,
-)
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.reduce cimport scan_type
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
+from pylibcudf.libcudf cimport reduce as cpp_reduce
+from pylibcudf.libcudf.aggregation cimport reduce_aggregation, scan_aggregation
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.reduce cimport scan_type
+from pylibcudf.libcudf.scalar.scalar cimport scalar
 
 from .aggregation cimport Aggregation
 from .column cimport Column
 from .scalar cimport Scalar
 from .types cimport DataType
 
-from cudf._lib.pylibcudf.libcudf.reduce import \
-    scan_type as ScanType  # no-cython-lint
+from pylibcudf.libcudf.reduce import scan_type as ScanType  # no-cython-lint
 
 
 cpdef Scalar reduce(Column col, Aggregation agg, DataType data_type):
diff --git a/python/cudf/cudf/_lib/pylibcudf/replace.pxd b/python/pylibcudf/pylibcudf/replace.pxd
similarity index 92%
rename from python/cudf/cudf/_lib/pylibcudf/replace.pxd
rename to python/pylibcudf/pylibcudf/replace.pxd
index 40484c728db..cb9fa8bf960 100644
--- a/python/cudf/cudf/_lib/pylibcudf/replace.pxd
+++ b/python/pylibcudf/pylibcudf/replace.pxd
@@ -1,8 +1,7 @@
 # Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
 from libcpp cimport bool
-
-from cudf._lib.pylibcudf.libcudf.replace cimport replace_policy
+from pylibcudf.libcudf.replace cimport replace_policy
 
 from .column cimport Column
 from .scalar cimport Scalar
diff --git a/python/cudf/cudf/_lib/pylibcudf/replace.pyx b/python/pylibcudf/pylibcudf/replace.pyx
similarity index 97%
rename from python/cudf/cudf/_lib/pylibcudf/replace.pyx
rename to python/pylibcudf/pylibcudf/replace.pyx
index 6e08e8f64a9..115dee132fd 100644
--- a/python/cudf/cudf/_lib/pylibcudf/replace.pyx
+++ b/python/pylibcudf/pylibcudf/replace.pyx
@@ -6,11 +6,10 @@ from cython.operator import dereference
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
+from pylibcudf.libcudf cimport replace as cpp_replace
+from pylibcudf.libcudf.column.column cimport column
 
-from cudf._lib.pylibcudf.libcudf cimport replace as cpp_replace
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-
-from cudf._lib.pylibcudf.libcudf.replace import \
+from pylibcudf.libcudf.replace import \
     replace_policy as ReplacePolicy  # no-cython-lint
 
 from .column cimport Column
diff --git a/python/cudf/cudf/_lib/pylibcudf/reshape.pxd b/python/pylibcudf/pylibcudf/reshape.pxd
similarity index 80%
rename from python/cudf/cudf/_lib/pylibcudf/reshape.pxd
rename to python/pylibcudf/pylibcudf/reshape.pxd
index a7cc45d7a08..c4d3d375f7a 100644
--- a/python/cudf/cudf/_lib/pylibcudf/reshape.pxd
+++ b/python/pylibcudf/pylibcudf/reshape.pxd
@@ -1,6 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.types cimport size_type
 
 from .column cimport Column
 from .scalar cimport Scalar
diff --git a/python/cudf/cudf/_lib/pylibcudf/reshape.pyx b/python/pylibcudf/pylibcudf/reshape.pyx
similarity index 86%
rename from python/cudf/cudf/_lib/pylibcudf/reshape.pyx
rename to python/pylibcudf/pylibcudf/reshape.pyx
index b68eba48cd6..a99145be900 100644
--- a/python/cudf/cudf/_lib/pylibcudf/reshape.pyx
+++ b/python/pylibcudf/pylibcudf/reshape.pyx
@@ -2,14 +2,13 @@
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.reshape cimport (
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.reshape cimport (
     interleave_columns as cpp_interleave_columns,
     tile as cpp_tile,
 )
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.types cimport size_type
 
 from .column cimport Column
 from .table cimport Table
diff --git a/python/cudf/cudf/_lib/pylibcudf/rolling.pxd b/python/pylibcudf/pylibcudf/rolling.pxd
similarity index 85%
rename from python/cudf/cudf/_lib/pylibcudf/rolling.pxd
rename to python/pylibcudf/pylibcudf/rolling.pxd
index cdadee68d43..9fcda21a62f 100644
--- a/python/cudf/cudf/_lib/pylibcudf/rolling.pxd
+++ b/python/pylibcudf/pylibcudf/rolling.pxd
@@ -1,6 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.types cimport size_type
 
 from .aggregation cimport Aggregation
 from .column cimport Column
diff --git a/python/cudf/cudf/_lib/pylibcudf/rolling.pyx b/python/pylibcudf/pylibcudf/rolling.pyx
similarity index 89%
rename from python/cudf/cudf/_lib/pylibcudf/rolling.pyx
rename to python/pylibcudf/pylibcudf/rolling.pyx
index 7aa7828a5dd..a46540d7ffa 100644
--- a/python/cudf/cudf/_lib/pylibcudf/rolling.pyx
+++ b/python/pylibcudf/pylibcudf/rolling.pyx
@@ -3,11 +3,10 @@
 from cython.operator cimport dereference
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
-
-from cudf._lib.pylibcudf.libcudf cimport rolling as cpp_rolling
-from cudf._lib.pylibcudf.libcudf.aggregation cimport rolling_aggregation
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf cimport rolling as cpp_rolling
+from pylibcudf.libcudf.aggregation cimport rolling_aggregation
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.types cimport size_type
 
 from .aggregation cimport Aggregation
 from .column cimport Column
diff --git a/python/cudf/cudf/_lib/pylibcudf/round.pxd b/python/pylibcudf/pylibcudf/round.pxd
similarity index 77%
rename from python/cudf/cudf/_lib/pylibcudf/round.pxd
rename to python/pylibcudf/pylibcudf/round.pxd
index ccb64fc2847..c8501b03fad 100644
--- a/python/cudf/cudf/_lib/pylibcudf/round.pxd
+++ b/python/pylibcudf/pylibcudf/round.pxd
@@ -1,7 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 from libc.stdint cimport int32_t
-
-from cudf._lib.pylibcudf.libcudf.round cimport rounding_method
+from pylibcudf.libcudf.round cimport rounding_method
 
 from .column cimport Column
 
diff --git a/python/cudf/cudf/_lib/pylibcudf/round.pyx b/python/pylibcudf/pylibcudf/round.pyx
similarity index 85%
rename from python/cudf/cudf/_lib/pylibcudf/round.pyx
rename to python/pylibcudf/pylibcudf/round.pyx
index cfcc2aafbb8..dc60d53b07e 100644
--- a/python/cudf/cudf/_lib/pylibcudf/round.pyx
+++ b/python/pylibcudf/pylibcudf/round.pyx
@@ -2,16 +2,12 @@
 from libc.stdint cimport int32_t
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
+from pylibcudf.libcudf.round cimport round as cpp_round, rounding_method
 
-from cudf._lib.pylibcudf.libcudf.round cimport (
-    round as cpp_round,
-    rounding_method,
-)
-
-from cudf._lib.pylibcudf.libcudf.round import \
+from pylibcudf.libcudf.round import \
     rounding_method as RoundingMethod  # no-cython-lint
 
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column cimport column
 
 from .column cimport Column
 
diff --git a/python/cudf/cudf/_lib/pylibcudf/scalar.pxd b/python/pylibcudf/pylibcudf/scalar.pxd
similarity index 92%
rename from python/cudf/cudf/_lib/pylibcudf/scalar.pxd
rename to python/pylibcudf/pylibcudf/scalar.pxd
index e6c9db2f1ac..8664dfa4b7e 100644
--- a/python/cudf/cudf/_lib/pylibcudf/scalar.pxd
+++ b/python/pylibcudf/pylibcudf/scalar.pxd
@@ -2,11 +2,10 @@
 
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
+from pylibcudf.libcudf.scalar.scalar cimport scalar
 
 from rmm._lib.memory_resource cimport DeviceMemoryResource
 
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
-
 from .column cimport Column
 from .types cimport DataType
 
diff --git a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx b/python/pylibcudf/pylibcudf/scalar.pyx
similarity index 94%
rename from python/cudf/cudf/_lib/pylibcudf/scalar.pyx
rename to python/pylibcudf/pylibcudf/scalar.pyx
index 67730be07d8..3e20938af0c 100644
--- a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
+++ b/python/pylibcudf/pylibcudf/scalar.pyx
@@ -3,14 +3,11 @@
 from cython cimport no_gc_clear
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
+from pylibcudf.libcudf.scalar.scalar cimport scalar
+from pylibcudf.libcudf.scalar.scalar_factories cimport make_empty_scalar_like
 
 from rmm._lib.memory_resource cimport get_current_device_resource
 
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
-from cudf._lib.pylibcudf.libcudf.scalar.scalar_factories cimport (
-    make_empty_scalar_like,
-)
-
 from .column cimport Column
 from .types cimport DataType
 
diff --git a/python/cudf/cudf/_lib/pylibcudf/search.pxd b/python/pylibcudf/pylibcudf/search.pxd
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/search.pxd
rename to python/pylibcudf/pylibcudf/search.pxd
diff --git a/python/cudf/cudf/_lib/pylibcudf/search.pyx b/python/pylibcudf/pylibcudf/search.pyx
similarity index 93%
rename from python/cudf/cudf/_lib/pylibcudf/search.pyx
rename to python/pylibcudf/pylibcudf/search.pyx
index 151a39f204f..ff2468f3f9c 100644
--- a/python/cudf/cudf/_lib/pylibcudf/search.pyx
+++ b/python/pylibcudf/pylibcudf/search.pyx
@@ -3,10 +3,9 @@
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
-
-from cudf._lib.pylibcudf.libcudf cimport search as cpp_search
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.types cimport null_order, order
+from pylibcudf.libcudf cimport search as cpp_search
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.types cimport null_order, order
 
 from .column cimport Column
 from .table cimport Table
diff --git a/python/cudf/cudf/_lib/pylibcudf/sorting.pxd b/python/pylibcudf/pylibcudf/sorting.pxd
similarity index 87%
rename from python/cudf/cudf/_lib/pylibcudf/sorting.pxd
rename to python/pylibcudf/pylibcudf/sorting.pxd
index a4ea541a03b..8127ab21ad1 100644
--- a/python/cudf/cudf/_lib/pylibcudf/sorting.pxd
+++ b/python/pylibcudf/pylibcudf/sorting.pxd
@@ -1,14 +1,8 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
 from libcpp cimport bool
-
-from cudf._lib.pylibcudf.libcudf.aggregation cimport rank_method
-from cudf._lib.pylibcudf.libcudf.types cimport (
-    null_order,
-    null_policy,
-    order,
-    size_type,
-)
+from pylibcudf.libcudf.aggregation cimport rank_method
+from pylibcudf.libcudf.types cimport null_order, null_policy, order, size_type
 
 from .column cimport Column
 from .table cimport Table
diff --git a/python/cudf/cudf/_lib/pylibcudf/sorting.pyx b/python/pylibcudf/pylibcudf/sorting.pyx
similarity index 96%
rename from python/cudf/cudf/_lib/pylibcudf/sorting.pyx
rename to python/pylibcudf/pylibcudf/sorting.pyx
index 8c5a8e26899..bd173eebacb 100644
--- a/python/cudf/cudf/_lib/pylibcudf/sorting.pyx
+++ b/python/pylibcudf/pylibcudf/sorting.pyx
@@ -3,12 +3,11 @@
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
-
-from cudf._lib.pylibcudf.libcudf cimport sorting as cpp_sorting
-from cudf._lib.pylibcudf.libcudf.aggregation cimport rank_method
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.types cimport null_order, null_policy, order
+from pylibcudf.libcudf cimport sorting as cpp_sorting
+from pylibcudf.libcudf.aggregation cimport rank_method
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.types cimport null_order, null_policy, order
 
 from .column cimport Column
 from .table cimport Table
diff --git a/python/cudf/cudf/_lib/pylibcudf/stream_compaction.pxd b/python/pylibcudf/pylibcudf/stream_compaction.pxd
similarity index 89%
rename from python/cudf/cudf/_lib/pylibcudf/stream_compaction.pxd
rename to python/pylibcudf/pylibcudf/stream_compaction.pxd
index 6f89aaf90e7..a4f39792f0c 100644
--- a/python/cudf/cudf/_lib/pylibcudf/stream_compaction.pxd
+++ b/python/pylibcudf/pylibcudf/stream_compaction.pxd
@@ -1,9 +1,7 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from cudf._lib.pylibcudf.libcudf.stream_compaction cimport (
-    duplicate_keep_option,
-)
-from cudf._lib.pylibcudf.libcudf.types cimport (
+from pylibcudf.libcudf.stream_compaction cimport duplicate_keep_option
+from pylibcudf.libcudf.types cimport (
     nan_equality,
     nan_policy,
     null_equality,
diff --git a/python/cudf/cudf/_lib/pylibcudf/stream_compaction.pyx b/python/pylibcudf/pylibcudf/stream_compaction.pyx
similarity index 95%
rename from python/cudf/cudf/_lib/pylibcudf/stream_compaction.pyx
rename to python/pylibcudf/pylibcudf/stream_compaction.pyx
index 43449d3690a..b574bfa9fa2 100644
--- a/python/cudf/cudf/_lib/pylibcudf/stream_compaction.pyx
+++ b/python/pylibcudf/pylibcudf/stream_compaction.pyx
@@ -3,16 +3,11 @@
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
-
-from cudf._lib.pylibcudf.libcudf cimport (
-    stream_compaction as cpp_stream_compaction,
-)
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.stream_compaction cimport (
-    duplicate_keep_option,
-)
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.types cimport (
+from pylibcudf.libcudf cimport stream_compaction as cpp_stream_compaction
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.stream_compaction cimport duplicate_keep_option
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.types cimport (
     nan_equality,
     nan_policy,
     null_equality,
@@ -20,7 +15,7 @@ from cudf._lib.pylibcudf.libcudf.types cimport (
     size_type,
 )
 
-from cudf._lib.pylibcudf.libcudf.stream_compaction import \
+from pylibcudf.libcudf.stream_compaction import \
     duplicate_keep_option as DuplicateKeepOption  # no-cython-lint, isort:skip
 
 from .column cimport Column
diff --git a/python/cudf/cudf/_lib/pylibcudf/strings/CMakeLists.txt b/python/pylibcudf/pylibcudf/strings/CMakeLists.txt
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/strings/CMakeLists.txt
rename to python/pylibcudf/pylibcudf/strings/CMakeLists.txt
diff --git a/python/cudf/cudf/_lib/pylibcudf/strings/__init__.pxd b/python/pylibcudf/pylibcudf/strings/__init__.pxd
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/strings/__init__.pxd
rename to python/pylibcudf/pylibcudf/strings/__init__.pxd
diff --git a/python/cudf/cudf/_lib/pylibcudf/strings/__init__.py b/python/pylibcudf/pylibcudf/strings/__init__.py
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/strings/__init__.py
rename to python/pylibcudf/pylibcudf/strings/__init__.py
diff --git a/python/cudf/cudf/_lib/pylibcudf/strings/capitalize.pxd b/python/pylibcudf/pylibcudf/strings/capitalize.pxd
similarity index 64%
rename from python/cudf/cudf/_lib/pylibcudf/strings/capitalize.pxd
rename to python/pylibcudf/pylibcudf/strings/capitalize.pxd
index 9acf189fc23..b45949d4eb4 100644
--- a/python/cudf/cudf/_lib/pylibcudf/strings/capitalize.pxd
+++ b/python/pylibcudf/pylibcudf/strings/capitalize.pxd
@@ -1,7 +1,7 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from cudf._lib.pylibcudf.column cimport Column
-from cudf._lib.pylibcudf.scalar cimport Scalar
+from pylibcudf.column cimport Column
+from pylibcudf.scalar cimport Scalar
 
 
 cpdef Column capitalize(Column input, Scalar delimiters=*)
diff --git a/python/pylibcudf/pylibcudf/strings/capitalize.pyx b/python/pylibcudf/pylibcudf/strings/capitalize.pyx
new file mode 100644
index 00000000000..06b991c3cf1
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/capitalize.pyx
@@ -0,0 +1,107 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from libcpp.memory cimport unique_ptr
+from libcpp.utility cimport move
+from pylibcudf.column cimport Column
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.scalar.scalar_factories cimport (
+    make_string_scalar as cpp_make_string_scalar,
+)
+from pylibcudf.libcudf.strings cimport capitalize as cpp_capitalize
+from pylibcudf.scalar cimport Scalar
+from pylibcudf.strings.char_types cimport string_character_types
+
+from cython.operator import dereference
+
+
+cpdef Column capitalize(
+    Column input,
+    Scalar delimiters=None
+    # TODO: default scalar values
+    # https://github.com/rapidsai/cudf/issues/15505
+):
+    """Returns a column of capitalized strings.
+
+    For details, see :cpp:func:`cudf::strings::capitalize`.
+
+    Parameters
+    ----------
+    input : Column
+        String column
+    delimiters : Scalar, default None
+        Characters for identifying words to capitalize
+
+    Returns
+    -------
+    pylibcudf.Column
+        Column of strings capitalized from the input column
+    """
+    cdef unique_ptr[column] c_result
+
+    if delimiters is None:
+        delimiters = Scalar.from_libcudf(
+            cpp_make_string_scalar("".encode())
+        )
+
+    cdef const string_scalar* cpp_delimiters = <const string_scalar*>(
+        delimiters.c_obj.get()
+    )
+
+    with nogil:
+        c_result = cpp_capitalize.capitalize(
+            input.view(),
+            dereference(cpp_delimiters)
+        )
+
+    return Column.from_libcudf(move(c_result))
+
+
+cpdef Column title(
+    Column input,
+    string_character_types sequence_type=string_character_types.ALPHA
+):
+    """Modifies first character of each word to upper-case and lower-cases
+    the rest.
+
+    For details, see :cpp:func:`cudf::strings::title`.
+
+    Parameters
+    ----------
+    input : Column
+        String column
+    sequence_type : string_character_types, default string_character_types.ALPHA
+        The character type that is used when identifying words
+
+    Returns
+    -------
+    pylibcudf.Column
+        Column of titled strings
+    """
+    cdef unique_ptr[column] c_result
+    with nogil:
+        c_result = cpp_capitalize.title(input.view(), sequence_type)
+
+    return Column.from_libcudf(move(c_result))
+
+
+cpdef Column is_title(Column input):
+    """Checks if the strings in the input column are title formatted.
+
+    For details, see :cpp:func:`cudf::strings::is_title`.
+
+    Parameters
+    ----------
+    input : Column
+        String column
+
+    Returns
+    -------
+    pylibcudf.Column
+        Column of type BOOL8
+    """
+    cdef unique_ptr[column] c_result
+    with nogil:
+        c_result = cpp_capitalize.is_title(input.view())
+
+    return Column.from_libcudf(move(c_result))
diff --git a/python/cudf/cudf/_lib/pylibcudf/strings/case.pxd b/python/pylibcudf/pylibcudf/strings/case.pxd
similarity index 76%
rename from python/cudf/cudf/_lib/pylibcudf/strings/case.pxd
rename to python/pylibcudf/pylibcudf/strings/case.pxd
index 225d566fe06..d3c98d5e3dc 100644
--- a/python/cudf/cudf/_lib/pylibcudf/strings/case.pxd
+++ b/python/pylibcudf/pylibcudf/strings/case.pxd
@@ -1,6 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from cudf._lib.pylibcudf.column cimport Column
+from pylibcudf.column cimport Column
 
 
 cpdef Column to_lower(Column input)
diff --git a/python/cudf/cudf/_lib/pylibcudf/strings/case.pyx b/python/pylibcudf/pylibcudf/strings/case.pyx
similarity index 79%
rename from python/cudf/cudf/_lib/pylibcudf/strings/case.pyx
rename to python/pylibcudf/pylibcudf/strings/case.pyx
index 3a360fd6b10..9e6cd7717d3 100644
--- a/python/cudf/cudf/_lib/pylibcudf/strings/case.pyx
+++ b/python/pylibcudf/pylibcudf/strings/case.pyx
@@ -2,10 +2,9 @@
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
-
-from cudf._lib.pylibcudf.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.strings cimport case as cpp_case
+from pylibcudf.column cimport Column
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.strings cimport case as cpp_case
 
 
 cpdef Column to_lower(Column input):
diff --git a/python/pylibcudf/pylibcudf/strings/char_types.pxd b/python/pylibcudf/pylibcudf/strings/char_types.pxd
new file mode 100644
index 00000000000..ad4e4cf61d8
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/char_types.pxd
@@ -0,0 +1,3 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.libcudf.strings.char_types cimport string_character_types
diff --git a/python/cudf/cudf/_lib/pylibcudf/strings/char_types.pyx b/python/pylibcudf/pylibcudf/strings/char_types.pyx
similarity index 64%
rename from python/cudf/cudf/_lib/pylibcudf/strings/char_types.pyx
rename to python/pylibcudf/pylibcudf/strings/char_types.pyx
index d96161951c6..e7621fb4d84 100644
--- a/python/cudf/cudf/_lib/pylibcudf/strings/char_types.pyx
+++ b/python/pylibcudf/pylibcudf/strings/char_types.pyx
@@ -1,4 +1,4 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from cudf._lib.pylibcudf.libcudf.strings.char_types import \
+from pylibcudf.libcudf.strings.char_types import \
     string_character_types as StringCharacterTypes  # no-cython-lint
diff --git a/python/pylibcudf/pylibcudf/strings/contains.pxd b/python/pylibcudf/pylibcudf/strings/contains.pxd
new file mode 100644
index 00000000000..2cd4891a0ea
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/contains.pxd
@@ -0,0 +1,7 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column cimport Column
+from pylibcudf.strings.regex_program cimport RegexProgram
+
+
+cpdef Column contains_re(Column input, RegexProgram prog)
diff --git a/python/cudf/cudf/_lib/pylibcudf/strings/contains.pyx b/python/pylibcudf/pylibcudf/strings/contains.pyx
similarity index 75%
rename from python/cudf/cudf/_lib/pylibcudf/strings/contains.pyx
rename to python/pylibcudf/pylibcudf/strings/contains.pyx
index 8c598b7c953..1a2446f6e2c 100644
--- a/python/cudf/cudf/_lib/pylibcudf/strings/contains.pyx
+++ b/python/pylibcudf/pylibcudf/strings/contains.pyx
@@ -1,11 +1,10 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
-
-from cudf._lib.pylibcudf.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.strings cimport contains as cpp_contains
-from cudf._lib.pylibcudf.strings.regex_program cimport RegexProgram
+from pylibcudf.column cimport Column
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.strings cimport contains as cpp_contains
+from pylibcudf.strings.regex_program cimport RegexProgram
 
 
 cpdef Column contains_re(
diff --git a/python/cudf/cudf/_lib/pylibcudf/strings/find.pxd b/python/pylibcudf/pylibcudf/strings/find.pxd
similarity index 77%
rename from python/cudf/cudf/_lib/pylibcudf/strings/find.pxd
rename to python/pylibcudf/pylibcudf/strings/find.pxd
index bb43069f190..e7524a9360b 100644
--- a/python/cudf/cudf/_lib/pylibcudf/strings/find.pxd
+++ b/python/pylibcudf/pylibcudf/strings/find.pxd
@@ -1,8 +1,8 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from cudf._lib.pylibcudf.column cimport Column
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
-from cudf._lib.pylibcudf.scalar cimport Scalar
+from pylibcudf.column cimport Column
+from pylibcudf.libcudf.types cimport size_type
+from pylibcudf.scalar cimport Scalar
 
 ctypedef fused ColumnOrScalar:
     Column
diff --git a/python/cudf/cudf/_lib/pylibcudf/strings/find.pyx b/python/pylibcudf/pylibcudf/strings/find.pyx
similarity index 90%
rename from python/cudf/cudf/_lib/pylibcudf/strings/find.pyx
rename to python/pylibcudf/pylibcudf/strings/find.pyx
index a0214efd0a1..22d370bf7e8 100644
--- a/python/cudf/cudf/_lib/pylibcudf/strings/find.pyx
+++ b/python/pylibcudf/pylibcudf/strings/find.pyx
@@ -1,15 +1,14 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
-
-from cudf._lib.pylibcudf.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.strings cimport find as cpp_find
-from cudf._lib.pylibcudf.scalar cimport Scalar
+from pylibcudf.column cimport Column
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.strings cimport find as cpp_find
+from pylibcudf.scalar cimport Scalar
 
 from cython.operator import dereference
 
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 
 
 cpdef Column find(
@@ -22,8 +21,8 @@ cpdef Column find(
     first found in each string of the provided column.
 
     ``target`` may be a
-    :py:class:`~cudf._lib.pylibcudf.column.Column` or a
-    :py:class:`~cudf._lib.pylibcudf.scalar.Scalar`.
+    :py:class:`~pylibcudf.column.Column` or a
+    :py:class:`~pylibcudf.scalar.Scalar`.
 
     If ``target`` is a scalar, the scalar will be searched for in each string.
     If ``target`` is a column, the corresponding string in the column will be
@@ -126,8 +125,8 @@ cpdef Column contains(
     column.
 
     ``target`` may be a
-    :py:class:`~cudf._lib.pylibcudf.column.Column` or a
-    :py:class:`~cudf._lib.pylibcudf.scalar.Scalar`.
+    :py:class:`~pylibcudf.column.Column` or a
+    :py:class:`~pylibcudf.scalar.Scalar`.
 
     If ``target`` is a scalar, the scalar will be searched for in each string.
     If ``target`` is a column, the corresponding string in the column will be
@@ -180,8 +179,8 @@ cpdef Column starts_with(
     column.
 
     ``target`` may be a
-    :py:class:`~cudf._lib.pylibcudf.column.Column` or a
-    :py:class:`~cudf._lib.pylibcudf.scalar.Scalar`.
+    :py:class:`~pylibcudf.column.Column` or a
+    :py:class:`~pylibcudf.scalar.Scalar`.
 
     If ``target`` is a scalar, the scalar will be searched for in each string.
     If ``target`` is a column, the corresponding string in the column will be
@@ -233,8 +232,8 @@ cpdef Column ends_with(
     target string was found at the end of the string in the provided column.
 
     ``target`` may be a
-    :py:class:`~cudf._lib.pylibcudf.column.Column` or a
-    :py:class:`~cudf._lib.pylibcudf.scalar.Scalar`.
+    :py:class:`~pylibcudf.column.Column` or a
+    :py:class:`~pylibcudf.scalar.Scalar`.
 
     If ``target`` is a scalar, the scalar will be searched for in each string.
     If ``target`` is a column, the corresponding string in the column will be
diff --git a/python/pylibcudf/pylibcudf/strings/regex_flags.pxd b/python/pylibcudf/pylibcudf/strings/regex_flags.pxd
new file mode 100644
index 00000000000..1ce3cd07df8
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/regex_flags.pxd
@@ -0,0 +1,2 @@
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+from pylibcudf.libcudf.strings.regex_flags cimport regex_flags
diff --git a/python/cudf/cudf/_lib/pylibcudf/strings/regex_flags.pyx b/python/pylibcudf/pylibcudf/strings/regex_flags.pyx
similarity index 59%
rename from python/cudf/cudf/_lib/pylibcudf/strings/regex_flags.pyx
rename to python/pylibcudf/pylibcudf/strings/regex_flags.pyx
index 903c2ddd503..ce3b6b10a42 100644
--- a/python/cudf/cudf/_lib/pylibcudf/strings/regex_flags.pyx
+++ b/python/pylibcudf/pylibcudf/strings/regex_flags.pyx
@@ -1,4 +1,4 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from cudf._lib.pylibcudf.libcudf.strings.regex_flags import \
+from pylibcudf.libcudf.strings.regex_flags import \
     regex_flags as RegexFlags  # no-cython-lint
diff --git a/python/cudf/cudf/_lib/pylibcudf/strings/regex_program.pxd b/python/pylibcudf/pylibcudf/strings/regex_program.pxd
similarity index 70%
rename from python/cudf/cudf/_lib/pylibcudf/strings/regex_program.pxd
rename to python/pylibcudf/pylibcudf/strings/regex_program.pxd
index 61ed268fb2d..045cc1e1c6b 100644
--- a/python/cudf/cudf/_lib/pylibcudf/strings/regex_program.pxd
+++ b/python/pylibcudf/pylibcudf/strings/regex_program.pxd
@@ -2,8 +2,7 @@
 
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
-
-from cudf._lib.pylibcudf.libcudf.strings.regex_program cimport regex_program
+from pylibcudf.libcudf.strings.regex_program cimport regex_program
 
 
 cdef class RegexProgram:
diff --git a/python/cudf/cudf/_lib/pylibcudf/strings/regex_program.pyx b/python/pylibcudf/pylibcudf/strings/regex_program.pyx
similarity index 55%
rename from python/cudf/cudf/_lib/pylibcudf/strings/regex_program.pyx
rename to python/pylibcudf/pylibcudf/strings/regex_program.pyx
index d605b0aba02..f426b6888ae 100644
--- a/python/cudf/cudf/_lib/pylibcudf/strings/regex_program.pyx
+++ b/python/pylibcudf/pylibcudf/strings/regex_program.pyx
@@ -4,21 +4,40 @@
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from libcpp.utility cimport move
+from pylibcudf.libcudf.strings.regex_flags cimport regex_flags
+from pylibcudf.libcudf.strings.regex_program cimport regex_program
 
-from cudf._lib.pylibcudf.libcudf.strings.regex_flags cimport regex_flags
-from cudf._lib.pylibcudf.libcudf.strings.regex_program cimport regex_program
+from pylibcudf.strings.regex_flags import RegexFlags
 
-from cudf._lib.pylibcudf.strings.regex_flags import RegexFlags
-from cudf._lib.pylibcudf.strings.regex_flags cimport regex_flags
+from pylibcudf.strings.regex_flags cimport regex_flags
 
 
 cdef class RegexProgram:
+    """Regex program class.
 
+    This is the Cython representation of
+    :cpp:class:`cudf::strings::regex_program`.
+
+    Do not instantiate this class directly, use the `create` method.
+
+    """
     def __init__(self, *args, **kwargs):
         raise ValueError("Do not instantiate RegexProgram directly, use create")
 
     @staticmethod
     def create(str pattern, int flags):
+        """Create a program from a pattern.
+
+        For detils, see :cpp:func:`cudf::strings::regex_program::create`.
+
+        Parameters
+        ----------
+        pattern : str
+            Regex pattern
+        flags : Uniont[int, RegexFlags]
+            Regex flags for interpreting special characters in the pattern
+
+        """
         cdef unique_ptr[regex_program] c_prog
         cdef regex_flags c_flags
         cdef string c_pattern = pattern.encode()
diff --git a/python/cudf/cudf/_lib/pylibcudf/strings/replace.pxd b/python/pylibcudf/pylibcudf/strings/replace.pxd
similarity index 71%
rename from python/cudf/cudf/_lib/pylibcudf/strings/replace.pxd
rename to python/pylibcudf/pylibcudf/strings/replace.pxd
index 52e2dc3c738..26273b96c57 100644
--- a/python/cudf/cudf/_lib/pylibcudf/strings/replace.pxd
+++ b/python/pylibcudf/pylibcudf/strings/replace.pxd
@@ -1,8 +1,8 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from cudf._lib.pylibcudf.column cimport Column
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
-from cudf._lib.pylibcudf.scalar cimport Scalar
+from pylibcudf.column cimport Column
+from pylibcudf.libcudf.types cimport size_type
+from pylibcudf.scalar cimport Scalar
 
 
 cpdef Column replace(
diff --git a/python/cudf/cudf/_lib/pylibcudf/strings/replace.pyx b/python/pylibcudf/pylibcudf/strings/replace.pyx
similarity index 90%
rename from python/cudf/cudf/_lib/pylibcudf/strings/replace.pyx
rename to python/pylibcudf/pylibcudf/strings/replace.pyx
index c757150a600..9d0ebf4a814 100644
--- a/python/cudf/cudf/_lib/pylibcudf/strings/replace.pyx
+++ b/python/pylibcudf/pylibcudf/strings/replace.pyx
@@ -2,20 +2,19 @@
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
-
-from cudf._lib.pylibcudf.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from cudf._lib.pylibcudf.libcudf.scalar.scalar_factories cimport (
+from pylibcudf.column cimport Column
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.scalar.scalar_factories cimport (
     make_string_scalar as cpp_make_string_scalar,
 )
-from cudf._lib.pylibcudf.libcudf.strings.replace cimport (
+from pylibcudf.libcudf.strings.replace cimport (
     replace as cpp_replace,
     replace_multiple as cpp_replace_multiple,
     replace_slice as cpp_replace_slice,
 )
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
-from cudf._lib.pylibcudf.scalar cimport Scalar
+from pylibcudf.libcudf.types cimport size_type
+from pylibcudf.scalar cimport Scalar
 
 
 cpdef Column replace(
diff --git a/python/cudf/cudf/_lib/pylibcudf/strings/slice.pxd b/python/pylibcudf/pylibcudf/strings/slice.pxd
similarity index 69%
rename from python/cudf/cudf/_lib/pylibcudf/strings/slice.pxd
rename to python/pylibcudf/pylibcudf/strings/slice.pxd
index 7d8d0006ef4..01e9f2b3c88 100644
--- a/python/cudf/cudf/_lib/pylibcudf/strings/slice.pxd
+++ b/python/pylibcudf/pylibcudf/strings/slice.pxd
@@ -1,7 +1,7 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from cudf._lib.pylibcudf.column cimport Column
-from cudf._lib.pylibcudf.scalar cimport Scalar
+from pylibcudf.column cimport Column
+from pylibcudf.scalar cimport Scalar
 
 ctypedef fused ColumnOrScalar:
     Column
diff --git a/python/cudf/cudf/_lib/pylibcudf/strings/slice.pyx b/python/pylibcudf/pylibcudf/strings/slice.pyx
similarity index 81%
rename from python/cudf/cudf/_lib/pylibcudf/strings/slice.pyx
rename to python/pylibcudf/pylibcudf/strings/slice.pyx
index df75134fb71..70d10cab36c 100644
--- a/python/cudf/cudf/_lib/pylibcudf/strings/slice.pyx
+++ b/python/pylibcudf/pylibcudf/strings/slice.pyx
@@ -2,16 +2,15 @@
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
-
-from cudf._lib.pylibcudf.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport numeric_scalar
-from cudf._lib.pylibcudf.libcudf.scalar.scalar_factories cimport (
+from pylibcudf.column cimport Column
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.scalar.scalar cimport numeric_scalar
+from pylibcudf.libcudf.scalar.scalar_factories cimport (
     make_fixed_width_scalar as cpp_make_fixed_width_scalar,
 )
-from cudf._lib.pylibcudf.libcudf.strings cimport substring as cpp_slice
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
-from cudf._lib.pylibcudf.scalar cimport Scalar
+from pylibcudf.libcudf.strings cimport substring as cpp_slice
+from pylibcudf.libcudf.types cimport size_type
+from pylibcudf.scalar cimport Scalar
 
 from cython.operator import dereference
 
@@ -25,9 +24,9 @@ cpdef Column slice_strings(
     """Perform a slice operation on a strings column.
 
     ``start`` and ``stop`` may be a
-    :py:class:`~cudf._lib.pylibcudf.column.Column` or a
-    :py:class:`~cudf._lib.pylibcudf.scalar.Scalar`. But ``step`` must be a
-    :py:class:`~cudf._lib.pylibcudf.scalar.Scalar`.
+    :py:class:`~pylibcudf.column.Column` or a
+    :py:class:`~pylibcudf.scalar.Scalar`. But ``step`` must be a
+    :py:class:`~pylibcudf.scalar.Scalar`.
 
     For details, see :cpp:func:`cudf::strings::slice_strings`.
 
diff --git a/python/cudf/cudf/_lib/pylibcudf/table.pxd b/python/pylibcudf/pylibcudf/table.pxd
similarity index 78%
rename from python/cudf/cudf/_lib/pylibcudf/table.pxd
rename to python/pylibcudf/pylibcudf/table.pxd
index e476fc770e3..cf5c0aa80f2 100644
--- a/python/cudf/cudf/_lib/pylibcudf/table.pxd
+++ b/python/pylibcudf/pylibcudf/table.pxd
@@ -1,9 +1,8 @@
 # Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.table.table_view cimport table_view
 
 
 cdef class Table:
diff --git a/python/cudf/cudf/_lib/pylibcudf/table.pyx b/python/pylibcudf/pylibcudf/table.pyx
similarity index 93%
rename from python/cudf/cudf/_lib/pylibcudf/table.pyx
rename to python/pylibcudf/pylibcudf/table.pyx
index d91fa0474b0..5f77b89a605 100644
--- a/python/cudf/cudf/_lib/pylibcudf/table.pyx
+++ b/python/pylibcudf/pylibcudf/table.pyx
@@ -4,10 +4,9 @@ from cython.operator cimport dereference
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
-
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.table.table cimport table
 
 from .column cimport Column
 
diff --git a/python/cudf/cudf/pylibcudf_tests/common/utils.py b/python/pylibcudf/pylibcudf/tests/common/utils.py
similarity index 95%
rename from python/cudf/cudf/pylibcudf_tests/common/utils.py
rename to python/pylibcudf/pylibcudf/tests/common/utils.py
index e19ff58927f..babe6634318 100644
--- a/python/cudf/cudf/pylibcudf_tests/common/utils.py
+++ b/python/pylibcudf/pylibcudf/tests/common/utils.py
@@ -6,11 +6,11 @@
 
 import numpy as np
 import pyarrow as pa
+import pyarrow.compute as pc
+import pylibcudf as plc
 import pytest
 from pyarrow.parquet import write_table as pq_write_table
-
-from cudf._lib import pylibcudf as plc
-from cudf._lib.pylibcudf.io.types import CompressionType
+from pylibcudf.io.types import CompressionType
 
 
 def metadata_from_arrow_type(
@@ -44,7 +44,7 @@ def metadata_from_arrow_type(
 def assert_column_eq(
     lhs: pa.Array | plc.Column,
     rhs: pa.Array | plc.Column,
-    check_field_nullability=True,
+    check_field_nullability=False,
 ) -> None:
     """Verify that a pylibcudf array and PyArrow array are equal.
 
@@ -59,7 +59,9 @@ def assert_column_eq(
         on child fields are equal.
 
         Useful for checking roundtripping of lossy formats like JSON that may not
-        preserve this information.
+        preserve this information. Also, our Arrow interop functions make different
+        choices by default than pyarrow field constructors since the interop functions
+        may make data-dependent choices.
     """
     # Nested types require children metadata to be passed to the conversion function.
     if isinstance(lhs, (pa.Array, pa.ChunkedArray)) and isinstance(
@@ -155,13 +157,13 @@ def _flatten_arrays(arr):
         for lh_arr, rh_arr in zip(lhs, rhs):
             # Check NaNs positions match
             # and then filter out nans
-            lhs_nans = pa.compute.is_nan(lh_arr)
-            rhs_nans = pa.compute.is_nan(rh_arr)
+            lhs_nans = pc.is_nan(lh_arr)
+            rhs_nans = pc.is_nan(rh_arr)
             assert lhs_nans.equals(rhs_nans)
 
-            if pa.compute.any(lhs_nans) or pa.compute.any(rhs_nans):
+            if pc.any(lhs_nans) or pc.any(rhs_nans):
                 # masks must be equal at this point
-                mask = pa.compute.fill_null(pa.compute.invert(lhs_nans), True)
+                mask = pc.fill_null(pc.invert(lhs_nans), True)
                 lh_arr = lh_arr.filter(mask)
                 rh_arr = rh_arr.filter(mask)
 
diff --git a/python/cudf/cudf/pylibcudf_tests/conftest.py b/python/pylibcudf/pylibcudf/tests/conftest.py
similarity index 98%
rename from python/cudf/cudf/pylibcudf_tests/conftest.py
rename to python/pylibcudf/pylibcudf/tests/conftest.py
index 945e1689229..fdce6f353ca 100644
--- a/python/cudf/cudf/pylibcudf_tests/conftest.py
+++ b/python/pylibcudf/pylibcudf/tests/conftest.py
@@ -8,10 +8,9 @@
 
 import numpy as np
 import pyarrow as pa
+import pylibcudf as plc
 import pytest
-
-import cudf._lib.pylibcudf as plc
-from cudf._lib.pylibcudf.io.types import CompressionType
+from pylibcudf.io.types import CompressionType
 
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "common"))
 
diff --git a/python/cudf/cudf/pylibcudf_tests/io/test_avro.py b/python/pylibcudf/pylibcudf/tests/io/test_avro.py
similarity index 98%
rename from python/cudf/cudf/pylibcudf_tests/io/test_avro.py
rename to python/pylibcudf/pylibcudf/tests/io/test_avro.py
index 061d6792ce3..0cd5064a697 100644
--- a/python/cudf/cudf/pylibcudf_tests/io/test_avro.py
+++ b/python/pylibcudf/pylibcudf/tests/io/test_avro.py
@@ -5,11 +5,10 @@
 
 import fastavro
 import pyarrow as pa
+import pylibcudf as plc
 import pytest
 from utils import assert_table_and_meta_eq
 
-import cudf._lib.pylibcudf as plc
-
 avro_dtype_pairs = [
     ("boolean", pa.bool_()),
     ("int", pa.int32()),
diff --git a/python/cudf/cudf/pylibcudf_tests/io/test_csv.py b/python/pylibcudf/pylibcudf/tests/io/test_csv.py
similarity index 98%
rename from python/cudf/cudf/pylibcudf_tests/io/test_csv.py
rename to python/pylibcudf/pylibcudf/tests/io/test_csv.py
index 95326a8b681..ccd7eef54f3 100644
--- a/python/cudf/cudf/pylibcudf_tests/io/test_csv.py
+++ b/python/pylibcudf/pylibcudf/tests/io/test_csv.py
@@ -5,7 +5,9 @@
 
 import pandas as pd
 import pyarrow as pa
+import pylibcudf as plc
 import pytest
+from pylibcudf.io.types import CompressionType
 from utils import (
     _convert_numeric_types_to_floating,
     assert_table_and_meta_eq,
@@ -13,9 +15,6 @@
     write_source_str,
 )
 
-import cudf._lib.pylibcudf as plc
-from cudf._lib.pylibcudf.io.types import CompressionType
-
 # Shared kwargs to pass to make_source
 _COMMON_CSV_SOURCE_KWARGS = {
     "format": "csv",
diff --git a/python/cudf/cudf/pylibcudf_tests/io/test_json.py b/python/pylibcudf/pylibcudf/tests/io/test_json.py
similarity index 99%
rename from python/cudf/cudf/pylibcudf_tests/io/test_json.py
rename to python/pylibcudf/pylibcudf/tests/io/test_json.py
index 4239f2438bb..9d976fedf00 100644
--- a/python/cudf/cudf/pylibcudf_tests/io/test_json.py
+++ b/python/pylibcudf/pylibcudf/tests/io/test_json.py
@@ -3,7 +3,9 @@
 
 import pandas as pd
 import pyarrow as pa
+import pylibcudf as plc
 import pytest
+from pylibcudf.io.types import CompressionType
 from utils import (
     assert_table_and_meta_eq,
     make_source,
@@ -11,9 +13,6 @@
     write_source_str,
 )
 
-import cudf._lib.pylibcudf as plc
-from cudf._lib.pylibcudf.io.types import CompressionType
-
 # Shared kwargs to pass to make_source
 _COMMON_JSON_SOURCE_KWARGS = {"format": "json", "orient": "records"}
 
diff --git a/python/cudf/cudf/pylibcudf_tests/io/test_parquet.py b/python/pylibcudf/pylibcudf/tests/io/test_parquet.py
similarity index 96%
rename from python/cudf/cudf/pylibcudf_tests/io/test_parquet.py
rename to python/pylibcudf/pylibcudf/tests/io/test_parquet.py
index 07d2ab3d69a..f6e843ccf66 100644
--- a/python/cudf/cudf/pylibcudf_tests/io/test_parquet.py
+++ b/python/pylibcudf/pylibcudf/tests/io/test_parquet.py
@@ -1,18 +1,17 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 import pyarrow as pa
 import pyarrow.compute as pc
+import pylibcudf as plc
 import pytest
 from pyarrow.parquet import read_table
-from utils import assert_table_and_meta_eq, make_source
-
-import cudf._lib.pylibcudf as plc
-from cudf._lib.pylibcudf.expressions import (
+from pylibcudf.expressions import (
     ASTOperator,
     ColumnNameReference,
     ColumnReference,
     Literal,
     Operation,
 )
+from utils import assert_table_and_meta_eq, make_source
 
 # Shared kwargs to pass to make_source
 _COMMON_PARQUET_SOURCE_KWARGS = {"format": "parquet"}
@@ -31,7 +30,7 @@ def test_read_parquet_basic(
 
     res = plc.io.parquet.read_parquet(
         plc.io.SourceInfo([source]),
-        num_rows=nrows,
+        nrows=nrows,
         skip_rows=skiprows,
         columns=columns,
     )
diff --git a/python/cudf/cudf/pylibcudf_tests/io/test_source_sink_info.py b/python/pylibcudf/pylibcudf/tests/io/test_source_sink_info.py
similarity index 98%
rename from python/cudf/cudf/pylibcudf_tests/io/test_source_sink_info.py
rename to python/pylibcudf/pylibcudf/tests/io/test_source_sink_info.py
index 438c482b77a..747f58ec8cf 100644
--- a/python/cudf/cudf/pylibcudf_tests/io/test_source_sink_info.py
+++ b/python/pylibcudf/pylibcudf/tests/io/test_source_sink_info.py
@@ -2,10 +2,9 @@
 
 import io
 
+import pylibcudf as plc
 import pytest
 
-import cudf._lib.pylibcudf as plc
-
 
 @pytest.fixture(params=[plc.io.SourceInfo, plc.io.SinkInfo])
 def io_class(request):
diff --git a/python/cudf/cudf/pylibcudf_tests/pytest.ini b/python/pylibcudf/pylibcudf/tests/pytest.ini
similarity index 100%
rename from python/cudf/cudf/pylibcudf_tests/pytest.ini
rename to python/pylibcudf/pylibcudf/tests/pytest.ini
diff --git a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py b/python/pylibcudf/pylibcudf/tests/test_binaryops.py
similarity index 99%
rename from python/cudf/cudf/pylibcudf_tests/test_binaryops.py
rename to python/pylibcudf/pylibcudf/tests/test_binaryops.py
index a83caf39ead..f784cb3c191 100644
--- a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py
+++ b/python/pylibcudf/pylibcudf/tests/test_binaryops.py
@@ -4,11 +4,10 @@
 
 import numpy as np
 import pyarrow as pa
+import pylibcudf as plc
 import pytest
 from utils import assert_column_eq
 
-from cudf._lib import pylibcudf as plc
-
 
 def idfn(param):
     ltype, rtype, outtype, plc_op, _ = param
diff --git a/python/cudf/cudf/pylibcudf_tests/test_column_factories.py b/python/pylibcudf/pylibcudf/tests/test_column_factories.py
similarity index 99%
rename from python/cudf/cudf/pylibcudf_tests/test_column_factories.py
rename to python/pylibcudf/pylibcudf/tests/test_column_factories.py
index 4c05770a41f..8cedbc6d42f 100644
--- a/python/cudf/cudf/pylibcudf_tests/test_column_factories.py
+++ b/python/pylibcudf/pylibcudf/tests/test_column_factories.py
@@ -1,11 +1,10 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
 import pyarrow as pa
+import pylibcudf as plc
 import pytest
 from utils import DEFAULT_STRUCT_TESTING_TYPE, assert_column_eq
 
-from cudf._lib import pylibcudf as plc
-
 EMPTY_COL_SIZE = 3
 
 NUMERIC_TYPES = [
diff --git a/python/pylibcudf/pylibcudf/tests/test_column_from_device.py b/python/pylibcudf/pylibcudf/tests/test_column_from_device.py
new file mode 100644
index 00000000000..0e129fdf0ef
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/tests/test_column_from_device.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+import pyarrow as pa
+import pylibcudf as plc
+import pytest
+from utils import assert_column_eq
+
+import rmm
+
+VALID_TYPES = [
+    pa.int8(),
+    pa.int16(),
+    pa.int32(),
+    pa.int64(),
+    pa.uint8(),
+    pa.uint16(),
+    pa.uint32(),
+    pa.uint64(),
+    pa.float32(),
+    pa.float64(),
+    pa.bool_(),
+    pa.timestamp("s"),
+    pa.timestamp("ms"),
+    pa.timestamp("us"),
+    pa.timestamp("ns"),
+    pa.duration("s"),
+    pa.duration("ms"),
+    pa.duration("us"),
+    pa.duration("ns"),
+]
+
+
+@pytest.fixture(params=VALID_TYPES, ids=repr)
+def valid_type(request):
+    return request.param
+
+
+class DataBuffer:
+    def __init__(self, obj, dtype):
+        self.obj = rmm.DeviceBuffer.to_device(obj)
+        self.dtype = dtype
+        self.shape = (int(len(self.obj) / self.dtype.itemsize),)
+        self.strides = (self.dtype.itemsize,)
+        self.typestr = self.dtype.str
+
+    @property
+    def __cuda_array_interface__(self):
+        return {
+            "data": self.obj.__cuda_array_interface__["data"],
+            "shape": self.shape,
+            "strides": self.strides,
+            "typestr": self.typestr,
+            "version": 0,
+        }
+
+
+@pytest.fixture
+def input_column(valid_type):
+    if valid_type == pa.bool_():
+        return pa.array([True, False, True], type=valid_type)
+    return pa.array([1, 2, 3], type=valid_type)
+
+
+@pytest.fixture
+def iface_obj(input_column):
+    data = input_column.to_numpy(zero_copy_only=False)
+    return DataBuffer(data.view("uint8"), data.dtype)
+
+
+def test_from_cuda_array_interface(input_column, iface_obj):
+    col = plc.column.Column.from_cuda_array_interface_obj(iface_obj)
+
+    assert_column_eq(input_column, col)
diff --git a/python/cudf/cudf/pylibcudf_tests/test_copying.py b/python/pylibcudf/pylibcudf/tests/test_copying.py
similarity index 99%
rename from python/cudf/cudf/pylibcudf_tests/test_copying.py
rename to python/pylibcudf/pylibcudf/tests/test_copying.py
index f27fe4e942e..628682d0a66 100644
--- a/python/cudf/cudf/pylibcudf_tests/test_copying.py
+++ b/python/pylibcudf/pylibcudf/tests/test_copying.py
@@ -2,6 +2,7 @@
 
 import pyarrow as pa
 import pyarrow.compute as pc
+import pylibcudf as plc
 import pytest
 from utils import (
     DEFAULT_STRUCT_TESTING_TYPE,
@@ -15,8 +16,6 @@
     metadata_from_arrow_type,
 )
 
-from cudf._lib import pylibcudf as plc
-
 
 # TODO: consider moving this to conftest and "pairing"
 # it with pa_type, so that they don't get out of sync
diff --git a/python/cudf/cudf/pylibcudf_tests/test_datetime.py b/python/pylibcudf/pylibcudf/tests/test_datetime.py
similarity index 83%
rename from python/cudf/cudf/pylibcudf_tests/test_datetime.py
rename to python/pylibcudf/pylibcudf/tests/test_datetime.py
index 75af0fa6ca1..d3aa6101e2d 100644
--- a/python/cudf/cudf/pylibcudf_tests/test_datetime.py
+++ b/python/pylibcudf/pylibcudf/tests/test_datetime.py
@@ -3,11 +3,11 @@
 import datetime
 
 import pyarrow as pa
+import pyarrow.compute as pc
+import pylibcudf as plc
 import pytest
 from utils import assert_column_eq
 
-import cudf._lib.pylibcudf as plc
-
 
 @pytest.fixture
 def column(has_nulls):
@@ -25,6 +25,6 @@ def column(has_nulls):
 def test_extract_year(column):
     got = plc.datetime.extract_year(column)
     # libcudf produces an int16, arrow produces an int64
-    expect = pa.compute.year(plc.interop.to_arrow(column)).cast(pa.int16())
+    expect = pc.year(plc.interop.to_arrow(column)).cast(pa.int16())
 
     assert_column_eq(expect, got)
diff --git a/python/cudf/cudf/pylibcudf_tests/test_expressions.py b/python/pylibcudf/pylibcudf/tests/test_expressions.py
similarity index 97%
rename from python/cudf/cudf/pylibcudf_tests/test_expressions.py
rename to python/pylibcudf/pylibcudf/tests/test_expressions.py
index f661512caad..5894ef4624c 100644
--- a/python/cudf/cudf/pylibcudf_tests/test_expressions.py
+++ b/python/pylibcudf/pylibcudf/tests/test_expressions.py
@@ -1,9 +1,8 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 import pyarrow as pa
+import pylibcudf as plc
 import pytest
 
-import cudf._lib.pylibcudf as plc
-
 # We can't really evaluate these expressions, so just make sure
 # construction works properly
 
diff --git a/python/cudf/cudf/pylibcudf_tests/test_interop.py b/python/pylibcudf/pylibcudf/tests/test_interop.py
similarity index 98%
rename from python/cudf/cudf/pylibcudf_tests/test_interop.py
rename to python/pylibcudf/pylibcudf/tests/test_interop.py
index 5c05f460e28..01c998f16d4 100644
--- a/python/cudf/cudf/pylibcudf_tests/test_interop.py
+++ b/python/pylibcudf/pylibcudf/tests/test_interop.py
@@ -1,10 +1,9 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
 import pyarrow as pa
+import pylibcudf as plc
 import pytest
 
-import cudf._lib.pylibcudf as plc
-
 
 def test_list_dtype_roundtrip():
     list_type = pa.list_(pa.int32())
diff --git a/python/cudf/cudf/pylibcudf_tests/test_join.py b/python/pylibcudf/pylibcudf/tests/test_join.py
similarity index 94%
rename from python/cudf/cudf/pylibcudf_tests/test_join.py
rename to python/pylibcudf/pylibcudf/tests/test_join.py
index eb25ed915b1..61e02f4d28d 100644
--- a/python/cudf/cudf/pylibcudf_tests/test_join.py
+++ b/python/pylibcudf/pylibcudf/tests/test_join.py
@@ -2,10 +2,9 @@
 
 import numpy as np
 import pyarrow as pa
+import pylibcudf as plc
 from utils import assert_table_eq
 
-from cudf._lib import pylibcudf as plc
-
 
 def test_cross_join():
     left = pa.Table.from_arrays([[0, 1, 2], [3, 4, 5]], names=["a", "b"])
diff --git a/python/cudf/cudf/pylibcudf_tests/test_lists.py b/python/pylibcudf/pylibcudf/tests/test_lists.py
similarity index 99%
rename from python/cudf/cudf/pylibcudf_tests/test_lists.py
rename to python/pylibcudf/pylibcudf/tests/test_lists.py
index 33f95a7d364..2353a6ff8f9 100644
--- a/python/cudf/cudf/pylibcudf_tests/test_lists.py
+++ b/python/pylibcudf/pylibcudf/tests/test_lists.py
@@ -2,11 +2,11 @@
 
 import numpy as np
 import pyarrow as pa
+import pyarrow.compute as pc
+import pylibcudf as plc
 import pytest
 from utils import assert_column_eq
 
-from cudf._lib import pylibcudf as plc
-
 
 @pytest.fixture
 def test_data():
@@ -184,7 +184,7 @@ def test_extract_list_element_scalar(list_column):
     plc_column = plc.interop.from_arrow(pa.array(list_column))
 
     res = plc.lists.extract_list_element(plc_column, 0)
-    expect = pa.compute.list_element(list_column, 0)
+    expect = pc.list_element(list_column, 0)
 
     assert_column_eq(expect, res)
 
diff --git a/python/pylibcudf/pylibcudf/tests/test_null_mask.py b/python/pylibcudf/pylibcudf/tests/test_null_mask.py
new file mode 100644
index 00000000000..3edcae59edc
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/tests/test_null_mask.py
@@ -0,0 +1,59 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+import pyarrow as pa
+import pylibcudf as plc
+import pytest
+from pylibcudf.null_mask import MaskState
+
+import rmm
+
+
+@pytest.fixture(params=[False, True])
+def nullable(request):
+    return request.param
+
+
+@pytest.fixture(params=["float32", "float64"])
+def column(request, nullable):
+    values = [2.5, 2.49, 1.6, 8, -1.5, -1.7, -0.5, 0.5]
+    typ = {"float32": pa.float32(), "float64": pa.float64()}[request.param]
+    if nullable:
+        values[2] = None
+    return plc.interop.from_arrow(pa.array(values, type=typ))
+
+
+def test_copy_bitmask(column, nullable):
+    expected = column.null_mask().obj if nullable else rmm.DeviceBuffer()
+    got = plc.null_mask.copy_bitmask(column)
+
+    assert expected.size == got.size
+    assert expected.tobytes() == got.tobytes()
+
+
+def test_bitmask_allocation_size_bytes():
+    assert plc.null_mask.bitmask_allocation_size_bytes(0) == 0
+    assert plc.null_mask.bitmask_allocation_size_bytes(1) == 64
+    assert plc.null_mask.bitmask_allocation_size_bytes(512) == 64
+    assert plc.null_mask.bitmask_allocation_size_bytes(513) == 128
+    assert plc.null_mask.bitmask_allocation_size_bytes(1024) == 128
+    assert plc.null_mask.bitmask_allocation_size_bytes(1025) == 192
+
+
+@pytest.mark.parametrize("size", [0, 1, 512, 1024])
+@pytest.mark.parametrize(
+    "state",
+    [
+        MaskState.UNALLOCATED,
+        MaskState.UNINITIALIZED,
+        MaskState.ALL_VALID,
+        MaskState.ALL_NULL,
+    ],
+)
+def test_create_null_mask(size, state):
+    mask = plc.null_mask.create_null_mask(size, state)
+
+    assert mask.size == (
+        0
+        if state == MaskState.UNALLOCATED
+        else plc.null_mask.bitmask_allocation_size_bytes(size)
+    )
diff --git a/python/cudf/cudf/pylibcudf_tests/test_quantiles.py b/python/pylibcudf/pylibcudf/tests/test_quantiles.py
similarity index 99%
rename from python/cudf/cudf/pylibcudf_tests/test_quantiles.py
rename to python/pylibcudf/pylibcudf/tests/test_quantiles.py
index 13f3b037606..bac56691306 100644
--- a/python/cudf/cudf/pylibcudf_tests/test_quantiles.py
+++ b/python/pylibcudf/pylibcudf/tests/test_quantiles.py
@@ -3,11 +3,10 @@
 import numpy as np
 import pyarrow as pa
 import pyarrow.compute as pc
+import pylibcudf as plc
 import pytest
 from utils import assert_column_eq, assert_table_eq
 
-import cudf._lib.pylibcudf as plc
-
 # Map pylibcudf interpolation options to pyarrow options
 interp_mapping = {
     plc.types.Interpolation.LINEAR: "linear",
diff --git a/python/cudf/cudf/pylibcudf_tests/test_regex_program.py b/python/pylibcudf/pylibcudf/tests/test_regex_program.py
similarity index 89%
rename from python/cudf/cudf/pylibcudf_tests/test_regex_program.py
rename to python/pylibcudf/pylibcudf/tests/test_regex_program.py
index 3a9bcec3616..777315df538 100644
--- a/python/cudf/cudf/pylibcudf_tests/test_regex_program.py
+++ b/python/pylibcudf/pylibcudf/tests/test_regex_program.py
@@ -1,9 +1,8 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
+import pylibcudf as plc
 import pytest
 
-import cudf._lib.pylibcudf as plc
-
 
 @pytest.mark.parametrize("pat", ["(", "*", "\\"])
 def test_regex_program_invalid(pat):
diff --git a/python/cudf/cudf/pylibcudf_tests/test_reshape.py b/python/pylibcudf/pylibcudf/tests/test_reshape.py
similarity index 96%
rename from python/cudf/cudf/pylibcudf_tests/test_reshape.py
rename to python/pylibcudf/pylibcudf/tests/test_reshape.py
index da1157e5832..01115bc363a 100644
--- a/python/cudf/cudf/pylibcudf_tests/test_reshape.py
+++ b/python/pylibcudf/pylibcudf/tests/test_reshape.py
@@ -1,11 +1,10 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
 import pyarrow as pa
+import pylibcudf as plc
 import pytest
 from utils import assert_column_eq, assert_table_eq
 
-from cudf._lib import pylibcudf as plc
-
 
 @pytest.fixture(scope="module")
 def reshape_data():
diff --git a/python/cudf/cudf/pylibcudf_tests/test_round.py b/python/pylibcudf/pylibcudf/tests/test_round.py
similarity index 86%
rename from python/cudf/cudf/pylibcudf_tests/test_round.py
rename to python/pylibcudf/pylibcudf/tests/test_round.py
index 991e6ed310d..0b30316b9a0 100644
--- a/python/cudf/cudf/pylibcudf_tests/test_round.py
+++ b/python/pylibcudf/pylibcudf/tests/test_round.py
@@ -1,11 +1,11 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
 import pyarrow as pa
+import pyarrow.compute as pc
+import pylibcudf as plc
 import pytest
 from utils import assert_column_eq
 
-import cudf._lib.pylibcudf as plc
-
 
 @pytest.fixture(params=["float32", "float64"])
 def column(request, has_nulls):
@@ -26,8 +26,6 @@ def test_round(column, round_mode, decimals):
         "half_to_even": plc.round.RoundingMethod.HALF_EVEN,
     }[round_mode]
     got = plc.round.round(column, decimals, method)
-    expect = pa.compute.round(
-        plc.interop.to_arrow(column), decimals, round_mode
-    )
+    expect = pc.round(plc.interop.to_arrow(column), decimals, round_mode)
 
     assert_column_eq(expect, got)
diff --git a/python/cudf/cudf/pylibcudf_tests/test_string_capitalize.py b/python/pylibcudf/pylibcudf/tests/test_string_capitalize.py
similarity index 86%
rename from python/cudf/cudf/pylibcudf_tests/test_string_capitalize.py
rename to python/pylibcudf/pylibcudf/tests/test_string_capitalize.py
index c4e437fe5d9..176ccc55b96 100644
--- a/python/cudf/cudf/pylibcudf_tests/test_string_capitalize.py
+++ b/python/pylibcudf/pylibcudf/tests/test_string_capitalize.py
@@ -1,11 +1,11 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
 import pyarrow as pa
+import pyarrow.compute as pc
+import pylibcudf as plc
 import pytest
 from utils import assert_column_eq
 
-import cudf._lib.pylibcudf as plc
-
 
 @pytest.fixture(scope="module")
 def str_data():
@@ -34,7 +34,7 @@ def str_data():
 def test_capitalize(str_data):
     pa_data, plc_data = str_data
     got = plc.strings.capitalize.capitalize(plc_data)
-    expected = pa.compute.utf8_capitalize(pa_data)
+    expected = pc.utf8_capitalize(pa_data)
     assert_column_eq(expected, got)
 
 
@@ -43,12 +43,12 @@ def test_title(str_data):
     got = plc.strings.capitalize.title(
         plc_data, plc.strings.char_types.StringCharacterTypes.CASE_TYPES
     )
-    expected = pa.compute.utf8_title(pa_data)
+    expected = pc.utf8_title(pa_data)
     assert_column_eq(expected, got)
 
 
 def test_is_title(str_data):
     pa_data, plc_data = str_data
     got = plc.strings.capitalize.is_title(plc_data)
-    expected = pa.compute.utf8_is_title(pa_data)
+    expected = pc.utf8_is_title(pa_data)
     assert_column_eq(expected, got)
diff --git a/python/cudf/cudf/pylibcudf_tests/test_string_case.py b/python/pylibcudf/pylibcudf/tests/test_string_case.py
similarity index 80%
rename from python/cudf/cudf/pylibcudf_tests/test_string_case.py
rename to python/pylibcudf/pylibcudf/tests/test_string_case.py
index 1039859b2cf..233cc253b14 100644
--- a/python/cudf/cudf/pylibcudf_tests/test_string_case.py
+++ b/python/pylibcudf/pylibcudf/tests/test_string_case.py
@@ -1,11 +1,11 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
 import pyarrow as pa
+import pyarrow.compute as pc
+import pylibcudf as plc
 import pytest
 from utils import assert_column_eq
 
-import cudf._lib.pylibcudf as plc
-
 
 @pytest.fixture(scope="module")
 def string_col():
@@ -17,19 +17,19 @@ def string_col():
 def test_to_upper(string_col):
     plc_col = plc.interop.from_arrow(string_col)
     got = plc.strings.case.to_upper(plc_col)
-    expected = pa.compute.utf8_upper(string_col)
+    expected = pc.utf8_upper(string_col)
     assert_column_eq(expected, got)
 
 
 def test_to_lower(string_col):
     plc_col = plc.interop.from_arrow(string_col)
     got = plc.strings.case.to_lower(plc_col)
-    expected = pa.compute.utf8_lower(string_col)
+    expected = pc.utf8_lower(string_col)
     assert_column_eq(expected, got)
 
 
 def test_swapcase(string_col):
     plc_col = plc.interop.from_arrow(string_col)
     got = plc.strings.case.swapcase(plc_col)
-    expected = pa.compute.utf8_swapcase(string_col)
+    expected = pc.utf8_swapcase(string_col)
     assert_column_eq(expected, got)
diff --git a/python/cudf/cudf/pylibcudf_tests/test_string_contains.py b/python/pylibcudf/pylibcudf/tests/test_string_contains.py
similarity index 92%
rename from python/cudf/cudf/pylibcudf_tests/test_string_contains.py
rename to python/pylibcudf/pylibcudf/tests/test_string_contains.py
index fc8c6656b5d..4f88e09183f 100644
--- a/python/cudf/cudf/pylibcudf_tests/test_string_contains.py
+++ b/python/pylibcudf/pylibcudf/tests/test_string_contains.py
@@ -1,11 +1,11 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
 import pyarrow as pa
+import pyarrow.compute as pc
+import pylibcudf as plc
 import pytest
 from utils import assert_column_eq
 
-import cudf._lib.pylibcudf as plc
-
 
 @pytest.fixture(scope="module")
 def target_col():
@@ -44,7 +44,7 @@ def plc_target_pat(pa_target_scalar):
 def test_contains_re(target_col, pa_target_scalar, plc_target_pat):
     pa_target_col, plc_target_col = target_col
     got = plc.strings.contains.contains_re(plc_target_col, plc_target_pat)
-    expected = pa.compute.match_substring_regex(
+    expected = pc.match_substring_regex(
         pa_target_col, pa_target_scalar.as_py()
     )
     assert_column_eq(got, expected)
diff --git a/python/cudf/cudf/pylibcudf_tests/test_string_find.py b/python/pylibcudf/pylibcudf/tests/test_string_find.py
similarity index 97%
rename from python/cudf/cudf/pylibcudf_tests/test_string_find.py
rename to python/pylibcudf/pylibcudf/tests/test_string_find.py
index 95a1a3cf731..db3b13a5aae 100644
--- a/python/cudf/cudf/pylibcudf_tests/test_string_find.py
+++ b/python/pylibcudf/pylibcudf/tests/test_string_find.py
@@ -1,11 +1,11 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
 import pyarrow as pa
+import pyarrow.compute as pc
+import pylibcudf as plc
 import pytest
 from utils import assert_column_eq
 
-import cudf._lib.pylibcudf as plc
-
 
 @pytest.fixture(scope="module")
 def data_col():
@@ -223,7 +223,7 @@ def test_starts_with(data_col, target_scalar):
     pa_target_scalar, plc_target_scalar = target_scalar
     py_target = pa_target_scalar.as_py()
     got = plc.strings.find.starts_with(plc_data_col, plc_target_scalar)
-    expected = pa.compute.starts_with(pa_data_col, py_target)
+    expected = pc.starts_with(pa_data_col, py_target)
     assert_column_eq(expected, got)
 
 
@@ -242,7 +242,7 @@ def test_ends_with(data_col, target_scalar):
     pa_target_scalar, plc_target_scalar = target_scalar
     py_target = pa_target_scalar.as_py()
     got = plc.strings.find.ends_with(plc_data_col, plc_target_scalar)
-    expected = pa.compute.ends_with(pa_data_col, py_target)
+    expected = pc.ends_with(pa_data_col, py_target)
     assert_column_eq(expected, got)
 
 
diff --git a/python/cudf/cudf/pylibcudf_tests/test_string_replace.py b/python/pylibcudf/pylibcudf/tests/test_string_replace.py
similarity index 95%
rename from python/cudf/cudf/pylibcudf_tests/test_string_replace.py
rename to python/pylibcudf/pylibcudf/tests/test_string_replace.py
index f20edf6a506..5a9c2007b73 100644
--- a/python/cudf/cudf/pylibcudf_tests/test_string_replace.py
+++ b/python/pylibcudf/pylibcudf/tests/test_string_replace.py
@@ -1,11 +1,11 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
 import pyarrow as pa
+import pyarrow.compute as pc
+import pylibcudf as plc
 import pytest
 from utils import assert_column_eq
 
-import cudf._lib.pylibcudf as plc
-
 
 @pytest.fixture(scope="module")
 def data_col():
@@ -64,7 +64,7 @@ def test_replace(data_col, scalar_repl_target, scalar_repl, maxrepl):
         plc_data_col, plc_target, plc_repl, maxrepl
     )
 
-    expected = pa.compute.replace_substring(
+    expected = pc.replace_substring(
         pa_data_col,
         pattern=pa_target,
         replacement=pa_repl,
@@ -90,7 +90,7 @@ def test_replace_slice(data_col, scalar_repl, startstop):
         # count_characters on the input, take the max and set stop to that
         stop = 1000
 
-    expected = pa.compute.utf8_replace_slice(pa_data_col, start, stop, pa_repl)
+    expected = pc.utf8_replace_slice(pa_data_col, start, stop, pa_repl)
 
     assert_column_eq(expected, got)
 
diff --git a/python/cudf/cudf/pylibcudf_tests/test_string_slice.py b/python/pylibcudf/pylibcudf/tests/test_string_slice.py
similarity index 98%
rename from python/cudf/cudf/pylibcudf_tests/test_string_slice.py
rename to python/pylibcudf/pylibcudf/tests/test_string_slice.py
index bd63987b30f..d9ce5591b98 100644
--- a/python/cudf/cudf/pylibcudf_tests/test_string_slice.py
+++ b/python/pylibcudf/pylibcudf/tests/test_string_slice.py
@@ -1,11 +1,10 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
 import pyarrow as pa
+import pylibcudf as plc
 import pytest
 from utils import assert_column_eq
 
-import cudf._lib.pylibcudf as plc
-
 
 @pytest.fixture(scope="module")
 def pa_col():
diff --git a/python/cudf/cudf/pylibcudf_tests/test_table.py b/python/pylibcudf/pylibcudf/tests/test_table.py
similarity index 93%
rename from python/cudf/cudf/pylibcudf_tests/test_table.py
rename to python/pylibcudf/pylibcudf/tests/test_table.py
index cf1d51f6491..e822d6a97a8 100644
--- a/python/cudf/cudf/pylibcudf_tests/test_table.py
+++ b/python/pylibcudf/pylibcudf/tests/test_table.py
@@ -1,10 +1,9 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
 import pyarrow as pa
+import pylibcudf as plc
 import pytest
 
-import cudf._lib.pylibcudf as plc
-
 
 @pytest.mark.parametrize(
     "arrow_tbl",
diff --git a/python/cudf/cudf/pylibcudf_tests/test_traits.py b/python/pylibcudf/pylibcudf/tests/test_traits.py
similarity index 98%
rename from python/cudf/cudf/pylibcudf_tests/test_traits.py
rename to python/pylibcudf/pylibcudf/tests/test_traits.py
index 6c22cb02f21..2570e8abd51 100644
--- a/python/cudf/cudf/pylibcudf_tests/test_traits.py
+++ b/python/pylibcudf/pylibcudf/tests/test_traits.py
@@ -1,6 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from cudf._lib import pylibcudf as plc
+import pylibcudf as plc
 
 
 def test_is_relationally_comparable():
diff --git a/python/cudf/cudf/pylibcudf_tests/test_transform.py b/python/pylibcudf/pylibcudf/tests/test_transform.py
similarity index 95%
rename from python/cudf/cudf/pylibcudf_tests/test_transform.py
rename to python/pylibcudf/pylibcudf/tests/test_transform.py
index 312939888dd..06fc35d8835 100644
--- a/python/cudf/cudf/pylibcudf_tests/test_transform.py
+++ b/python/pylibcudf/pylibcudf/tests/test_transform.py
@@ -3,10 +3,9 @@
 import math
 
 import pyarrow as pa
+import pylibcudf as plc
 from utils import assert_column_eq
 
-from cudf._lib import pylibcudf as plc
-
 
 def test_nans_to_nulls(has_nans):
     if has_nans:
diff --git a/python/cudf/cudf/pylibcudf_tests/test_unary.py b/python/pylibcudf/pylibcudf/tests/test_unary.py
similarity index 93%
rename from python/cudf/cudf/pylibcudf_tests/test_unary.py
rename to python/pylibcudf/pylibcudf/tests/test_unary.py
index b5e4f0cb0e8..9b8085d5c52 100644
--- a/python/cudf/cudf/pylibcudf_tests/test_unary.py
+++ b/python/pylibcudf/pylibcudf/tests/test_unary.py
@@ -1,6 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from cudf._lib import pylibcudf as plc
+import pylibcudf as plc
 
 
 def test_is_supported_cast():
diff --git a/python/cudf/cudf/_lib/pylibcudf/traits.pxd b/python/pylibcudf/pylibcudf/traits.pxd
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/traits.pxd
rename to python/pylibcudf/pylibcudf/traits.pxd
diff --git a/python/cudf/cudf/_lib/pylibcudf/traits.pyx b/python/pylibcudf/pylibcudf/traits.pyx
similarity index 98%
rename from python/cudf/cudf/_lib/pylibcudf/traits.pyx
rename to python/pylibcudf/pylibcudf/traits.pyx
index d2370f8d641..5a1c67e1f6c 100644
--- a/python/cudf/cudf/_lib/pylibcudf/traits.pyx
+++ b/python/pylibcudf/pylibcudf/traits.pyx
@@ -1,8 +1,7 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
 from libcpp cimport bool
-
-from cudf._lib.pylibcudf.libcudf.utilities cimport traits
+from pylibcudf.libcudf.utilities cimport traits
 
 from .types cimport DataType
 
diff --git a/python/cudf/cudf/_lib/pylibcudf/transform.pxd b/python/pylibcudf/pylibcudf/transform.pxd
similarity index 100%
rename from python/cudf/cudf/_lib/pylibcudf/transform.pxd
rename to python/pylibcudf/pylibcudf/transform.pxd
diff --git a/python/cudf/cudf/_lib/pylibcudf/transform.pyx b/python/pylibcudf/pylibcudf/transform.pyx
similarity index 87%
rename from python/cudf/cudf/_lib/pylibcudf/transform.pyx
rename to python/pylibcudf/pylibcudf/transform.pyx
index a734e71b820..100ccb580ce 100644
--- a/python/cudf/cudf/_lib/pylibcudf/transform.pyx
+++ b/python/pylibcudf/pylibcudf/transform.pyx
@@ -2,12 +2,11 @@
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move, pair
+from pylibcudf.libcudf cimport transform as cpp_transform
+from pylibcudf.libcudf.types cimport size_type
 
 from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
 
-from cudf._lib.pylibcudf.libcudf cimport transform as cpp_transform
-from cudf._lib.pylibcudf.libcudf.types cimport size_type
-
 from .column cimport Column
 from .gpumemoryview cimport gpumemoryview
 
diff --git a/python/cudf/cudf/_lib/pylibcudf/types.pxd b/python/pylibcudf/pylibcudf/types.pxd
similarity index 86%
rename from python/cudf/cudf/_lib/pylibcudf/types.pxd
rename to python/pylibcudf/pylibcudf/types.pxd
index 7d3ddca14a1..aa48979d961 100644
--- a/python/cudf/cudf/_lib/pylibcudf/types.pxd
+++ b/python/pylibcudf/pylibcudf/types.pxd
@@ -2,8 +2,7 @@
 
 from libc.stdint cimport int32_t
 from libcpp cimport bool as cbool
-
-from cudf._lib.pylibcudf.libcudf.types cimport (
+from pylibcudf.libcudf.types cimport (
     data_type,
     interpolation,
     mask_state,
@@ -27,3 +26,5 @@ cdef class DataType:
 
     @staticmethod
     cdef DataType from_libcudf(data_type dt)
+
+cpdef size_type size_of(DataType t)
diff --git a/python/cudf/cudf/_lib/pylibcudf/types.pyx b/python/pylibcudf/pylibcudf/types.pyx
similarity index 58%
rename from python/cudf/cudf/_lib/pylibcudf/types.pyx
rename to python/pylibcudf/pylibcudf/types.pyx
index c45c6071bb3..58c7d97e9bc 100644
--- a/python/cudf/cudf/_lib/pylibcudf/types.pyx
+++ b/python/pylibcudf/pylibcudf/types.pyx
@@ -1,20 +1,24 @@
 # Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
 from libc.stdint cimport int32_t
+from pylibcudf.libcudf.types cimport (
+    data_type,
+    size_of as cpp_size_of,
+    size_type,
+    type_id,
+)
+from pylibcudf.libcudf.utilities.type_dispatcher cimport type_to_id
 
-from cudf._lib.pylibcudf.libcudf.types cimport data_type, size_type, type_id
-from cudf._lib.pylibcudf.libcudf.utilities.type_dispatcher cimport type_to_id
-
-from cudf._lib.pylibcudf.libcudf.types import type_id as TypeId  # no-cython-lint, isort:skip
-from cudf._lib.pylibcudf.libcudf.types import nan_policy as NanPolicy  # no-cython-lint, isort:skip
-from cudf._lib.pylibcudf.libcudf.types import null_policy as NullPolicy  # no-cython-lint, isort:skip
-from cudf._lib.pylibcudf.libcudf.types import interpolation as Interpolation  # no-cython-lint, isort:skip
-from cudf._lib.pylibcudf.libcudf.types import mask_state as MaskState  # no-cython-lint, isort:skip
-from cudf._lib.pylibcudf.libcudf.types import nan_equality as NanEquality  # no-cython-lint, isort:skip
-from cudf._lib.pylibcudf.libcudf.types import null_equality as NullEquality  # no-cython-lint, isort:skip
-from cudf._lib.pylibcudf.libcudf.types import null_order as NullOrder  # no-cython-lint, isort:skip
-from cudf._lib.pylibcudf.libcudf.types import order as Order  # no-cython-lint, isort:skip
-from cudf._lib.pylibcudf.libcudf.types import sorted as Sorted  # no-cython-lint, isort:skip
+from pylibcudf.libcudf.types import type_id as TypeId  # no-cython-lint, isort:skip
+from pylibcudf.libcudf.types import nan_policy as NanPolicy  # no-cython-lint, isort:skip
+from pylibcudf.libcudf.types import null_policy as NullPolicy  # no-cython-lint, isort:skip
+from pylibcudf.libcudf.types import interpolation as Interpolation  # no-cython-lint, isort:skip
+from pylibcudf.libcudf.types import mask_state as MaskState  # no-cython-lint, isort:skip
+from pylibcudf.libcudf.types import nan_equality as NanEquality  # no-cython-lint, isort:skip
+from pylibcudf.libcudf.types import null_equality as NullEquality  # no-cython-lint, isort:skip
+from pylibcudf.libcudf.types import null_order as NullOrder  # no-cython-lint, isort:skip
+from pylibcudf.libcudf.types import order as Order  # no-cython-lint, isort:skip
+from pylibcudf.libcudf.types import sorted as Sorted  # no-cython-lint, isort:skip
 
 
 cdef class DataType:
@@ -69,6 +73,15 @@ cdef class DataType:
         ret.c_obj = dt
         return ret
 
+cpdef size_type size_of(DataType t):
+    """Returns the size in bytes of elements of the specified data_type.
+
+    Only fixed-width types are supported.
+
+    For details, see :cpp:func:`size_of`.
+    """
+    with nogil:
+        return cpp_size_of(t.c_obj)
 
 SIZE_TYPE = DataType(type_to_id[size_type]())
 SIZE_TYPE_ID = SIZE_TYPE.id()
diff --git a/python/cudf/cudf/_lib/pylibcudf/unary.pxd b/python/pylibcudf/pylibcudf/unary.pxd
similarity index 87%
rename from python/cudf/cudf/_lib/pylibcudf/unary.pxd
rename to python/pylibcudf/pylibcudf/unary.pxd
index d07df838172..9ee08653599 100644
--- a/python/cudf/cudf/_lib/pylibcudf/unary.pxd
+++ b/python/pylibcudf/pylibcudf/unary.pxd
@@ -1,8 +1,7 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
 from libcpp cimport bool
-
-from cudf._lib.pylibcudf.libcudf.unary cimport unary_operator
+from pylibcudf.libcudf.unary cimport unary_operator
 
 from .column cimport Column
 from .types cimport DataType
diff --git a/python/cudf/cudf/_lib/pylibcudf/unary.pyx b/python/pylibcudf/pylibcudf/unary.pyx
similarity index 94%
rename from python/cudf/cudf/_lib/pylibcudf/unary.pyx
rename to python/pylibcudf/pylibcudf/unary.pyx
index 8da46f0a832..839360ef406 100644
--- a/python/cudf/cudf/_lib/pylibcudf/unary.pyx
+++ b/python/pylibcudf/pylibcudf/unary.pyx
@@ -3,12 +3,11 @@
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
+from pylibcudf.libcudf cimport unary as cpp_unary
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.unary cimport unary_operator
 
-from cudf._lib.pylibcudf.libcudf cimport unary as cpp_unary
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.unary cimport unary_operator
-
-from cudf._lib.pylibcudf.libcudf.unary import \
+from pylibcudf.libcudf.unary import \
     unary_operator as UnaryOperator  # no-cython-lint
 
 from .column cimport Column
diff --git a/python/cudf/cudf/_lib/pylibcudf/utils.pxd b/python/pylibcudf/pylibcudf/utils.pxd
similarity index 71%
rename from python/cudf/cudf/_lib/pylibcudf/utils.pxd
rename to python/pylibcudf/pylibcudf/utils.pxd
index 77c05086397..6b994f20b61 100644
--- a/python/cudf/cudf/_lib/pylibcudf/utils.pxd
+++ b/python/pylibcudf/pylibcudf/utils.pxd
@@ -2,9 +2,8 @@
 
 from libcpp.functional cimport reference_wrapper
 from libcpp.vector cimport vector
-
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
-from cudf._lib.pylibcudf.libcudf.types cimport bitmask_type
+from pylibcudf.libcudf.scalar.scalar cimport scalar
+from pylibcudf.libcudf.types cimport bitmask_type
 
 
 cdef void * int_to_void_ptr(Py_ssize_t ptr) nogil
diff --git a/python/cudf/cudf/_lib/pylibcudf/utils.pyx b/python/pylibcudf/pylibcudf/utils.pyx
similarity index 56%
rename from python/cudf/cudf/_lib/pylibcudf/utils.pyx
rename to python/pylibcudf/pylibcudf/utils.pyx
index b4427e8ecff..ee4421ddeaf 100644
--- a/python/cudf/cudf/_lib/pylibcudf/utils.pyx
+++ b/python/pylibcudf/pylibcudf/utils.pyx
@@ -5,9 +5,10 @@ from cython.operator import dereference
 from libc.stdint cimport uintptr_t
 from libcpp.functional cimport reference_wrapper
 from libcpp.vector cimport vector
+from cuda import cudart
 
-from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
-from cudf._lib.pylibcudf.libcudf.types cimport bitmask_type
+from pylibcudf.libcudf.scalar.scalar cimport scalar
+from pylibcudf.libcudf.types cimport bitmask_type
 
 from .scalar cimport Scalar
 
@@ -34,3 +35,23 @@ cdef vector[reference_wrapper[const scalar]] _as_vector(list source):
         c_scalars.push_back(
             reference_wrapper[constscalar](dereference((<Scalar?>slr).c_obj)))
     return c_scalars
+
+
+def _is_concurrent_managed_access_supported():
+    """Check the availability of concurrent managed access (UVM).
+
+    Note that WSL2 does not support managed memory.
+    """
+
+    # Ensure CUDA is initialized before checking cudaDevAttrConcurrentManagedAccess
+    cudart.cudaFree(0)
+
+    device_id = 0
+    err, supports_managed_access = cudart.cudaDeviceGetAttribute(
+        cudart.cudaDeviceAttr.cudaDevAttrConcurrentManagedAccess, device_id
+    )
+    if err != cudart.cudaError_t.cudaSuccess:
+        raise RuntimeError(
+            f"Failed to check cudaDevAttrConcurrentManagedAccess with error {err}"
+        )
+    return supports_managed_access != 0
diff --git a/python/cudf/cudf/_lib/variant.pxd b/python/pylibcudf/pylibcudf/variant.pxd
similarity index 100%
rename from python/cudf/cudf/_lib/variant.pxd
rename to python/pylibcudf/pylibcudf/variant.pxd
diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml
new file mode 100644
index 00000000000..bfade41353c
--- /dev/null
+++ b/python/pylibcudf/pyproject.toml
@@ -0,0 +1,123 @@
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+
+[build-system]
+build-backend = "rapids_build_backend.build"
+requires = [
+    "rapids-build-backend>=0.3.0,<0.4.0.dev0",
+    "scikit-build-core[pyproject]>=0.10.0",
+] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+
+[project]
+name = "pylibcudf"
+dynamic = ["version"]
+description = "pylibcudf - Python bindings for libcudf"
+readme = { file = "README.md", content-type = "text/markdown" }
+authors = [
+    { name = "NVIDIA Corporation" },
+]
+license = { text = "Apache 2.0" }
+requires-python = ">=3.10"
+dependencies = [
+    "cuda-python>=11.7.1,<12.0a0",
+    "libcudf==24.10.*,>=0.0.0a0",
+    "nvtx>=0.2.1",
+    "packaging",
+    "pyarrow>=14.0.0,<18.0.0a0",
+    "rmm==24.10.*,>=0.0.0a0",
+    "typing_extensions>=4.0.0",
+] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+classifiers = [
+    "Intended Audience :: Developers",
+    "Topic :: Database",
+    "Topic :: Scientific/Engineering",
+    "License :: OSI Approved :: Apache Software License",
+    "Programming Language :: Python",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+]
+
+[project.optional-dependencies]
+test = [
+    "fastavro>=0.22.9",
+    "hypothesis",
+    "numpy>=1.23,<3.0a0",
+    "pandas",
+    "pytest-cov",
+    "pytest-xdist",
+    "pytest<8",
+] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+
+[project.urls]
+Homepage = "https://github.com/rapidsai/cudf"
+Documentation = "https://docs.rapids.ai/api/cudf/stable/"
+
+[tool.isort]
+line_length = 79
+multi_line_output = 3
+include_trailing_comma = true
+force_grid_wrap = 0
+combine_as_imports = true
+order_by_type = true
+known_dask = [
+    "dask",
+    "distributed",
+    "dask_cuda",
+]
+known_rapids = [
+    "rmm",
+]
+known_first_party = [
+    "cudf",
+]
+default_section = "THIRDPARTY"
+sections = [
+    "FUTURE",
+    "STDLIB",
+    "THIRDPARTY",
+    "DASK",
+    "RAPIDS",
+    "FIRSTPARTY",
+    "LOCALFOLDER",
+]
+skip = [
+    "thirdparty",
+    ".eggs",
+    ".git",
+    ".hg",
+    ".mypy_cache",
+    ".tox",
+    ".venv",
+    "_build",
+    "buck-out",
+    "build",
+    "dist",
+    "__init__.py",
+]
+
+[tool.rapids-build-backend]
+build-backend = "scikit_build_core.build"
+dependencies-file = "../../dependencies.yaml"
+matrix-entry = "cuda_suffixed=true"
+requires = [
+    "cmake>=3.26.4,!=3.30.0",
+    "cython>=3.0.3",
+    "libcudf==24.10.*,>=0.0.0a0",
+    "librmm==24.10.*,>=0.0.0a0",
+    "ninja",
+    "rmm==24.10.*,>=0.0.0a0",
+] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+
+[tool.scikit-build]
+build-dir = "build/{wheel_tag}"
+cmake.build-type = "Release"
+cmake.version = "CMakeLists.txt"
+minimum-version = "build-system.requires"
+ninja.make-fallback = true
+sdist.exclude = ["*tests*"]
+sdist.reproducible = true
+wheel.packages = ["pylibcudf"]
+
+[tool.scikit-build.metadata.version]
+provider = "scikit_build_core.metadata.regex"
+input = "pylibcudf/VERSION"
+regex = "(?P<value>.*)"