diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 3366554db30..26d07515f70 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -28,7 +28,7 @@ concurrency:
 jobs:
   cpp-build:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -37,7 +37,7 @@ jobs:
   python-build:
     needs: [cpp-build]
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -46,7 +46,7 @@ jobs:
   upload-conda:
     needs: [cpp-build, python-build]
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -55,7 +55,7 @@ jobs:
       skip_upload_pkgs: libcudf-example
   wheel-build-cudf:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -67,7 +67,7 @@ jobs:
   wheel-publish-cudf:
     needs: wheel-build-cudf
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@branch-23.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -77,7 +77,7 @@ jobs:
   wheel-build-dask-cudf:
     needs: wheel-publish-cudf
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-build.yml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-build.yml@branch-23.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -88,7 +88,7 @@ jobs:
   wheel-publish-dask-cudf:
     needs: wheel-build-dask-cudf
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-publish.yml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-publish.yml@branch-23.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index cf20b0006a2..f33fc15c52f 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -25,32 +25,32 @@ jobs:
       - wheel-build-dask-cudf
       - wheel-tests-dask-cudf
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.04
   checks:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.04
   conda-cpp-build:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.04
     with:
       build_type: pull-request
   conda-cpp-tests:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.04
     with:
       build_type: pull-request
   conda-python-build:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.04
     with:
       build_type: pull-request
   conda-python-cudf-tests:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.04
     with:
       build_type: pull-request
       test_script: "ci/test_python_cudf.sh"
@@ -58,14 +58,14 @@ jobs:
     # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.04
     with:
       build_type: pull-request
       test_script: "ci/test_python_other.sh"
   conda-java-tests:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04
     with:
       build_type: pull-request
       node_type: "gpu-latest-1"
@@ -75,7 +75,7 @@ jobs:
   conda-notebook-tests:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04
     with:
       build_type: pull-request
       node_type: "gpu-latest-1"
@@ -85,7 +85,7 @@ jobs:
   wheel-build-cudf:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.04
     with:
       build_type: pull-request
       package-name: cudf
@@ -94,7 +94,7 @@ jobs:
   wheel-tests-cudf:
     needs: wheel-build-cudf
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.04
     with:
       build_type: pull-request
       package-name: cudf
@@ -106,7 +106,7 @@ jobs:
   wheel-build-dask-cudf:
     needs: wheel-tests-cudf
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-build.yml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-build.yml@branch-23.04
     with:
       build_type: pull-request
       package-name: dask_cudf
@@ -115,7 +115,7 @@ jobs:
   wheel-tests-dask-cudf:
     needs: wheel-build-dask-cudf
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-test.yml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-test.yml@branch-23.04
     with:
       build_type: pull-request
       package-name: dask_cudf
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 1b117bb2f4f..ff19d51f8ef 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -16,7 +16,7 @@ on:
 jobs:
   conda-cpp-tests:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -24,7 +24,7 @@ jobs:
       sha: ${{ inputs.sha }}
   conda-python-cudf-tests:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -34,7 +34,7 @@ jobs:
   conda-python-other-tests:
     # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -43,7 +43,7 @@ jobs:
       test_script: "ci/test_python_other.sh"
   conda-java-tests:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -55,7 +55,7 @@ jobs:
       run_script: "ci/test_java.sh"
   conda-notebook-tests:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -67,7 +67,7 @@ jobs:
       run_script: "ci/test_notebooks.sh"
   wheel-tests-cudf:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -78,7 +78,7 @@ jobs:
       test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests"
   wheel-tests-dask-cudf:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-test.yml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-test.yml@branch-23.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d653d503a1e..4acad48eabf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,244 @@
-# cuDF 23.02.00 (Date TBD)
+# cuDF 23.02.00 (9 Feb 2023)
 
-Please see https://github.com/rapidsai/cudf/releases/tag/v23.02.00a for the latest changes to this development branch.
+## 🚨 Breaking Changes
+
+- Pin `dask` and `distributed` for release ([#12695](https://github.com/rapidsai/cudf/pull/12695)) [@galipremsagar](https://github.com/galipremsagar)
+- Change ways to access `ptr` in `Buffer` ([#12587](https://github.com/rapidsai/cudf/pull/12587)) [@galipremsagar](https://github.com/galipremsagar)
+- Remove column names ([#12578](https://github.com/rapidsai/cudf/pull/12578)) [@vuule](https://github.com/vuule)
+- Default `cudf::io::read_json` to nested JSON parser ([#12544](https://github.com/rapidsai/cudf/pull/12544)) [@vuule](https://github.com/vuule)
+- Switch `engine=cudf` to the new `JSON` reader ([#12509](https://github.com/rapidsai/cudf/pull/12509)) [@galipremsagar](https://github.com/galipremsagar)
+- Add trailing comma support for nested JSON reader ([#12448](https://github.com/rapidsai/cudf/pull/12448)) [@karthikeyann](https://github.com/karthikeyann)
+- Upgrade to `arrow-10.0.1` ([#12327](https://github.com/rapidsai/cudf/pull/12327)) [@galipremsagar](https://github.com/galipremsagar)
+- Fail loudly to avoid data corruption with unsupported input in `read_orc` ([#12325](https://github.com/rapidsai/cudf/pull/12325)) [@vuule](https://github.com/vuule)
+- CSV, JSON reader to infer integer column with nulls as int64 instead of float64 ([#12309](https://github.com/rapidsai/cudf/pull/12309)) [@karthikeyann](https://github.com/karthikeyann)
+- Remove deprecated code for 23.02 ([#12281](https://github.com/rapidsai/cudf/pull/12281)) [@vyasr](https://github.com/vyasr)
+- Null element for parsing error in numeric types in JSON, CSV reader ([#12272](https://github.com/rapidsai/cudf/pull/12272)) [@karthikeyann](https://github.com/karthikeyann)
+- Purge non-empty nulls for `superimpose_nulls` and `push_down_nulls` ([#12239](https://github.com/rapidsai/cudf/pull/12239)) [@ttnghia](https://github.com/ttnghia)
+- Rename `cudf::structs::detail::superimpose_parent_nulls` APIs ([#12230](https://github.com/rapidsai/cudf/pull/12230)) [@ttnghia](https://github.com/ttnghia)
+- Remove JIT type names, refactor id_to_type. ([#12158](https://github.com/rapidsai/cudf/pull/12158)) [@bdice](https://github.com/bdice)
+- Floor division uses integer division for integral arguments ([#12131](https://github.com/rapidsai/cudf/pull/12131)) [@wence-](https://github.com/wence-)
+
+## 🐛 Bug Fixes
+
+- Fix a mask data corruption in UDF ([#12647](https://github.com/rapidsai/cudf/pull/12647)) [@galipremsagar](https://github.com/galipremsagar)
+- pre-commit: Update isort version to 5.12.0 ([#12645](https://github.com/rapidsai/cudf/pull/12645)) [@wence-](https://github.com/wence-)
+- tests: Skip cuInit tests if cuda-gdb is not found or not working ([#12644](https://github.com/rapidsai/cudf/pull/12644)) [@wence-](https://github.com/wence-)
+- Revert regex program java APIs and tests ([#12639](https://github.com/rapidsai/cudf/pull/12639)) [@cindyyuanjiang](https://github.com/cindyyuanjiang)
+- Fix leaks in ColumnVectorTest ([#12625](https://github.com/rapidsai/cudf/pull/12625)) [@jlowe](https://github.com/jlowe)
+- Handle when spillable buffers own each other ([#12607](https://github.com/rapidsai/cudf/pull/12607)) [@madsbk](https://github.com/madsbk)
+- Fix incorrect null counts for sliced columns in JCudfSerialization ([#12589](https://github.com/rapidsai/cudf/pull/12589)) [@jlowe](https://github.com/jlowe)
+- lists: Transfer dtypes correctly through list.get ([#12586](https://github.com/rapidsai/cudf/pull/12586)) [@wence-](https://github.com/wence-)
+- timedelta: Don&#39;t go via float intermediates for floordiv ([#12585](https://github.com/rapidsai/cudf/pull/12585)) [@wence-](https://github.com/wence-)
+- Fixing BUG, `get_next_chunk()` should use the blocking function `device_read()` ([#12584](https://github.com/rapidsai/cudf/pull/12584)) [@madsbk](https://github.com/madsbk)
+- Make JNI QuoteStyle accessible outside ai.rapids.cudf ([#12572](https://github.com/rapidsai/cudf/pull/12572)) [@mythrocks](https://github.com/mythrocks)
+- `partition_by_hash()`: support index ([#12554](https://github.com/rapidsai/cudf/pull/12554)) [@madsbk](https://github.com/madsbk)
+- Mixed Join benchmark bug due to wrong conditional column ([#12553](https://github.com/rapidsai/cudf/pull/12553)) [@divyegala](https://github.com/divyegala)
+- Update List Lexicographical Comparator ([#12538](https://github.com/rapidsai/cudf/pull/12538)) [@divyegala](https://github.com/divyegala)
+- Dynamically read PTX version ([#12534](https://github.com/rapidsai/cudf/pull/12534)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- build.sh switch to use `RAPIDS` magic value ([#12525](https://github.com/rapidsai/cudf/pull/12525)) [@robertmaynard](https://github.com/robertmaynard)
+- Loosen runtime arrow pinning ([#12522](https://github.com/rapidsai/cudf/pull/12522)) [@vyasr](https://github.com/vyasr)
+- Enable metadata transfer for complex types in transpose ([#12491](https://github.com/rapidsai/cudf/pull/12491)) [@galipremsagar](https://github.com/galipremsagar)
+- Fix issues with parquet chunked reader ([#12488](https://github.com/rapidsai/cudf/pull/12488)) [@nvdbaranec](https://github.com/nvdbaranec)
+- Fix missing metadata transfer in concat for `ListColumn` ([#12487](https://github.com/rapidsai/cudf/pull/12487)) [@galipremsagar](https://github.com/galipremsagar)
+- Rename libcudf substring source files to slice ([#12484](https://github.com/rapidsai/cudf/pull/12484)) [@davidwendt](https://github.com/davidwendt)
+- Fix compile issue with arrow 10 ([#12465](https://github.com/rapidsai/cudf/pull/12465)) [@ttnghia](https://github.com/ttnghia)
+- Fix List offsets bug in mixed type list column in nested JSON reader ([#12447](https://github.com/rapidsai/cudf/pull/12447)) [@karthikeyann](https://github.com/karthikeyann)
+- Fix xfail incompatibilities ([#12423](https://github.com/rapidsai/cudf/pull/12423)) [@vyasr](https://github.com/vyasr)
+- Fix bug in Parquet column index encoding ([#12404](https://github.com/rapidsai/cudf/pull/12404)) [@etseidl](https://github.com/etseidl)
+- When building Arrow shared look for a shared OpenSSL ([#12396](https://github.com/rapidsai/cudf/pull/12396)) [@robertmaynard](https://github.com/robertmaynard)
+- Fix get_json_object to return empty column on empty input ([#12384](https://github.com/rapidsai/cudf/pull/12384)) [@davidwendt](https://github.com/davidwendt)
+- Pin arrow 9 in testing dependencies to prevent conda solve issues ([#12377](https://github.com/rapidsai/cudf/pull/12377)) [@vyasr](https://github.com/vyasr)
+- Fix reductions any/all return value for empty input ([#12374](https://github.com/rapidsai/cudf/pull/12374)) [@davidwendt](https://github.com/davidwendt)
+- Fix debug compile errors in parquet.hpp ([#12372](https://github.com/rapidsai/cudf/pull/12372)) [@davidwendt](https://github.com/davidwendt)
+- Purge non-empty nulls in `cudf::make_lists_column` ([#12370](https://github.com/rapidsai/cudf/pull/12370)) [@ttnghia](https://github.com/ttnghia)
+- Use correct memory resource in io::make_column ([#12364](https://github.com/rapidsai/cudf/pull/12364)) [@vyasr](https://github.com/vyasr)
+- Add code to detect possible malformed page data in parquet files. ([#12360](https://github.com/rapidsai/cudf/pull/12360)) [@nvdbaranec](https://github.com/nvdbaranec)
+- Fail loudly to avoid data corruption with unsupported input in `read_orc` ([#12325](https://github.com/rapidsai/cudf/pull/12325)) [@vuule](https://github.com/vuule)
+- Fix NumericPairIteratorTest for float values ([#12306](https://github.com/rapidsai/cudf/pull/12306)) [@davidwendt](https://github.com/davidwendt)
+- Fixes memory allocation in nested JSON tokenizer ([#12300](https://github.com/rapidsai/cudf/pull/12300)) [@elstehle](https://github.com/elstehle)
+- Reconstruct dtypes correctly for list aggs of struct columns ([#12290](https://github.com/rapidsai/cudf/pull/12290)) [@wence-](https://github.com/wence-)
+- Fix regex \A and \Z to strictly match string begin/end ([#12282](https://github.com/rapidsai/cudf/pull/12282)) [@davidwendt](https://github.com/davidwendt)
+- Fix compile issue in `json_chunked_reader.cpp` ([#12280](https://github.com/rapidsai/cudf/pull/12280)) [@ttnghia](https://github.com/ttnghia)
+- Change reductions any/all to return valid values for empty input ([#12279](https://github.com/rapidsai/cudf/pull/12279)) [@davidwendt](https://github.com/davidwendt)
+- Only exclude join keys that are indices from key columns ([#12271](https://github.com/rapidsai/cudf/pull/12271)) [@wence-](https://github.com/wence-)
+- Fix spill to device limit ([#12252](https://github.com/rapidsai/cudf/pull/12252)) [@madsbk](https://github.com/madsbk)
+- Correct behaviour of sort in `concat` for singleton concatenations ([#12247](https://github.com/rapidsai/cudf/pull/12247)) [@wence-](https://github.com/wence-)
+- Purge non-empty nulls for `superimpose_nulls` and `push_down_nulls` ([#12239](https://github.com/rapidsai/cudf/pull/12239)) [@ttnghia](https://github.com/ttnghia)
+- Patch CUB DeviceSegmentedSort and remove workaround ([#12234](https://github.com/rapidsai/cudf/pull/12234)) [@davidwendt](https://github.com/davidwendt)
+- Fix memory leak in udf_string::assign(&amp;&amp;) function ([#12206](https://github.com/rapidsai/cudf/pull/12206)) [@davidwendt](https://github.com/davidwendt)
+- Workaround thrust-copy-if limit in json get_tree_representation ([#12190](https://github.com/rapidsai/cudf/pull/12190)) [@davidwendt](https://github.com/davidwendt)
+- Fix page size calculation in Parquet writer ([#12182](https://github.com/rapidsai/cudf/pull/12182)) [@etseidl](https://github.com/etseidl)
+- Add cudf::detail::sizes_to_offsets_iterator to allow checking overflow in offsets ([#12180](https://github.com/rapidsai/cudf/pull/12180)) [@davidwendt](https://github.com/davidwendt)
+- Workaround thrust-copy-if limit in wordpiece-tokenizer ([#12168](https://github.com/rapidsai/cudf/pull/12168)) [@davidwendt](https://github.com/davidwendt)
+- Floor division uses integer division for integral arguments ([#12131](https://github.com/rapidsai/cudf/pull/12131)) [@wence-](https://github.com/wence-)
+
+## 📖 Documentation
+
+- Fix link to NVTX ([#12598](https://github.com/rapidsai/cudf/pull/12598)) [@sameerz](https://github.com/sameerz)
+- Include missing groupby functions in documentation ([#12580](https://github.com/rapidsai/cudf/pull/12580)) [@quasiben](https://github.com/quasiben)
+- Fix documentation author ([#12527](https://github.com/rapidsai/cudf/pull/12527)) [@bdice](https://github.com/bdice)
+- Update libcudf reduction docs for casting output types ([#12526](https://github.com/rapidsai/cudf/pull/12526)) [@davidwendt](https://github.com/davidwendt)
+- Add JSON reader page in user guide ([#12499](https://github.com/rapidsai/cudf/pull/12499)) [@GregoryKimball](https://github.com/GregoryKimball)
+- Link unsupported iteration API docstrings ([#12482](https://github.com/rapidsai/cudf/pull/12482)) [@galipremsagar](https://github.com/galipremsagar)
+- `strings_udf` doc update ([#12469](https://github.com/rapidsai/cudf/pull/12469)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Update cudf_assert docs with correct NDEBUG behavior ([#12464](https://github.com/rapidsai/cudf/pull/12464)) [@robertmaynard](https://github.com/robertmaynard)
+- Update pre-commit hooks guide ([#12395](https://github.com/rapidsai/cudf/pull/12395)) [@bdice](https://github.com/bdice)
+- Update test docs to not use detail comparison utilities ([#12332](https://github.com/rapidsai/cudf/pull/12332)) [@PointKernel](https://github.com/PointKernel)
+- Fix doxygen description for regex_program::compute_working_memory_size ([#12329](https://github.com/rapidsai/cudf/pull/12329)) [@davidwendt](https://github.com/davidwendt)
+- Add eval to docs. ([#12322](https://github.com/rapidsai/cudf/pull/12322)) [@vyasr](https://github.com/vyasr)
+- Turn on xfail_strict=true ([#12244](https://github.com/rapidsai/cudf/pull/12244)) [@wence-](https://github.com/wence-)
+- Update 10 minutes to cuDF ([#12114](https://github.com/rapidsai/cudf/pull/12114)) [@wence-](https://github.com/wence-)
+
+## 🚀 New Features
+
+- Use kvikIO as the default IO backend ([#12574](https://github.com/rapidsai/cudf/pull/12574)) [@vuule](https://github.com/vuule)
+- Use `has_nonempty_nulls` instead of `may_contain_non_empty_nulls` in `superimpose_nulls` and `push_down_nulls` ([#12560](https://github.com/rapidsai/cudf/pull/12560)) [@ttnghia](https://github.com/ttnghia)
+- Add strings methods removeprefix and removesuffix ([#12557](https://github.com/rapidsai/cudf/pull/12557)) [@davidwendt](https://github.com/davidwendt)
+- Add `regex_program` java APIs and unit tests ([#12548](https://github.com/rapidsai/cudf/pull/12548)) [@cindyyuanjiang](https://github.com/cindyyuanjiang)
+- Default `cudf::io::read_json` to nested JSON parser ([#12544](https://github.com/rapidsai/cudf/pull/12544)) [@vuule](https://github.com/vuule)
+- Make string quoting optional on CSV write ([#12539](https://github.com/rapidsai/cudf/pull/12539)) [@mythrocks](https://github.com/mythrocks)
+- Use new nvCOMP API to optimize the compression temp memory size ([#12533](https://github.com/rapidsai/cudf/pull/12533)) [@vuule](https://github.com/vuule)
+- Support &quot;values&quot; orient (array of arrays) in Nested JSON reader ([#12498](https://github.com/rapidsai/cudf/pull/12498)) [@karthikeyann](https://github.com/karthikeyann)
+- `one_hot_encode` to use experimental row comparators ([#12478](https://github.com/rapidsai/cudf/pull/12478)) [@divyegala](https://github.com/divyegala)
+- Support %W and %w format specifiers in cudf::strings::to_timestamps ([#12475](https://github.com/rapidsai/cudf/pull/12475)) [@davidwendt](https://github.com/davidwendt)
+- Add JSON Writer ([#12474](https://github.com/rapidsai/cudf/pull/12474)) [@karthikeyann](https://github.com/karthikeyann)
+- Refactor `thrust_copy_if` into `cudf::detail::copy_if_safe` ([#12455](https://github.com/rapidsai/cudf/pull/12455)) [@ttnghia](https://github.com/ttnghia)
+- Add trailing comma support for nested JSON reader ([#12448](https://github.com/rapidsai/cudf/pull/12448)) [@karthikeyann](https://github.com/karthikeyann)
+- Extract `tokenize_json.hpp` detail header from `src/io/json/nested_json.hpp` ([#12432](https://github.com/rapidsai/cudf/pull/12432)) [@ttnghia](https://github.com/ttnghia)
+- JNI bindings to write CSV ([#12425](https://github.com/rapidsai/cudf/pull/12425)) [@mythrocks](https://github.com/mythrocks)
+- Nested JSON depth benchmark ([#12371](https://github.com/rapidsai/cudf/pull/12371)) [@karthikeyann](https://github.com/karthikeyann)
+- Implement `lists::reverse` ([#12336](https://github.com/rapidsai/cudf/pull/12336)) [@ttnghia](https://github.com/ttnghia)
+- Use `device_read` in experimental `read_json` ([#12314](https://github.com/rapidsai/cudf/pull/12314)) [@vuule](https://github.com/vuule)
+- Implement JNI for `strings::reverse` ([#12283](https://github.com/rapidsai/cudf/pull/12283)) [@ttnghia](https://github.com/ttnghia)
+- Null element for parsing error in numeric types in JSON, CSV reader ([#12272](https://github.com/rapidsai/cudf/pull/12272)) [@karthikeyann](https://github.com/karthikeyann)
+- Add cudf::strings:like function with multiple patterns ([#12269](https://github.com/rapidsai/cudf/pull/12269)) [@davidwendt](https://github.com/davidwendt)
+- Add environment variable to control host memory allocation in `hostdevice_vector` ([#12251](https://github.com/rapidsai/cudf/pull/12251)) [@vuule](https://github.com/vuule)
+- Add cudf::strings::reverse function ([#12227](https://github.com/rapidsai/cudf/pull/12227)) [@davidwendt](https://github.com/davidwendt)
+- Selectively use dictionary encoding in Parquet writer ([#12211](https://github.com/rapidsai/cudf/pull/12211)) [@etseidl](https://github.com/etseidl)
+- Support `replace` in `strings_udf` ([#12207](https://github.com/rapidsai/cudf/pull/12207)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Add support to read binary encoded decimals in parquet ([#12205](https://github.com/rapidsai/cudf/pull/12205)) [@PointKernel](https://github.com/PointKernel)
+- Support regex EOL where the string ends with a new-line character ([#12181](https://github.com/rapidsai/cudf/pull/12181)) [@davidwendt](https://github.com/davidwendt)
+- Updating `stream_compaction/unique` to use new row comparators ([#12159](https://github.com/rapidsai/cudf/pull/12159)) [@divyegala](https://github.com/divyegala)
+- Add device buffer datasource ([#12024](https://github.com/rapidsai/cudf/pull/12024)) [@PointKernel](https://github.com/PointKernel)
+- Implement groupby apply with JIT ([#11452](https://github.com/rapidsai/cudf/pull/11452)) [@bwyogatama](https://github.com/bwyogatama)
+
+## 🛠️ Improvements
+
+- Update shared workflow branches ([#12696](https://github.com/rapidsai/cudf/pull/12696)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Pin `dask` and `distributed` for release ([#12695](https://github.com/rapidsai/cudf/pull/12695)) [@galipremsagar](https://github.com/galipremsagar)
+- Don&#39;t upload `libcudf-example` to Anaconda.org ([#12671](https://github.com/rapidsai/cudf/pull/12671)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Pin wheel dependencies to same RAPIDS release ([#12659](https://github.com/rapidsai/cudf/pull/12659)) [@sevagh](https://github.com/sevagh)
+- Use CTK 118/cp310 branch of wheel workflows ([#12602](https://github.com/rapidsai/cudf/pull/12602)) [@sevagh](https://github.com/sevagh)
+- Change ways to access `ptr` in `Buffer` ([#12587](https://github.com/rapidsai/cudf/pull/12587)) [@galipremsagar](https://github.com/galipremsagar)
+- Version a parquet writer xfail ([#12579](https://github.com/rapidsai/cudf/pull/12579)) [@galipremsagar](https://github.com/galipremsagar)
+- Remove column names ([#12578](https://github.com/rapidsai/cudf/pull/12578)) [@vuule](https://github.com/vuule)
+- Parquet reader optimization to address V100 regression. ([#12577](https://github.com/rapidsai/cudf/pull/12577)) [@nvdbaranec](https://github.com/nvdbaranec)
+- Add support for `category` dtypes in CSV reader ([#12571](https://github.com/rapidsai/cudf/pull/12571)) [@galipremsagar](https://github.com/galipremsagar)
+- Remove `spill_lock` parameter from `SpillableBuffer.get_ptr()` ([#12564](https://github.com/rapidsai/cudf/pull/12564)) [@madsbk](https://github.com/madsbk)
+- Optimize `cudf::make_lists_column` ([#12547](https://github.com/rapidsai/cudf/pull/12547)) [@ttnghia](https://github.com/ttnghia)
+- Remove `cudf::strings::repeat_strings_output_sizes` from Java and JNI ([#12546](https://github.com/rapidsai/cudf/pull/12546)) [@ttnghia](https://github.com/ttnghia)
+- Test that cuInit is not called when RAPIDS_NO_INITIALIZE is set ([#12545](https://github.com/rapidsai/cudf/pull/12545)) [@wence-](https://github.com/wence-)
+- Rework repeat_strings to use sizes-to-offsets utility ([#12543](https://github.com/rapidsai/cudf/pull/12543)) [@davidwendt](https://github.com/davidwendt)
+- Replace exclusive_scan with sizes_to_offsets in cudf::lists::sequences ([#12541](https://github.com/rapidsai/cudf/pull/12541)) [@davidwendt](https://github.com/davidwendt)
+- Rework nvtext::ngrams_tokenize to use sizes-to-offsets utility ([#12540](https://github.com/rapidsai/cudf/pull/12540)) [@davidwendt](https://github.com/davidwendt)
+- Fix binary-ops gtests coded in namespace cudf::test ([#12536](https://github.com/rapidsai/cudf/pull/12536)) [@davidwendt](https://github.com/davidwendt)
+- More `[@acquire_spill_lock()` and `as_buffer(..., exposed=False)` ([#12535](https://github.com/rapidsai/cudf/pull/12535)) @madsbk](https://github.com/acquire_spill_lock()` and `as_buffer(..., exposed=False)` ([#12535](https://github.com/rapidsai/cudf/pull/12535)) @madsbk)
+- Guard CUDA runtime APIs with error checking ([#12531](https://github.com/rapidsai/cudf/pull/12531)) [@PointKernel](https://github.com/PointKernel)
+- Update TODOs from issue 10432. ([#12528](https://github.com/rapidsai/cudf/pull/12528)) [@bdice](https://github.com/bdice)
+- Update rapids-cmake definitions version in GitHub Actions style checks. ([#12511](https://github.com/rapidsai/cudf/pull/12511)) [@bdice](https://github.com/bdice)
+- Switch `engine=cudf` to the new `JSON` reader ([#12509](https://github.com/rapidsai/cudf/pull/12509)) [@galipremsagar](https://github.com/galipremsagar)
+- Fix SUM/MEAN aggregation type support. ([#12503](https://github.com/rapidsai/cudf/pull/12503)) [@bdice](https://github.com/bdice)
+- Stop using pandas._testing ([#12492](https://github.com/rapidsai/cudf/pull/12492)) [@vyasr](https://github.com/vyasr)
+- Fix ROLLING_TEST gtests coded in namespace cudf::test ([#12490](https://github.com/rapidsai/cudf/pull/12490)) [@davidwendt](https://github.com/davidwendt)
+- Fix erroneously skipped ORC ZSTD test ([#12486](https://github.com/rapidsai/cudf/pull/12486)) [@vuule](https://github.com/vuule)
+- Rework nvtext::generate_character_ngrams to use make_strings_children ([#12480](https://github.com/rapidsai/cudf/pull/12480)) [@davidwendt](https://github.com/davidwendt)
+- Raise warnings as errors in the test suite ([#12468](https://github.com/rapidsai/cudf/pull/12468)) [@vyasr](https://github.com/vyasr)
+- Remove `int32` hard-coding in python ([#12467](https://github.com/rapidsai/cudf/pull/12467)) [@galipremsagar](https://github.com/galipremsagar)
+- Use cudaMemcpyDefault. ([#12466](https://github.com/rapidsai/cudf/pull/12466)) [@bdice](https://github.com/bdice)
+- Update workflows for nightly tests ([#12462](https://github.com/rapidsai/cudf/pull/12462)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Build CUDA `11.8` and Python `3.10` Packages ([#12457](https://github.com/rapidsai/cudf/pull/12457)) [@ajschmidt8](https://github.com/ajschmidt8)
+- JNI build image default as cuda11.8 ([#12441](https://github.com/rapidsai/cudf/pull/12441)) [@pxLi](https://github.com/pxLi)
+- Re-enable `Recently Updated` Check ([#12435](https://github.com/rapidsai/cudf/pull/12435)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Rework remaining cudf::strings::from_xyz functions to use make_strings_children ([#12434](https://github.com/rapidsai/cudf/pull/12434)) [@vuule](https://github.com/vuule)
+- Build wheels alongside conda CI ([#12427](https://github.com/rapidsai/cudf/pull/12427)) [@sevagh](https://github.com/sevagh)
+- Remove arguments for checking exception messages in Python ([#12424](https://github.com/rapidsai/cudf/pull/12424)) [@vyasr](https://github.com/vyasr)
+- Clean up cuco usage ([#12421](https://github.com/rapidsai/cudf/pull/12421)) [@PointKernel](https://github.com/PointKernel)
+- Fix warnings in remaining modules ([#12406](https://github.com/rapidsai/cudf/pull/12406)) [@vyasr](https://github.com/vyasr)
+- Update `ops-bot.yaml` ([#12402](https://github.com/rapidsai/cudf/pull/12402)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Rework cudf::strings::integers_to_ipv4 to use make_strings_children utility ([#12401](https://github.com/rapidsai/cudf/pull/12401)) [@davidwendt](https://github.com/davidwendt)
+- Use `numpy.empty()` instead of `bytearray` to allocate host memory for spilling ([#12399](https://github.com/rapidsai/cudf/pull/12399)) [@madsbk](https://github.com/madsbk)
+- Deprecate chunksize from dask_cudf.read_csv ([#12394](https://github.com/rapidsai/cudf/pull/12394)) [@rjzamora](https://github.com/rjzamora)
+- Expose the RMM pool size in JNI ([#12390](https://github.com/rapidsai/cudf/pull/12390)) [@revans2](https://github.com/revans2)
+- Fix COPYING_TEST: gtests coded in namespace cudf::test ([#12387](https://github.com/rapidsai/cudf/pull/12387)) [@davidwendt](https://github.com/davidwendt)
+- Rework cudf::strings::url_encode to use make_strings_children utility ([#12385](https://github.com/rapidsai/cudf/pull/12385)) [@davidwendt](https://github.com/davidwendt)
+- Use make_strings_children in parse_data nested json reader ([#12382](https://github.com/rapidsai/cudf/pull/12382)) [@karthikeyann](https://github.com/karthikeyann)
+- Fix warnings in test_datetime.py ([#12381](https://github.com/rapidsai/cudf/pull/12381)) [@vyasr](https://github.com/vyasr)
+- Mixed Join Benchmarks ([#12375](https://github.com/rapidsai/cudf/pull/12375)) [@divyegala](https://github.com/divyegala)
+- Fix warnings in dataframe.py ([#12369](https://github.com/rapidsai/cudf/pull/12369)) [@vyasr](https://github.com/vyasr)
+- Update conda recipes. ([#12368](https://github.com/rapidsai/cudf/pull/12368)) [@bdice](https://github.com/bdice)
+- Use gpu-latest-1 runner tag ([#12366](https://github.com/rapidsai/cudf/pull/12366)) [@bdice](https://github.com/bdice)
+- Rework cudf::strings::from_booleans to use make_strings_children ([#12365](https://github.com/rapidsai/cudf/pull/12365)) [@vuule](https://github.com/vuule)
+- Fix warnings in test modules up to test_dataframe.py ([#12355](https://github.com/rapidsai/cudf/pull/12355)) [@vyasr](https://github.com/vyasr)
+- JSON column performance optimization - struct column nulls ([#12354](https://github.com/rapidsai/cudf/pull/12354)) [@karthikeyann](https://github.com/karthikeyann)
+- Accelerate stable-segmented-sort with CUB segmented sort ([#12347](https://github.com/rapidsai/cudf/pull/12347)) [@davidwendt](https://github.com/davidwendt)
+- Add size check to make_offsets_child_column utility ([#12345](https://github.com/rapidsai/cudf/pull/12345)) [@davidwendt](https://github.com/davidwendt)
+- Enable max compression ratio small block optimization for ZSTD ([#12338](https://github.com/rapidsai/cudf/pull/12338)) [@vuule](https://github.com/vuule)
+- Fix warnings in test_monotonic.py ([#12334](https://github.com/rapidsai/cudf/pull/12334)) [@vyasr](https://github.com/vyasr)
+- Improve JSON column creation performance (list offsets) ([#12330](https://github.com/rapidsai/cudf/pull/12330)) [@karthikeyann](https://github.com/karthikeyann)
+- Upgrade to `arrow-10.0.1` ([#12327](https://github.com/rapidsai/cudf/pull/12327)) [@galipremsagar](https://github.com/galipremsagar)
+- Fix warnings in test_orc.py ([#12326](https://github.com/rapidsai/cudf/pull/12326)) [@vyasr](https://github.com/vyasr)
+- Fix warnings in test_groupby.py ([#12324](https://github.com/rapidsai/cudf/pull/12324)) [@vyasr](https://github.com/vyasr)
+- Fix `test_notebooks.sh` ([#12323](https://github.com/rapidsai/cudf/pull/12323)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Fix transform gtests coded in namespace cudf::test ([#12321](https://github.com/rapidsai/cudf/pull/12321)) [@davidwendt](https://github.com/davidwendt)
+- Fix `check_style.sh` script ([#12320](https://github.com/rapidsai/cudf/pull/12320)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Rework cudf::strings::from_timestamps to use make_strings_children ([#12317](https://github.com/rapidsai/cudf/pull/12317)) [@davidwendt](https://github.com/davidwendt)
+- Fix warnings in test_index.py ([#12313](https://github.com/rapidsai/cudf/pull/12313)) [@vyasr](https://github.com/vyasr)
+- Fix warnings in test_multiindex.py ([#12310](https://github.com/rapidsai/cudf/pull/12310)) [@vyasr](https://github.com/vyasr)
+- CSV, JSON reader to infer integer column with nulls as int64 instead of float64 ([#12309](https://github.com/rapidsai/cudf/pull/12309)) [@karthikeyann](https://github.com/karthikeyann)
+- Fix warnings in test_indexing.py ([#12305](https://github.com/rapidsai/cudf/pull/12305)) [@vyasr](https://github.com/vyasr)
+- Fix warnings in test_joining.py ([#12304](https://github.com/rapidsai/cudf/pull/12304)) [@vyasr](https://github.com/vyasr)
+- Unpin `dask` and `distributed` for development ([#12302](https://github.com/rapidsai/cudf/pull/12302)) [@galipremsagar](https://github.com/galipremsagar)
+- Re-enable `sccache` for Jenkins builds ([#12297](https://github.com/rapidsai/cudf/pull/12297)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Define needs for pr-builder workflow. ([#12296](https://github.com/rapidsai/cudf/pull/12296)) [@bdice](https://github.com/bdice)
+- Forward merge 22.12 into 23.02 ([#12294](https://github.com/rapidsai/cudf/pull/12294)) [@vyasr](https://github.com/vyasr)
+- Fix warnings in test_stats.py ([#12293](https://github.com/rapidsai/cudf/pull/12293)) [@vyasr](https://github.com/vyasr)
+- Fix table gtests coded in namespace cudf::test ([#12292](https://github.com/rapidsai/cudf/pull/12292)) [@davidwendt](https://github.com/davidwendt)
+- Change cython for regex calls to use cudf::strings::regex_program ([#12289](https://github.com/rapidsai/cudf/pull/12289)) [@davidwendt](https://github.com/davidwendt)
+- Improved error reporting when reading multiple JSON files ([#12285](https://github.com/rapidsai/cudf/pull/12285)) [@vuule](https://github.com/vuule)
+- Deprecate Frame.sum_of_squares ([#12284](https://github.com/rapidsai/cudf/pull/12284)) [@vyasr](https://github.com/vyasr)
+- Remove deprecated code for 23.02 ([#12281](https://github.com/rapidsai/cudf/pull/12281)) [@vyasr](https://github.com/vyasr)
+- Clean up handling of max_page_size_bytes in Parquet writer ([#12277](https://github.com/rapidsai/cudf/pull/12277)) [@etseidl](https://github.com/etseidl)
+- Fix replace gtests coded in namespace cudf::test ([#12270](https://github.com/rapidsai/cudf/pull/12270)) [@davidwendt](https://github.com/davidwendt)
+- Add pandas nullable type support in `Index.to_pandas` ([#12268](https://github.com/rapidsai/cudf/pull/12268)) [@galipremsagar](https://github.com/galipremsagar)
+- Rework nvtext::detokenize to use indexalator for row indices ([#12267](https://github.com/rapidsai/cudf/pull/12267)) [@davidwendt](https://github.com/davidwendt)
+- Fix reduction gtests coded in namespace cudf::test ([#12257](https://github.com/rapidsai/cudf/pull/12257)) [@davidwendt](https://github.com/davidwendt)
+- Remove default parameters from cudf::detail::sort function declarations ([#12254](https://github.com/rapidsai/cudf/pull/12254)) [@davidwendt](https://github.com/davidwendt)
+- Add `duplicated` support for `Series`, `DataFrame` and `Index` ([#12246](https://github.com/rapidsai/cudf/pull/12246)) [@galipremsagar](https://github.com/galipremsagar)
+- Replace column/table test utilities with macros ([#12242](https://github.com/rapidsai/cudf/pull/12242)) [@PointKernel](https://github.com/PointKernel)
+- Rework cudf::strings::pad and zfill to use make_strings_children ([#12238](https://github.com/rapidsai/cudf/pull/12238)) [@davidwendt](https://github.com/davidwendt)
+- Fix sort gtests coded in namespace cudf::test ([#12237](https://github.com/rapidsai/cudf/pull/12237)) [@davidwendt](https://github.com/davidwendt)
+- Wrapping concat and file writes in `[@acquire_spill_lock()` ([#12232](https://github.com/rapidsai/cudf/pull/12232)) @madsbk](https://github.com/acquire_spill_lock()` ([#12232](https://github.com/rapidsai/cudf/pull/12232)) @madsbk)
+- Rename `cudf::structs::detail::superimpose_parent_nulls` APIs ([#12230](https://github.com/rapidsai/cudf/pull/12230)) [@ttnghia](https://github.com/ttnghia)
+- Cover parsing to decimal types in `read_json` tests ([#12229](https://github.com/rapidsai/cudf/pull/12229)) [@vuule](https://github.com/vuule)
+- Spill Statistics ([#12223](https://github.com/rapidsai/cudf/pull/12223)) [@madsbk](https://github.com/madsbk)
+- Use CUDF_JNI_ENABLE_PROFILING to conditionally enable profiling support. ([#12221](https://github.com/rapidsai/cudf/pull/12221)) [@bdice](https://github.com/bdice)
+- Clean up of `test_spilling.py` ([#12220](https://github.com/rapidsai/cudf/pull/12220)) [@madsbk](https://github.com/madsbk)
+- Simplify repetitive boolean logic ([#12218](https://github.com/rapidsai/cudf/pull/12218)) [@vuule](https://github.com/vuule)
+- Add `Series.hasnans` and `Index.hasnans` ([#12214](https://github.com/rapidsai/cudf/pull/12214)) [@galipremsagar](https://github.com/galipremsagar)
+- Add cudf::strings:udf::replace function ([#12210](https://github.com/rapidsai/cudf/pull/12210)) [@davidwendt](https://github.com/davidwendt)
+- Adds in new java APIs for appending byte arrays to host columnar data ([#12208](https://github.com/rapidsai/cudf/pull/12208)) [@revans2](https://github.com/revans2)
+- Remove Python dependencies from Java CI. ([#12193](https://github.com/rapidsai/cudf/pull/12193)) [@bdice](https://github.com/bdice)
+- Fix null order in sort-based groupby and improve groupby tests ([#12191](https://github.com/rapidsai/cudf/pull/12191)) [@divyegala](https://github.com/divyegala)
+- Move strings children functions from cudf/strings/detail/utilities.cuh to new header ([#12185](https://github.com/rapidsai/cudf/pull/12185)) [@davidwendt](https://github.com/davidwendt)
+- Clean up existing JNI scalar to column code ([#12173](https://github.com/rapidsai/cudf/pull/12173)) [@revans2](https://github.com/revans2)
+- Remove JIT type names, refactor id_to_type. ([#12158](https://github.com/rapidsai/cudf/pull/12158)) [@bdice](https://github.com/bdice)
+- Update JNI version to 23.02.0-SNAPSHOT ([#12129](https://github.com/rapidsai/cudf/pull/12129)) [@pxLi](https://github.com/pxLi)
+- Minor refactor of cpp/src/io/parquet/page_data.cu ([#12126](https://github.com/rapidsai/cudf/pull/12126)) [@etseidl](https://github.com/etseidl)
+- Add codespell as a linter ([#12097](https://github.com/rapidsai/cudf/pull/12097)) [@benfred](https://github.com/benfred)
+- Enable specifying exceptions in error macros ([#12078](https://github.com/rapidsai/cudf/pull/12078)) [@vyasr](https://github.com/vyasr)
+- Move `_label_encoding` from Series to Column ([#12040](https://github.com/rapidsai/cudf/pull/12040)) [@shwina](https://github.com/shwina)
+- Add GitHub Actions Workflows ([#12002](https://github.com/rapidsai/cudf/pull/12002)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Consolidate dask-cudf `groupby_agg` calls in one place ([#10835](https://github.com/rapidsai/cudf/pull/10835)) [@charlesbluca](https://github.com/charlesbluca)
 
 # cuDF 22.12.00 (8 Dec 2022)
 
diff --git a/README.md b/README.md
index 68c2d4f6276..36c1ff1d1fa 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,5 @@
 # <div align="left"><img src="img/rapids_logo.png" width="90px"/>&nbsp;cuDF - GPU DataFrames</div>
 
-[![Build Status](https://gpuci.gpuopenanalytics.com/job/rapidsai/job/gpuci/job/cudf/job/branches/job/cudf-branch-pipeline/badge/icon)](https://gpuci.gpuopenanalytics.com/job/rapidsai/job/gpuci/job/cudf/job/branches/job/cudf-branch-pipeline/)
-
 **NOTE:** For the latest stable [README.md](https://github.com/rapidsai/cudf/blob/main/README.md) ensure you are on the `main` branch.
 
 ## Resources
diff --git a/ci/benchmark/build.sh b/ci/benchmark/build.sh
deleted file mode 100755
index ec4f8d55372..00000000000
--- a/ci/benchmark/build.sh
+++ /dev/null
@@ -1,196 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
-#########################################
-# cuDF GPU build and test script for CI #
-#########################################
-set -e
-NUMARGS=$#
-ARGS=$*
-
-# Logger function for build status output
-function logger() {
-  echo -e "\n>>>> $@\n"
-}
-
-# Arg parsing function
-function hasArg {
-    (( ${NUMARGS} != 0 )) && (echo " ${ARGS} " | grep -q " $1 ")
-}
-
-# Set path and build parallel level
-export PATH=/conda/bin:/usr/local/cuda/bin:$PATH
-export PARALLEL_LEVEL=4
-export CUDA_REL=${CUDA_VERSION%.*}
-export HOME="$WORKSPACE"
-
-# Parse git describe
-cd "$WORKSPACE"
-export GIT_DESCRIBE_TAG=`git describe --tags`
-export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`
-
-# Set Benchmark Vars
-export GBENCH_BENCHMARKS_DIR="$WORKSPACE/cpp/build/gbenchmarks/"
-
-# Set `LIBCUDF_KERNEL_CACHE_PATH` environment variable to $HOME/.jitify-cache because
-# it's local to the container's virtual file system, and not shared with other CI jobs
-# like `/tmp` is.
-export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache"
-
-# Dask & Distributed option to install main(nightly) or `conda-forge` packages.
-export INSTALL_DASK_MAIN=1
-
-# Dask version to install when `INSTALL_DASK_MAIN=0`
-export DASK_STABLE_VERSION="2022.12.0"
-
-function remove_libcudf_kernel_cache_dir {
-    EXITCODE=$?
-    logger "removing kernel cache dir: $LIBCUDF_KERNEL_CACHE_PATH"
-    rm -rf "$LIBCUDF_KERNEL_CACHE_PATH" || logger "could not rm -rf $LIBCUDF_KERNEL_CACHE_PATH"
-    exit $EXITCODE
-}
-
-trap remove_libcudf_kernel_cache_dir EXIT
-
-mkdir -p "$LIBCUDF_KERNEL_CACHE_PATH" || logger "could not mkdir -p $LIBCUDF_KERNEL_CACHE_PATH"
-
-################################################################################
-# SETUP - Check environment
-################################################################################
-
-logger "Check environment..."
-env
-
-logger "Check GPU usage..."
-nvidia-smi
-
-logger "Activate conda env..."
-. /opt/conda/etc/profile.d/conda.sh
-conda activate rapids
-
-# Enter dependencies to be shown in ASV tooltips.
-CUDF_DEPS=(librmm)
-LIBCUDF_DEPS=(librmm)
-
-conda install "rmm=$MINOR_VERSION.*" "cudatoolkit=$CUDA_REL" \
-              "rapids-build-env=$MINOR_VERSION.*" \
-              "rapids-notebook-env=$MINOR_VERSION.*" \
-              rapids-pytest-benchmark
-
-# https://docs.rapids.ai/maintainers/depmgmt/
-# conda remove -f rapids-build-env rapids-notebook-env
-# conda install "your-pkg=1.0.0"
-
-# Install the conda-forge or nightly version of dask and distributed
-if [[ "${INSTALL_DASK_MAIN}" == 1 ]]; then
-    gpuci_logger "gpuci_mamba_retry install -c dask/label/dev 'dask/label/dev::dask' 'dask/label/dev::distributed'"
-    gpuci_mamba_retry install -c dask/label/dev "dask/label/dev::dask" "dask/label/dev::distributed"
-else
-    gpuci_logger "gpuci_mamba_retry install conda-forge::dask=={$DASK_STABLE_VERSION} conda-forge::distributed=={$DASK_STABLE_VERSION} conda-forge::dask-core=={$DASK_STABLE_VERSION} --force-reinstall"
-    gpuci_mamba_retry install conda-forge::dask=={$DASK_STABLE_VERSION} conda-forge::distributed=={$DASK_STABLE_VERSION} conda-forge::dask-core=={$DASK_STABLE_VERSION} --force-reinstall
-fi
-
-# Install the master version of streamz
-logger "pip install git+https://github.com/python-streamz/streamz.git@master --upgrade --no-deps"
-pip install "git+https://github.com/python-streamz/streamz.git@master" --upgrade --no-deps
-
-logger "Check versions..."
-python --version
-
-conda info
-conda config --show-sources
-conda list --show-channel-urls
-
-################################################################################
-# BUILD - Build libcudf, cuDF and dask_cudf from source
-################################################################################
-
-logger "Build libcudf..."
-"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf benchmarks tests --ptds
-
-################################################################################
-# BENCHMARK - Run and parse libcudf and cuDF benchmarks
-################################################################################
-
-logger "Running benchmarks..."
-
-#Download GBench results Parser
-curl -L https://raw.githubusercontent.com/rapidsai/benchmark/main/parser/GBenchToASV.py --output GBenchToASV.py
-
-###
-# Generate Metadata for dependencies
-###
-
-# Concatenate dependency arrays, convert to JSON array,
-# and remove duplicates.
-X=("${CUDF_DEPS[@]}" "${LIBCUDF_DEPS[@]}")
-DEPS=$(printf '%s\n' "${X[@]}" | jq -R . | jq -s 'unique')
-
-# Build object with k/v pairs of "dependency:version"
-DEP_VER_DICT=$(jq -n '{}')
-for DEP in $(echo "${DEPS}" | jq -r '.[]'); do
-  VER=$(conda list | grep "^${DEP}" | awk '{print $2"-"$3}')
-  DEP_VER_DICT=$(echo "${DEP_VER_DICT}" | jq -c --arg DEP "${DEP}" --arg VER "${VER}" '. + { ($DEP): $VER }')
-done
-
-# Pass in an array of dependencies to get a dict of "dependency:version"
-function getReqs() {
-  local DEPS_ARR=("$@")
-  local REQS="{}"
-  for DEP in "${DEPS_ARR[@]}"; do
-    VER=$(echo "${DEP_VER_DICT}" | jq -r --arg DEP "${DEP}" '.[$DEP]')
-    REQS=$(echo "${REQS}" | jq -c --arg DEP "${DEP}" --arg VER "${VER}" '. + { ($DEP): $VER }')
-  done
-
-  echo "${REQS}"
-}
-
-###
-# Run LIBCUDF Benchmarks
-###
-
-REQS=$(getReqs "${LIBCUDF_DEPS[@]}")
-
-mkdir -p "$WORKSPACE/tmp/benchmark"
-touch "$WORKSPACE/tmp/benchmark/benchmarks.txt"
-ls ${GBENCH_BENCHMARKS_DIR} > "$WORKSPACE/tmp/benchmark/benchmarks.txt"
-
-#Disable error aborting while tests run, failed tests will not generate data
-logger "Running libcudf GBenchmarks..."
-cd ${GBENCH_BENCHMARKS_DIR}
-set +e
-while read BENCH;
-do
-    nvidia-smi
-    ./${BENCH} --benchmark_out=${BENCH}.json --benchmark_out_format=json
-    EXITCODE=$?
-    if [[ ${EXITCODE} != 0 ]]; then
-        rm ./${BENCH}.json
-	JOBEXITCODE=1
-    fi
-done < "$WORKSPACE/tmp/benchmark/benchmarks.txt"
-set -e
-
-rm "$WORKSPACE/tmp/benchmark/benchmarks.txt"
-cd "$WORKSPACE"
-mv ${GBENCH_BENCHMARKS_DIR}/*.json "$WORKSPACE/tmp/benchmark/"
-python GBenchToASV.py -d  "$WORKSPACE/tmp/benchmark/" -t ${S3_ASV_DIR} -n libcudf -b branch-${MINOR_VERSION} -r "${REQS}"
-
-###
-# Run Python Benchmarks
-###
-
-#REQS=$(getReqs "${CUDF_DEPS[@]}")
-
-#BENCHMARK_META=$(jq -n \
-#  --arg NODE "${NODE_NAME}" \
-#  --arg BRANCH "branch-${MINOR_VERSION}" \
-#  --argjson REQS "${REQS}" '
-#  {
-#    "machineName": $NODE,
-#    "commitBranch": $BRANCH,
-#    "requirements": $REQS
-#  }
-#')
-
-#echo "Benchmark meta:"
-#echo "${BENCHMARK_META}" | jq "."
diff --git a/ci/checks/changelog.sh b/ci/checks/changelog.sh
deleted file mode 100755
index 0dfcf27298e..00000000000
--- a/ci/checks/changelog.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2018, NVIDIA CORPORATION.
-#########################
-# cuDF CHANGELOG Tester #
-#########################
-
-# Checkout main for comparison
-git checkout --force --quiet main
-
-# Switch back to tip of PR branch
-git checkout --force --quiet current-pr-branch
-
-# Ignore errors during searching
-set +e
-
-# Get list of modified files between main and PR branch
-CHANGELOG=`git diff --name-only main...current-pr-branch | grep CHANGELOG.md`
-# Check if CHANGELOG has PR ID
-PRNUM=`cat CHANGELOG.md | grep "$PR_ID"`
-RETVAL=0
-
-# Return status of check result
-if [ "$CHANGELOG" != "" -a "$PRNUM" != "" ] ; then
-  echo -e "\n\n>>>> PASSED: CHANGELOG.md has been updated with current PR information.\n\nPlease ensure the update meets the following criteria.\n"
-else
-  echo -e "\n\n>>>> FAILED: CHANGELOG.md has not been updated!\n\nPlease add a line describing this PR to CHANGELOG.md in the repository root directory. The line should meet the following criteria.\n"
-  RETVAL=1
-fi
-
-cat << EOF
-  It should be placed under the section for the appropriate release.
-  It should be placed under "New Features", "Improvements", or "Bug Fixes" as appropriate.
-  It should be formatted as '- PR #<PR number> <Concise human-readable description of the PR's new feature, improvement, or bug fix>'
-    Example format for #491 '- PR #491 Add CI test script to check for updates to CHANGELOG.md in PRs'
-
-
-EOF
-
-exit $RETVAL
diff --git a/ci/checks/style.sh b/ci/checks/style.sh
deleted file mode 100755
index d32d88f5574..00000000000
--- a/ci/checks/style.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
-#####################
-# cuDF Style Tester #
-#####################
-
-# Ignore errors and set path
-set +e
-PATH=/conda/bin:$PATH
-LC_ALL=C.UTF-8
-LANG=C.UTF-8
-
-# Activate common conda env
-. /opt/conda/etc/profile.d/conda.sh
-conda activate rapids
-
-FORMAT_FILE_URL=https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.04/cmake-format-rapids-cmake.json
-export RAPIDS_CMAKE_FORMAT_FILE=/tmp/rapids_cmake_ci/cmake-formats-rapids-cmake.json
-mkdir -p $(dirname ${RAPIDS_CMAKE_FORMAT_FILE})
-wget -O ${RAPIDS_CMAKE_FORMAT_FILE} ${FORMAT_FILE_URL}
-
-# Run pre-commit checks
-pre-commit run --hook-stage manual --all-files --show-diff-on-failure
diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh
deleted file mode 100755
index 7ffd032bce0..00000000000
--- a/ci/cpu/build.sh
+++ /dev/null
@@ -1,150 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
-##############################################
-# cuDF CPU conda build script for CI         #
-##############################################
-set -e
-
-# Set path and build parallel level
-# FIXME: PATH variable shouldn't be necessary.
-# This should be removed once we either stop using the `remote-docker-plugin`
-# or the following issue is addressed: https://github.com/gpuopenanalytics/remote-docker-plugin/issues/47
-export PATH=/usr/local/gcc9/bin:/opt/conda/bin:/usr/local/cuda/bin:$PATH
-export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4}
-
-# Set home to the job's workspace
-export HOME="$WORKSPACE"
-
-# Determine CUDA release version
-export CUDA_REL=${CUDA_VERSION%.*}
-
-# Setup 'gpuci_conda_retry' for build retries (results in 2 total attempts)
-export GPUCI_CONDA_RETRY_MAX=1
-export GPUCI_CONDA_RETRY_SLEEP=30
-
-# Workaround to keep Jenkins builds working
-# until we migrate fully to GitHub Actions
-export RAPIDS_CUDA_VERSION="${CUDA}"
-export SCCACHE_BUCKET=rapids-sccache
-export SCCACHE_REGION=us-west-2
-export SCCACHE_IDLE_TIMEOUT=32768
-
-# Use Ninja to build, setup Conda Build Dir
-export CMAKE_GENERATOR="Ninja"
-export CONDA_BLD_DIR="$WORKSPACE/.conda-bld"
-
-# Whether to keep `dask/label/dev` channel in the env. If INSTALL_DASK_MAIN=0,
-# `dask/label/dev` channel is removed.
-export INSTALL_DASK_MAIN=1
-
-# Switch to project root; also root of repo checkout
-cd "$WORKSPACE"
-
-# If nightly build, append current YYMMDD to version
-if [[ "$BUILD_MODE" = "branch" && "$SOURCE_BRANCH" = branch-* ]] ; then
-  export VERSION_SUFFIX=`date +%y%m%d`
-fi
-
-################################################################################
-# SETUP - Check environment
-################################################################################
-
-gpuci_logger "Check environment variables"
-env
-
-gpuci_logger "Activate conda env"
-. /opt/conda/etc/profile.d/conda.sh
-conda activate rapids
-
-# Remove `rapidsai-nightly` & `dask/label/dev` channel if we are building main branch
-if [ "$SOURCE_BRANCH" = "main" ]; then
-  conda config --system --remove channels rapidsai-nightly
-  conda config --system --remove channels dask/label/dev
-elif [[ "${INSTALL_DASK_MAIN}" == 0 ]]; then
-  # Remove `dask/label/dev` channel if INSTALL_DASK_MAIN=0
-  conda config --system --remove channels dask/label/dev
-fi
-
-gpuci_logger "Check compiler versions"
-python --version
-
-gpuci_logger "Check conda environment"
-conda info
-conda config --show-sources
-conda list --show-channel-urls
-
-# FIX Added to deal with Anancoda SSL verification issues during conda builds
-conda config --set ssl_verify False
-
-# TODO: Move boa install to gpuci/rapidsai
-gpuci_mamba_retry install boa
-################################################################################
-# BUILD - Conda package builds
-################################################################################
-
-if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then
-  CONDA_BUILD_ARGS=""
-  CONDA_CHANNEL=""
-else
-  CONDA_BUILD_ARGS="--dirty --no-remove-work-dir"
-  CONDA_CHANNEL="-c $WORKSPACE/ci/artifacts/cudf/cpu/.conda-bld/"
-fi
-
-if [ "$BUILD_LIBCUDF" == '1' ]; then
-  gpuci_logger "Build conda pkg for libcudf"
-  gpuci_conda_retry mambabuild --no-build-id --croot ${CONDA_BLD_DIR} conda/recipes/libcudf $CONDA_BUILD_ARGS
-
-  # BUILD_LIBCUDF == 1 means this job is being run on the cpu_build jobs
-  # that is where we must also build the strings_udf package
-  mkdir -p ${CONDA_BLD_DIR}/strings_udf/work
-  STRINGS_UDF_BUILD_DIR=${CONDA_BLD_DIR}/strings_udf/work
-  gpuci_logger "Build conda pkg for cudf (python 3.8), for strings_udf"
-  gpuci_conda_retry mambabuild --no-build-id --croot ${STRINGS_UDF_BUILD_DIR} -c ${CONDA_BLD_DIR} conda/recipes/cudf ${CONDA_BUILD_ARGS} --python=3.8
-  gpuci_logger "Build conda pkg for cudf (python 3.9), for strings_udf"
-  gpuci_conda_retry mambabuild --no-build-id --croot ${STRINGS_UDF_BUILD_DIR} -c ${CONDA_BLD_DIR} conda/recipes/cudf ${CONDA_BUILD_ARGS} --python=3.9
-
-  gpuci_logger "Build conda pkg for strings_udf (python 3.8)"
-  gpuci_conda_retry mambabuild --no-build-id --croot ${CONDA_BLD_DIR} -c ${STRINGS_UDF_BUILD_DIR} -c ${CONDA_BLD_DIR} conda/recipes/strings_udf $CONDA_BUILD_ARGS --python=3.8
-  gpuci_logger "Build conda pkg for strings_udf (python 3.9)"
-  gpuci_conda_retry mambabuild --no-build-id --croot ${CONDA_BLD_DIR} -c ${STRINGS_UDF_BUILD_DIR} -c ${CONDA_BLD_DIR} conda/recipes/strings_udf $CONDA_BUILD_ARGS --python=3.9
-
-  mkdir -p ${CONDA_BLD_DIR}/libcudf/work
-  cp -r ${CONDA_BLD_DIR}/work/* ${CONDA_BLD_DIR}/libcudf/work
-  gpuci_logger "sccache stats"
-  sccache --show-stats
-
-  # Copy libcudf build metrics results
-  LIBCUDF_BUILD_DIR=$CONDA_BLD_DIR/libcudf/work/cpp/build
-  echo "Checking for build metrics log $LIBCUDF_BUILD_DIR/ninja_log.html"
-  if [[ -f "$LIBCUDF_BUILD_DIR/ninja_log.html" ]]; then
-      gpuci_logger "Copying build metrics results"
-      mkdir -p "$WORKSPACE/build-metrics"
-      cp "$LIBCUDF_BUILD_DIR/ninja_log.html" "$WORKSPACE/build-metrics/BuildMetrics.html"
-      cp "$LIBCUDF_BUILD_DIR/ninja.log" "$WORKSPACE/build-metrics/ninja.log"
-  fi
-fi
-
-if [ "$BUILD_CUDF" == '1' ]; then
-  gpuci_logger "Build conda pkg for cudf"
-  gpuci_conda_retry mambabuild --croot ${CONDA_BLD_DIR} conda/recipes/cudf --python=$PYTHON $CONDA_BUILD_ARGS $CONDA_CHANNEL
-
-  gpuci_logger "Build conda pkg for dask-cudf"
-  gpuci_conda_retry mambabuild --croot ${CONDA_BLD_DIR} conda/recipes/dask-cudf --python=$PYTHON $CONDA_BUILD_ARGS $CONDA_CHANNEL
-
-  gpuci_logger "Build conda pkg for cudf_kafka"
-  gpuci_conda_retry mambabuild --croot ${CONDA_BLD_DIR} conda/recipes/cudf_kafka --python=$PYTHON $CONDA_BUILD_ARGS $CONDA_CHANNEL
-
-  gpuci_logger "Build conda pkg for custreamz"
-  gpuci_conda_retry mambabuild --croot ${CONDA_BLD_DIR} conda/recipes/custreamz --python=$PYTHON $CONDA_BUILD_ARGS $CONDA_CHANNEL
-
-  gpuci_logger "Build conda pkg for strings_udf"
-  gpuci_conda_retry mambabuild --croot ${CONDA_BLD_DIR} conda/recipes/strings_udf --python=$PYTHON $CONDA_BUILD_ARGS $CONDA_CHANNEL
-
-fi
-################################################################################
-# UPLOAD - Conda packages
-################################################################################
-
-# Uploads disabled due to new GH Actions implementation
-# gpuci_logger "Upload conda pkgs"
-# source ci/cpu/upload.sh
diff --git a/ci/cpu/prebuild.sh b/ci/cpu/prebuild.sh
deleted file mode 100755
index 32589042f7f..00000000000
--- a/ci/cpu/prebuild.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/usr/bin/env bash
-
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
-set -e
-
-#Always upload cudf packages
-export UPLOAD_CUDF=1
-export UPLOAD_LIBCUDF=1
-export UPLOAD_CUDF_KAFKA=1
-
-if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then
-    #If project flash is not activate, always build both
-    export BUILD_LIBCUDF=1
-    export BUILD_CUDF=1
-fi
diff --git a/ci/cpu/upload.sh b/ci/cpu/upload.sh
deleted file mode 100755
index 82c58673605..00000000000
--- a/ci/cpu/upload.sh
+++ /dev/null
@@ -1,65 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
-# Adopted from https://github.com/tmcdonell/travis-scripts/blob/dfaac280ac2082cd6bcaba3217428347899f2975/update-accelerate-buildbot.sh
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
-
-set -e
-
-# Setup 'gpuci_retry' for upload retries (results in 4 total attempts)
-export GPUCI_RETRY_MAX=3
-export GPUCI_RETRY_SLEEP=30
-
-# Set default label options if they are not defined elsewhere
-export LABEL_OPTION=${LABEL_OPTION:-"--label main"}
-
-# Skip uploads unless BUILD_MODE == "branch"
-if [ "${BUILD_MODE}" != "branch" ]; then
-  echo "Skipping upload"
-  return 0
-fi
-
-# Skip uploads if there is no upload key
-if [ -z "$MY_UPLOAD_KEY" ]; then
-  echo "No upload key"
-  return 0
-fi
-
-################################################################################
-# UPLOAD - Conda packages
-################################################################################
-
-gpuci_logger "Starting conda uploads"
-if [[ "$BUILD_LIBCUDF" == "1" && "$UPLOAD_LIBCUDF" == "1" ]]; then
-  export LIBCUDF_FILES=$(conda build --no-build-id --croot "${CONDA_BLD_DIR}" conda/recipes/libcudf --output)
-  LIBCUDF_FILES=$(echo "$LIBCUDF_FILES" | sed 's/.*libcudf-example.*//') # skip libcudf-example pkg upload
-  gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing --no-progress $LIBCUDF_FILES
-fi
-
-if [[ "$BUILD_CUDF" == "1" && "$UPLOAD_CUDF" == "1" ]]; then
-  export CUDF_FILE=$(conda build --croot "${CONDA_BLD_DIR}" conda/recipes/cudf --python=$PYTHON --output)
-  test -e ${CUDF_FILE}
-  echo "Upload cudf: ${CUDF_FILE}"
-  gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${CUDF_FILE} --no-progress
-
-  export STRINGS_UDF_FILE=$(conda build --croot "${CONDA_BLD_DIR}" conda/recipes/strings_udf --python=$PYTHON --output -c "${CONDA_BLD_DIR}")
-  test -e ${STRINGS_UDF_FILE}
-  echo "Upload strings_udf: ${STRINGS_UDF_FILE}"
-  gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${STRINGS_UDF_FILE} --no-progress
-
-  export DASK_CUDF_FILE=$(conda build --croot "${CONDA_BLD_DIR}" conda/recipes/dask-cudf --python=$PYTHON --output)
-  test -e ${DASK_CUDF_FILE}
-  echo "Upload dask-cudf: ${DASK_CUDF_FILE}"
-  gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${DASK_CUDF_FILE} --no-progress
-
-  export CUSTREAMZ_FILE=$(conda build --croot "${CONDA_BLD_DIR}" conda/recipes/custreamz --python=$PYTHON --output)
-  test -e ${CUSTREAMZ_FILE}
-  echo "Upload custreamz: ${CUSTREAMZ_FILE}"
-  gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${CUSTREAMZ_FILE} --no-progress
-fi
-
-if [[ "$BUILD_CUDF" == "1" && "$UPLOAD_CUDF_KAFKA" == "1" ]]; then
-  export CUDF_KAFKA_FILE=$(conda build --croot "${CONDA_BLD_DIR}" conda/recipes/cudf_kafka --python=$PYTHON --output)
-  test -e ${CUDF_KAFKA_FILE}
-  echo "Upload cudf_kafka: ${CUDF_KAFKA_FILE}"
-  gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${CUDF_KAFKA_FILE} --no-progress
-fi
diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
deleted file mode 100755
index 51a2d9ab170..00000000000
--- a/ci/gpu/build.sh
+++ /dev/null
@@ -1,324 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2018-2023, NVIDIA CORPORATION.
-##############################################
-# cuDF GPU build and test script for CI      #
-##############################################
-set -e
-NUMARGS=$#
-ARGS=$*
-
-# Arg parsing function
-function hasArg {
-    (( ${NUMARGS} != 0 )) && (echo " ${ARGS} " | grep -q " $1 ")
-}
-
-# Set path and build parallel level
-export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH
-export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4}
-
-# Set home to the job's workspace
-export HOME="$WORKSPACE"
-
-# Switch to project root; also root of repo checkout
-cd "$WORKSPACE"
-
-# Determine CUDA release version
-export CUDA_REL=${CUDA_VERSION%.*}
-export CONDA_ARTIFACT_PATH="$WORKSPACE/ci/artifacts/cudf/cpu/.conda-bld/"
-
-# Workaround to keep Jenkins builds working
-# until we migrate fully to GitHub Actions
-export RAPIDS_CUDA_VERSION="${CUDA}"
-export SCCACHE_BUCKET=rapids-sccache
-export SCCACHE_REGION=us-west-2
-export SCCACHE_IDLE_TIMEOUT=32768
-
-# Parse git describe
-export GIT_DESCRIBE_TAG=`git describe --tags`
-export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`
-unset GIT_DESCRIBE_TAG
-
-# Dask & Distributed option to install main(nightly) or `conda-forge` packages.
-export INSTALL_DASK_MAIN=1
-
-# Dask version to install when `INSTALL_DASK_MAIN=0`
-export DASK_STABLE_VERSION="2022.12.0"
-
-# ucx-py version
-export UCX_PY_VERSION='0.31.*'
-
-################################################################################
-# TRAP - Setup trap for removing jitify cache
-################################################################################
-
-# Set `LIBCUDF_KERNEL_CACHE_PATH` environment variable to $HOME/.jitify-cache
-# because it's local to the container's virtual file system, and not shared with
-# other CI jobs like `/tmp` is
-export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache"
-
-function remove_libcudf_kernel_cache_dir {
-    EXITCODE=$?
-    gpuci_logger "TRAP: Removing kernel cache dir: $LIBCUDF_KERNEL_CACHE_PATH"
-    rm -rf "$LIBCUDF_KERNEL_CACHE_PATH" \
-        || gpuci_logger "[ERROR] TRAP: Could not rm -rf $LIBCUDF_KERNEL_CACHE_PATH"
-    exit $EXITCODE
-}
-
-# Set trap to run on exit
-gpuci_logger "TRAP: Set trap to remove jitify cache on exit"
-trap remove_libcudf_kernel_cache_dir EXIT
-
-mkdir -p "$LIBCUDF_KERNEL_CACHE_PATH" \
-    || gpuci_logger "[ERROR] TRAP: Could not mkdir -p $LIBCUDF_KERNEL_CACHE_PATH"
-
-################################################################################
-# SETUP - Check environment
-################################################################################
-
-gpuci_logger "Check environment variables"
-env
-
-gpuci_logger "Check GPU usage"
-nvidia-smi
-
-gpuci_logger "Activate conda env"
-. /opt/conda/etc/profile.d/conda.sh
-conda activate rapids
-
-# Remove `dask/label/dev` channel if INSTALL_DASK_MAIN=0
-if [ "$SOURCE_BRANCH" != "main" ] && [[ "${INSTALL_DASK_MAIN}" == 0 ]]; then
-  conda config --system --remove channels dask/label/dev
-  gpuci_mamba_retry install conda-forge::dask==$DASK_STABLE_VERSION conda-forge::distributed==$DASK_STABLE_VERSION conda-forge::dask-core==$DASK_STABLE_VERSION --force-reinstall
-fi
-
-gpuci_logger "Check conda environment"
-conda info
-conda config --show-sources
-conda list --show-channel-urls
-gpuci_logger "Check compiler versions"
-python --version
-
-function install_dask {
-    # Install the conda-forge or nightly version of dask and distributed
-    gpuci_logger "Install the conda-forge or nightly version of dask and distributed"
-    set -x
-    if [[ "${INSTALL_DASK_MAIN}" == 1 ]]; then
-        gpuci_logger "gpuci_mamba_retry install -c dask/label/dev 'dask/label/dev::dask' 'dask/label/dev::distributed'"
-        gpuci_mamba_retry install -c dask/label/dev "dask/label/dev::dask" "dask/label/dev::distributed"
-        conda list
-    else
-        gpuci_logger "gpuci_mamba_retry install conda-forge::dask=={$DASK_STABLE_VERSION} conda-forge::distributed=={$DASK_STABLE_VERSION} conda-forge::dask-core=={$DASK_STABLE_VERSION} --force-reinstall"
-        gpuci_mamba_retry install conda-forge::dask==$DASK_STABLE_VERSION conda-forge::distributed==$DASK_STABLE_VERSION conda-forge::dask-core==$DASK_STABLE_VERSION --force-reinstall
-    fi
-    # Install the main version of streamz
-    gpuci_logger "Install the main version of streamz"
-    # Need to uninstall streamz that is already in the env.
-    pip uninstall -y streamz
-    pip install "git+https://github.com/python-streamz/streamz.git@master" --upgrade --no-deps
-    set +x
-}
-
-install_dask
-
-if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then
-
-    gpuci_logger "Install dependencies"
-    gpuci_mamba_retry install -y \
-                  "cudatoolkit=$CUDA_REL" \
-                  "rapids-build-env=$MINOR_VERSION.*" \
-                  "rapids-notebook-env=$MINOR_VERSION.*" \
-                  "dask-cuda=${MINOR_VERSION}" \
-                  "rmm=$MINOR_VERSION.*" \
-                  "ucx-py=${UCX_PY_VERSION}"
-
-    # https://docs.rapids.ai/maintainers/depmgmt/
-    # gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env
-    # gpuci_mamba_retry install -y "your-pkg=1.0.0"
-
-    ################################################################################
-    # BUILD - Build libcudf, cuDF, libcudf_kafka, dask_cudf, and strings_udf from source
-    ################################################################################
-
-    gpuci_logger "Build from source"
-    "$WORKSPACE/build.sh" clean libcudf cudf dask_cudf libcudf_kafka cudf_kafka strings_udf benchmarks tests --ptds
-
-    ################################################################################
-    # TEST - Run GoogleTest
-    ################################################################################
-
-    set +e -Eo pipefail
-    EXITCODE=0
-    trap "EXITCODE=1" ERR
-
-
-    if hasArg --skip-tests; then
-        gpuci_logger "Skipping Tests"
-        exit 0
-    else
-        gpuci_logger "Check GPU usage"
-        nvidia-smi
-
-        gpuci_logger "GoogleTests"
-        set -x
-        cd "$WORKSPACE/cpp/build"
-
-        for gt in "$WORKSPACE/cpp/build/gtests/"* ; do
-            test_name=$(basename ${gt})
-            echo "Running GoogleTest $test_name"
-            ${gt} --gtest_output=xml:"$WORKSPACE/test-results/"
-        done
-    fi
-else
-    #Project Flash
-
-    if hasArg --skip-tests; then
-        gpuci_logger "Skipping Tests"
-        exit 0
-    fi
-
-    gpuci_logger "Check GPU usage"
-    nvidia-smi
-
-    gpuci_logger "Installing libcudf, libcudf_kafka and libcudf-tests"
-    gpuci_mamba_retry install -y -c ${CONDA_ARTIFACT_PATH} libcudf libcudf_kafka libcudf-tests
-
-    # TODO: Move boa install to gpuci/rapidsai
-    gpuci_mamba_retry install boa
-    gpuci_logger "Building cudf, dask-cudf, cudf_kafka and custreamz"
-    export CONDA_BLD_DIR="$WORKSPACE/.conda-bld"
-    gpuci_conda_retry mambabuild --croot ${CONDA_BLD_DIR} conda/recipes/cudf --python=$PYTHON -c ${CONDA_ARTIFACT_PATH}
-    gpuci_conda_retry mambabuild --croot ${CONDA_BLD_DIR} conda/recipes/dask-cudf --python=$PYTHON -c ${CONDA_ARTIFACT_PATH}
-    gpuci_conda_retry mambabuild --croot ${CONDA_BLD_DIR} conda/recipes/cudf_kafka --python=$PYTHON -c ${CONDA_ARTIFACT_PATH}
-    gpuci_conda_retry mambabuild --croot ${CONDA_BLD_DIR} conda/recipes/custreamz --python=$PYTHON -c ${CONDA_ARTIFACT_PATH}
-
-    # the CUDA component of strings_udf must be built on cuda 11.5 just like libcudf
-    # but because there is no separate python package, we must also build the python on the 11.5 jobs
-    # this means that at this point (on the GPU test jobs) the whole package is already built and has been
-    # copied by CI from the upstream 11.5 jobs into $CONDA_ARTIFACT_PATH
-    gpuci_logger "Installing cudf, dask-cudf, cudf_kafka, and custreamz"
-    gpuci_mamba_retry install cudf dask-cudf cudf_kafka custreamz -c "${CONDA_BLD_DIR}" -c "${CONDA_ARTIFACT_PATH}"
-
-    gpuci_logger "Check current conda environment"
-    conda list --show-channel-urls
-
-    gpuci_logger "GoogleTests"
-
-    # Set up library for finding incorrect default stream usage.
-    cd "$WORKSPACE/cpp/tests/utilities/identify_stream_usage/"
-    mkdir build && cd build && cmake .. -GNinja && ninja && ninja test
-    STREAM_IDENTIFY_LIB="$WORKSPACE/cpp/tests/utilities/identify_stream_usage/build/libidentify_stream_usage.so"
-
-    # Run libcudf and libcudf_kafka gtests from libcudf-tests package
-    for gt in "$CONDA_PREFIX/bin/gtests/libcudf"*/* ; do
-        test_name=$(basename ${gt})
-
-        echo "Running GoogleTest $test_name"
-        if [[ ${test_name} == "SPAN_TEST" ]]; then
-            # This one test is specifically designed to test using a thrust device
-            # vector, so we expect and allow it to include default stream usage.
-            gtest_filter="SpanTest.CanConstructFromDeviceContainers"
-            GTEST_CUDF_STREAM_MODE="custom" LD_PRELOAD=${STREAM_IDENTIFY_LIB} ${gt} --gtest_output=xml:"$WORKSPACE/test-results/" --gtest_filter="-${gtest_filter}"
-            ${gt} --gtest_output=xml:"$WORKSPACE/test-results/" --gtest_filter="${gtest_filter}"
-        else
-            GTEST_CUDF_STREAM_MODE="custom" LD_PRELOAD=${STREAM_IDENTIFY_LIB} ${gt} --gtest_output=xml:"$WORKSPACE/test-results/"
-        fi
-    done
-
-    export LIB_BUILD_DIR="$WORKSPACE/ci/artifacts/cudf/cpu/libcudf_work/cpp/build"
-    # Copy libcudf build time results
-    echo "Checking for build time log $LIB_BUILD_DIR/ninja_log.xml"
-    if [[ -f "$LIB_BUILD_DIR/ninja_log.xml" ]]; then
-        gpuci_logger "Copying build time results"
-        cp "$LIB_BUILD_DIR/ninja_log.xml" "$WORKSPACE/test-results/buildtimes-junit.xml"
-    fi
-
-    ################################################################################
-    # MEMCHECK - Run compute-sanitizer on GoogleTest (only in nightly builds)
-    ################################################################################
-    if [[ "$BUILD_MODE" == "branch" && "$BUILD_TYPE" == "gpu" ]]; then
-        if [[ "$COMPUTE_SANITIZER_ENABLE" == "true" ]]; then
-            gpuci_logger "Memcheck on GoogleTests with rmm_mode=cuda"
-            export GTEST_CUDF_RMM_MODE=cuda
-            COMPUTE_SANITIZER_CMD="compute-sanitizer --tool memcheck"
-            mkdir -p "$WORKSPACE/test-results/"
-            for gt in "$CONDA_PREFIX/bin/gtests/libcudf"*/* ; do
-                test_name=$(basename ${gt})
-                if [[ "$test_name" == "ERROR_TEST" ]]; then
-                  continue
-                fi
-                echo "Running GoogleTest $test_name"
-                ${COMPUTE_SANITIZER_CMD} ${gt} | tee "$WORKSPACE/test-results/${test_name}.cs.log"
-            done
-            unset GTEST_CUDF_RMM_MODE
-            # test-results/*.cs.log are processed in gpuci
-        fi
-    fi
-fi
-
-# Both regular and Project Flash proceed here
-
-# set environment variable for numpy 1.16
-# will be enabled for later versions by default
-np_ver=$(python -c "import numpy; print('.'.join(numpy.__version__.split('.')[:-1]))")
-if [ "$np_ver" == "1.16" ];then
-    export NUMPY_EXPERIMENTAL_ARRAY_FUNCTION=1
-fi
-
-################################################################################
-# TEST - Run py.test, notebooks
-################################################################################
-
-cd "$WORKSPACE/python/cudf/cudf"
-# It is essential to cd into $WORKSPACE/python/cudf/cudf as `pytest-xdist` + `coverage` seem to work only at this directory level.
-gpuci_logger "Check conda packages"
-conda list
-gpuci_logger "Python py.test for cuDF"
-py.test -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" --ignore="$WORKSPACE/python/cudf/cudf/benchmarks" --junitxml="$WORKSPACE/junit-cudf.xml" -v --cov-config="$WORKSPACE/python/cudf/.coveragerc" --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-coverage.xml" --cov-report term --dist=loadscope tests
-
-gpuci_logger "Python py.tests for cuDF with spilling (CUDF_SPILL_DEVICE_LIMIT=1)"
-# Due to time concerns, we only run tests marked "spilling"
-CUDF_SPILL=on CUDF_SPILL_DEVICE_LIMIT=1 py.test -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" --ignore="$WORKSPACE/python/cudf/cudf/benchmarks" -v --cov-config="$WORKSPACE/python/cudf/.coveragerc" --cov-append --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-coverage.xml" --cov-report term --dist=loadscope -m spilling tests
-
-cd "$WORKSPACE/python/dask_cudf"
-gpuci_logger "Python py.test for dask-cudf"
-py.test -n 8 --cache-clear --basetemp="$WORKSPACE/dask-cudf-cuda-tmp" --junitxml="$WORKSPACE/junit-dask-cudf.xml" -v --cov-config=.coveragerc --cov=dask_cudf --cov-report=xml:"$WORKSPACE/python/dask_cudf/dask-cudf-coverage.xml" --cov-report term dask_cudf
-
-cd "$WORKSPACE/python/custreamz"
-gpuci_logger "Python py.test for cuStreamz"
-py.test -n 8 --cache-clear --basetemp="$WORKSPACE/custreamz-cuda-tmp" --junitxml="$WORKSPACE/junit-custreamz.xml" -v --cov-config=.coveragerc --cov=custreamz --cov-report=xml:"$WORKSPACE/python/custreamz/custreamz-coverage.xml" --cov-report term custreamz
-
-
-# only install strings_udf after cuDF is finished testing without its presence
-gpuci_logger "Installing strings_udf"
-gpuci_mamba_retry install strings_udf -c "${CONDA_BLD_DIR}" -c "${CONDA_ARTIFACT_PATH}"
-
-cd "$WORKSPACE/python/strings_udf/strings_udf"
-gpuci_logger "Python py.test for strings_udf"
-py.test -n 8 --cache-clear --basetemp="$WORKSPACE/strings-udf-cuda-tmp" --junitxml="$WORKSPACE/junit-strings-udf.xml" -v --cov-config=.coveragerc --cov=strings_udf --cov-report=xml:"$WORKSPACE/python/strings_udf/strings-udf-coverage.xml" --cov-report term tests
-
-# retest cuDF UDFs
-cd "$WORKSPACE/python/cudf/cudf"
-gpuci_logger "Python py.test retest cuDF UDFs"
-py.test -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-strings-udf-tmp" --ignore="$WORKSPACE/python/cudf/cudf/benchmarks" --junitxml="$WORKSPACE/junit-cudf-strings-udf.xml" -v --cov-config="$WORKSPACE/python/cudf/.coveragerc" --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-strings-udf-coverage.xml" --cov-report term --dist=loadscope tests/test_udf_masked_ops.py
-
-
-# Run benchmarks with both cudf and pandas to ensure compatibility is maintained.
-# Benchmarks are run in DEBUG_ONLY mode, meaning that only small data sizes are used.
-# Therefore, these runs only verify that benchmarks are valid.
-# They do not generate meaningful performance measurements.
-cd "$WORKSPACE/python/cudf"
-gpuci_logger "Python pytest for cuDF benchmarks"
-CUDF_BENCHMARKS_DEBUG_ONLY=ON pytest -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" -v --dist=loadscope benchmarks
-
-gpuci_logger "Python pytest for cuDF benchmarks using pandas"
-CUDF_BENCHMARKS_USE_PANDAS=ON CUDF_BENCHMARKS_DEBUG_ONLY=ON pytest -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" -v --dist=loadscope benchmarks
-
-gpuci_logger "Test notebooks"
-"$WORKSPACE/ci/gpu/test-notebooks.sh" 2>&1 | tee nbtest.log
-python "$WORKSPACE/ci/utils/nbtestlog2junitxml.py" nbtest.log
-
-if [ -n "${CODECOV_TOKEN}" ]; then
-    codecov -t $CODECOV_TOKEN
-fi
-
-return ${EXITCODE}
diff --git a/ci/gpu/java.sh b/ci/gpu/java.sh
deleted file mode 100755
index 2db9cd57eb8..00000000000
--- a/ci/gpu/java.sh
+++ /dev/null
@@ -1,115 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
-##############################################
-# cuDF GPU build and test script for CI      #
-##############################################
-set -e
-NUMARGS=$#
-ARGS=$*
-
-# Arg parsing function
-function hasArg {
-    (( ${NUMARGS} != 0 )) && (echo " ${ARGS} " | grep -q " $1 ")
-}
-
-# Set path and build parallel level
-export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH
-export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4}
-
-# Set home to the job's workspace
-export HOME="$WORKSPACE"
-
-# Switch to project root; also root of repo checkout
-cd "$WORKSPACE"
-
-# Determine CUDA release version
-export CUDA_REL=${CUDA_VERSION%.*}
-export CONDA_ARTIFACT_PATH="$WORKSPACE/ci/artifacts/cudf/cpu/.conda-bld/"
-
-# Parse git describe
-export GIT_DESCRIBE_TAG=`git describe --tags`
-export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`
-
-################################################################################
-# TRAP - Setup trap for removing jitify cache
-################################################################################
-
-# Set `LIBCUDF_KERNEL_CACHE_PATH` environment variable to $HOME/.jitify-cache
-# because it's local to the container's virtual file system, and not shared with
-# other CI jobs like `/tmp` is
-export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache"
-
-function remove_libcudf_kernel_cache_dir {
-    EXITCODE=$?
-    gpuci_logger "TRAP: Removing kernel cache dir: $LIBCUDF_KERNEL_CACHE_PATH"
-    rm -rf "$LIBCUDF_KERNEL_CACHE_PATH" \
-        || gpuci_logger "[ERROR] TRAP: Could not rm -rf $LIBCUDF_KERNEL_CACHE_PATH"
-    exit $EXITCODE
-}
-
-# Set trap to run on exit
-gpuci_logger "TRAP: Set trap to remove jitify cache on exit"
-trap remove_libcudf_kernel_cache_dir EXIT
-
-mkdir -p "$LIBCUDF_KERNEL_CACHE_PATH" \
-    || gpuci_logger "[ERROR] TRAP: Could not mkdir -p $LIBCUDF_KERNEL_CACHE_PATH"
-
-################################################################################
-# SETUP - Check environment
-################################################################################
-
-gpuci_logger "Check environment variables"
-env
-
-gpuci_logger "Check GPU usage"
-nvidia-smi
-
-gpuci_logger "Activate conda env"
-. /opt/conda/etc/profile.d/conda.sh
-conda activate rapids
-
-gpuci_logger "Check conda environment"
-conda info
-conda config --show-sources
-conda list --show-channel-urls
-
-gpuci_logger "Install dependencies"
-gpuci_mamba_retry install -y \
-                  "cudatoolkit=$CUDA_REL" \
-                  "rapids-build-env=$MINOR_VERSION.*" \
-                  "rmm=$MINOR_VERSION.*" \
-                  "openjdk=8.*" \
-                  "maven"
-# "mamba install openjdk" adds an activation script to set JAVA_HOME but this is
-# not triggered on installation. Re-activating the conda environment will set
-# this environment variable so that CMake can find JNI.
-conda activate rapids
-
-# https://docs.rapids.ai/maintainers/depmgmt/
-# gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env
-# gpuci_mamba_retry install -y "your-pkg=1.0.0"
-
-gpuci_logger "Check conda environment"
-conda info
-conda config --show-sources
-conda list --show-channel-urls
-
-################################################################################
-# INSTALL - Install libcudf artifacts
-################################################################################
-
-gpuci_logger "Installing libcudf"
-gpuci_mamba_retry install -c ${CONDA_ARTIFACT_PATH} libcudf
-
-################################################################################
-# TEST - Run java tests
-################################################################################
-
-gpuci_logger "Check GPU usage"
-nvidia-smi
-
-gpuci_logger "Running Java Tests"
-cd ${WORKSPACE}/java
-mvn test -B -DCUDF_JNI_ARROW_STATIC=OFF
-
-return ${EXITCODE}
diff --git a/ci/gpu/test-notebooks.sh b/ci/gpu/test-notebooks.sh
deleted file mode 100755
index 36d093d0d28..00000000000
--- a/ci/gpu/test-notebooks.sh
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
-
-NOTEBOOKS_DIR="$WORKSPACE/notebooks"
-NBTEST="$WORKSPACE/ci/utils/nbtest.sh"
-LIBCUDF_KERNEL_CACHE_PATH="$WORKSPACE/.jitcache"
-
-cd ${NOTEBOOKS_DIR}
-TOPLEVEL_NB_FOLDERS=$(find . -name *.ipynb |cut -d'/' -f2|sort -u)
-
-# Add notebooks that should be skipped here
-# (space-separated list of filenames without paths)
-
-SKIPNBS=""
-
-## Check env
-env
-
-EXITCODE=0
-
-# Always run nbtest in all TOPLEVEL_NB_FOLDERS, set EXITCODE to failure
-# if any run fails
-
-cd ${NOTEBOOKS_DIR}
-for nb in $(find . -name "*.ipynb"); do
-    nbBasename=$(basename ${nb})
-    # Skip all NBs that use dask (in the code or even in their name)
-    if ((echo ${nb}|grep -qi dask) || \
-        (grep -q dask ${nb})); then
-        echo "--------------------------------------------------------------------------------"
-        echo "SKIPPING: ${nb} (suspected Dask usage, not currently automatable)"
-        echo "--------------------------------------------------------------------------------"
-    elif (echo " ${SKIPNBS} " | grep -q " ${nbBasename} "); then
-        echo "--------------------------------------------------------------------------------"
-        echo "SKIPPING: ${nb} (listed in skip list)"
-        echo "--------------------------------------------------------------------------------"
-    else
-        nvidia-smi
-        ${NBTEST} ${nbBasename}
-        EXITCODE=$((EXITCODE | $?))
-        rm -rf ${LIBCUDF_KERNEL_CACHE_PATH}/*
-    fi
-done
-
-
-nvidia-smi
-
-exit ${EXITCODE}
diff --git a/ci/local/README.md b/ci/local/README.md
deleted file mode 100644
index 7754bcaf647..00000000000
--- a/ci/local/README.md
+++ /dev/null
@@ -1,57 +0,0 @@
-## Purpose
-
-This script is designed for developer and contributor use. This tool mimics the actions of gpuCI on your local machine. This allows you to test and even debug your code inside a gpuCI base container before pushing your code as a GitHub commit.
-The script can be helpful in locally triaging and debugging RAPIDS continuous integration failures.
-
-## Requirements
-
-```
-nvidia-docker
-```
-
-## Usage
-
-```
-bash build.sh [-h] [-H] [-s] [-r <repo_dir>] [-i <image_name>]
-Build and test your local repository using a base gpuCI Docker image
-
-where:
-    -H   Show this help text
-    -r   Path to repository (defaults to working directory)
-    -i   Use Docker image (default is gpuci/rapidsai:${NIGHTLY_VERSION}-cuda11.5-devel-ubuntu20.04-py3.8)
-    -s   Skip building and testing and start an interactive shell in a container of the Docker image
-```
-
-Example Usage:
-`bash build.sh -r ~/rapids/cudf -i gpuci/rapidsai:22.02-cuda11.5-devel-ubuntu20.04-py3.8`
-
-For a full list of available gpuCI docker images, visit our [DockerHub](https://hub.docker.com/r/gpuci/rapidsai/tags) page.
-
-Style Check:
-```bash
-$ bash ci/local/build.sh -r ~/rapids/cudf -s
-$ source activate rapids    # Activate gpuCI conda environment
-$ cd rapids
-$ flake8 python
-```
-
-## Information
-
-There are some caveats to be aware of when using this script, especially if you plan on developing from within the container itself.
-
-
-### Docker Image Build Repository
-
-The docker image will generate build artifacts in a folder on your machine located in the `root` directory of the repository you passed to the script. For the above example, the directory is named `~/rapids/cudf/build_rapidsai_cuda11.5-ubuntu20.04-py3.8/`. Feel free to remove this directory after the script is finished.
-
-*Note*: The script *will not* override your local build repository. Your local environment stays in tact.
-
-
-### Where The User is Dumped
-
-The script will build your repository and run all tests. If any tests fail, it dumps the user into the docker container itself to allow you to debug from within the container. If all the tests pass as expected the container exits and is automatically removed. Remember to exit the container if tests fail and you do not wish to debug within the container itself.
-
-
-### Container File Structure
-
-Your repository will be located in the `/rapids/` folder of the container. This folder is volume mounted from the local machine. Any changes to the code in this repository are replicated onto the local machine. The `cpp/build` and `python/build` directories within your repository is on a separate mount to avoid conflicting with your local build artifacts.
diff --git a/ci/local/build.sh b/ci/local/build.sh
deleted file mode 100755
index f6479cd76cc..00000000000
--- a/ci/local/build.sh
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
-
-GIT_DESCRIBE_TAG=`git describe --tags`
-MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`
-
-DOCKER_IMAGE="gpuci/rapidsai:${MINOR_VERSION}-cuda11.5-devel-ubuntu20.04-py3.8"
-REPO_PATH=${PWD}
-RAPIDS_DIR_IN_CONTAINER="/rapids"
-CPP_BUILD_DIR="cpp/build"
-PYTHON_BUILD_DIR="python/build"
-CONTAINER_SHELL_ONLY=0
-
-SHORTHELP="$(basename "$0") [-h] [-H] [-s] [-r <repo_dir>] [-i <image_name>]"
-LONGHELP="${SHORTHELP}
-Build and test your local repository using a base gpuCI Docker image
-
-where:
-    -H   Show this help text
-    -r   Path to repository (defaults to working directory)
-    -i   Use Docker image (default is ${DOCKER_IMAGE})
-    -s   Skip building and testing and start an interactive shell in a container of the Docker image
-"
-
-# Limit GPUs available to container based on CUDA_VISIBLE_DEVICES
-if [[ -z "${CUDA_VISIBLE_DEVICES}" ]]; then
-    NVIDIA_VISIBLE_DEVICES="all"
-else
-    NVIDIA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}
-fi
-
-while getopts ":hHr:i:s" option; do
-    case ${option} in
-        r)
-            REPO_PATH=${OPTARG}
-            ;;
-        i)
-            DOCKER_IMAGE=${OPTARG}
-            ;;
-        s)
-            CONTAINER_SHELL_ONLY=1
-            ;;
-        h)
-            echo "${SHORTHELP}"
-            exit 0
-            ;;
-        H)
-            echo "${LONGHELP}"
-            exit 0
-            ;;
-        *)
-            echo "ERROR: Invalid flag"
-            echo "${SHORTHELP}"
-            exit 1
-            ;;
-    esac
-done
-
-REPO_PATH_IN_CONTAINER="${RAPIDS_DIR_IN_CONTAINER}/$(basename "${REPO_PATH}")"
-CPP_BUILD_DIR_IN_CONTAINER="${RAPIDS_DIR_IN_CONTAINER}/$(basename "${REPO_PATH}")/${CPP_BUILD_DIR}"
-PYTHON_BUILD_DIR_IN_CONTAINER="${RAPIDS_DIR_IN_CONTAINER}/$(basename "${REPO_PATH}")/${PYTHON_BUILD_DIR}"
-
-
-# BASE_CONTAINER_BUILD_DIR is named after the image name, allowing for
-# multiple image builds to coexist on the local filesystem. This will
-# be mapped to the typical BUILD_DIR inside of the container. Builds
-# running in the container generate build artifacts just as they would
-# in a bare-metal environment, and the host filesystem is able to
-# maintain the host build in BUILD_DIR as well.
-# shellcheck disable=SC2001,SC2005,SC2046
-BASE_CONTAINER_BUILD_DIR=${REPO_PATH}/build_$(echo $(basename "${DOCKER_IMAGE}")|sed -e 's/:/_/g')
-CPP_CONTAINER_BUILD_DIR=${BASE_CONTAINER_BUILD_DIR}/cpp
-PYTHON_CONTAINER_BUILD_DIR=${BASE_CONTAINER_BUILD_DIR}/python
-
-
-BUILD_SCRIPT="#!/bin/bash
-set -e
-WORKSPACE=${REPO_PATH_IN_CONTAINER}
-PREBUILD_SCRIPT=${REPO_PATH_IN_CONTAINER}/ci/gpu/prebuild.sh
-BUILD_SCRIPT=${REPO_PATH_IN_CONTAINER}/ci/gpu/build.sh
-if [ -f \${PREBUILD_SCRIPT} ]; then
-    source \${PREBUILD_SCRIPT}
-fi
-yes | source \${BUILD_SCRIPT}
-"
-
-if (( CONTAINER_SHELL_ONLY == 0 )); then
-    COMMAND="${CPP_BUILD_DIR_IN_CONTAINER}/build.sh || bash"
-else
-    COMMAND="bash"
-fi
-
-# Create the build dir for the container to mount, generate the build script inside of it
-mkdir -p "${BASE_CONTAINER_BUILD_DIR}"
-mkdir -p "${CPP_CONTAINER_BUILD_DIR}"
-mkdir -p "${PYTHON_CONTAINER_BUILD_DIR}"
-# Create build directories. This is to ensure correct owner for directories. If
-# directories don't exist there is side effect from docker volume mounting creating build
-# directories owned by root(volume mount point(s))
-mkdir -p "${REPO_PATH}/${CPP_BUILD_DIR}"
-mkdir -p "${REPO_PATH}/${PYTHON_BUILD_DIR}"
-
-echo "${BUILD_SCRIPT}" > "${CPP_CONTAINER_BUILD_DIR}/build.sh"
-chmod ugo+x "${CPP_CONTAINER_BUILD_DIR}/build.sh"
-
-# Mount passwd and group files to docker. This allows docker to resolve username and group
-# avoiding these nags:
-#   * groups: cannot find name for group ID ID
-#   * I have no name!@id:/$
-# For ldap user user information is not present in system /etc/passwd and /etc/group files.
-# Hence we generate dummy files for ldap users which docker uses to resolve username and group
-
-PASSWD_FILE="/etc/passwd"
-GROUP_FILE="/etc/group"
-
-USER_FOUND=$(grep -wc "$(whoami)" < "$PASSWD_FILE")
-if [ "$USER_FOUND" == 0 ]; then
-  echo "Local User not found, LDAP WAR for docker mounts activated. Creating dummy passwd and group"
-  echo "files to allow docker resolve username and group"
-  cp "$PASSWD_FILE" /tmp/passwd
-  PASSWD_FILE="/tmp/passwd"
-  cp "$GROUP_FILE" /tmp/group
-  GROUP_FILE="/tmp/group"
-  echo "$(whoami):x:$(id -u):$(id -g):$(whoami),,,:$HOME:$SHELL" >> "$PASSWD_FILE"
-  echo "$(whoami):x:$(id -g):" >> "$GROUP_FILE"
-fi
-
-# Run the generated build script in a container
-docker pull "${DOCKER_IMAGE}"
-
-DOCKER_MAJOR=$(docker -v|sed 's/[^[0-9]*\([0-9]*\).*/\1/')
-GPU_OPTS="--gpus device=${NVIDIA_VISIBLE_DEVICES}"
-if [ "$DOCKER_MAJOR" -lt 19 ]
-then
-    GPU_OPTS="--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES='${NVIDIA_VISIBLE_DEVICES}'"
-fi
-
-docker run --rm -it ${GPU_OPTS} \
-       -u "$(id -u)":"$(id -g)" \
-       -v "${REPO_PATH}":"${REPO_PATH_IN_CONTAINER}" \
-       -v "${CPP_CONTAINER_BUILD_DIR}":"${CPP_BUILD_DIR_IN_CONTAINER}" \
-       -v "${PYTHON_CONTAINER_BUILD_DIR}":"${PYTHON_BUILD_DIR_IN_CONTAINER}" \
-       -v "$PASSWD_FILE":/etc/passwd:ro \
-       -v "$GROUP_FILE":/etc/group:ro \
-       --cap-add=SYS_PTRACE \
-       "${DOCKER_IMAGE}" bash -c "${COMMAND}"
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index 555a67d9cd6..d2be7d5f222 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -43,9 +43,6 @@ sed_runner 's/'"cudf_version .*)"'/'"cudf_version ${NEXT_FULL_TAG})"'/g' python/
 # Strings UDF update
 sed_runner 's/'"strings_udf_version .*)"'/'"strings_udf_version ${NEXT_FULL_TAG})"'/g' python/strings_udf/CMakeLists.txt
 
-# Groupby UDF update
-sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' python/cudf/udf_cpp/CMakeLists.txt
-
 # cpp libcudf_kafka update
 sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/libcudf_kafka/CMakeLists.txt
 
@@ -56,7 +53,6 @@ sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g'
 sed_runner 's/'"branch-.*\/RAPIDS.cmake"'/'"branch-${NEXT_SHORT_TAG}\/RAPIDS.cmake"'/g' fetch_rapids.cmake
 
 # cmake-format rapids-cmake definitions
-sed_runner 's/'"branch-.*\/cmake-format-rapids-cmake.json"'/'"branch-${NEXT_SHORT_TAG}\/cmake-format-rapids-cmake.json"'/g' ci/checks/style.sh
 sed_runner 's/'"branch-.*\/cmake-format-rapids-cmake.json"'/'"branch-${NEXT_SHORT_TAG}\/cmake-format-rapids-cmake.json"'/g' ci/check_style.sh
 
 # doxyfile update
@@ -84,10 +80,6 @@ sed_runner "s/cudf=${CURRENT_SHORT_TAG}/cudf=${NEXT_SHORT_TAG}/g" README.md
 sed_runner "s/CUDF_TAG branch-${CURRENT_SHORT_TAG}/CUDF_TAG branch-${NEXT_SHORT_TAG}/" cpp/examples/basic/CMakeLists.txt
 sed_runner "s/CUDF_TAG branch-${CURRENT_SHORT_TAG}/CUDF_TAG branch-${NEXT_SHORT_TAG}/" cpp/examples/strings/CMakeLists.txt
 
-# ucx-py version update
-sed_runner "s/export UCX_PY_VERSION=.*/export UCX_PY_VERSION='${NEXT_UCX_PY_VERSION}'/g" ci/gpu/build.sh
-sed_runner "s/export UCX_PY_VERSION=.*/export UCX_PY_VERSION='${NEXT_UCX_PY_VERSION}'/g" ci/gpu/java.sh
-
 # Need to distutils-normalize the original version
 NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_SHORT_TAG}'))")
 
diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh
index 5b1e8aa398c..0be72486319 100755
--- a/ci/test_cpp.sh
+++ b/ci/test_cpp.sh
@@ -21,7 +21,6 @@ set -u
 CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
 RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}/
 mkdir -p "${RAPIDS_TESTS_DIR}"
-SUITEERROR=0
 
 rapids-print-env
 
@@ -32,15 +31,14 @@ rapids-mamba-retry install \
 rapids-logger "Check GPU usage"
 nvidia-smi
 
+EXITCODE=0
+trap "EXITCODE=1" ERR
 set +e
 
-# TODO: Disabling stream identification for now.
-# Set up library for finding incorrect default stream usage.
-#pushd "cpp/tests/utilities/identify_stream_usage/"
-#mkdir build && cd build && cmake .. -GNinja && ninja && ninja test
-#STREAM_IDENTIFY_LIB="$(realpath build/libidentify_stream_usage.so)"
-#echo "STREAM_IDENTIFY_LIB=${STREAM_IDENTIFY_LIB}"
-#popd
+# Get library for finding incorrect default stream usage.
+STREAM_IDENTIFY_LIB="${CONDA_PREFIX}/lib/libcudf_identify_stream_usage.so"
+
+echo "STREAM_IDENTIFY_LIB=${STREAM_IDENTIFY_LIB}"
 
 # Run libcudf and libcudf_kafka gtests from libcudf-tests package
 rapids-logger "Run gtests"
@@ -50,22 +48,21 @@ rapids-logger "Run gtests"
 for gt in "$CONDA_PREFIX"/bin/gtests/{libcudf,libcudf_kafka}/* ; do
     test_name=$(basename ${gt})
     echo "Running gtest $test_name"
-    ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR}
-    # TODO: Disabling stream identification for now.
-    #if [[ ${test_name} == "SPAN_TEST" ]]; then
-    #    # This one test is specifically designed to test using a thrust device
-    #    # vector, so we expect and allow it to include default stream usage.
-    #    gtest_filter="SpanTest.CanConstructFromDeviceContainers"
-    #    GTEST_CUDF_STREAM_MODE="custom" LD_PRELOAD=${STREAM_IDENTIFY_LIB} ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR} --gtest_filter="-${gtest_filter}" && \
-    #        ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR} --gtest_filter="${gtest_filter}"
-    #else
-    #    GTEST_CUDF_STREAM_MODE="custom" LD_PRELOAD=${STREAM_IDENTIFY_LIB} ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR}
-    #fi
-
-    exitcode=$?
-    if (( ${exitcode} != 0 )); then
-        SUITEERROR=${exitcode}
-        echo "FAILED: GTest ${gt}"
+
+    # TODO: This strategy for using the stream lib will need to change when we
+    # switch to invoking ctest. For one, we will want to set the test
+    # properties to use the lib (which means that the decision will be made at
+    # CMake-configure time instead of runtime). We may also need to leverage
+    # something like gtest_discover_tests to be able to filter on the
+    # underlying test names.
+    if [[ ${test_name} == "SPAN_TEST" ]]; then
+        # This one test is specifically designed to test using a thrust device
+        # vector, so we expect and allow it to include default stream usage.
+        gtest_filter="SpanTest.CanConstructFromDeviceContainers"
+        GTEST_CUDF_STREAM_MODE="custom" LD_PRELOAD=${STREAM_IDENTIFY_LIB} ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR} --gtest_filter="-${gtest_filter}" && \
+            ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR} --gtest_filter="${gtest_filter}"
+    else
+        GTEST_CUDF_STREAM_MODE="custom" LD_PRELOAD=${STREAM_IDENTIFY_LIB} ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR}
     fi
 done
 
@@ -82,7 +79,8 @@ if [[ "${RAPIDS_BUILD_TYPE}" == "nightly" ]]; then
         ${COMPUTE_SANITIZER_CMD} ${gt} | tee "${RAPIDS_TESTS_DIR}${test_name}.cs.log"
     done
     unset GTEST_CUDF_RMM_MODE
-    # TODO: test-results/*.cs.log are processed in gpuci
+    # TODO: test-results/*.cs.log are processed in CI
 fi
 
-exit ${SUITEERROR}
+rapids-logger "Test script exiting with value: $EXITCODE"
+exit ${EXITCODE}
diff --git a/ci/test_java.sh b/ci/test_java.sh
index a0ba7c41607..f905aaa1178 100755
--- a/ci/test_java.sh
+++ b/ci/test_java.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
 
 set -euo pipefail
 
@@ -13,6 +13,8 @@ rapids-dependency-file-generator \
 
 rapids-mamba-retry env create --force -f env.yaml -n test
 
+export CMAKE_GENERATOR=Ninja
+
 # Temporarily allow unbound variables for conda activation.
 set +u
 conda activate test
@@ -27,22 +29,17 @@ rapids-mamba-retry install \
   --channel "${CPP_CHANNEL}" \
   libcudf
 
-SUITEERROR=0
-
 rapids-logger "Check GPU usage"
 nvidia-smi
 
+EXITCODE=0
+trap "EXITCODE=1" ERR
 set +e
 
 rapids-logger "Run Java tests"
 pushd java
 mvn test -B -DCUDF_JNI_ARROW_STATIC=OFF -DCUDF_JNI_ENABLE_PROFILING=OFF
-exitcode=$?
-
-if (( ${exitcode} != 0 )); then
-    SUITEERROR=${exitcode}
-    echo "FAILED: 1 or more tests in cudf Java"
-fi
 popd
 
-exit ${SUITEERROR}
+rapids-logger "Test script exiting with value: $EXITCODE"
+exit ${EXITCODE}
diff --git a/ci/test_notebooks.sh b/ci/test_notebooks.sh
index f1e17162195..7f5f35219b0 100755
--- a/ci/test_notebooks.sh
+++ b/ci/test_notebooks.sh
@@ -36,9 +36,8 @@ pushd notebooks
 # (space-separated list of filenames without paths)
 SKIPNBS=""
 
-# Set SUITEERROR to failure if any run fails
-SUITEERROR=0
-
+EXITCODE=0
+trap "EXITCODE=1" ERR
 set +e
 for nb in $(find . -name "*.ipynb"); do
     nbBasename=$(basename ${nb})
@@ -55,8 +54,8 @@ for nb in $(find . -name "*.ipynb"); do
     else
         nvidia-smi
         ${NBTEST} ${nbBasename}
-        SUITEERROR=$((SUITEERROR | $?))
     fi
 done
 
-exit ${SUITEERROR}
+rapids-logger "Test script exiting with value: $EXITCODE"
+exit ${EXITCODE}
diff --git a/ci/test_python_common.sh b/ci/test_python_common.sh
index 107540c0192..0e922c105dd 100755
--- a/ci/test_python_common.sh
+++ b/ci/test_python_common.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
 
 # Common setup steps shared by Python test jobs
 
@@ -27,7 +27,6 @@ PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
 RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
 RAPIDS_COVERAGE_DIR=${RAPIDS_COVERAGE_DIR:-"${PWD}/coverage-results"}
 mkdir -p "${RAPIDS_TESTS_DIR}" "${RAPIDS_COVERAGE_DIR}"
-SUITEERROR=0
 
 rapids-print-env
 
diff --git a/ci/test_python_cudf.sh b/ci/test_python_cudf.sh
index bea162a9318..bb33d8473ce 100755
--- a/ci/test_python_cudf.sh
+++ b/ci/test_python_cudf.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
 
 # Common setup steps shared by Python test jobs
 source "$(dirname "$0")/test_python_common.sh"
@@ -7,11 +7,12 @@ source "$(dirname "$0")/test_python_common.sh"
 rapids-logger "Check GPU usage"
 nvidia-smi
 
+EXITCODE=0
+trap "EXITCODE=1" ERR
 set +e
 
 rapids-logger "pytest cudf"
 pushd python/cudf/cudf
-# (TODO: Copied the comment below from gpuCI, need to verify on GitHub Actions)
 # It is essential to cd into python/cudf/cudf as `pytest-xdist` + `coverage` seem to work only at this directory level.
 pytest \
   --cache-clear \
@@ -24,12 +25,6 @@ pytest \
   --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-coverage.xml" \
   --cov-report=term \
   tests
-exitcode=$?
-
-if (( ${exitcode} != 0 )); then
-    SUITEERROR=${exitcode}
-    echo "FAILED: 1 or more tests in cudf"
-fi
 popd
 
 # Run benchmarks with both cudf and pandas to ensure compatibility is maintained.
@@ -48,12 +43,6 @@ pytest \
   --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-benchmark-coverage.xml" \
   --cov-report=term \
   benchmarks
-exitcode=$?
-
-if (( ${exitcode} != 0 )); then
-    SUITEERROR=${exitcode}
-    echo "FAILED: 1 or more tests in cudf"
-fi
 
 rapids-logger "pytest for cudf benchmarks using pandas"
 CUDF_BENCHMARKS_USE_PANDAS=ON \
@@ -67,12 +56,7 @@ pytest \
   --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-benchmark-pandas-coverage.xml" \
   --cov-report=term \
   benchmarks
-exitcode=$?
-
-if (( ${exitcode} != 0 )); then
-    SUITEERROR=${exitcode}
-    echo "FAILED: 1 or more tests in cudf"
-fi
 popd
 
-exit ${SUITEERROR}
+rapids-logger "Test script exiting with value: $EXITCODE"
+exit ${EXITCODE}
diff --git a/ci/test_python_other.sh b/ci/test_python_other.sh
index d7a5e288193..b79cd44cdbe 100755
--- a/ci/test_python_other.sh
+++ b/ci/test_python_other.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
 
 # Common setup steps shared by Python test jobs
 source "$(dirname "$0")/test_python_common.sh"
@@ -12,6 +12,8 @@ rapids-mamba-retry install \
 rapids-logger "Check GPU usage"
 nvidia-smi
 
+EXITCODE=0
+trap "EXITCODE=1" ERR
 set +e
 
 rapids-logger "pytest dask_cudf"
@@ -26,12 +28,6 @@ pytest \
   --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/dask-cudf-coverage.xml" \
   --cov-report=term \
   dask_cudf
-exitcode=$?
-
-if (( ${exitcode} != 0 )); then
-    SUITEERROR=${exitcode}
-    echo "FAILED: 1 or more tests in dask-cudf"
-fi
 popd
 
 rapids-logger "pytest custreamz"
@@ -46,12 +42,6 @@ pytest \
   --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/custreamz-coverage.xml" \
   --cov-report=term \
   custreamz
-exitcode=$?
-
-if (( ${exitcode} != 0 )); then
-    SUITEERROR=${exitcode}
-    echo "FAILED: 1 or more tests in custreamz"
-fi
 popd
 
 set -e
@@ -73,12 +63,6 @@ pytest \
   --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/strings-udf-coverage.xml" \
   --cov-report=term \
   tests
-exitcode=$?
-
-if (( ${exitcode} != 0 )); then
-    SUITEERROR=${exitcode}
-    echo "FAILED: 1 or more tests in strings_udf"
-fi
 popd
 
 rapids-logger "pytest cudf with strings_udf"
@@ -94,12 +78,7 @@ pytest \
   --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-strings-udf-coverage.xml" \
   --cov-report=term \
   tests/test_udf_masked_ops.py
-exitcode=$?
-
-if (( ${exitcode} != 0 )); then
-    SUITEERROR=${exitcode}
-    echo "FAILED: 1 or more tests in cudf with strings_udf"
-fi
 popd
 
-exit ${SUITEERROR}
+rapids-logger "Test script exiting with value: $EXITCODE"
+exit ${EXITCODE}
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index df013c492e8..675df3891c3 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -21,8 +21,8 @@ dependencies:
 - cxx-compiler
 - cython>=0.29,<0.30
 - dask-cuda=23.04.*
-- dask>=2022.12.0
-- distributed>=2022.12.0
+- dask>=2023.1.1
+- distributed>=2023.1.1
 - dlpack>=0.5,<0.6.0a0
 - doxygen=1.8.20
 - fastavro>=0.22.9
diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
index 205ca2a995c..0d5b5d16e08 100644
--- a/conda/recipes/cudf/meta.yaml
+++ b/conda/recipes/cudf/meta.yaml
@@ -1,10 +1,11 @@
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+# Copyright (c) 2018-2023, NVIDIA CORPORATION.
 
-{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
+{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
 {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
 {% set py_version = environ['CONDA_PY'] %}
 {% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
 {% set cuda_major = cuda_version.split('.')[0] %}
+{% set date_string = environ['RAPIDS_DATE_STRING'] %}
 
 package:
   name: cudf
@@ -15,7 +16,7 @@ source:
 
 build:
   number: {{ GIT_DESCRIBE_NUMBER }}
-  string: cuda_{{ cuda_major }}_py{{ py_version }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+  string: cuda_{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
   script_env:
     - PARALLEL_LEVEL
     - CMAKE_GENERATOR
diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml
index 5fa0411803b..5cbea78e82b 100644
--- a/conda/recipes/cudf_kafka/meta.yaml
+++ b/conda/recipes/cudf_kafka/meta.yaml
@@ -1,9 +1,10 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 
-{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
+{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
 {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
 {% set py_version = environ['CONDA_PY'] %}
 {% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
+{% set date_string = environ['RAPIDS_DATE_STRING'] %}
 
 package:
   name: cudf_kafka
@@ -14,7 +15,7 @@ source:
 
 build:
   number: {{ GIT_DESCRIBE_NUMBER }}
-  string: py{{ py_version }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+  string: py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
   script_env:
     - PARALLEL_LEVEL
     - CMAKE_GENERATOR
diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml
index 13d54011e02..af5705341e6 100644
--- a/conda/recipes/custreamz/meta.yaml
+++ b/conda/recipes/custreamz/meta.yaml
@@ -1,9 +1,10 @@
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+# Copyright (c) 2018-2023, NVIDIA CORPORATION.
 
-{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
+{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
 {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
 {% set py_version = environ['CONDA_PY'] %}
 {% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
+{% set date_string = environ['RAPIDS_DATE_STRING'] %}
 
 package:
   name: custreamz
@@ -14,7 +15,7 @@ source:
 
 build:
   number: {{ GIT_DESCRIBE_NUMBER }}
-  string: py{{ py_version }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+  string: py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
   script_env:
     - PARALLEL_LEVEL
     - CMAKE_GENERATOR
@@ -38,8 +39,8 @@ requirements:
     - python
     - streamz
     - cudf ={{ version }}
-    - dask >=2022.12.0
-    - distributed >=2022.12.0
+    - dask >=2023.1.1
+    - distributed >=2023.1.1
     - python-confluent-kafka >=1.7.0,<1.8.0a0
     - cudf_kafka ={{ version }}
 
diff --git a/conda/recipes/dask-cudf/meta.yaml b/conda/recipes/dask-cudf/meta.yaml
index feb9e53e37f..3ee3d4d3952 100644
--- a/conda/recipes/dask-cudf/meta.yaml
+++ b/conda/recipes/dask-cudf/meta.yaml
@@ -1,10 +1,11 @@
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+# Copyright (c) 2018-2023, NVIDIA CORPORATION.
 
-{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
+{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
 {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
 {% set py_version = environ['CONDA_PY'] %}
 {% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
 {% set cuda_major = cuda_version.split('.')[0] %}
+{% set date_string = environ['RAPIDS_DATE_STRING'] %}
 
 package:
   name: dask-cudf
@@ -15,7 +16,7 @@ source:
 
 build:
   number: {{ GIT_DESCRIBE_NUMBER }}
-  string: cuda_{{ cuda_major }}_py{{ py_version }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+  string: cuda_{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
   script_env:
     - PARALLEL_LEVEL
     - CMAKE_GENERATOR
@@ -34,14 +35,14 @@ requirements:
   host:
     - python
     - cudf ={{ version }}
-    - dask >=2022.12.0
-    - distributed >=2022.12.0
+    - dask >=2023.1.1
+    - distributed >=2023.1.1
     - cudatoolkit ={{ cuda_version }}
   run:
     - python
     - cudf ={{ version }}
-    - dask >=2022.12.0
-    - distributed >=2022.12.0
+    - dask >=2023.1.1
+    - distributed >=2023.1.1
     - {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }}
 
 test:
diff --git a/conda/recipes/dask-cudf/run_test.sh b/conda/recipes/dask-cudf/run_test.sh
index f56610bea86..0c2f628dcf2 100644
--- a/conda/recipes/dask-cudf/run_test.sh
+++ b/conda/recipes/dask-cudf/run_test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 
 set -e
 
@@ -17,12 +17,20 @@ if [ "${ARCH}" = "aarch64" ]; then
   exit 0
 fi
 
-# Install the latest version of dask and distributed
-logger "pip install git+https://github.com/dask/distributed.git@main --upgrade --no-deps"
-pip install "git+https://github.com/dask/distributed.git@main" --upgrade --no-deps
+# Dask & Distributed option to install main(nightly) or `conda-forge` packages.
+export INSTALL_DASK_MAIN=1
 
-logger "pip install git+https://github.com/dask/dask.git@main --upgrade --no-deps"
-pip install "git+https://github.com/dask/dask.git@main" --upgrade --no-deps
+# Dask version to install when `INSTALL_DASK_MAIN=0`
+export DASK_STABLE_VERSION="2023.1.1"
+
+# Install the conda-forge or nightly version of dask and distributed
+if [[ "${INSTALL_DASK_MAIN}" == 1 ]]; then
+    rapids-logger "rapids-mamba-retry install -c dask/label/dev 'dask/label/dev::dask' 'dask/label/dev::distributed'"
+    rapids-mamba-retry install -c dask/label/dev "dask/label/dev::dask" "dask/label/dev::distributed"
+else
+    rapids-logger "rapids-mamba-retry install conda-forge::dask=={$DASK_STABLE_VERSION} conda-forge::distributed=={$DASK_STABLE_VERSION} conda-forge::dask-core=={$DASK_STABLE_VERSION} --force-reinstall"
+    rapids-mamba-retry install conda-forge::dask=={$DASK_STABLE_VERSION} conda-forge::distributed=={$DASK_STABLE_VERSION} conda-forge::dask-core=={$DASK_STABLE_VERSION} --force-reinstall
+fi
 
 logger "python -c 'import dask_cudf'"
 python -c "import dask_cudf"
diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
index 911080ebdb6..b0b86b427b7 100644
--- a/conda/recipes/libcudf/meta.yaml
+++ b/conda/recipes/libcudf/meta.yaml
@@ -1,10 +1,11 @@
 # Copyright (c) 2018-2023, NVIDIA CORPORATION.
 
-{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
+{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
 {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
 {% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
 {% set cuda_major = cuda_version.split('.')[0] %}
 {% set cuda_spec = ">=" + cuda_major ~ ",<" + (cuda_major | int + 1) ~ ".0a0" %} # i.e. >=11,<12.0a0
+{% set date_string = environ['RAPIDS_DATE_STRING'] %}
 
 package:
   name: libcudf-split
@@ -52,7 +53,7 @@ outputs:
     script: install_libcudf.sh
     build:
       number: {{ GIT_DESCRIBE_NUMBER }}
-      string: cuda{{ cuda_major }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+      string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
       run_exports:
         - {{ pin_subpackage("libcudf", max_pin="x.x") }}
       ignore_run_exports_from:
@@ -308,7 +309,7 @@ outputs:
     script: install_libcudf_kafka.sh
     build:
       number: {{ GIT_DESCRIBE_NUMBER }}
-      string: {{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+      string: {{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
       ignore_run_exports_from:
         - {{ compiler('cuda') }}
     requirements:
@@ -331,7 +332,7 @@ outputs:
     script: install_libcudf_example.sh
     build:
       number: {{ GIT_DESCRIBE_NUMBER }}
-      string: {{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+      string: {{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
       ignore_run_exports_from:
         - {{ compiler('cuda') }}
     requirements:
@@ -358,7 +359,7 @@ outputs:
     script: install_libcudf_tests.sh
     build:
       number: {{ GIT_DESCRIBE_NUMBER }}
-      string: cuda{{ cuda_major }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+      string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
       ignore_run_exports_from:
         - {{ compiler('cuda') }}
     requirements:
diff --git a/conda/recipes/strings_udf/meta.yaml b/conda/recipes/strings_udf/meta.yaml
index 0928c5d3315..93316a92c22 100644
--- a/conda/recipes/strings_udf/meta.yaml
+++ b/conda/recipes/strings_udf/meta.yaml
@@ -1,10 +1,11 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
 
-{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
+{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
 {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
 {% set py_version = environ['CONDA_PY'] %}
 {% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
 {% set cuda_major = cuda_version.split('.')[0] %}
+{% set date_string = environ['RAPIDS_DATE_STRING'] %}
 
 package:
   name: strings_udf
@@ -15,7 +16,7 @@ source:
 
 build:
   number: {{ GIT_DESCRIBE_NUMBER }}
-  string: cuda_{{ cuda_major }}_py{{ py_version }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+  string: cuda_{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
   script_env:
     - PARALLEL_LEVEL
     - CMAKE_GENERATOR
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 19c118016bf..d402a47628c 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -71,6 +71,18 @@ option(CUDA_ENABLE_LINEINFO
 option(CUDA_WARNINGS_AS_ERRORS "Enable -Werror=all-warnings for all CUDA compilation" ON)
 # cudart can be statically linked or dynamically linked. The python ecosystem wants dynamic linking
 option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF)
+
+set(DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL ON)
+if(${CUDA_STATIC_RUNTIME})
+  set(DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL OFF)
+endif()
+option(
+  CUDF_BUILD_STREAMS_TEST_UTIL
+  "Whether to build the utilities for stream testing contained in libcudf"
+  ${DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL}
+)
+mark_as_advanced(CUDF_BUILD_STREAMS_TEST_UTIL)
+
 option(USE_LIBARROW_FROM_PYARROW "Use the libarrow contained within pyarrow." OFF)
 mark_as_advanced(USE_LIBARROW_FROM_PYARROW)
 
@@ -754,10 +766,34 @@ if(CUDF_BUILD_TESTUTIL)
     cudftestutil PUBLIC "$<BUILD_INTERFACE:${CUDF_SOURCE_DIR}>"
                         "$<BUILD_INTERFACE:${CUDF_SOURCE_DIR}/src>"
   )
-
   add_library(cudf::cudftestutil ALIAS cudftestutil)
 
 endif()
+
+# * build cudf_identify_stream_usage --------------------------------------------------------------
+
+if(CUDF_BUILD_STREAMS_TEST_UTIL)
+  if(CUDA_STATIC_RUNTIME)
+    message(
+      FATAL_ERROR
+        "Stream identification cannot be used with a static CUDA runtime. Please set CUDA_STATIC_RUNTIME=OFF or CUDF_BUILD_STREAMS_TEST_UTIL=OFF."
+    )
+  endif()
+
+  # Libraries for stream-related testing.
+  add_library(cudf_identify_stream_usage SHARED tests/utilities/identify_stream_usage.cpp)
+
+  set_target_properties(
+    cudf_identify_stream_usage
+    PROPERTIES # set target compile options
+               CXX_STANDARD 17
+               CXX_STANDARD_REQUIRED ON
+               POSITION_INDEPENDENT_CODE ON
+  )
+  target_link_libraries(cudf_identify_stream_usage PUBLIC CUDA::cudart rmm::rmm)
+  add_library(cudf::cudf_identify_stream_usage ALIAS cudf_identify_stream_usage)
+endif()
+
 # ##################################################################################################
 # * add tests -------------------------------------------------------------------------------------
 
@@ -784,12 +820,9 @@ if(CUDF_BUILD_BENCHMARKS)
   include(${rapids-cmake-dir}/cpm/gbench.cmake)
   rapids_cpm_gbench()
 
-  # Find or install NVBench Temporarily force downloading of fmt because current versions of nvbench
-  # do not support the latest version of fmt, which is automatically pulled into our conda
-  # environments by mamba.
-  set(CPM_DOWNLOAD_fmt TRUE)
-  include(${rapids-cmake-dir}/cpm/nvbench.cmake)
-  rapids_cpm_nvbench()
+  # Find or install nvbench
+  include(cmake/thirdparty/get_nvbench.cmake)
+
   add_subdirectory(benchmarks)
 endif()
 
@@ -833,6 +866,10 @@ if(CUDF_BUILD_TESTUTIL)
   )
 endif()
 
+if(CUDF_BUILD_STREAMS_TEST_UTIL)
+  install(TARGETS cudf_identify_stream_usage DESTINATION ${lib_dir})
+endif()
+
 set(doc_string
     [=[
 Provide targets for the cudf library.
diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index 6f67cb32b0a..c5ae3345da5 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -169,7 +169,10 @@ ConfigureNVBench(SEARCH_NVBENCH search/contains.cpp)
 # ##################################################################################################
 # * sort benchmark --------------------------------------------------------------------------------
 ConfigureBench(SORT_BENCH sort/rank.cpp sort/sort.cpp sort/sort_strings.cpp)
-ConfigureNVBench(SORT_NVBENCH sort/segmented_sort.cpp sort/sort_lists.cpp sort/sort_structs.cpp)
+ConfigureNVBench(
+  SORT_NVBENCH sort/rank_lists.cpp sort/rank_structs.cpp sort/segmented_sort.cpp
+  sort/sort_lists.cpp sort/sort_structs.cpp
+)
 
 # ##################################################################################################
 # * quantiles benchmark
diff --git a/cpp/benchmarks/fixture/rmm_pool_raii.hpp b/cpp/benchmarks/fixture/rmm_pool_raii.hpp
index 60586ef878b..465c53a91ea 100644
--- a/cpp/benchmarks/fixture/rmm_pool_raii.hpp
+++ b/cpp/benchmarks/fixture/rmm_pool_raii.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -69,4 +69,15 @@ class rmm_pool_raii {
   std::shared_ptr<rmm::mr::device_memory_resource> mr;
 };
 
+/**
+ * Base fixture for cudf benchmarks using nvbench.
+ *
+ * Initializes the default memory resource to use the RMM pool device resource.
+ */
+struct nvbench_base_fixture {
+  rmm_pool_raii _mr;
+};
+
 }  // namespace cudf
+
+#define NVBENCH_ENVIRONMENT cudf::nvbench_base_fixture
diff --git a/cpp/benchmarks/groupby/group_max.cpp b/cpp/benchmarks/groupby/group_max.cpp
index 4956cce0daf..077558f8709 100644
--- a/cpp/benchmarks/groupby/group_max.cpp
+++ b/cpp/benchmarks/groupby/group_max.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,7 +24,6 @@
 template <typename Type>
 void bench_groupby_max(nvbench::state& state, nvbench::type_list<Type>)
 {
-  cudf::rmm_pool_raii pool_raii;
   const auto size = static_cast<cudf::size_type>(state.get_int64("num_rows"));
 
   auto const keys = [&] {
diff --git a/cpp/benchmarks/groupby/group_nunique.cpp b/cpp/benchmarks/groupby/group_nunique.cpp
index 05698c04058..f74ed95200e 100644
--- a/cpp/benchmarks/groupby/group_nunique.cpp
+++ b/cpp/benchmarks/groupby/group_nunique.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -40,7 +40,6 @@ auto make_aggregation_request_vector(cudf::column_view const& values, Args&&...
 template <typename Type>
 void bench_groupby_nunique(nvbench::state& state, nvbench::type_list<Type>)
 {
-  cudf::rmm_pool_raii pool_raii;
   const auto size = static_cast<cudf::size_type>(state.get_int64("num_rows"));
 
   auto const keys = [&] {
diff --git a/cpp/benchmarks/groupby/group_rank.cpp b/cpp/benchmarks/groupby/group_rank.cpp
index f573b63a75d..2a70b95890b 100644
--- a/cpp/benchmarks/groupby/group_rank.cpp
+++ b/cpp/benchmarks/groupby/group_rank.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -30,7 +30,6 @@ static void nvbench_groupby_rank(nvbench::state& state,
 {
   using namespace cudf;
   constexpr auto dtype = type_to_id<int64_t>();
-  cudf::rmm_pool_raii pool_raii;
 
   bool const is_sorted              = state.get_int64("is_sorted");
   cudf::size_type const column_size = state.get_int64("data_size");
diff --git a/cpp/benchmarks/groupby/group_struct_keys.cpp b/cpp/benchmarks/groupby/group_struct_keys.cpp
index cc6f0faaf41..53ef12ffeaa 100644
--- a/cpp/benchmarks/groupby/group_struct_keys.cpp
+++ b/cpp/benchmarks/groupby/group_struct_keys.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -29,8 +29,6 @@
 
 void bench_groupby_struct_keys(nvbench::state& state)
 {
-  cudf::rmm_pool_raii pool_raii;
-
   using Type           = int;
   using column_wrapper = cudf::test::fixed_width_column_wrapper<Type>;
   std::default_random_engine generator;
diff --git a/cpp/benchmarks/io/csv/csv_reader_input.cpp b/cpp/benchmarks/io/csv/csv_reader_input.cpp
index 27fea856332..026045acee7 100644
--- a/cpp/benchmarks/io/csv/csv_reader_input.cpp
+++ b/cpp/benchmarks/io/csv/csv_reader_input.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -64,29 +64,26 @@ void csv_read_common(DataType const& data_types,
   state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
 }
 
-template <data_type DataType>
-void BM_csv_read_input(nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>>)
+template <data_type DataType, cudf::io::io_type IOType>
+void BM_csv_read_input(nvbench::state& state,
+                       nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IOType>>)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   auto const d_type      = get_type_or_group(static_cast<int32_t>(DataType));
-  auto const source_type = io_type::FILEPATH;
+  auto const source_type = IOType;
 
   csv_read_common(d_type, source_type, state);
 }
 
-template <cudf::io::io_type IO>
-void BM_csv_read_io(nvbench::state& state, nvbench::type_list<nvbench::enum_type<IO>>)
+template <cudf::io::io_type IOType>
+void BM_csv_read_io(nvbench::state& state, nvbench::type_list<nvbench::enum_type<IOType>>)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   auto const d_type      = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
                                          static_cast<int32_t>(data_type::FLOAT),
                                          static_cast<int32_t>(data_type::DECIMAL),
                                          static_cast<int32_t>(data_type::TIMESTAMP),
                                          static_cast<int32_t>(data_type::DURATION),
                                          static_cast<int32_t>(data_type::STRING)});
-  auto const source_type = IO;
+  auto const source_type = IOType;
 
   csv_read_common(d_type, source_type, state);
 }
@@ -101,9 +98,11 @@ using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL,
 using io_list =
   nvbench::enum_type_list<cudf::io::io_type::FILEPATH, cudf::io::io_type::HOST_BUFFER>;
 
-NVBENCH_BENCH_TYPES(BM_csv_read_input, NVBENCH_TYPE_AXES(d_type_list))
+NVBENCH_BENCH_TYPES(BM_csv_read_input,
+                    NVBENCH_TYPE_AXES(d_type_list,
+                                      nvbench::enum_type_list<cudf::io::io_type::DEVICE_BUFFER>))
   .set_name("csv_read_data_type")
-  .set_type_axes_names({"data_type"})
+  .set_type_axes_names({"data_type", "io"})
   .set_min_samples(4);
 
 NVBENCH_BENCH_TYPES(BM_csv_read_io, NVBENCH_TYPE_AXES(io_list))
diff --git a/cpp/benchmarks/io/csv/csv_reader_options.cpp b/cpp/benchmarks/io/csv/csv_reader_options.cpp
index 04522c16d5c..2d0e0e5754e 100644
--- a/cpp/benchmarks/io/csv/csv_reader_options.cpp
+++ b/cpp/benchmarks/io/csv/csv_reader_options.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -32,8 +32,6 @@ void BM_csv_read_varying_options(
   nvbench::state& state,
   nvbench::type_list<nvbench::enum_type<ColSelection>, nvbench::enum_type<RowSelection>>)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   auto const data_types =
     dtypes_for_column_selection(get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
                                                    static_cast<int32_t>(data_type::FLOAT),
diff --git a/cpp/benchmarks/io/fst.cu b/cpp/benchmarks/io/fst.cu
index 6d318db12de..7acf69e9d8e 100644
--- a/cpp/benchmarks/io/fst.cu
+++ b/cpp/benchmarks/io/fst.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -78,9 +78,6 @@ constexpr std::size_t single_item = 1;
 
 void BM_FST_JSON(nvbench::state& state)
 {
-  // TODO: to be replaced by nvbench fixture once it's ready
-  cudf::rmm_pool_raii rmm_pool;
-
   CUDF_EXPECTS(state.get_int64("string_size") <= std::numeric_limits<size_type>::max(),
                "Benchmarks only support up to size_type's maximum number of items");
   auto const string_size{size_type(state.get_int64("string_size"))};
@@ -116,9 +113,6 @@ void BM_FST_JSON(nvbench::state& state)
 
 void BM_FST_JSON_no_outidx(nvbench::state& state)
 {
-  // TODO: to be replaced by nvbench fixture once it's ready
-  cudf::rmm_pool_raii rmm_pool;
-
   CUDF_EXPECTS(state.get_int64("string_size") <= std::numeric_limits<size_type>::max(),
                "Benchmarks only support up to size_type's maximum number of items");
   auto const string_size{size_type(state.get_int64("string_size"))};
@@ -154,9 +148,6 @@ void BM_FST_JSON_no_outidx(nvbench::state& state)
 
 void BM_FST_JSON_no_out(nvbench::state& state)
 {
-  // TODO: to be replaced by nvbench fixture once it's ready
-  cudf::rmm_pool_raii rmm_pool;
-
   CUDF_EXPECTS(state.get_int64("string_size") <= std::numeric_limits<size_type>::max(),
                "Benchmarks only support up to size_type's maximum number of items");
   auto const string_size{size_type(state.get_int64("string_size"))};
@@ -190,9 +181,6 @@ void BM_FST_JSON_no_out(nvbench::state& state)
 
 void BM_FST_JSON_no_str(nvbench::state& state)
 {
-  // TODO: to be replaced by nvbench fixture once it's ready
-  cudf::rmm_pool_raii rmm_pool;
-
   CUDF_EXPECTS(state.get_int64("string_size") <= std::numeric_limits<size_type>::max(),
                "Benchmarks only support up to size_type's maximum number of items");
   auto const string_size{size_type(state.get_int64("string_size"))};
diff --git a/cpp/benchmarks/io/json/nested_json.cpp b/cpp/benchmarks/io/json/nested_json.cpp
index 2abae88dca3..416cf403671 100644
--- a/cpp/benchmarks/io/json/nested_json.cpp
+++ b/cpp/benchmarks/io/json/nested_json.cpp
@@ -157,9 +157,6 @@ auto make_test_json_data(cudf::size_type string_size, rmm::cuda_stream_view stre
 
 void BM_NESTED_JSON(nvbench::state& state)
 {
-  // TODO: to be replaced by nvbench fixture once it's ready
-  cudf::rmm_pool_raii rmm_pool;
-
   auto const string_size{cudf::size_type(state.get_int64("string_size"))};
   auto const default_options = cudf::io::json_reader_options{};
 
@@ -189,9 +186,6 @@ NVBENCH_BENCH(BM_NESTED_JSON)
 
 void BM_NESTED_JSON_DEPTH(nvbench::state& state)
 {
-  // TODO: to be replaced by nvbench fixture once it's ready
-  cudf::rmm_pool_raii rmm_pool;
-
   auto const string_size{cudf::size_type(state.get_int64("string_size"))};
   auto const depth{cudf::size_type(state.get_int64("depth"))};
 
diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp
index 3f8c096140e..4705c083c02 100644
--- a/cpp/benchmarks/io/orc/orc_reader_input.cpp
+++ b/cpp/benchmarks/io/orc/orc_reader_input.cpp
@@ -57,11 +57,10 @@ void orc_read_common(cudf::io::orc_writer_options const& opts,
   state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
 }
 
-template <data_type DataType>
-void BM_orc_read_data(nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>>)
+template <data_type DataType, cudf::io::io_type IOType>
+void BM_orc_read_data(nvbench::state& state,
+                      nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IOType>>)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   auto const d_type                 = get_type_or_group(static_cast<int32_t>(DataType));
   cudf::size_type const cardinality = state.get_int64("cardinality");
   cudf::size_type const run_length  = state.get_int64("run_length");
@@ -72,20 +71,18 @@ void BM_orc_read_data(nvbench::state& state, nvbench::type_list<nvbench::enum_ty
                         data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
   auto const view = tbl->view();
 
-  cuio_source_sink_pair source_sink(io_type::HOST_BUFFER);
+  cuio_source_sink_pair source_sink(IOType);
   cudf::io::orc_writer_options opts =
     cudf::io::orc_writer_options::builder(source_sink.make_sink_info(), view);
 
   orc_read_common(opts, source_sink, state);
 }
 
-template <cudf::io::io_type IO, cudf::io::compression_type Compression>
+template <cudf::io::io_type IOType, cudf::io::compression_type Compression>
 void BM_orc_read_io_compression(
   nvbench::state& state,
-  nvbench::type_list<nvbench::enum_type<IO>, nvbench::enum_type<Compression>>)
+  nvbench::type_list<nvbench::enum_type<IOType>, nvbench::enum_type<Compression>>)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   auto const d_type = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL_SIGNED),
                                          static_cast<int32_t>(data_type::FLOAT),
                                          static_cast<int32_t>(data_type::DECIMAL),
@@ -103,7 +100,7 @@ void BM_orc_read_io_compression(
                         data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
   auto const view = tbl->view();
 
-  cuio_source_sink_pair source_sink(IO);
+  cuio_source_sink_pair source_sink(IOType);
   cudf::io::orc_writer_options opts =
     cudf::io::orc_writer_options::builder(source_sink.make_sink_info(), view)
       .compression(Compression);
@@ -126,9 +123,11 @@ using io_list = nvbench::enum_type_list<cudf::io::io_type::FILEPATH,
 using compression_list =
   nvbench::enum_type_list<cudf::io::compression_type::SNAPPY, cudf::io::compression_type::NONE>;
 
-NVBENCH_BENCH_TYPES(BM_orc_read_data, NVBENCH_TYPE_AXES(d_type_list))
+NVBENCH_BENCH_TYPES(BM_orc_read_data,
+                    NVBENCH_TYPE_AXES(d_type_list,
+                                      nvbench::enum_type_list<cudf::io::io_type::DEVICE_BUFFER>))
   .set_name("orc_read_decode")
-  .set_type_axes_names({"data_type"})
+  .set_type_axes_names({"data_type", "io"})
   .set_min_samples(4)
   .add_int64_axis("cardinality", {0, 1000})
   .add_int64_axis("run_length", {1, 32});
diff --git a/cpp/benchmarks/io/orc/orc_reader_options.cpp b/cpp/benchmarks/io/orc/orc_reader_options.cpp
index 1b7d33ccd19..1e841f744ae 100644
--- a/cpp/benchmarks/io/orc/orc_reader_options.cpp
+++ b/cpp/benchmarks/io/orc/orc_reader_options.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -56,8 +56,6 @@ void BM_orc_read_varying_options(nvbench::state& state,
                                                     nvbench::enum_type<UsesNumpyDType>,
                                                     nvbench::enum_type<Timestamp>>)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   auto const num_chunks = RowSelection == row_selection::ALL ? 1 : chunked_read_num_chunks;
 
   auto const use_index     = UsesIndex == uses_index::YES;
diff --git a/cpp/benchmarks/io/orc/orc_writer.cpp b/cpp/benchmarks/io/orc/orc_writer.cpp
index 545f8d10122..67bf4cb750b 100644
--- a/cpp/benchmarks/io/orc/orc_writer.cpp
+++ b/cpp/benchmarks/io/orc/orc_writer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -46,8 +46,6 @@ constexpr cudf::size_type num_cols = 64;
 template <data_type DataType>
 void BM_orc_write_encode(nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>>)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   auto const d_type                 = get_type_or_group(static_cast<int32_t>(DataType));
   cudf::size_type const cardinality = state.get_int64("cardinality");
   cudf::size_type const run_length  = state.get_int64("run_length");
@@ -90,8 +88,6 @@ void BM_orc_write_io_compression(
   nvbench::state& state,
   nvbench::type_list<nvbench::enum_type<IO>, nvbench::enum_type<Compression>>)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   auto const d_type = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL_SIGNED),
                                          static_cast<int32_t>(data_type::FLOAT),
                                          static_cast<int32_t>(data_type::DECIMAL),
@@ -141,8 +137,6 @@ void BM_orc_write_statistics(
   nvbench::state& state,
   nvbench::type_list<nvbench::enum_type<Statistics>, nvbench::enum_type<Compression>>)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   auto const d_type = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL_SIGNED),
                                          static_cast<int32_t>(data_type::FLOAT),
                                          static_cast<int32_t>(data_type::DECIMAL),
diff --git a/cpp/benchmarks/io/orc/orc_writer_chunks.cpp b/cpp/benchmarks/io/orc/orc_writer_chunks.cpp
index 592eae96362..eda70bc05e6 100644
--- a/cpp/benchmarks/io/orc/orc_writer_chunks.cpp
+++ b/cpp/benchmarks/io/orc/orc_writer_chunks.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -35,8 +35,6 @@ constexpr int64_t data_size = 512 << 20;
 
 void nvbench_orc_write(nvbench::state& state)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   cudf::size_type num_cols = state.get_int64("num_columns");
 
   auto tbl = create_random_table(
@@ -79,8 +77,6 @@ void nvbench_orc_write(nvbench::state& state)
 
 void nvbench_orc_chunked_write(nvbench::state& state)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   cudf::size_type num_cols   = state.get_int64("num_columns");
   cudf::size_type num_tables = state.get_int64("num_chunks");
 
diff --git a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp
index 36a62903f31..e04dfbbc799 100644
--- a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp
@@ -57,16 +57,15 @@ void parquet_read_common(cudf::io::parquet_writer_options const& write_opts,
   state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
 }
 
-template <data_type DataType>
-void BM_parquet_read_data(nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>>)
+template <data_type DataType, cudf::io::io_type IOType>
+void BM_parquet_read_data(
+  nvbench::state& state,
+  nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IOType>>)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   auto const d_type                 = get_type_or_group(static_cast<int32_t>(DataType));
   cudf::size_type const cardinality = state.get_int64("cardinality");
   cudf::size_type const run_length  = state.get_int64("run_length");
   auto const compression            = cudf::io::compression_type::SNAPPY;
-  auto const source_type            = io_type::FILEPATH;
 
   auto const tbl =
     create_random_table(cycle_dtypes(d_type, num_cols),
@@ -74,7 +73,7 @@ void BM_parquet_read_data(nvbench::state& state, nvbench::type_list<nvbench::enu
                         data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
   auto const view = tbl->view();
 
-  cuio_source_sink_pair source_sink(source_type);
+  cuio_source_sink_pair source_sink(IOType);
   cudf::io::parquet_writer_options write_opts =
     cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view)
       .compression(compression);
@@ -82,13 +81,11 @@ void BM_parquet_read_data(nvbench::state& state, nvbench::type_list<nvbench::enu
   parquet_read_common(write_opts, source_sink, state);
 }
 
-template <cudf::io::io_type IO, cudf::io::compression_type Compression>
+template <cudf::io::io_type IOType, cudf::io::compression_type Compression>
 void BM_parquet_read_io_compression(
   nvbench::state& state,
-  nvbench::type_list<nvbench::enum_type<IO>, nvbench::enum_type<Compression>>)
+  nvbench::type_list<nvbench::enum_type<IOType>, nvbench::enum_type<Compression>>)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   auto const d_type = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
                                          static_cast<int32_t>(data_type::FLOAT),
                                          static_cast<int32_t>(data_type::DECIMAL),
@@ -101,7 +98,7 @@ void BM_parquet_read_io_compression(
   cudf::size_type const cardinality = state.get_int64("cardinality");
   cudf::size_type const run_length  = state.get_int64("run_length");
   auto const compression            = Compression;
-  auto const source_type            = IO;
+  auto const source_type            = IOType;
 
   auto const tbl =
     create_random_table(cycle_dtypes(d_type, num_cols),
@@ -133,9 +130,11 @@ using io_list = nvbench::enum_type_list<cudf::io::io_type::FILEPATH,
 using compression_list =
   nvbench::enum_type_list<cudf::io::compression_type::SNAPPY, cudf::io::compression_type::NONE>;
 
-NVBENCH_BENCH_TYPES(BM_parquet_read_data, NVBENCH_TYPE_AXES(d_type_list))
+NVBENCH_BENCH_TYPES(BM_parquet_read_data,
+                    NVBENCH_TYPE_AXES(d_type_list,
+                                      nvbench::enum_type_list<cudf::io::io_type::DEVICE_BUFFER>))
   .set_name("parquet_read_decode")
-  .set_type_axes_names({"data_type"})
+  .set_type_axes_names({"data_type", "io"})
   .set_min_samples(4)
   .add_int64_axis("cardinality", {0, 1000})
   .add_int64_axis("run_length", {1, 32});
diff --git a/cpp/benchmarks/io/parquet/parquet_reader_options.cpp b/cpp/benchmarks/io/parquet/parquet_reader_options.cpp
index 6e187afd6ab..3fd46fa08f2 100644
--- a/cpp/benchmarks/io/parquet/parquet_reader_options.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_reader_options.cpp
@@ -57,8 +57,6 @@ void BM_parquet_read_options(nvbench::state& state,
                                                 nvbench::enum_type<UsesPandasMetadata>,
                                                 nvbench::enum_type<Timestamp>>)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   auto constexpr str_to_categories = ConvertsStrings == converts_strings::YES;
   auto constexpr uses_pd_metadata  = UsesPandasMetadata == uses_pandas_metadata::YES;
 
diff --git a/cpp/benchmarks/io/parquet/parquet_writer.cpp b/cpp/benchmarks/io/parquet/parquet_writer.cpp
index a0b076abfda..d3d22e06086 100644
--- a/cpp/benchmarks/io/parquet/parquet_writer.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_writer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -46,8 +46,6 @@ constexpr cudf::size_type num_cols = 64;
 template <data_type DataType>
 void BM_parq_write_encode(nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>>)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   auto const data_types             = get_type_or_group(static_cast<int32_t>(DataType));
   cudf::size_type const cardinality = state.get_int64("cardinality");
   cudf::size_type const run_length  = state.get_int64("run_length");
@@ -90,8 +88,6 @@ void BM_parq_write_io_compression(
   nvbench::state& state,
   nvbench::type_list<nvbench::enum_type<IO>, nvbench::enum_type<Compression>>)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   auto const data_types = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
                                              static_cast<int32_t>(data_type::FLOAT),
                                              static_cast<int32_t>(data_type::DECIMAL),
diff --git a/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp b/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp
index 11b29cc2297..ed70f53cad8 100644
--- a/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -33,8 +33,6 @@ constexpr int64_t data_size = 512 << 20;
 
 void PQ_write(nvbench::state& state)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   cudf::size_type const num_cols = state.get_int64("num_cols");
 
   auto const tbl  = create_random_table(cycle_dtypes({cudf::type_id::INT32}, num_cols),
@@ -67,8 +65,6 @@ void PQ_write(nvbench::state& state)
 
 void PQ_write_chunked(nvbench::state& state)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   cudf::size_type const num_cols   = state.get_int64("num_cols");
   cudf::size_type const num_tables = state.get_int64("num_chunks");
 
diff --git a/cpp/benchmarks/io/text/multibyte_split.cpp b/cpp/benchmarks/io/text/multibyte_split.cpp
index c3b7c585055..41b5ddb567e 100644
--- a/cpp/benchmarks/io/text/multibyte_split.cpp
+++ b/cpp/benchmarks/io/text/multibyte_split.cpp
@@ -116,8 +116,6 @@ template <data_chunk_source_type source_type>
 static void bench_multibyte_split(nvbench::state& state,
                                   nvbench::type_list<nvbench::enum_type<source_type>>)
 {
-  cudf::rmm_pool_raii pool_raii;
-
   auto const delim_size         = state.get_int64("delim_size");
   auto const delim_percent      = state.get_int64("delim_percent");
   auto const file_size_approx   = state.get_int64("size_approx");
@@ -209,10 +207,21 @@ using source_type_list = nvbench::enum_type_list<data_chunk_source_type::device,
                                                  data_chunk_source_type::host_pinned,
                                                  data_chunk_source_type::file_bgzip>;
 
-NVBENCH_BENCH_TYPES(bench_multibyte_split, NVBENCH_TYPE_AXES(source_type_list))
-  .set_name("multibyte_split")
+NVBENCH_BENCH_TYPES(bench_multibyte_split,
+                    NVBENCH_TYPE_AXES(nvbench::enum_type_list<data_chunk_source_type::file>))
+  .set_name("multibyte_split_delimiters")
+  .set_min_samples(4)
   .add_int64_axis("strip_delimiters", {0, 1})
   .add_int64_axis("delim_size", {1, 4, 7})
   .add_int64_axis("delim_percent", {1, 25})
+  .add_int64_power_of_two_axis("size_approx", {15})
+  .add_int64_axis("byte_range_percent", {50});
+
+NVBENCH_BENCH_TYPES(bench_multibyte_split, NVBENCH_TYPE_AXES(source_type_list))
+  .set_name("multibyte_split_source")
+  .set_min_samples(4)
+  .add_int64_axis("strip_delimiters", {1})
+  .add_int64_axis("delim_size", {1})
+  .add_int64_axis("delim_percent", {1})
   .add_int64_power_of_two_axis("size_approx", {15, 30})
-  .add_int64_axis("byte_range_percent", {1, 5, 25, 50, 100});
+  .add_int64_axis("byte_range_percent", {10, 100});
diff --git a/cpp/benchmarks/join/join.cu b/cpp/benchmarks/join/join.cu
index 053eb6c2852..647e37aa97d 100644
--- a/cpp/benchmarks/join/join.cu
+++ b/cpp/benchmarks/join/join.cu
@@ -23,9 +23,6 @@ void nvbench_inner_join(nvbench::state& state,
 {
   skip_helper(state);
 
-  // TODO: to be replaced by nvbench fixture once it's ready
-  cudf::rmm_pool_raii pool_raii;
-
   auto join = [](cudf::table_view const& left_input,
                  cudf::table_view const& right_input,
                  cudf::null_equality compare_nulls,
@@ -43,9 +40,6 @@ void nvbench_left_join(nvbench::state& state,
 {
   skip_helper(state);
 
-  // TODO: to be replaced by nvbench fixture once it's ready
-  cudf::rmm_pool_raii pool_raii;
-
   auto join = [](cudf::table_view const& left_input,
                  cudf::table_view const& right_input,
                  cudf::null_equality compare_nulls,
@@ -63,9 +57,6 @@ void nvbench_full_join(nvbench::state& state,
 {
   skip_helper(state);
 
-  // TODO: to be replaced by nvbench fixture once it's ready
-  cudf::rmm_pool_raii pool_raii;
-
   auto join = [](cudf::table_view const& left_input,
                  cudf::table_view const& right_input,
                  cudf::null_equality compare_nulls,
diff --git a/cpp/benchmarks/join/mixed_join.cu b/cpp/benchmarks/join/mixed_join.cu
index b7da5e2c0b3..1420625bbcd 100644
--- a/cpp/benchmarks/join/mixed_join.cu
+++ b/cpp/benchmarks/join/mixed_join.cu
@@ -23,9 +23,6 @@ void nvbench_mixed_inner_join(
 {
   skip_helper(state);
 
-  // TODO: to be replaced by nvbench fixture once it's ready
-  cudf::rmm_pool_raii pool_raii;
-
   auto join = [](cudf::table_view const& left_equality_input,
                  cudf::table_view const& right_equality_input,
                  cudf::table_view const& left_conditional_input,
@@ -50,9 +47,6 @@ void nvbench_mixed_left_join(
 {
   skip_helper(state);
 
-  // TODO: to be replaced by nvbench fixture once it's ready
-  cudf::rmm_pool_raii pool_raii;
-
   auto join = [](cudf::table_view const& left_equality_input,
                  cudf::table_view const& right_equality_input,
                  cudf::table_view const& left_conditional_input,
@@ -77,9 +71,6 @@ void nvbench_mixed_full_join(
 {
   skip_helper(state);
 
-  // TODO: to be replaced by nvbench fixture once it's ready
-  cudf::rmm_pool_raii pool_raii;
-
   auto join = [](cudf::table_view const& left_equality_input,
                  cudf::table_view const& right_equality_input,
                  cudf::table_view const& left_conditional_input,
@@ -104,9 +95,6 @@ void nvbench_mixed_left_semi_join(
 {
   skip_helper(state);
 
-  // TODO: to be replaced by nvbench fixture once it's ready
-  cudf::rmm_pool_raii pool_raii;
-
   auto join = [](cudf::table_view const& left_equality_input,
                  cudf::table_view const& right_equality_input,
                  cudf::table_view const& left_conditional_input,
@@ -131,9 +119,6 @@ void nvbench_mixed_left_anti_join(
 {
   skip_helper(state);
 
-  // TODO: to be replaced by nvbench fixture once it's ready
-  cudf::rmm_pool_raii pool_raii;
-
   auto join = [](cudf::table_view const& left_equality_input,
                  cudf::table_view const& right_equality_input,
                  cudf::table_view const& left_conditional_input,
diff --git a/cpp/benchmarks/reduction/distinct_count.cpp b/cpp/benchmarks/reduction/distinct_count.cpp
index 489d7935809..d2218c270a8 100644
--- a/cpp/benchmarks/reduction/distinct_count.cpp
+++ b/cpp/benchmarks/reduction/distinct_count.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,8 +24,6 @@
 template <typename Type>
 static void bench_reduction_distinct_count(nvbench::state& state, nvbench::type_list<Type>)
 {
-  cudf::rmm_pool_raii pool_raii;
-
   auto const dtype            = cudf::type_to_id<Type>();
   auto const size             = static_cast<cudf::size_type>(state.get_int64("num_rows"));
   auto const null_probability = state.get_float64("null_probability");
diff --git a/cpp/benchmarks/reduction/rank.cpp b/cpp/benchmarks/reduction/rank.cpp
index 5022e029d97..41295f787fc 100644
--- a/cpp/benchmarks/reduction/rank.cpp
+++ b/cpp/benchmarks/reduction/rank.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -26,8 +26,6 @@
 template <typename type>
 static void nvbench_reduction_scan(nvbench::state& state, nvbench::type_list<type>)
 {
-  cudf::rmm_pool_raii pool_raii;
-
   auto const dtype = cudf::type_to_id<type>();
 
   double const null_probability = state.get_float64("null_probability");
diff --git a/cpp/benchmarks/reduction/scan_structs.cpp b/cpp/benchmarks/reduction/scan_structs.cpp
index 92016041c9a..d5b19faf773 100644
--- a/cpp/benchmarks/reduction/scan_structs.cpp
+++ b/cpp/benchmarks/reduction/scan_structs.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -28,8 +28,6 @@ static constexpr cudf::size_type max_str_length     = 32;
 
 static void nvbench_structs_scan(nvbench::state& state)
 {
-  cudf::rmm_pool_raii pool_raii;
-
   auto const null_probability = [&] {
     auto const null_prob_val = state.get_float64("null_probability");
     return null_prob_val > 0 ? std::optional{null_prob_val} : std::nullopt;
diff --git a/cpp/benchmarks/reduction/segment_reduce.cu b/cpp/benchmarks/reduction/segment_reduce.cu
index e063adb25f9..127b3598dae 100644
--- a/cpp/benchmarks/reduction/segment_reduce.cu
+++ b/cpp/benchmarks/reduction/segment_reduce.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -86,9 +86,6 @@ template <typename DataType, cudf::aggregation::Kind kind>
 void BM_Simple_Segmented_Reduction(nvbench::state& state,
                                    nvbench::type_list<DataType, nvbench::enum_type<kind>>)
 {
-  // TODO: to be replaced by nvbench fixture once it's ready
-  cudf::rmm_pool_raii rmm_pool;
-
   auto const column_size{cudf::size_type(state.get_int64("column_size"))};
   auto const num_segments{cudf::size_type(state.get_int64("num_segments"))};
 
diff --git a/cpp/benchmarks/search/contains.cpp b/cpp/benchmarks/search/contains.cpp
index 8daa975d4ed..01a0a37b21a 100644
--- a/cpp/benchmarks/search/contains.cpp
+++ b/cpp/benchmarks/search/contains.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -38,7 +38,6 @@ std::unique_ptr<cudf::column> create_column_data(cudf::size_type n_rows, bool ha
 
 static void nvbench_contains_scalar(nvbench::state& state)
 {
-  cudf::rmm_pool_raii pool_raii;
   using Type = int;
 
   auto const has_nulls = static_cast<bool>(state.get_int64("has_nulls"));
diff --git a/cpp/benchmarks/sort/nested_types_common.hpp b/cpp/benchmarks/sort/nested_types_common.hpp
new file mode 100644
index 00000000000..c4851823534
--- /dev/null
+++ b/cpp/benchmarks/sort/nested_types_common.hpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/rmm_pool_raii.hpp>
+
+#include <cudf_test/column_wrapper.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+#include <random>
+
+inline std::unique_ptr<cudf::table> create_lists_data(nvbench::state& state)
+{
+  const size_t size_bytes(state.get_int64("size_bytes"));
+  const cudf::size_type depth{static_cast<cudf::size_type>(state.get_int64("depth"))};
+  auto const null_frequency{state.get_float64("null_frequency")};
+
+  data_profile table_profile;
+  table_profile.set_distribution_params(cudf::type_id::LIST, distribution_id::UNIFORM, 0, 5);
+  table_profile.set_list_depth(depth);
+  table_profile.set_null_probability(null_frequency);
+  return create_random_table({cudf::type_id::LIST}, table_size_bytes{size_bytes}, table_profile);
+}
+
+inline std::unique_ptr<cudf::table> create_structs_data(nvbench::state& state,
+                                                        cudf::size_type const n_cols = 1)
+{
+  using Type           = int;
+  using column_wrapper = cudf::test::fixed_width_column_wrapper<Type>;
+  std::default_random_engine generator;
+  std::uniform_int_distribution<int> distribution(0, 100);
+
+  const cudf::size_type n_rows{static_cast<cudf::size_type>(state.get_int64("NumRows"))};
+  const cudf::size_type depth{static_cast<cudf::size_type>(state.get_int64("Depth"))};
+  const bool nulls{static_cast<bool>(state.get_int64("Nulls"))};
+
+  // Create columns with values in the range [0,100)
+  std::vector<column_wrapper> columns;
+  columns.reserve(n_cols);
+  std::generate_n(std::back_inserter(columns), n_cols, [&]() {
+    auto const elements = cudf::detail::make_counting_transform_iterator(
+      0, [&](auto row) { return distribution(generator); });
+    if (!nulls) return column_wrapper(elements, elements + n_rows);
+    auto valids =
+      cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 10 != 0; });
+    return column_wrapper(elements, elements + n_rows, valids);
+  });
+
+  std::vector<std::unique_ptr<cudf::column>> cols;
+  std::transform(columns.begin(), columns.end(), std::back_inserter(cols), [](column_wrapper& col) {
+    return col.release();
+  });
+
+  std::vector<std::unique_ptr<cudf::column>> child_cols = std::move(cols);
+  // Nest the child columns in a struct, then nest that struct column inside another
+  // struct column up to the desired depth
+  for (int i = 0; i < depth; i++) {
+    std::vector<bool> struct_validity;
+    std::uniform_int_distribution<int> bool_distribution(0, 100 * (i + 1));
+    std::generate_n(
+      std::back_inserter(struct_validity), n_rows, [&]() { return bool_distribution(generator); });
+    cudf::test::structs_column_wrapper struct_col(std::move(child_cols), struct_validity);
+    child_cols = std::vector<std::unique_ptr<cudf::column>>{};
+    child_cols.push_back(struct_col.release());
+  }
+
+  // Create table view
+  return std::make_unique<cudf::table>(std::move(child_cols));
+}
diff --git a/cpp/benchmarks/sort/rank.cpp b/cpp/benchmarks/sort/rank.cpp
index 2c26f4fa15d..6d0a8e5aedd 100644
--- a/cpp/benchmarks/sort/rank.cpp
+++ b/cpp/benchmarks/sort/rank.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -33,7 +33,7 @@ static void BM_rank(benchmark::State& state, bool nulls)
   // Create columns with values in the range [0,100)
   data_profile profile = data_profile_builder().cardinality(0).distribution(
     cudf::type_to_id<Type>(), distribution_id::UNIFORM, 0, 100);
-  profile.set_null_probability(nulls ? std::optional{0.01} : std::nullopt);
+  profile.set_null_probability(nulls ? std::optional{0.2} : std::nullopt);
   auto keys = create_random_column(cudf::type_to_id<Type>(), row_count{n_rows}, profile);
 
   for (auto _ : state) {
diff --git a/cpp/benchmarks/sort/rank_lists.cpp b/cpp/benchmarks/sort/rank_lists.cpp
new file mode 100644
index 00000000000..49dc409ebfc
--- /dev/null
+++ b/cpp/benchmarks/sort/rank_lists.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nested_types_common.hpp"
+#include "rank_types_common.hpp"
+
+#include <cudf/sorting.hpp>
+
+#include <cudf_test/column_utilities.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+template <cudf::rank_method method>
+void nvbench_rank_lists(nvbench::state& state, nvbench::type_list<nvbench::enum_type<method>>)
+{
+  auto const table = create_lists_data(state);
+
+  auto const null_frequency{state.get_float64("null_frequency")};
+
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    cudf::rank(table->view().column(0),
+               method,
+               cudf::order::ASCENDING,
+               null_frequency ? cudf::null_policy::INCLUDE : cudf::null_policy::EXCLUDE,
+               cudf::null_order::AFTER,
+               rmm::mr::get_current_device_resource());
+  });
+}
+
+NVBENCH_BENCH_TYPES(nvbench_rank_lists, NVBENCH_TYPE_AXES(methods))
+  .set_name("rank_lists")
+  .add_int64_power_of_two_axis("size_bytes", {10, 18, 24, 28})
+  .add_int64_axis("depth", {1, 4})
+  .add_float64_axis("null_frequency", {0, 0.2});
diff --git a/cpp/benchmarks/sort/rank_structs.cpp b/cpp/benchmarks/sort/rank_structs.cpp
new file mode 100644
index 00000000000..c0227e85191
--- /dev/null
+++ b/cpp/benchmarks/sort/rank_structs.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nested_types_common.hpp"
+#include "rank_types_common.hpp"
+
+#include <cudf/sorting.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+template <cudf::rank_method method>
+void nvbench_rank_structs(nvbench::state& state, nvbench::type_list<nvbench::enum_type<method>>)
+{
+  auto const table = create_structs_data(state);
+
+  const bool nulls{static_cast<bool>(state.get_int64("Nulls"))};
+
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    cudf::rank(table->view().column(0),
+               method,
+               cudf::order::ASCENDING,
+               nulls ? cudf::null_policy::INCLUDE : cudf::null_policy::EXCLUDE,
+               cudf::null_order::AFTER,
+               rmm::mr::get_current_device_resource());
+  });
+}
+
+NVBENCH_BENCH_TYPES(nvbench_rank_structs, NVBENCH_TYPE_AXES(methods))
+  .set_name("rank_structs")
+  .add_int64_power_of_two_axis("NumRows", {10, 18, 26})
+  .add_int64_axis("Depth", {0, 1, 8})
+  .add_int64_axis("Nulls", {0, 1});
diff --git a/cpp/benchmarks/sort/rank_types_common.hpp b/cpp/benchmarks/sort/rank_types_common.hpp
new file mode 100644
index 00000000000..adb58606c42
--- /dev/null
+++ b/cpp/benchmarks/sort/rank_types_common.hpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/aggregation.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+enum class rank_method : int32_t {};
+
+NVBENCH_DECLARE_ENUM_TYPE_STRINGS(
+  cudf::rank_method,
+  [](cudf::rank_method value) {
+    switch (value) {
+      case cudf::rank_method::FIRST: return "FIRST";
+      case cudf::rank_method::AVERAGE: return "AVERAGE";
+      case cudf::rank_method::MIN: return "MIN";
+      case cudf::rank_method::MAX: return "MAX";
+      case cudf::rank_method::DENSE: return "DENSE";
+      default: return "unknown";
+    }
+  },
+  [](cudf::rank_method value) {
+    switch (value) {
+      case cudf::rank_method::FIRST: return "cudf::rank_method::FIRST";
+      case cudf::rank_method::AVERAGE: return "cudf::rank_method::AVERAGE";
+      case cudf::rank_method::MIN: return "cudf::rank_method::MIN";
+      case cudf::rank_method::MAX: return "cudf::rank_method::MAX";
+      case cudf::rank_method::DENSE: return "cudf::rank_method::DENSE";
+      default: return "unknown";
+    }
+  })
+
+using methods = nvbench::enum_type_list<cudf::rank_method::AVERAGE,
+                                        cudf::rank_method::DENSE,
+                                        cudf::rank_method::FIRST,
+                                        cudf::rank_method::MAX,
+                                        cudf::rank_method::MIN>;
diff --git a/cpp/benchmarks/sort/segmented_sort.cpp b/cpp/benchmarks/sort/segmented_sort.cpp
index e3459291caf..22d2b1c4029 100644
--- a/cpp/benchmarks/sort/segmented_sort.cpp
+++ b/cpp/benchmarks/sort/segmented_sort.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -26,8 +26,6 @@
 
 void nvbench_segmented_sort(nvbench::state& state)
 {
-  cudf::rmm_pool_raii pool_raii;
-
   auto const stable     = static_cast<bool>(state.get_int64("stable"));
   auto const dtype      = cudf::type_to_id<int32_t>();
   auto const size_bytes = static_cast<size_t>(state.get_int64("size_bytes"));
diff --git a/cpp/benchmarks/sort/sort_lists.cpp b/cpp/benchmarks/sort/sort_lists.cpp
index dac865de479..b55b60f5ec9 100644
--- a/cpp/benchmarks/sort/sort_lists.cpp
+++ b/cpp/benchmarks/sort/sort_lists.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,8 +14,7 @@
  * limitations under the License.
  */
 
-#include <benchmarks/common/generate_input.hpp>
-#include <benchmarks/fixture/rmm_pool_raii.hpp>
+#include "nested_types_common.hpp"
 
 #include <cudf/detail/sorting.hpp>
 
@@ -23,18 +22,7 @@
 
 void nvbench_sort_lists(nvbench::state& state)
 {
-  cudf::rmm_pool_raii pool_raii;
-
-  const size_t size_bytes(state.get_int64("size_bytes"));
-  const cudf::size_type depth{static_cast<cudf::size_type>(state.get_int64("depth"))};
-  auto const null_frequency{state.get_float64("null_frequency")};
-
-  data_profile table_profile;
-  table_profile.set_distribution_params(cudf::type_id::LIST, distribution_id::UNIFORM, 0, 5);
-  table_profile.set_list_depth(depth);
-  table_profile.set_null_probability(null_frequency);
-  auto const table =
-    create_random_table({cudf::type_id::LIST}, table_size_bytes{size_bytes}, table_profile);
+  auto const table = create_lists_data(state);
 
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
     rmm::cuda_stream_view stream_view{launch.get_stream()};
diff --git a/cpp/benchmarks/sort/sort_structs.cpp b/cpp/benchmarks/sort/sort_structs.cpp
index 9b6c32940f5..1d54fa42f6f 100644
--- a/cpp/benchmarks/sort/sort_structs.cpp
+++ b/cpp/benchmarks/sort/sort_structs.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,65 +14,19 @@
  * limitations under the License.
  */
 
-#include <benchmarks/fixture/rmm_pool_raii.hpp>
-
-#include <cudf_test/column_wrapper.hpp>
+#include "nested_types_common.hpp"
 
 #include <cudf/detail/sorting.hpp>
 
 #include <nvbench/nvbench.cuh>
 
-#include <random>
-
 void nvbench_sort_struct(nvbench::state& state)
 {
-  cudf::rmm_pool_raii pool_raii;
-
-  using Type           = int;
-  using column_wrapper = cudf::test::fixed_width_column_wrapper<Type>;
-  std::default_random_engine generator;
-  std::uniform_int_distribution<int> distribution(0, 100);
-
-  const cudf::size_type n_rows{static_cast<cudf::size_type>(state.get_int64("NumRows"))};
-  const cudf::size_type n_cols{1};
-  const cudf::size_type depth{static_cast<cudf::size_type>(state.get_int64("Depth"))};
-  const bool nulls{static_cast<bool>(state.get_int64("Nulls"))};
-
-  // Create columns with values in the range [0,100)
-  std::vector<column_wrapper> columns;
-  columns.reserve(n_cols);
-  std::generate_n(std::back_inserter(columns), n_cols, [&]() {
-    auto const elements = cudf::detail::make_counting_transform_iterator(
-      0, [&](auto row) { return distribution(generator); });
-    if (!nulls) return column_wrapper(elements, elements + n_rows);
-    auto valids =
-      cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 10 != 0; });
-    return column_wrapper(elements, elements + n_rows, valids);
-  });
-
-  std::vector<std::unique_ptr<cudf::column>> cols;
-  std::transform(columns.begin(), columns.end(), std::back_inserter(cols), [](column_wrapper& col) {
-    return col.release();
-  });
-
-  std::vector<std::unique_ptr<cudf::column>> child_cols = std::move(cols);
-  // Lets add some layers
-  for (int i = 0; i < depth; i++) {
-    std::vector<bool> struct_validity;
-    std::uniform_int_distribution<int> bool_distribution(0, 100 * (i + 1));
-    std::generate_n(
-      std::back_inserter(struct_validity), n_rows, [&]() { return bool_distribution(generator); });
-    cudf::test::structs_column_wrapper struct_col(std::move(child_cols), struct_validity);
-    child_cols = std::vector<std::unique_ptr<cudf::column>>{};
-    child_cols.push_back(struct_col.release());
-  }
-
-  // Create table view
-  auto const input = cudf::table(std::move(child_cols));
+  auto const input = create_structs_data(state);
 
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
     rmm::cuda_stream_view stream_view{launch.get_stream()};
-    cudf::detail::sorted_order(input, {}, {}, stream_view, rmm::mr::get_current_device_resource());
+    cudf::detail::sorted_order(*input, {}, {}, stream_view, rmm::mr::get_current_device_resource());
   });
 }
 
diff --git a/cpp/benchmarks/stream_compaction/distinct.cpp b/cpp/benchmarks/stream_compaction/distinct.cpp
index 512554ff1bc..81eafa3044f 100644
--- a/cpp/benchmarks/stream_compaction/distinct.cpp
+++ b/cpp/benchmarks/stream_compaction/distinct.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -29,8 +29,6 @@ NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_ms, "cudf::timestamp_ms", "cudf::ti
 template <typename Type>
 void nvbench_distinct(nvbench::state& state, nvbench::type_list<Type>)
 {
-  cudf::rmm_pool_raii pool_raii;
-
   cudf::size_type const num_rows = state.get_int64("NumRows");
 
   data_profile profile = data_profile_builder().cardinality(0).null_probability(0.01).distribution(
@@ -61,8 +59,6 @@ NVBENCH_BENCH_TYPES(nvbench_distinct, NVBENCH_TYPE_AXES(data_type))
 template <typename Type>
 void nvbench_distinct_list(nvbench::state& state, nvbench::type_list<Type>)
 {
-  cudf::rmm_pool_raii pool_raii;
-
   auto const size               = state.get_int64("ColumnSize");
   auto const dtype              = cudf::type_to_id<Type>();
   double const null_probability = state.get_float64("null_probability");
diff --git a/cpp/benchmarks/stream_compaction/unique.cpp b/cpp/benchmarks/stream_compaction/unique.cpp
index 9a0f4c3b743..dafb9d506c7 100644
--- a/cpp/benchmarks/stream_compaction/unique.cpp
+++ b/cpp/benchmarks/stream_compaction/unique.cpp
@@ -54,8 +54,6 @@ void nvbench_unique(nvbench::state& state, nvbench::type_list<Type, nvbench::enu
     state.skip("Skip unwanted benchmarks.");
   }
 
-  cudf::rmm_pool_raii pool_raii;
-
   cudf::size_type const num_rows = state.get_int64("NumRows");
   auto const sorting             = state.get_int64("Sort");
 
@@ -102,8 +100,6 @@ void nvbench_unique_list(nvbench::state& state, nvbench::type_list<Type, nvbench
     state.skip("Skip unwanted benchmarks.");
   }
 
-  cudf::rmm_pool_raii pool_raii;
-
   auto const size               = state.get_int64("ColumnSize");
   auto const dtype              = cudf::type_to_id<Type>();
   double const null_probability = state.get_float64("null_probability");
diff --git a/cpp/benchmarks/string/like.cpp b/cpp/benchmarks/string/like.cpp
index de7382f5a75..d86c31480dd 100644
--- a/cpp/benchmarks/string/like.cpp
+++ b/cpp/benchmarks/string/like.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -71,7 +71,6 @@ std::unique_ptr<cudf::column> build_input_column(cudf::size_type n_rows, int32_t
 
 static void bench_like(nvbench::state& state)
 {
-  cudf::rmm_pool_raii pool_raii;
   auto const n_rows   = static_cast<cudf::size_type>(state.get_int64("num_rows"));
   auto const hit_rate = static_cast<int32_t>(state.get_int64("hit_rate"));
 
diff --git a/cpp/benchmarks/string/repeat_strings.cpp b/cpp/benchmarks/string/repeat_strings.cpp
index 1844e93bc53..fe015b27f13 100644
--- a/cpp/benchmarks/string/repeat_strings.cpp
+++ b/cpp/benchmarks/string/repeat_strings.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -79,42 +79,6 @@ static void BM_repeat_strings_column_times(benchmark::State& state)
                           (strings_col.chars_size() + repeat_times_col.size() * sizeof(int32_t)));
 }
 
-static void BM_compute_output_strings_sizes(benchmark::State& state)
-{
-  auto const n_rows           = static_cast<cudf::size_type>(state.range(0));
-  auto const max_str_length   = static_cast<cudf::size_type>(state.range(1));
-  auto const table            = create_data_table(2, n_rows, max_str_length);
-  auto const strings_col      = cudf::strings_column_view(table->view().column(0));
-  auto const repeat_times_col = table->view().column(1);
-
-  for ([[maybe_unused]] auto _ : state) {
-    [[maybe_unused]] cuda_event_timer raii(state, true, cudf::get_default_stream());
-    cudf::strings::repeat_strings_output_sizes(strings_col, repeat_times_col);
-  }
-
-  state.SetBytesProcessed(state.iterations() *
-                          (strings_col.chars_size() + repeat_times_col.size() * sizeof(int32_t)));
-}
-
-static void BM_repeat_strings_column_times_precomputed_sizes(benchmark::State& state)
-{
-  auto const n_rows           = static_cast<cudf::size_type>(state.range(0));
-  auto const max_str_length   = static_cast<cudf::size_type>(state.range(1));
-  auto const table            = create_data_table(2, n_rows, max_str_length);
-  auto const strings_col      = cudf::strings_column_view(table->view().column(0));
-  auto const repeat_times_col = table->view().column(1);
-  [[maybe_unused]] auto const [sizes, total_bytes] =
-    cudf::strings::repeat_strings_output_sizes(strings_col, repeat_times_col);
-
-  for ([[maybe_unused]] auto _ : state) {
-    [[maybe_unused]] cuda_event_timer raii(state, true, cudf::get_default_stream());
-    cudf::strings::repeat_strings(strings_col, repeat_times_col, *sizes);
-  }
-
-  state.SetBytesProcessed(state.iterations() *
-                          (strings_col.chars_size() + repeat_times_col.size() * sizeof(int32_t)));
-}
-
 static void generate_bench_args(benchmark::internal::Benchmark* b)
 {
   int const min_rows   = 1 << 8;
@@ -145,23 +109,5 @@ class RepeatStrings : public cudf::benchmark {
     ->UseManualTime()                                               \
     ->Unit(benchmark::kMillisecond);
 
-#define COMPUTE_OUTPUT_STRINGS_SIZES_BENCHMARK_DEFINE(name)          \
-  BENCHMARK_DEFINE_F(RepeatStrings, name)                            \
-  (::benchmark::State & st) { BM_compute_output_strings_sizes(st); } \
-  BENCHMARK_REGISTER_F(RepeatStrings, name)                          \
-    ->Apply(generate_bench_args)                                     \
-    ->UseManualTime()                                                \
-    ->Unit(benchmark::kMillisecond);
-
-#define REPEAT_STRINGS_COLUMN_TIMES_PRECOMPUTED_SIZES_BENCHMARK_DEFINE(name)          \
-  BENCHMARK_DEFINE_F(RepeatStrings, name)                                             \
-  (::benchmark::State & st) { BM_repeat_strings_column_times_precomputed_sizes(st); } \
-  BENCHMARK_REGISTER_F(RepeatStrings, name)                                           \
-    ->Apply(generate_bench_args)                                                      \
-    ->UseManualTime()                                                                 \
-    ->Unit(benchmark::kMillisecond);
-
 REPEAT_STRINGS_SCALAR_TIMES_BENCHMARK_DEFINE(scalar_times)
 REPEAT_STRINGS_COLUMN_TIMES_BENCHMARK_DEFINE(column_times)
-COMPUTE_OUTPUT_STRINGS_SIZES_BENCHMARK_DEFINE(compute_output_strings_sizes)
-REPEAT_STRINGS_COLUMN_TIMES_PRECOMPUTED_SIZES_BENCHMARK_DEFINE(precomputed_sizes)
diff --git a/cpp/benchmarks/string/reverse.cpp b/cpp/benchmarks/string/reverse.cpp
index 7b08897079b..4c3846c79bb 100644
--- a/cpp/benchmarks/string/reverse.cpp
+++ b/cpp/benchmarks/string/reverse.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,7 +25,6 @@
 
 static void bench_reverse(nvbench::state& state)
 {
-  cudf::rmm_pool_raii pool_raii;
   auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
   auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
 
diff --git a/cpp/cmake/thirdparty/get_nvbench.cmake b/cpp/cmake/thirdparty/get_nvbench.cmake
new file mode 100644
index 00000000000..f0642145fa0
--- /dev/null
+++ b/cpp/cmake/thirdparty/get_nvbench.cmake
@@ -0,0 +1,28 @@
+# =============================================================================
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+# This function finds nvbench and applies any needed patches.
+function(find_and_configure_nvbench)
+
+  include(${rapids-cmake-dir}/cpm/nvbench.cmake)
+  include(${rapids-cmake-dir}/cpm/package_override.cmake)
+
+  set(cudf_patch_dir "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/patches")
+  rapids_cpm_package_override("${cudf_patch_dir}/nvbench_override.json")
+
+  rapids_cpm_nvbench()
+
+endfunction()
+
+find_and_configure_nvbench()
diff --git a/cpp/cmake/thirdparty/patches/nvbench_global_setup.diff b/cpp/cmake/thirdparty/patches/nvbench_global_setup.diff
new file mode 100644
index 00000000000..0487b0a1ac3
--- /dev/null
+++ b/cpp/cmake/thirdparty/patches/nvbench_global_setup.diff
@@ -0,0 +1,27 @@
+diff --git a/nvbench/main.cuh b/nvbench/main.cuh
+index 0ba82d7..7ab02c1 100644
+--- a/nvbench/main.cuh
++++ b/nvbench/main.cuh
+@@ -54,6 +54,14 @@
+ // clang-format on
+ #endif
+
++#ifndef NVBENCH_ENVIRONMENT
++namespace nvbench {
++struct no_environment
++{};
++}
++#define NVBENCH_ENVIRONMENT nvbench::no_environment
++#endif
++
+ #define NVBENCH_MAIN_PARSE(argc, argv)                                                             \
+   nvbench::option_parser parser;                                                                   \
+   parser.parse(argc, argv)
+@@ -77,6 +85,7 @@
+     printer.set_total_state_count(total_states);                                                   \
+                                                                                                    \
+     printer.set_completed_state_count(0);                                                          \
++    NVBENCH_ENVIRONMENT();                                                                         \
+     for (auto &bench_ptr : benchmarks)                                                             \
+     {                                                                                              \
+       bench_ptr->set_printer(printer);                                                             \
diff --git a/cpp/cmake/thirdparty/patches/nvbench_override.json b/cpp/cmake/thirdparty/patches/nvbench_override.json
new file mode 100644
index 00000000000..7be868081b6
--- /dev/null
+++ b/cpp/cmake/thirdparty/patches/nvbench_override.json
@@ -0,0 +1,19 @@
+
+{
+  "packages" : {
+    "nvbench" : {
+      "patches" : [
+        {
+          "file" : "${current_json_dir}/nvbench_global_setup.diff",
+          "issue" : "Fix add support for global setup to initialize RMM in nvbench [https://github.com/NVIDIA/nvbench/pull/123]",
+          "fixed_in" : ""
+        },
+        {
+          "file" : "nvbench/use_existing_fmt.diff",
+          "issue" : "Fix add support for using an existing fmt [https://github.com/NVIDIA/nvbench/pull/125]",
+          "fixed_in" : ""
+        }
+      ]
+    }
+  }
+}
diff --git a/cpp/include/cudf/detail/segmented_reduction.cuh b/cpp/include/cudf/detail/segmented_reduction.cuh
index 9a49c1abe38..1c39d5eab1e 100644
--- a/cpp/include/cudf/detail/segmented_reduction.cuh
+++ b/cpp/include/cudf/detail/segmented_reduction.cuh
@@ -145,10 +145,10 @@ void segmented_reduce(InputIterator d_in,
                       size_type* d_valid_counts,
                       rmm::cuda_stream_view stream)
 {
-  using OutputType         = typename thrust::iterator_value<OutputIterator>::type;
-  using IntermediateType   = typename thrust::iterator_value<InputIterator>::type;
-  auto num_segments        = static_cast<size_type>(std::distance(d_offset_begin, d_offset_end));
-  auto const binary_op     = op.get_binary_op();
+  using OutputType       = typename thrust::iterator_value<OutputIterator>::type;
+  using IntermediateType = typename thrust::iterator_value<InputIterator>::type;
+  auto num_segments      = static_cast<size_type>(std::distance(d_offset_begin, d_offset_end)) - 1;
+  auto const binary_op   = op.get_binary_op();
   auto const initial_value = op.template get_identity<IntermediateType>();
 
   rmm::device_uvector<IntermediateType> intermediate_result{static_cast<std::size_t>(num_segments),
diff --git a/cpp/include/cudf/strings/repeat_strings.hpp b/cpp/include/cudf/strings/repeat_strings.hpp
index 0e6ee2126d3..26fe5f95983 100644
--- a/cpp/include/cudf/strings/repeat_strings.hpp
+++ b/cpp/include/cudf/strings/repeat_strings.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -32,15 +32,15 @@ namespace strings {
  */
 
 /**
- * @brief Repeat the given string scalar by a given number of times.
+ * @brief Repeat the given string scalar a given number of times
  *
  * An output string scalar is generated by repeating the input string by a number of times given by
- * the @p `repeat_times` parameter.
+ * the `repeat_times` parameter.
  *
  * In special cases:
- *  - If @p `repeat_times` is not a positive value, an empty (valid) string scalar will be returned.
+ *  - If `repeat_times` is not a positive value, an empty (valid) string scalar will be returned.
  *  - An invalid input scalar will always result in an invalid output scalar regardless of the
- *    value of @p `repeat_times` parameter.
+ *    value of `repeat_times` parameter.
  *
  * @code{.pseudo}
  * Example:
@@ -50,13 +50,13 @@ namespace strings {
  * @endcode
  *
  * @throw cudf::logic_error if the size of the output string scalar exceeds the maximum value that
- *        can be stored by the index type
- *        (i.e., @code input.size() * repeat_times > numeric_limits<size_type>::max() @endcode).
+ *        can be stored by the index type:
+ *        `input.size() * repeat_times > max of size_type`
  *
- * @param input The scalar containing the string to repeat.
- * @param repeat_times The number of times the input string is repeated.
- * @param mr Device memory resource used to allocate the returned string scalar.
- * @return New string scalar in which the input string is repeated.
+ * @param input The scalar containing the string to repeat
+ * @param repeat_times The number of times the input string is repeated
+ * @param mr Device memory resource used to allocate the returned string scalar
+ * @return New string scalar in which the input string is repeated
  */
 std::unique_ptr<string_scalar> repeat_string(
   string_scalar const& input,
@@ -64,19 +64,16 @@ std::unique_ptr<string_scalar> repeat_string(
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
- * @brief Repeat each string in the given strings column by a given number of times.
+ * @brief Repeat each string in the given strings column a given number of times
  *
- * An output strings column is generated by repeating each string from the input strings column by a
- * number of times given by the @p `repeat_times` parameter.
+ * An output strings column is generated by repeating each string from the input strings column by
+ * the number of times given by the `repeat_times` parameter.
  *
  * In special cases:
- *  - If @p `repeat_times` is not a positive number, a non-null input string will always result in
+ *  - If `repeat_times` is not a positive number, a non-null input string will always result in
  *    an empty output string.
  *  - A null input string will always result in a null output string regardless of the value of the
- *    @p `repeat_times` parameter.
- *
- * The caller is responsible for checking the output column size will not exceed the maximum size of
- * a strings column (number of total characters is less than the max size_type value).
+ *    `repeat_times` parameter.
  *
  * @code{.pseudo}
  * Example:
@@ -85,10 +82,10 @@ std::unique_ptr<string_scalar> repeat_string(
  * out is ['aaaaaa', null, '', 'bbcbbcbbc']
  * @endcode
  *
- * @param input The column containing strings to repeat.
- * @param repeat_times The number of times each input string is repeated.
- * @param mr Device memory resource used to allocate the returned strings column.
- * @return New column containing the repeated strings.
+ * @param input The column containing strings to repeat
+ * @param repeat_times The number of times each input string is repeated
+ * @param mr Device memory resource used to allocate the returned strings column
+ * @return New column containing the repeated strings
  */
 std::unique_ptr<column> repeat_strings(
   strings_column_view const& input,
@@ -97,11 +94,10 @@ std::unique_ptr<column> repeat_strings(
 
 /**
  * @brief Repeat each string in the given strings column by the numbers of times given in another
- * numeric column.
+ * numeric column
  *
  * An output strings column is generated by repeating each of the input string by a number of times
- * given by the corresponding row in a @p `repeat_times` numeric column. The computational time can
- * be reduced if sizes of the output strings are known and provided.
+ * given by the corresponding row in a `repeat_times` numeric column.
  *
  * In special cases:
  *  - Any null row (from either the input strings column or the `repeat_times` column) will always
@@ -109,9 +105,6 @@ std::unique_ptr<column> repeat_strings(
  *  - If any value in the `repeat_times` column is not a positive number and its corresponding input
  *    string is not null, the output string will be an empty string.
  *
- * The caller is responsible for checking the output column size will not exceed the maximum size of
- * a strings column (number of total characters is less than the max size_type value).
- *
  * @code{.pseudo}
  * Example:
  * strs         = ['aa', null, '', 'bbc-']
@@ -120,51 +113,16 @@ std::unique_ptr<column> repeat_strings(
  * out is ['aa', null, '', 'bbc-bbc-bbc-bbc-']
  * @endcode
  *
- * @throw cudf::logic_error if the input `repeat_times` column has data type other than integer.
+ * @throw cudf::logic_error if the input `repeat_times` is not an integer type
  * @throw cudf::logic_error if the input columns have different sizes.
  *
- * @param input The column containing strings to repeat.
+ * @param input The column containing strings to repeat
  * @param repeat_times The column containing numbers of times that the corresponding input strings
- *        are repeated.
- * @param output_strings_sizes The optional column containing pre-computed sizes of the output
- *        strings.
- * @param mr Device memory resource used to allocate the returned strings column.
+ *                     are repeated
+ * @param mr Device memory resource used to allocate the returned strings column
  * @return New column containing the repeated strings.
  */
 std::unique_ptr<column> repeat_strings(
-  strings_column_view const& input,
-  column_view const& repeat_times,
-  std::optional<column_view> output_strings_sizes = std::nullopt,
-  rmm::mr::device_memory_resource* mr             = rmm::mr::get_current_device_resource());
-
-/**
- * @brief Compute sizes of the output strings if each string in the input strings column
- * is repeated by the numbers of times given in another numeric column.
- *
- * The output column storing string output sizes is not nullable. These string sizes are
- * also summed up and returned (in an `int64_t` value), which can be used to detect if the input
- * strings column can be safely repeated without data corruption due to overflow in string indexing.
- *
- * @code{.pseudo}
- * Example:
- * strs         = ['aa', null, '', 'bbc-']
- * repeat_times = [ 1,   2,     3,  4   ]
- * [output_sizes, total_size] = repeat_strings_output_sizes(strs, repeat_times)
- * out is [2, 0, 0, 16], and total_size = 18
- * @endcode
- *
- * @throw cudf::logic_error if the input `repeat_times` column has data type other than integer.
- * @throw cudf::logic_error if the input columns have different sizes.
- *
- * @param input The column containing strings to repeat.
- * @param repeat_times The column containing numbers of times that the corresponding input strings
- *        are repeated.
- * @param mr Device memory resource used to allocate the returned strings column.
- * @return A pair with the first item is an int32_t column containing sizes of the output strings,
- *         and the second item is an int64_t number containing the total sizes (in bytes) of the
- *         output strings column.
- */
-std::pair<std::unique_ptr<column>, int64_t> repeat_strings_output_sizes(
   strings_column_view const& input,
   column_view const& repeat_times,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh
index 0dc0f4e5315..f9ffbfcdf7b 100644
--- a/cpp/include/cudf/table/experimental/row_operators.cuh
+++ b/cpp/include/cudf/table/experimental/row_operators.cuh
@@ -245,6 +245,16 @@ using optional_dremel_view = thrust::optional<detail::dremel_device_view const>;
  * second letter in both words is the first non-equal letter, and `a < b`, thus
  * `aac < abb`.
  *
+ * @note The operator overloads in sub-class `element_comparator` are templated via the
+ *        `type_dispatcher` to help select an overload instance for each column in a table.
+ *        So, `cudf::is_nested<Element>` will return `true` if the table has nested-type columns,
+ *        but it will be a runtime error if template parameter `has_nested_columns != true`.
+ *
+ * @tparam has_nested_columns compile-time optimization for primitive types.
+ *         This template parameter is to be used by the developer by querying
+ *         `cudf::detail::has_nested_columns(input)`. `true` compiles operator
+ *         overloads for nested types, while `false` only compiles operator
+ *         overloads for primitive types.
  * @tparam Nullate A cudf::nullate type describing whether to check for nulls.
  * @tparam PhysicalElementComparator A relational comparator functor that compares individual values
  * rather than logical elements, defaults to `NaN` aware relational comparator that evaluates `NaN`
@@ -857,6 +867,16 @@ class self_comparator {
    *
    * `F(i,j)` returns true if and only if row `i` compares lexicographically less than row `j`.
    *
+   * @note The operator overloads in sub-class `element_comparator` are templated via the
+   *        `type_dispatcher` to help select an overload instance for each column in a table.
+   *        So, `cudf::is_nested<Element>` will return `true` if the table has nested-type columns,
+   *        but it will be a runtime error if template parameter `has_nested_columns != true`.
+   *
+   * @tparam has_nested_columns compile-time optimization for primitive types.
+   *         This template parameter is to be used by the developer by querying
+   *         `cudf::detail::has_nested_columns(input)`. `true` compiles operator
+   *         overloads for nested types, while `false` only compiles operator
+   *         overloads for primitive types.
    * @tparam Nullate A cudf::nullate type describing whether to check for nulls.
    * @tparam PhysicalElementComparator A relational comparator functor that compares individual
    * values rather than logical elements, defaults to `NaN` aware relational comparator that
@@ -1009,6 +1029,16 @@ class two_table_comparator {
    * only if row `i` of the right table compares lexicographically less than row
    * `j` of the left table.
    *
+   * @note The operator overloads in sub-class `element_comparator` are templated via the
+   *        `type_dispatcher` to help select an overload instance for each column in a table.
+   *        So, `cudf::is_nested<Element>` will return `true` if the table has nested-type columns,
+   *        but it will be a runtime error if template parameter `has_nested_columns != true`.
+   *
+   * @tparam has_nested_columns compile-time optimization for primitive types.
+   *         This template parameter is to be used by the developer by querying
+   *         `cudf::detail::has_nested_columns(input)`. `true` compiles operator
+   *         overloads for nested types, while `false` only compiles operator
+   *         overloads for primitive types.
    * @tparam Nullate A cudf::nullate type describing whether to check for nulls.
    * @tparam PhysicalElementComparator A relational comparator functor that compares individual
    * values rather than logical elements, defaults to `NaN` aware relational comparator that
@@ -1131,11 +1161,22 @@ struct nan_equal_physical_equality_comparator {
  * returns false, representing unequal rows. If the rows are compared without mismatched elements,
  * the rows are equal.
  *
+ * @note The operator overloads in sub-class `element_comparator` are templated via the
+ *        `type_dispatcher` to help select an overload instance for each column in a table.
+ *        So, `cudf::is_nested<Element>` will return `true` if the table has nested-type columns,
+ *        but it will be a runtime error if template parameter `has_nested_columns != true`.
+ *
+ * @tparam has_nested_columns compile-time optimization for primitive types.
+ *         This template parameter is to be used by the developer by querying
+ *         `cudf::detail::has_nested_columns(input)`. `true` compiles operator
+ *         overloads for nested types, while `false` only compiles operator
+ *         overloads for primitive types.
  * @tparam Nullate A cudf::nullate type describing whether to check for nulls.
  * @tparam PhysicalEqualityComparator A equality comparator functor that compares individual values
  * rather than logical elements, defaults to a comparator for which `NaN == NaN`.
  */
-template <typename Nullate,
+template <bool has_nested_columns,
+          typename Nullate,
           typename PhysicalEqualityComparator = nan_equal_physical_equality_comparator>
 class device_row_comparator {
   friend class self_comparator;       ///< Allow self_comparator to access private members
@@ -1246,14 +1287,14 @@ class device_row_comparator {
 
     template <typename Element,
               CUDF_ENABLE_IF(not cudf::is_equality_comparable<Element, Element>() and
-                             not cudf::is_nested<Element>()),
+                             (not has_nested_columns or not cudf::is_nested<Element>())),
               typename... Args>
     __device__ bool operator()(Args...)
     {
       CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
     }
 
-    template <typename Element, CUDF_ENABLE_IF(cudf::is_nested<Element>())>
+    template <typename Element, CUDF_ENABLE_IF(has_nested_columns and cudf::is_nested<Element>())>
     __device__ bool operator()(size_type const lhs_element_index,
                                size_type const rhs_element_index) const noexcept
     {
@@ -1437,6 +1478,16 @@ class self_comparator {
    *
    * `F(i,j)` returns true if and only if row `i` compares equal to row `j`.
    *
+   * @note The operator overloads in sub-class `element_comparator` are templated via the
+   *        `type_dispatcher` to help select an overload instance for each column in a table.
+   *        So, `cudf::is_nested<Element>` will return `true` if the table has nested-type columns,
+   *        but it will be a runtime error if template parameter `has_nested_columns != true`.
+   *
+   * @tparam has_nested_columns compile-time optimization for primitive types.
+   *         This template parameter is to be used by the developer by querying
+   *         `cudf::detail::has_nested_columns(input)`. `true` compiles operator
+   *         overloads for nested types, while `false` only compiles operator
+   *         overloads for primitive types.
    * @tparam Nullate A cudf::nullate type describing whether to check for nulls.
    * @tparam PhysicalEqualityComparator A equality comparator functor that compares individual
    * values rather than logical elements, defaults to a comparator for which `NaN == NaN`.
@@ -1445,13 +1496,15 @@ class self_comparator {
    * @param comparator Physical element equality comparison functor.
    * @return A binary callable object
    */
-  template <typename Nullate,
+  template <bool has_nested_columns,
+            typename Nullate,
             typename PhysicalEqualityComparator = nan_equal_physical_equality_comparator>
   auto equal_to(Nullate nullate                       = {},
                 null_equality nulls_are_equal         = null_equality::EQUAL,
                 PhysicalEqualityComparator comparator = {}) const noexcept
   {
-    return device_row_comparator{nullate, *d_t, *d_t, nulls_are_equal, comparator};
+    return device_row_comparator<has_nested_columns, Nullate, PhysicalEqualityComparator>{
+      nullate, *d_t, *d_t, nulls_are_equal, comparator};
   }
 
  private:
@@ -1539,6 +1592,16 @@ class two_table_comparator {
    * Similarly, `F(rhs_index_type i, lhs_index_type j)` returns true if and only if row `i` of the
    * right table compares equal to row `j` of the left table.
    *
+   * @note The operator overloads in sub-class `element_comparator` are templated via the
+   *        `type_dispatcher` to help select an overload instance for each column in a table.
+   *        So, `cudf::is_nested<Element>` will return `true` if the table has nested-type columns,
+   *        but it will be a runtime error if template parameter `has_nested_columns != true`.
+   *
+   * @tparam has_nested_columns compile-time optimization for primitive types.
+   *         This template parameter is to be used by the developer by querying
+   *         `cudf::detail::has_nested_columns(input)`. `true` compiles operator
+   *         overloads for nested types, while `false` only compiles operator
+   *         overloads for primitive types.
    * @tparam Nullate A cudf::nullate type describing whether to check for nulls.
    * @tparam PhysicalEqualityComparator A equality comparator functor that compares individual
    * values rather than logical elements, defaults to a `NaN == NaN` equality comparator.
@@ -1547,14 +1610,16 @@ class two_table_comparator {
    * @param comparator Physical element equality comparison functor.
    * @return A binary callable object
    */
-  template <typename Nullate,
+  template <bool has_nested_columns,
+            typename Nullate,
             typename PhysicalEqualityComparator = nan_equal_physical_equality_comparator>
   auto equal_to(Nullate nullate                       = {},
                 null_equality nulls_are_equal         = null_equality::EQUAL,
                 PhysicalEqualityComparator comparator = {}) const noexcept
   {
     return strong_index_comparator_adapter{
-      device_row_comparator(nullate, *d_left_table, *d_right_table, nulls_are_equal, comparator)};
+      device_row_comparator<has_nested_columns, Nullate, PhysicalEqualityComparator>(
+        nullate, *d_left_table, *d_right_table, nulls_are_equal, comparator)};
   }
 
  private:
diff --git a/cpp/src/binaryop/compiled/struct_binary_ops.cuh b/cpp/src/binaryop/compiled/struct_binary_ops.cuh
index 2fcf1ce4e32..d167f0fe3c5 100644
--- a/cpp/src/binaryop/compiled/struct_binary_ops.cuh
+++ b/cpp/src/binaryop/compiled/struct_binary_ops.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -106,6 +106,36 @@ void apply_struct_binary_op(mutable_column_view& out,
   }
 }
 
+template <typename OptionalIteratorType, typename DeviceComparatorType>
+struct struct_equality_functor {
+  struct_equality_functor(OptionalIteratorType optional_iter,
+                          DeviceComparatorType device_comparator,
+                          bool is_lhs_scalar,
+                          bool is_rhs_scalar,
+                          bool preserve_output)
+    : _optional_iter(optional_iter),
+      _device_comparator(device_comparator),
+      _is_lhs_scalar(is_lhs_scalar),
+      _is_rhs_scalar(is_rhs_scalar),
+      _preserve_output(preserve_output)
+  {
+  }
+
+  auto __device__ operator()(size_type i) const noexcept
+  {
+    auto const lhs = cudf::experimental::row::lhs_index_type{_is_lhs_scalar ? 0 : i};
+    auto const rhs = cudf::experimental::row::rhs_index_type{_is_rhs_scalar ? 0 : i};
+    return _optional_iter[i].has_value() and (_device_comparator(lhs, rhs) == _preserve_output);
+  }
+
+ private:
+  OptionalIteratorType _optional_iter;
+  DeviceComparatorType _device_comparator;
+  bool _is_lhs_scalar;
+  bool _is_rhs_scalar;
+  bool _preserve_output;
+};
+
 template <typename PhysicalEqualityComparator =
             cudf::experimental::row::equality::physical_equality_comparator>
 void apply_struct_equality_op(mutable_column_view& out,
@@ -125,26 +155,37 @@ void apply_struct_equality_op(mutable_column_view& out,
   auto trhs = table_view{{rhs}};
   auto table_comparator =
     cudf::experimental::row::equality::two_table_comparator{tlhs, trhs, stream};
-  auto device_comparator =
-    table_comparator.equal_to(nullate::DYNAMIC{has_nested_nulls(tlhs) || has_nested_nulls(trhs)},
-                              null_equality::EQUAL,
-                              comparator);
 
   auto outd = column_device_view::create(out, stream);
   auto optional_iter =
     cudf::detail::make_optional_iterator<bool>(*outd, nullate::DYNAMIC{out.has_nulls()});
-  thrust::tabulate(rmm::exec_policy(stream),
-                   out.begin<bool>(),
-                   out.end<bool>(),
-                   [optional_iter,
-                    is_lhs_scalar,
-                    is_rhs_scalar,
-                    preserve_output = (op != binary_operator::NOT_EQUAL),
-                    device_comparator] __device__(size_type i) {
-                     auto lhs = cudf::experimental::row::lhs_index_type{is_lhs_scalar ? 0 : i};
-                     auto rhs = cudf::experimental::row::rhs_index_type{is_rhs_scalar ? 0 : i};
-                     return optional_iter[i].has_value() and
-                            (device_comparator(lhs, rhs) == preserve_output);
-                   });
+
+  auto const comparator_helper = [&](auto const device_comparator) {
+    thrust::tabulate(rmm::exec_policy(stream),
+                     out.begin<bool>(),
+                     out.end<bool>(),
+                     struct_equality_functor<decltype(optional_iter), decltype(device_comparator)>(
+                       optional_iter,
+                       device_comparator,
+                       is_lhs_scalar,
+                       is_rhs_scalar,
+                       op != binary_operator::NOT_EQUAL));
+  };
+
+  if (cudf::detail::has_nested_columns(tlhs) or cudf::detail::has_nested_columns(trhs)) {
+    auto device_comparator = table_comparator.equal_to<true>(
+      nullate::DYNAMIC{has_nested_nulls(tlhs) || has_nested_nulls(trhs)},
+      null_equality::EQUAL,
+      comparator);
+
+    comparator_helper(device_comparator);
+  } else {
+    auto device_comparator = table_comparator.equal_to<false>(
+      nullate::DYNAMIC{has_nested_nulls(tlhs) || has_nested_nulls(trhs)},
+      null_equality::EQUAL,
+      comparator);
+
+    comparator_helper(device_comparator);
+  }
 }
 }  // namespace cudf::binops::compiled::detail
diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu
index 50173d6a987..72ac6255549 100644
--- a/cpp/src/groupby/hash/groupby.cu
+++ b/cpp/src/groupby/hash/groupby.cu
@@ -68,12 +68,13 @@ namespace {
 
 // TODO: replace it with `cuco::static_map`
 // https://github.com/rapidsai/cudf/issues/10401
-using map_type = concurrent_unordered_map<
-  cudf::size_type,
-  cudf::size_type,
-  cudf::experimental::row::hash::device_row_hasher<cudf::detail::default_hash,
-                                                   cudf::nullate::DYNAMIC>,
-  cudf::experimental::row::equality::device_row_comparator<cudf::nullate::DYNAMIC>>;
+template <typename ComparatorType>
+using map_type =
+  concurrent_unordered_map<cudf::size_type,
+                           cudf::size_type,
+                           cudf::experimental::row::hash::
+                             device_row_hasher<cudf::detail::default_hash, cudf::nullate::DYNAMIC>,
+                           ComparatorType>;
 
 /**
  * @brief List of aggregation operations that can be computed with a hash-based
@@ -189,13 +190,14 @@ class groupby_simple_aggregations_collector final
   }
 };
 
+template <typename ComparatorType>
 class hash_compound_agg_finalizer final : public cudf::detail::aggregation_finalizer {
   column_view col;
   data_type result_type;
   cudf::detail::result_cache* sparse_results;
   cudf::detail::result_cache* dense_results;
   device_span<size_type const> gather_map;
-  map_type const& map;
+  map_type<ComparatorType> const& map;
   bitmask_type const* __restrict__ row_bitmask;
   rmm::cuda_stream_view stream;
   rmm::mr::device_memory_resource* mr;
@@ -207,7 +209,7 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final
                               cudf::detail::result_cache* sparse_results,
                               cudf::detail::result_cache* dense_results,
                               device_span<size_type const> gather_map,
-                              map_type const& map,
+                              map_type<ComparatorType> const& map,
                               bitmask_type const* row_bitmask,
                               rmm::cuda_stream_view stream,
                               rmm::mr::device_memory_resource* mr)
@@ -336,7 +338,7 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final
       rmm::exec_policy(stream),
       thrust::make_counting_iterator(0),
       col.size(),
-      ::cudf::detail::var_hash_functor<map_type>{
+      ::cudf::detail::var_hash_functor<map_type<ComparatorType>>{
         map, row_bitmask, *var_result_view, *values_view, *sum_view, *count_view, agg._ddof});
     sparse_results->add_result(col, agg, std::move(var_result));
     dense_results->add_result(col, agg, to_dense_agg_result(agg));
@@ -394,12 +396,13 @@ flatten_single_pass_aggs(host_span<aggregation_request const> requests)
  *
  * @see groupby_null_templated()
  */
+template <typename ComparatorType>
 void sparse_to_dense_results(table_view const& keys,
                              host_span<aggregation_request const> requests,
                              cudf::detail::result_cache* sparse_results,
                              cudf::detail::result_cache* dense_results,
                              device_span<size_type const> gather_map,
-                             map_type const& map,
+                             map_type<ComparatorType> const& map,
                              bool keys_have_nulls,
                              null_policy include_null_keys,
                              rmm::cuda_stream_view stream,
@@ -461,10 +464,11 @@ auto create_sparse_results_table(table_view const& flattened_values,
  * @brief Computes all aggregations from `requests` that require a single pass
  * over the data and stores the results in `sparse_results`
  */
+template <typename ComparatorType>
 void compute_single_pass_aggs(table_view const& keys,
                               host_span<aggregation_request const> requests,
                               cudf::detail::result_cache* sparse_results,
-                              map_type& map,
+                              map_type<ComparatorType>& map,
                               bool keys_have_nulls,
                               null_policy include_null_keys,
                               rmm::cuda_stream_view stream)
@@ -484,16 +488,16 @@ void compute_single_pass_aggs(table_view const& keys,
   auto row_bitmask =
     skip_key_rows_with_nulls ? cudf::detail::bitmask_and(keys, stream).first : rmm::device_buffer{};
 
-  thrust::for_each_n(
-    rmm::exec_policy(stream),
-    thrust::make_counting_iterator(0),
-    keys.num_rows(),
-    hash::compute_single_pass_aggs_fn<map_type>{map,
-                                                *d_values,
-                                                *d_sparse_table,
-                                                d_aggs.data(),
-                                                static_cast<bitmask_type*>(row_bitmask.data()),
-                                                skip_key_rows_with_nulls});
+  thrust::for_each_n(rmm::exec_policy(stream),
+                     thrust::make_counting_iterator(0),
+                     keys.num_rows(),
+                     hash::compute_single_pass_aggs_fn<map_type<ComparatorType>>{
+                       map,
+                       *d_values,
+                       *d_sparse_table,
+                       d_aggs.data(),
+                       static_cast<bitmask_type*>(row_bitmask.data()),
+                       skip_key_rows_with_nulls});
   // Add results back to sparse_results cache
   auto sparse_result_cols = sparse_table.release();
   for (size_t i = 0; i < aggs.size(); i++) {
@@ -507,7 +511,8 @@ void compute_single_pass_aggs(table_view const& keys,
  * @brief Computes and returns a device vector containing all populated keys in
  * `map`.
  */
-rmm::device_uvector<size_type> extract_populated_keys(map_type const& map,
+template <typename ComparatorType>
+rmm::device_uvector<size_type> extract_populated_keys(map_type<ComparatorType> const& map,
                                                       size_type num_keys,
                                                       rmm::cuda_stream_view stream)
 {
@@ -566,52 +571,60 @@ std::unique_ptr<table> groupby(table_view const& keys,
   auto preprocessed_keys = cudf::experimental::row::hash::preprocessed_table::create(keys, stream);
   auto const comparator  = cudf::experimental::row::equality::self_comparator{preprocessed_keys};
   auto const row_hash    = cudf::experimental::row::hash::row_hasher{std::move(preprocessed_keys)};
-  auto const d_key_equal = comparator.equal_to(has_null, null_keys_are_equal);
   auto const d_row_hash  = row_hash.device_hasher(has_null);
 
   size_type constexpr unused_key{std::numeric_limits<size_type>::max()};
   size_type constexpr unused_value{std::numeric_limits<size_type>::max()};
 
-  using allocator_type = typename map_type::allocator_type;
-
-  auto map = map_type::create(compute_hash_table_size(num_keys),
-                              stream,
-                              unused_key,
-                              unused_value,
-                              d_row_hash,
-                              d_key_equal,
-                              allocator_type());
-
   // Cache of sparse results where the location of aggregate value in each
   // column is indexed by the hash map
   cudf::detail::result_cache sparse_results(requests.size());
 
-  // Compute all single pass aggs first
-  compute_single_pass_aggs(
-    keys, requests, &sparse_results, *map, keys_have_nulls, include_null_keys, stream);
-
-  // Extract the populated indices from the hash map and create a gather map.
-  // Gathering using this map from sparse results will give dense results.
-  auto gather_map = extract_populated_keys(*map, keys.num_rows(), stream);
-
-  // Compact all results from sparse_results and insert into cache
-  sparse_to_dense_results(keys,
-                          requests,
-                          &sparse_results,
-                          cache,
-                          gather_map,
-                          *map,
-                          keys_have_nulls,
-                          include_null_keys,
-                          stream,
-                          mr);
-
-  return cudf::detail::gather(keys,
-                              gather_map,
-                              out_of_bounds_policy::DONT_CHECK,
-                              cudf::detail::negative_index_policy::NOT_ALLOWED,
-                              stream,
-                              mr);
+  auto const comparator_helper = [&](auto const d_key_equal) {
+    using allocator_type = typename map_type<decltype(d_key_equal)>::allocator_type;
+
+    auto const map = map_type<decltype(d_key_equal)>::create(compute_hash_table_size(num_keys),
+                                                             stream,
+                                                             unused_key,
+                                                             unused_value,
+                                                             d_row_hash,
+                                                             d_key_equal,
+                                                             allocator_type());
+    // Compute all single pass aggs first
+    compute_single_pass_aggs(
+      keys, requests, &sparse_results, *map, keys_have_nulls, include_null_keys, stream);
+
+    // Extract the populated indices from the hash map and create a gather map.
+    // Gathering using this map from sparse results will give dense results.
+    auto gather_map = extract_populated_keys(*map, keys.num_rows(), stream);
+
+    // Compact all results from sparse_results and insert into cache
+    sparse_to_dense_results(keys,
+                            requests,
+                            &sparse_results,
+                            cache,
+                            gather_map,
+                            *map,
+                            keys_have_nulls,
+                            include_null_keys,
+                            stream,
+                            mr);
+
+    return cudf::detail::gather(keys,
+                                gather_map,
+                                out_of_bounds_policy::DONT_CHECK,
+                                cudf::detail::negative_index_policy::NOT_ALLOWED,
+                                stream,
+                                mr);
+  };
+
+  if (cudf::detail::has_nested_columns(keys)) {
+    auto const d_key_equal = comparator.equal_to<true>(has_null, null_keys_are_equal);
+    return comparator_helper(d_key_equal);
+  } else {
+    auto const d_key_equal = comparator.equal_to<false>(has_null, null_keys_are_equal);
+    return comparator_helper(d_key_equal);
+  }
 }
 
 }  // namespace
diff --git a/cpp/src/groupby/sort/group_nunique.cu b/cpp/src/groupby/sort/group_nunique.cu
index c411e654913..cf81253483e 100644
--- a/cpp/src/groupby/sort/group_nunique.cu
+++ b/cpp/src/groupby/sort/group_nunique.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -33,10 +33,10 @@ namespace groupby {
 namespace detail {
 namespace {
 
-template <typename Nullate>
+template <bool has_nested_columns, typename Nullate>
 struct is_unique_iterator_fn {
   using comparator_type =
-    typename cudf::experimental::row::equality::device_row_comparator<Nullate>;
+    typename cudf::experimental::row::equality::device_row_comparator<has_nested_columns, Nullate>;
 
   Nullate nulls;
   column_device_view const v;
@@ -91,24 +91,35 @@ std::unique_ptr<column> group_nunique(column_view const& values,
 
   auto const values_view = table_view{{values}};
   auto const comparator  = cudf::experimental::row::equality::self_comparator{values_view, stream};
-  auto const d_equal     = comparator.equal_to(
-    cudf::nullate::DYNAMIC{cudf::has_nested_nulls(values_view)}, null_equality::EQUAL);
 
   auto const d_values_view = column_device_view::create(values, stream);
-  auto const is_unique_iterator =
-    thrust::make_transform_iterator(thrust::counting_iterator<cudf::size_type>(0),
-                                    is_unique_iterator_fn{nullate::DYNAMIC{values.has_nulls()},
-                                                          *d_values_view,
-                                                          d_equal,
-                                                          null_handling,
-                                                          group_offsets.data(),
-                                                          group_labels.data()});
-  thrust::reduce_by_key(rmm::exec_policy(stream),
-                        group_labels.begin(),
-                        group_labels.end(),
-                        is_unique_iterator,
-                        thrust::make_discard_iterator(),
-                        result->mutable_view().begin<size_type>());
+
+  auto const comparator_helper = [&](auto const d_equal) {
+    auto const is_unique_iterator =
+      thrust::make_transform_iterator(thrust::counting_iterator<cudf::size_type>(0),
+                                      is_unique_iterator_fn{nullate::DYNAMIC{values.has_nulls()},
+                                                            *d_values_view,
+                                                            d_equal,
+                                                            null_handling,
+                                                            group_offsets.data(),
+                                                            group_labels.data()});
+    thrust::reduce_by_key(rmm::exec_policy(stream),
+                          group_labels.begin(),
+                          group_labels.end(),
+                          is_unique_iterator,
+                          thrust::make_discard_iterator(),
+                          result->mutable_view().begin<size_type>());
+  };
+
+  if (cudf::detail::has_nested_columns(values_view)) {
+    auto const d_equal = comparator.equal_to<true>(
+      cudf::nullate::DYNAMIC{cudf::has_nested_nulls(values_view)}, null_equality::EQUAL);
+    comparator_helper(d_equal);
+  } else {
+    auto const d_equal = comparator.equal_to<false>(
+      cudf::nullate::DYNAMIC{cudf::has_nested_nulls(values_view)}, null_equality::EQUAL);
+    comparator_helper(d_equal);
+  }
 
   return result;
 }
diff --git a/cpp/src/groupby/sort/group_rank_scan.cu b/cpp/src/groupby/sort/group_rank_scan.cu
index 149f026ffe6..479ce166724 100644
--- a/cpp/src/groupby/sort/group_rank_scan.cu
+++ b/cpp/src/groupby/sort/group_rank_scan.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -41,6 +41,38 @@ namespace groupby {
 namespace detail {
 namespace {
 
+template <bool forward, typename permuted_equal_t, typename value_resolver>
+struct unique_identifier {
+  unique_identifier(size_type const* labels,
+                    size_type const* offsets,
+                    permuted_equal_t permuted_equal,
+                    value_resolver resolver)
+    : _labels(labels), _offsets(offsets), _permuted_equal(permuted_equal), _resolver(resolver)
+  {
+  }
+
+  auto __device__ operator()(size_type row_index) const noexcept
+  {
+    auto const group_start = _offsets[_labels[row_index]];
+    if constexpr (forward) {
+      // First value of equal values is 1.
+      return _resolver(row_index == group_start || !_permuted_equal(row_index, row_index - 1),
+                       row_index - group_start);
+    } else {
+      auto const group_end = _offsets[_labels[row_index] + 1];
+      // Last value of equal values is 1.
+      return _resolver(row_index + 1 == group_end || !_permuted_equal(row_index, row_index + 1),
+                       row_index - group_start);
+    }
+  }
+
+ private:
+  size_type const* _labels;
+  size_type const* _offsets;
+  permuted_equal_t _permuted_equal;
+  value_resolver _resolver;
+};
+
 /**
  * @brief generate grouped row ranks or dense ranks using a row comparison then scan the results
  *
@@ -71,36 +103,34 @@ std::unique_ptr<column> rank_generator(column_view const& grouped_values,
                                        rmm::cuda_stream_view stream,
                                        rmm::mr::device_memory_resource* mr)
 {
+  auto const grouped_values_view = table_view{{grouped_values}};
   auto const comparator =
-    cudf::experimental::row::equality::self_comparator{table_view{{grouped_values}}, stream};
-  auto const d_equal = comparator.equal_to(cudf::nullate::DYNAMIC{has_nulls}, null_equality::EQUAL);
-  auto const permuted_equal =
-    permuted_row_equality_comparator(d_equal, value_order.begin<size_type>());
+    cudf::experimental::row::equality::self_comparator{grouped_values_view, stream};
 
   auto ranks = make_fixed_width_column(
     data_type{type_to_id<size_type>()}, grouped_values.size(), mask_state::UNALLOCATED, stream, mr);
   auto mutable_ranks = ranks->mutable_view();
 
-  auto unique_identifier = [labels  = group_labels.begin(),
-                            offsets = group_offsets.begin(),
-                            permuted_equal,
-                            resolver] __device__(size_type row_index) {
-    auto const group_start = offsets[labels[row_index]];
-    if constexpr (forward) {
-      // First value of equal values is 1.
-      return resolver(row_index == group_start || !permuted_equal(row_index, row_index - 1),
-                      row_index - group_start);
-    } else {
-      auto const group_end = offsets[labels[row_index] + 1];
-      // Last value of equal values is 1.
-      return resolver(row_index + 1 == group_end || !permuted_equal(row_index, row_index + 1),
-                      row_index - group_start);
-    }
+  auto const comparator_helper = [&](auto const d_equal) {
+    auto const permuted_equal =
+      permuted_row_equality_comparator(d_equal, value_order.begin<size_type>());
+
+    thrust::tabulate(rmm::exec_policy(stream),
+                     mutable_ranks.begin<size_type>(),
+                     mutable_ranks.end<size_type>(),
+                     unique_identifier<forward, decltype(permuted_equal), value_resolver>(
+                       group_labels.begin(), group_offsets.begin(), permuted_equal, resolver));
   };
-  thrust::tabulate(rmm::exec_policy(stream),
-                   mutable_ranks.begin<size_type>(),
-                   mutable_ranks.end<size_type>(),
-                   unique_identifier);
+
+  if (cudf::detail::has_nested_columns(grouped_values_view)) {
+    auto const d_equal =
+      comparator.equal_to<true>(cudf::nullate::DYNAMIC{has_nulls}, null_equality::EQUAL);
+    comparator_helper(d_equal);
+  } else {
+    auto const d_equal =
+      comparator.equal_to<false>(cudf::nullate::DYNAMIC{has_nulls}, null_equality::EQUAL);
+    comparator_helper(d_equal);
+  }
 
   auto [group_labels_begin, mutable_rank_begin] = [&]() {
     if constexpr (forward) {
diff --git a/cpp/src/groupby/sort/sort_helper.cu b/cpp/src/groupby/sort/sort_helper.cu
index 3be090159a7..b53955472b1 100644
--- a/cpp/src/groupby/sort/sort_helper.cu
+++ b/cpp/src/groupby/sort/sort_helper.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -149,17 +149,28 @@ sort_groupby_helper::index_vector const& sort_groupby_helper::group_offsets(
 
   _group_offsets = std::make_unique<index_vector>(num_keys(stream) + 1, stream);
 
-  auto const comparator  = cudf::experimental::row::equality::self_comparator{_keys, stream};
-  auto const d_key_equal = comparator.equal_to(
-    cudf::nullate::DYNAMIC{cudf::has_nested_nulls(_keys)}, null_equality::EQUAL);
+  auto const comparator = cudf::experimental::row::equality::self_comparator{_keys, stream};
+
   auto const sorted_order = key_sort_order(stream).data<size_type>();
   decltype(_group_offsets->begin()) result_end;
 
-  result_end = thrust::unique_copy(rmm::exec_policy(stream),
-                                   thrust::counting_iterator<size_type>(0),
-                                   thrust::counting_iterator<size_type>(num_keys(stream)),
-                                   _group_offsets->begin(),
-                                   permuted_row_equality_comparator(d_key_equal, sorted_order));
+  if (cudf::detail::has_nested_columns(_keys)) {
+    auto const d_key_equal = comparator.equal_to<true>(
+      cudf::nullate::DYNAMIC{cudf::has_nested_nulls(_keys)}, null_equality::EQUAL);
+    result_end = thrust::unique_copy(rmm::exec_policy(stream),
+                                     thrust::counting_iterator<size_type>(0),
+                                     thrust::counting_iterator<size_type>(num_keys(stream)),
+                                     _group_offsets->begin(),
+                                     permuted_row_equality_comparator(d_key_equal, sorted_order));
+  } else {
+    auto const d_key_equal = comparator.equal_to<false>(
+      cudf::nullate::DYNAMIC{cudf::has_nested_nulls(_keys)}, null_equality::EQUAL);
+    result_end = thrust::unique_copy(rmm::exec_policy(stream),
+                                     thrust::counting_iterator<size_type>(0),
+                                     thrust::counting_iterator<size_type>(num_keys(stream)),
+                                     _group_offsets->begin(),
+                                     permuted_row_equality_comparator(d_key_equal, sorted_order));
+  }
 
   size_type num_groups = thrust::distance(_group_offsets->begin(), result_end);
   _group_offsets->set_element(num_groups, num_keys(stream), stream);
diff --git a/cpp/src/io/json/write_json.cu b/cpp/src/io/json/write_json.cu
index 49d035e6cb9..9849629015d 100644
--- a/cpp/src/io/json/write_json.cu
+++ b/cpp/src/io/json/write_json.cu
@@ -581,14 +581,13 @@ std::unique_ptr<column> make_column_names_column(host_span<column_name_info cons
 
 void write_chunked(data_sink* out_sink,
                    strings_column_view const& str_column_view,
-                   std::string const& line_terminator,
+                   string_scalar const& d_line_terminator,
                    json_writer_options const& options,
                    rmm::cuda_stream_view stream,
                    rmm::mr::device_memory_resource* mr)
 {
   CUDF_EXPECTS(str_column_view.size() > 0, "Unexpected empty strings column.");
 
-  string_scalar d_line_terminator{line_terminator};
   auto p_str_col_w_nl = cudf::strings::detail::join_strings(str_column_view,
                                                             d_line_terminator,
                                                             string_scalar("", false),
@@ -609,15 +608,6 @@ void write_chunked(data_sink* out_sink,
 
     out_sink->host_write(h_bytes.data(), total_num_bytes);
   }
-
-  // Needs newline at the end, to separate from next chunk
-  if (options.is_enabled_lines()) {
-    if (out_sink->is_device_write_preferred(d_line_terminator.size())) {
-      out_sink->device_write(d_line_terminator.data(), d_line_terminator.size(), stream);
-    } else {
-      out_sink->host_write(line_terminator.data(), line_terminator.size());
-    }
-  }
 }
 
 void write_json(data_sink* out_sink,
@@ -697,7 +687,16 @@ void write_json(data_sink* out_sink,
       // struct converter for the table
       auto str_concat_col = converter(sub_view.begin(), sub_view.end(), user_column_names);
 
-      write_chunked(out_sink, str_concat_col->view(), line_terminator, options, stream, mr);
+      write_chunked(out_sink, str_concat_col->view(), d_line_terminator, options, stream, mr);
+
+      // Needs line_terminator at the end, to separate from next chunk
+      if (&sub_view != &vector_views.back() or options.is_enabled_lines()) {
+        if (out_sink->is_device_write_preferred(d_line_terminator.size())) {
+          out_sink->device_write(d_line_terminator.data(), d_line_terminator.size(), stream);
+        } else {
+          out_sink->host_write(line_terminator.data(), line_terminator.size());
+        }
+      }
     }
   } else {
     if (options.is_enabled_lines()) {
diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index 23d130e1585..ee115e7432a 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -104,20 +104,41 @@ struct page_state_s {
  * specified row bounds
  *
  * @param s The page to be checked
- * @param min_row The starting row index
+ * @param start_row The starting row index
  * @param num_rows The number of rows
  *
  * @return True if the page spans the beginning or the end of the row bounds
  */
-inline __device__ bool is_bounds_page(page_state_s* const s, size_t min_row, size_t num_rows)
+inline __device__ bool is_bounds_page(page_state_s* const s, size_t start_row, size_t num_rows)
 {
   size_t const page_begin = s->col.start_row + s->page.chunk_row;
   size_t const page_end   = page_begin + s->page.num_rows;
-  size_t const begin      = min_row;
-  size_t const end        = min_row + num_rows;
+  size_t const begin      = start_row;
+  size_t const end        = start_row + num_rows;
+
   return ((page_begin <= begin && page_end >= begin) || (page_begin <= end && page_end >= end));
 }
 
+/**
+ * @brief Returns whether or not a page is completely contained within the specified
+ * row bounds
+ *
+ * @param s The page to be checked
+ * @param start_row The starting row index
+ * @param num_rows The number of rows
+ *
+ * @return True if the page is completely contained within the row bounds
+ */
+inline __device__ bool is_page_contained(page_state_s* const s, size_t start_row, size_t num_rows)
+{
+  size_t const page_begin = s->col.start_row + s->page.chunk_row;
+  size_t const page_end   = page_begin + s->page.num_rows;
+  size_t const begin      = start_row;
+  size_t const end        = start_row + num_rows;
+
+  return page_begin >= begin && page_end <= end;
+}
+
 /**
  * @brief Read a 32-bit varint integer
  *
@@ -1728,10 +1749,11 @@ __global__ void __launch_bounds__(block_size)
       auto const thread_depth = depth + t;
       if (thread_depth < s->page.num_output_nesting_levels) {
         // if we are not a bounding page (as checked above) then we are either
-        // returning 0 rows from the page (completely outside the bounds) or all
-        // rows in the page (completely within the bounds)
+        // returning all rows/values from this page, or 0 of them
         pp->nesting[thread_depth].batch_size =
-          s->num_rows == 0 ? 0 : pp->nesting[thread_depth].size;
+          (s->num_rows == 0 && !is_page_contained(s, min_row, num_rows))
+            ? 0
+            : pp->nesting[thread_depth].size;
       }
       depth += blockDim.x;
     }
@@ -1838,7 +1860,19 @@ __global__ void __launch_bounds__(block_size) gpuDecodePageData(
   bool const has_repetition = s->col.max_level[level_type::REPETITION] > 0;
 
   // if we have no work to do (eg, in a skip_rows/num_rows case) in this page.
-  if (s->num_rows == 0 && !(has_repetition && is_bounds_page(s, min_row, num_rows))) { return; }
+  //
+  // corner case: in the case of lists, we can have pages that contain "0" rows if the current row
+  // starts before this page and ends after this page:
+  //       P0        P1        P2
+  //  |---------|---------|----------|
+  //        ^------------------^
+  //      row start           row end
+  // P1 will contain 0 rows
+  //
+  if (s->num_rows == 0 && !(has_repetition && (is_bounds_page(s, min_row, num_rows) ||
+                                               is_page_contained(s, min_row, num_rows)))) {
+    return;
+  }
 
   if (s->dict_base) {
     out_thread0 = (s->dict_bits > 0) ? 64 : 32;
diff --git a/cpp/src/lists/contains.cu b/cpp/src/lists/contains.cu
index 0142e736fd0..05fe82d1713 100644
--- a/cpp/src/lists/contains.cu
+++ b/cpp/src/lists/contains.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -267,7 +267,7 @@ void index_of_nested_types(InputIterator input_it,
   auto const has_nulls   = has_nested_nulls(child_tview) || has_nested_nulls(keys_tview);
   auto const comparator =
     cudf::experimental::row::equality::two_table_comparator(child_tview, keys_tview, stream);
-  auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls});
+  auto const d_comp = comparator.equal_to<true>(nullate::DYNAMIC{has_nulls});
 
   auto const do_search = [=](auto const key_validity_iter) {
     thrust::transform(
diff --git a/cpp/src/reductions/scan/rank_scan.cu b/cpp/src/reductions/scan/rank_scan.cu
index c6909bfd601..538763099d3 100644
--- a/cpp/src/reductions/scan/rank_scan.cu
+++ b/cpp/src/reductions/scan/rank_scan.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -32,6 +32,23 @@ namespace cudf {
 namespace detail {
 namespace {
 
+template <typename device_comparator_type, typename value_resolver>
+struct rank_equality_functor {
+  rank_equality_functor(device_comparator_type comparator, value_resolver resolver)
+    : _comparator(comparator), _resolver(resolver)
+  {
+  }
+
+  auto __device__ operator()(size_type row_index) const noexcept
+  {
+    return _resolver(row_index == 0 || !_comparator(row_index, row_index - 1), row_index);
+  }
+
+ private:
+  device_comparator_type _comparator;
+  value_resolver _resolver;
+};
+
 /**
  * @brief generate row ranks or dense ranks using a row comparison then scan the results
  *
@@ -51,20 +68,30 @@ std::unique_ptr<column> rank_generator(column_view const& order_by,
                                        rmm::cuda_stream_view stream,
                                        rmm::mr::device_memory_resource* mr)
 {
-  auto comp = cudf::experimental::row::equality::self_comparator(table_view{{order_by}}, stream);
-  auto const device_comparator =
-    comp.equal_to(nullate::DYNAMIC{has_nested_nulls(table_view({order_by}))});
+  auto const order_by_tview = table_view{{order_by}};
+  auto comp = cudf::experimental::row::equality::self_comparator(order_by_tview, stream);
+
   auto ranks = make_fixed_width_column(
     data_type{type_to_id<size_type>()}, order_by.size(), mask_state::UNALLOCATED, stream, mr);
   auto mutable_ranks = ranks->mutable_view();
 
-  thrust::tabulate(rmm::exec_policy(stream),
-                   mutable_ranks.begin<size_type>(),
-                   mutable_ranks.end<size_type>(),
-                   [comparator = device_comparator, resolver] __device__(size_type row_index) {
-                     return resolver(row_index == 0 || !comparator(row_index, row_index - 1),
-                                     row_index);
-                   });
+  auto const comparator_helper = [&](auto const device_comparator) {
+    thrust::tabulate(rmm::exec_policy(stream),
+                     mutable_ranks.begin<size_type>(),
+                     mutable_ranks.end<size_type>(),
+                     rank_equality_functor<decltype(device_comparator), value_resolver>(
+                       device_comparator, resolver));
+  };
+
+  if (cudf::detail::has_nested_columns(order_by_tview)) {
+    auto const device_comparator =
+      comp.equal_to<true>(nullate::DYNAMIC{has_nested_nulls(table_view({order_by}))});
+    comparator_helper(device_comparator);
+  } else {
+    auto const device_comparator =
+      comp.equal_to<false>(nullate::DYNAMIC{has_nested_nulls(table_view({order_by}))});
+    comparator_helper(device_comparator);
+  }
 
   thrust::inclusive_scan(rmm::exec_policy(stream),
                          mutable_ranks.begin<size_type>(),
diff --git a/cpp/src/reductions/simple.cuh b/cpp/src/reductions/simple.cuh
index 76ed864a92d..5fe7b91e28a 100644
--- a/cpp/src/reductions/simple.cuh
+++ b/cpp/src/reductions/simple.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -115,37 +115,12 @@ std::unique_ptr<scalar> fixed_point_reduction(
 {
   using Type = device_storage_type_t<DecimalXX>;
 
-  auto dcol      = cudf::column_device_view::create(col, stream);
-  auto simple_op = Op{};
-
-  // Cast initial value
-  std::optional<Type> const initial_value = [&] {
-    if (init.has_value() && init.value().get().is_valid()) {
-      using ScalarType = cudf::scalar_type_t<Type>;
-      return std::optional<Type>(
-        static_cast<const ScalarType*>(&init.value().get())->value(stream));
-    } else {
-      return std::optional<Type>(std::nullopt);
-    }
-  }();
-
-  auto result = [&] {
-    if (col.has_nulls()) {
-      auto f  = simple_op.template get_null_replacing_element_transformer<Type>();
-      auto it = thrust::make_transform_iterator(dcol->pair_begin<Type, true>(), f);
-      return cudf::reduction::detail::reduce(it, col.size(), simple_op, initial_value, stream, mr);
-    } else {
-      auto f  = simple_op.template get_element_transformer<Type>();
-      auto it = thrust::make_transform_iterator(dcol->begin<Type>(), f);
-      return cudf::reduction::detail::reduce(it, col.size(), simple_op, initial_value, stream, mr);
-    }
-  }();
+  auto result = simple_reduction<Type, Type, Op>(col, init, stream, mr);
 
   auto const scale = [&] {
     if (std::is_same_v<Op, cudf::reduction::op::product>) {
       auto const valid_count = static_cast<int32_t>(col.size() - col.null_count());
-      return numeric::scale_type{col.type().scale() *
-                                 (valid_count + (initial_value.has_value() ? 1 : 0))};
+      return numeric::scale_type{col.type().scale() * (valid_count + (init.has_value() ? 1 : 0))};
     } else if (std::is_same_v<Op, cudf::reduction::op::sum_of_squares>) {
       return numeric::scale_type{col.type().scale() * 2};
     }
diff --git a/cpp/src/search/contains_scalar.cu b/cpp/src/search/contains_scalar.cu
index 8c500e1e757..093a1f8f1ed 100644
--- a/cpp/src/search/contains_scalar.cu
+++ b/cpp/src/search/contains_scalar.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -99,7 +99,6 @@ struct contains_scalar_dispatch {
 
     auto const comparator =
       cudf::experimental::row::equality::two_table_comparator(haystack_tv, needle_tv, stream);
-    auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls});
 
     auto const begin = cudf::experimental::row::lhs_iterator(0);
     auto const end   = begin + haystack.size();
@@ -108,6 +107,7 @@ struct contains_scalar_dispatch {
     auto const check_nulls      = haystack.has_nulls();
     auto const haystack_cdv_ptr = column_device_view::create(haystack, stream);
 
+    auto const d_comp = comparator.equal_to<true>(nullate::DYNAMIC{has_nulls});
     return thrust::count_if(
              rmm::exec_policy(stream),
              begin,
diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu
index 639dc503ce4..c1cc4659a19 100644
--- a/cpp/src/search/contains_table.cu
+++ b/cpp/src/search/contains_table.cu
@@ -204,27 +204,45 @@ rmm::device_uvector<bool> contains_with_lists_or_nans(table_view const& haystack
       auto const bitmask_buffer_and_ptr = build_row_bitmask(haystack, stream);
       auto const row_bitmask_ptr        = bitmask_buffer_and_ptr.second;
 
-      // Insert only rows that do not have any null at any level.
       auto const insert_map = [&](auto const value_comp) {
-        auto const d_eqcomp = strong_index_comparator_adapter{
-          comparator.equal_to(nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)};
-        map.insert_if(haystack_it,
-                      haystack_it + haystack.num_rows(),
-                      thrust::counting_iterator<size_type>(0),  // stencil
-                      row_is_valid{row_bitmask_ptr},
-                      d_hasher,
-                      d_eqcomp,
-                      stream.value());
+        if (cudf::detail::has_nested_columns(haystack)) {
+          auto const d_eqcomp = strong_index_comparator_adapter{comparator.equal_to<true>(
+            nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)};
+          map.insert_if(haystack_it,
+                        haystack_it + haystack.num_rows(),
+                        thrust::counting_iterator<size_type>(0),  // stencil
+                        row_is_valid{row_bitmask_ptr},
+                        d_hasher,
+                        d_eqcomp,
+                        stream.value());
+        } else {
+          auto const d_eqcomp = strong_index_comparator_adapter{comparator.equal_to<false>(
+            nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)};
+          map.insert_if(haystack_it,
+                        haystack_it + haystack.num_rows(),
+                        thrust::counting_iterator<size_type>(0),  // stencil
+                        row_is_valid{row_bitmask_ptr},
+                        d_hasher,
+                        d_eqcomp,
+                        stream.value());
+        }
       };
 
+      // Insert only rows that do not have any null at any level.
       dispatch_nan_comparator(compare_nans, insert_map);
-
     } else {  // haystack_doesn't_have_nulls || compare_nulls == null_equality::EQUAL
       auto const insert_map = [&](auto const value_comp) {
-        auto const d_eqcomp = strong_index_comparator_adapter{
-          comparator.equal_to(nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)};
-        map.insert(
-          haystack_it, haystack_it + haystack.num_rows(), d_hasher, d_eqcomp, stream.value());
+        if (cudf::detail::has_nested_columns(haystack)) {
+          auto const d_eqcomp = strong_index_comparator_adapter{comparator.equal_to<true>(
+            nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)};
+          map.insert(
+            haystack_it, haystack_it + haystack.num_rows(), d_hasher, d_eqcomp, stream.value());
+        } else {
+          auto const d_eqcomp = strong_index_comparator_adapter{comparator.equal_to<false>(
+            nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)};
+          map.insert(
+            haystack_it, haystack_it + haystack.num_rows(), d_hasher, d_eqcomp, stream.value());
+        }
       };
 
       dispatch_nan_comparator(compare_nans, insert_map);
@@ -247,14 +265,25 @@ rmm::device_uvector<bool> contains_with_lists_or_nans(table_view const& haystack
       cudf::experimental::row::equality::two_table_comparator(haystack, needles, stream);
 
     auto const check_contains = [&](auto const value_comp) {
-      auto const d_eqcomp =
-        comparator.equal_to(nullate::DYNAMIC{has_any_nulls}, compare_nulls, value_comp);
-      map.contains(needles_it,
-                   needles_it + needles.num_rows(),
-                   contained.begin(),
-                   d_hasher,
-                   d_eqcomp,
-                   stream.value());
+      if (cudf::detail::has_nested_columns(haystack) or cudf::detail::has_nested_columns(needles)) {
+        auto const d_eqcomp =
+          comparator.equal_to<true>(nullate::DYNAMIC{has_any_nulls}, compare_nulls, value_comp);
+        map.contains(needles_it,
+                     needles_it + needles.num_rows(),
+                     contained.begin(),
+                     d_hasher,
+                     d_eqcomp,
+                     stream.value());
+      } else {
+        auto const d_eqcomp =
+          comparator.equal_to<false>(nullate::DYNAMIC{has_any_nulls}, compare_nulls, value_comp);
+        map.contains(needles_it,
+                     needles_it + needles.num_rows(),
+                     contained.begin(),
+                     d_hasher,
+                     d_eqcomp,
+                     stream.value());
+      }
     };
 
     dispatch_nan_comparator(compare_nans, check_contains);
diff --git a/cpp/src/sort/rank.cu b/cpp/src/sort/rank.cu
index 99e99704c10..fd65e38d467 100644
--- a/cpp/src/sort/rank.cu
+++ b/cpp/src/sort/rank.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,7 +21,7 @@
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/sorting.hpp>
 #include <cudf/sorting.hpp>
-#include <cudf/table/row_operators.cuh>
+#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_device_view.cuh>
 #include <cudf/table/table_view.hpp>
@@ -47,22 +47,23 @@
 namespace cudf {
 namespace detail {
 namespace {
-// Functor to identify unique elements in a sorted order table/column
-template <typename ReturnType, typename Iterator>
-struct unique_comparator {
-  unique_comparator(table_device_view device_table, Iterator const sorted_order, bool has_nulls)
-    : comparator(nullate::DYNAMIC{has_nulls}, device_table, device_table, null_equality::EQUAL),
-      permute(sorted_order)
+
+template <typename PermutationIteratorType, typename DeviceComparatorType>
+struct unique_functor {
+  unique_functor(PermutationIteratorType permute, DeviceComparatorType device_comparator)
+    : _permute(permute), _device_comparator(device_comparator)
   {
   }
-  __device__ ReturnType operator()(size_type index) const noexcept
+
+  auto __device__ operator()(size_type index) const noexcept
   {
-    return index == 0 || not comparator(permute[index], permute[index - 1]);
-  };
+    return static_cast<size_type>(index == 0 ||
+                                  not _device_comparator(_permute[index], _permute[index - 1]));
+  }
 
  private:
-  row_equality_comparator<nullate::DYNAMIC> comparator;
-  Iterator const permute;
+  PermutationIteratorType _permute;
+  DeviceComparatorType _device_comparator;
 };
 
 // Assign rank from 1 to n unique values. Equal values get same rank value.
@@ -70,17 +71,39 @@ rmm::device_uvector<size_type> sorted_dense_rank(column_view input_col,
                                                  column_view sorted_order_view,
                                                  rmm::cuda_stream_view stream)
 {
-  auto device_table     = table_device_view::create(table_view{{input_col}}, stream);
+  auto const t_input    = table_view{{input_col}};
+  auto const comparator = cudf::experimental::row::equality::self_comparator{t_input, stream};
+
+  auto const sorted_index_order = thrust::make_permutation_iterator(
+    sorted_order_view.begin<size_type>(), thrust::make_counting_iterator<size_type>(0));
+
   auto const input_size = input_col.size();
   rmm::device_uvector<size_type> dense_rank_sorted(input_size, stream);
-  auto sorted_index_order = thrust::make_permutation_iterator(
-    sorted_order_view.begin<size_type>(), thrust::make_counting_iterator<size_type>(0));
-  auto conv = unique_comparator<size_type, decltype(sorted_index_order)>(
-    *device_table, sorted_index_order, input_col.has_nulls());
-  auto unique_it = cudf::detail::make_counting_transform_iterator(0, conv);
 
-  thrust::inclusive_scan(
-    rmm::exec_policy(stream), unique_it, unique_it + input_size, dense_rank_sorted.data());
+  auto const comparator_helper = [&](auto const device_comparator) {
+    thrust::transform(rmm::exec_policy(stream),
+                      thrust::make_counting_iterator(0),
+                      thrust::make_counting_iterator(input_size),
+                      dense_rank_sorted.data(),
+                      unique_functor<decltype(sorted_index_order), decltype(device_comparator)>{
+                        sorted_index_order, device_comparator});
+  };
+
+  if (cudf::detail::has_nested_columns(t_input)) {
+    auto const device_comparator =
+      comparator.equal_to<true>(nullate::DYNAMIC{has_nested_nulls(t_input)});
+    comparator_helper(device_comparator);
+  } else {
+    auto const device_comparator =
+      comparator.equal_to<false>(nullate::DYNAMIC{has_nested_nulls(t_input)});
+    comparator_helper(device_comparator);
+  }
+
+  thrust::inclusive_scan(rmm::exec_policy(stream),
+                         dense_rank_sorted.begin(),
+                         dense_rank_sorted.end(),
+                         dense_rank_sorted.data());
+
   return dense_rank_sorted;
 }
 
diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu
index 8f462f58e4e..e15d54b4251 100644
--- a/cpp/src/stream_compaction/distinct.cu
+++ b/cpp/src/stream_compaction/distinct.cu
@@ -55,7 +55,8 @@ rmm::device_uvector<size_type> get_distinct_indices(table_view const& input,
 
   auto const preprocessed_input =
     cudf::experimental::row::hash::preprocessed_table::create(input, stream);
-  auto const has_nulls = nullate::DYNAMIC{cudf::has_nested_nulls(input)};
+  auto const has_nulls          = nullate::DYNAMIC{cudf::has_nested_nulls(input)};
+  auto const has_nested_columns = cudf::detail::has_nested_columns(input);
 
   auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input);
   auto const key_hasher = experimental::compaction_hash(row_hasher.device_hasher(has_nulls));
@@ -66,8 +67,13 @@ rmm::device_uvector<size_type> get_distinct_indices(table_view const& input,
     size_type{0}, [] __device__(size_type const i) { return cuco::make_pair(i, i); });
 
   auto const insert_keys = [&](auto const value_comp) {
-    auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp);
-    map.insert(pair_iter, pair_iter + input.num_rows(), key_hasher, key_equal, stream.value());
+    if (has_nested_columns) {
+      auto const key_equal = row_comp.equal_to<true>(has_nulls, nulls_equal, value_comp);
+      map.insert(pair_iter, pair_iter + input.num_rows(), key_hasher, key_equal, stream.value());
+    } else {
+      auto const key_equal = row_comp.equal_to<false>(has_nulls, nulls_equal, value_comp);
+      map.insert(pair_iter, pair_iter + input.num_rows(), key_hasher, key_equal, stream.value());
+    }
   };
 
   if (nans_equal == nan_equality::ALL_EQUAL) {
@@ -92,6 +98,7 @@ rmm::device_uvector<size_type> get_distinct_indices(table_view const& input,
                                                     std::move(preprocessed_input),
                                                     input.num_rows(),
                                                     has_nulls,
+                                                    has_nested_columns,
                                                     keep,
                                                     nulls_equal,
                                                     nans_equal,
diff --git a/cpp/src/stream_compaction/distinct_reduce.cu b/cpp/src/stream_compaction/distinct_reduce.cu
index 468561273b3..020e6a495bc 100644
--- a/cpp/src/stream_compaction/distinct_reduce.cu
+++ b/cpp/src/stream_compaction/distinct_reduce.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -93,6 +93,7 @@ rmm::device_uvector<size_type> hash_reduce_by_row(
   std::shared_ptr<cudf::experimental::row::equality::preprocessed_table> const preprocessed_input,
   size_type num_rows,
   cudf::nullate::DYNAMIC has_nulls,
+  bool has_nested_columns,
   duplicate_keep_option keep,
   null_equality nulls_equal,
   nan_equality nans_equal,
@@ -115,13 +116,23 @@ rmm::device_uvector<size_type> hash_reduce_by_row(
   auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input);
 
   auto const reduce_by_row = [&](auto const value_comp) {
-    auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp);
-    thrust::for_each(
-      rmm::exec_policy(stream),
-      thrust::make_counting_iterator(0),
-      thrust::make_counting_iterator(num_rows),
-      reduce_by_row_fn{
-        map.get_device_view(), key_hasher, key_equal, keep, reduction_results.begin()});
+    if (has_nested_columns) {
+      auto const key_equal = row_comp.equal_to<true>(has_nulls, nulls_equal, value_comp);
+      thrust::for_each(
+        rmm::exec_policy(stream),
+        thrust::make_counting_iterator(0),
+        thrust::make_counting_iterator(num_rows),
+        reduce_by_row_fn{
+          map.get_device_view(), key_hasher, key_equal, keep, reduction_results.begin()});
+    } else {
+      auto const key_equal = row_comp.equal_to<false>(has_nulls, nulls_equal, value_comp);
+      thrust::for_each(
+        rmm::exec_policy(stream),
+        thrust::make_counting_iterator(0),
+        thrust::make_counting_iterator(num_rows),
+        reduce_by_row_fn{
+          map.get_device_view(), key_hasher, key_equal, keep, reduction_results.begin()});
+    }
   };
 
   if (nans_equal == nan_equality::ALL_EQUAL) {
diff --git a/cpp/src/stream_compaction/distinct_reduce.cuh b/cpp/src/stream_compaction/distinct_reduce.cuh
index c8a0c2869c8..e360d03280a 100644
--- a/cpp/src/stream_compaction/distinct_reduce.cuh
+++ b/cpp/src/stream_compaction/distinct_reduce.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -65,6 +65,7 @@ auto constexpr reduction_init_value(duplicate_keep_option keep)
  *        comparisons
  * @param num_rows The number of all input rows
  * @param has_nulls Indicate whether the input rows has any nulls at any nested levels
+ * @param has_nested_columns Indicates whether the input table has any nested columns
  * @param keep The parameter to determine what type of reduction to perform
  * @param nulls_equal Flag to specify whether null elements should be considered as equal
  * @param stream CUDA stream used for device memory operations and kernel launches
@@ -76,6 +77,7 @@ rmm::device_uvector<size_type> hash_reduce_by_row(
   std::shared_ptr<cudf::experimental::row::equality::preprocessed_table> const preprocessed_input,
   size_type num_rows,
   cudf::nullate::DYNAMIC has_nulls,
+  bool has_nested_columns,
   duplicate_keep_option keep,
   null_equality nulls_equal,
   nan_equality nans_equal,
diff --git a/cpp/src/stream_compaction/unique.cu b/cpp/src/stream_compaction/unique.cu
index 369b63995e3..511a7b7ae1c 100644
--- a/cpp/src/stream_compaction/unique.cu
+++ b/cpp/src/stream_compaction/unique.cu
@@ -65,28 +65,40 @@ std::unique_ptr<table> unique(table_view const& input,
   auto mutable_view = mutable_column_device_view::create(*unique_indices, stream);
   auto keys_view    = input.select(keys);
 
-  auto comp      = cudf::experimental::row::equality::self_comparator(keys_view, stream);
-  auto row_equal = comp.equal_to(nullate::DYNAMIC{has_nested_nulls(keys_view)}, nulls_equal);
+  auto comp = cudf::experimental::row::equality::self_comparator(keys_view, stream);
 
-  // get indices of unique rows
-  auto result_end = unique_copy(thrust::counting_iterator<size_type>(0),
-                                thrust::counting_iterator<size_type>(num_rows),
-                                mutable_view->begin<size_type>(),
-                                row_equal,
-                                keep,
-                                stream);
-  auto indices_view =
-    cudf::detail::slice(column_view(*unique_indices),
-                        0,
-                        thrust::distance(mutable_view->begin<size_type>(), result_end));
+  auto const comparator_helper = [&](auto const row_equal) {
+    // get indices of unique rows
+    auto result_end = unique_copy(thrust::counting_iterator<size_type>(0),
+                                  thrust::counting_iterator<size_type>(num_rows),
+                                  mutable_view->begin<size_type>(),
+                                  row_equal,
+                                  keep,
+                                  stream);
 
-  // gather unique rows and return
-  return detail::gather(input,
-                        indices_view,
-                        out_of_bounds_policy::DONT_CHECK,
-                        detail::negative_index_policy::NOT_ALLOWED,
-                        stream,
-                        mr);
+    auto indices_view =
+      cudf::detail::slice(column_view(*unique_indices),
+                          0,
+                          thrust::distance(mutable_view->begin<size_type>(), result_end));
+
+    // gather unique rows and return
+    return detail::gather(input,
+                          indices_view,
+                          out_of_bounds_policy::DONT_CHECK,
+                          detail::negative_index_policy::NOT_ALLOWED,
+                          stream,
+                          mr);
+  };
+
+  if (cudf::detail::has_nested_columns(keys_view)) {
+    auto row_equal =
+      comp.equal_to<true>(nullate::DYNAMIC{has_nested_nulls(keys_view)}, nulls_equal);
+    return comparator_helper(row_equal);
+  } else {
+    auto row_equal =
+      comp.equal_to<false>(nullate::DYNAMIC{has_nested_nulls(keys_view)}, nulls_equal);
+    return comparator_helper(row_equal);
+  }
 }
 }  // namespace detail
 
diff --git a/cpp/src/strings/repeat_strings.cu b/cpp/src/strings/repeat_strings.cu
index cc283fbcee2..3784b535a5b 100644
--- a/cpp/src/strings/repeat_strings.cu
+++ b/cpp/src/strings/repeat_strings.cu
@@ -176,7 +176,7 @@ namespace {
  * separate number of times.
  */
 template <class Iterator>
-struct compute_size_and_repeat_separately_fn {
+struct compute_sizes_and_repeat_fn {
   column_device_view const strings_dv;
   column_device_view const repeat_times_dv;
   Iterator const repeat_times_iter;
@@ -189,146 +189,63 @@ struct compute_size_and_repeat_separately_fn {
   // If d_chars != nullptr: only repeat strings.
   char* d_chars{nullptr};
 
-  __device__ int64_t operator()(size_type const idx) const noexcept
+  __device__ void operator()(size_type const idx) const noexcept
   {
     auto const string_is_valid = !strings_has_nulls || strings_dv.is_valid_nocheck(idx);
     auto const rtimes_is_valid = !rtimes_has_nulls || repeat_times_dv.is_valid_nocheck(idx);
 
     // Any null input (either string or repeat_times value) will result in a null output.
     auto const is_valid = string_is_valid && rtimes_is_valid;
+    if (!is_valid) {
+      if (!d_chars) { d_offsets[idx] = 0; }
+      return;
+    }
 
-    // When the input string is null, `repeat_times` and `string_size` are also set to 0.
-    // This makes sure that if `repeat_times > 0` then we will always have a valid input string,
-    // and if `repeat_times <= 0` we will never copy anything to the output.
-    auto const repeat_times = is_valid ? repeat_times_iter[idx] : size_type{0};
-    auto const string_size =
-      is_valid ? strings_dv.element<string_view>(idx).size_bytes() : size_type{0};
-
-    // The output_size is returned, and it needs to be an int64_t number to prevent overflow.
-    auto const output_size =
-      repeat_times > 0 ? static_cast<int64_t>(repeat_times) * static_cast<int64_t>(string_size)
-                       : int64_t{0};
+    auto repeat_times = repeat_times_iter[idx];
+    auto const d_str  = strings_dv.element<string_view>(idx);
 
     if (!d_chars) {
-      // If overflow happen, the stored value of output string size will be incorrect due to
-      // downcasting. In such cases, the entire output string size array should be discarded.
-      d_offsets[idx] = static_cast<offset_type>(output_size);
-    } else if (repeat_times > 0 && string_size > 0) {
-      auto const d_str     = strings_dv.element<string_view>(idx);
-      auto const input_ptr = d_str.data();
-      auto output_ptr      = d_chars + d_offsets[idx];
-      for (size_type repeat_idx = 0; repeat_idx < repeat_times; ++repeat_idx) {
-        output_ptr = copy_and_increment(output_ptr, input_ptr, string_size);
+      // repeat_times could be negative
+      d_offsets[idx] = (repeat_times > 0) ? (repeat_times * d_str.size_bytes()) : 0;
+    } else {
+      auto output_ptr = d_chars + d_offsets[idx];
+      while (repeat_times-- > 0) {
+        output_ptr = copy_and_increment(output_ptr, d_str.data(), d_str.size_bytes());
       }
     }
-
-    // The output_size value may be used to sum up to detect overflow at the caller site.
-    // The caller can detect overflow easily by checking `SUM(output_size) > INT_MAX`.
-    return output_size;
   }
 };
 
-/**
- * @brief Creates child offsets and chars columns by applying the template function that
- * can be used for computing the output size of each string as well as create the output.
- *
- * This function is similar to `strings::detail::make_strings_children`, except that it accepts an
- * optional input `std::optional<column_view>` that can contain the precomputed sizes of the output
- * strings.
- *
- * @deprecated This will be removed with issue 12542
- */
-template <typename Func>
-auto make_strings_children(Func fn,
-                           size_type exec_size,
-                           size_type strings_count,
-                           std::optional<column_view> output_strings_sizes,
-                           rmm::cuda_stream_view stream,
-                           rmm::mr::device_memory_resource* mr)
-{
-  auto offsets_column = make_numeric_column(
-    data_type{type_id::INT32}, strings_count + 1, mask_state::UNALLOCATED, stream, mr);
-
-  auto offsets_view = offsets_column->mutable_view();
-  auto d_offsets    = offsets_view.template data<size_type>();
-  fn.d_offsets      = d_offsets;
-
-  // This may be called twice -- once for offsets and once for chars.
-  auto for_each_fn = [exec_size, stream](Func& fn) {
-    thrust::for_each_n(
-      rmm::exec_policy(stream), thrust::make_counting_iterator<size_type>(0), exec_size, fn);
-  };
-
-  if (!output_strings_sizes.has_value()) {
-    // Compute the output sizes only if they are not given.
-    for_each_fn(fn);
-
-    // Compute the offsets values.
-    auto const bytes =
-      cudf::detail::sizes_to_offsets(d_offsets, d_offsets + strings_count + 1, d_offsets, stream);
-    CUDF_EXPECTS(bytes <= static_cast<int64_t>(std::numeric_limits<size_type>::max()),
-                 "Size of output exceeds column size limit");
-  } else {
-    // Compute the offsets values from the provided output string sizes.
-    auto const string_sizes = output_strings_sizes.value();
-    CUDF_CUDA_TRY(cudaMemsetAsync(d_offsets, 0, sizeof(offset_type), stream.value()));
-    thrust::inclusive_scan(rmm::exec_policy(stream),
-                           string_sizes.template begin<size_type>(),
-                           string_sizes.template end<size_type>(),
-                           d_offsets + 1);
-  }
-
-  // Now build the chars column
-  auto const bytes  = cudf::detail::get_value<size_type>(offsets_view, strings_count, stream);
-  auto chars_column = create_chars_child_column(bytes, stream, mr);
-
-  // Execute the function fn again to fill the chars column.
-  // Note that if the output chars column has zero size, the function fn should not be called to
-  // avoid accidentally overwriting the offsets.
-  if (bytes > 0) {
-    fn.d_chars = chars_column->mutable_view().template data<char>();
-    for_each_fn(fn);
-  }
-
-  return std::pair(std::move(offsets_column), std::move(chars_column));
-}
-
 }  // namespace
 
 std::unique_ptr<column> repeat_strings(strings_column_view const& input,
                                        column_view const& repeat_times,
-                                       std::optional<column_view> output_strings_sizes,
                                        rmm::cuda_stream_view stream,
                                        rmm::mr::device_memory_resource* mr)
 {
   CUDF_EXPECTS(input.size() == repeat_times.size(), "The input columns must have the same size.");
   CUDF_EXPECTS(cudf::is_index_type(repeat_times.type()),
                "repeat_strings expects an integer type for the `repeat_times` input column.");
-  if (output_strings_sizes.has_value()) {
-    auto const output_sizes = output_strings_sizes.value();
-    CUDF_EXPECTS(input.size() == output_sizes.size() &&
-                   (!output_sizes.nullable() || !output_sizes.has_nulls()),
-                 "The given column of output string sizes is invalid.");
-  }
 
   auto const strings_count = input.size();
   if (strings_count == 0) { return make_empty_column(type_id::STRING); }
 
   auto const strings_dv_ptr      = column_device_view::create(input.parent(), stream);
   auto const repeat_times_dv_ptr = column_device_view::create(repeat_times, stream);
-  auto const strings_has_nulls   = input.has_nulls();
-  auto const rtimes_has_nulls    = repeat_times.has_nulls();
   auto const repeat_times_iter =
     cudf::detail::indexalator_factory::make_input_iterator(repeat_times);
-  auto const fn = compute_size_and_repeat_separately_fn<decltype(repeat_times_iter)>{
-    *strings_dv_ptr, *repeat_times_dv_ptr, repeat_times_iter, strings_has_nulls, rtimes_has_nulls};
-
-  auto [offsets_column, chars_column] =
-    make_strings_children(fn, strings_count, strings_count, output_strings_sizes, stream, mr);
-
-  // We generate new bitmask by AND of the input columns' bitmasks.
-  // Note that if the input columns are nullable, the output column will also be nullable (which may
-  // not have nulls).
+  auto const fn =
+    compute_sizes_and_repeat_fn<decltype(repeat_times_iter)>{*strings_dv_ptr,
+                                                             *repeat_times_dv_ptr,
+                                                             repeat_times_iter,
+                                                             input.has_nulls(),
+                                                             repeat_times.has_nulls()};
+
+  auto [offsets_column, chars_column] = make_strings_children(fn, strings_count, stream, mr);
+
+  // We generate new bitmask by AND of the two input columns' bitmasks.
+  // Note that if either of the input columns are nullable, the output column will also be nullable
+  // but may not have nulls.
   auto [null_mask, null_count] =
     cudf::detail::bitmask_and(table_view{{input.parent(), repeat_times}}, stream, mr);
 
@@ -338,52 +255,6 @@ std::unique_ptr<column> repeat_strings(strings_column_view const& input,
                              null_count,
                              std::move(null_mask));
 }
-
-std::pair<std::unique_ptr<column>, int64_t> repeat_strings_output_sizes(
-  strings_column_view const& input,
-  column_view const& repeat_times,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr)
-{
-  CUDF_EXPECTS(input.size() == repeat_times.size(), "The input columns must have the same size.");
-  CUDF_EXPECTS(
-    cudf::is_index_type(repeat_times.type()),
-    "repeat_strings_output_sizes expects an integer type for the `repeat_times` input column.");
-
-  auto const strings_count = input.size();
-  if (strings_count == 0) {
-    return std::pair(make_empty_column(type_to_id<size_type>()), int64_t{0});
-  }
-
-  auto output_sizes = make_numeric_column(
-    data_type{type_to_id<size_type>()}, strings_count, mask_state::UNALLOCATED, stream, mr);
-
-  auto const strings_dv_ptr      = column_device_view::create(input.parent(), stream);
-  auto const repeat_times_dv_ptr = column_device_view::create(repeat_times, stream);
-  auto const strings_has_nulls   = input.has_nulls();
-  auto const rtimes_has_nulls    = repeat_times.has_nulls();
-  auto const repeat_times_iter =
-    cudf::detail::indexalator_factory::make_input_iterator(repeat_times);
-
-  auto const fn = compute_size_and_repeat_separately_fn<decltype(repeat_times_iter)>{
-    *strings_dv_ptr,
-    *repeat_times_dv_ptr,
-    repeat_times_iter,
-    strings_has_nulls,
-    rtimes_has_nulls,
-    output_sizes->mutable_view().template begin<size_type>()};
-
-  auto const total_bytes =
-    thrust::transform_reduce(rmm::exec_policy(stream),
-                             thrust::make_counting_iterator<size_type>(0),
-                             thrust::make_counting_iterator<size_type>(strings_count),
-                             fn,
-                             int64_t{0},
-                             thrust::plus{});
-
-  return std::pair(std::move(output_sizes), total_bytes);
-}
-
 }  // namespace detail
 
 std::unique_ptr<string_scalar> repeat_string(string_scalar const& input,
@@ -404,21 +275,10 @@ std::unique_ptr<column> repeat_strings(strings_column_view const& input,
 
 std::unique_ptr<column> repeat_strings(strings_column_view const& input,
                                        column_view const& repeat_times,
-                                       std::optional<column_view> output_strings_sizes,
                                        rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::repeat_strings(
-    input, repeat_times, output_strings_sizes, cudf::get_default_stream(), mr);
-}
-
-std::pair<std::unique_ptr<column>, int64_t> repeat_strings_output_sizes(
-  strings_column_view const& input,
-  column_view const& repeat_times,
-  rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::repeat_strings_output_sizes(input, repeat_times, cudf::get_default_stream(), mr);
+  return detail::repeat_strings(input, repeat_times, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/transform/one_hot_encode.cu b/cpp/src/transform/one_hot_encode.cu
index 8f0a44585bf..3f3dd422f9d 100644
--- a/cpp/src/transform/one_hot_encode.cu
+++ b/cpp/src/transform/one_hot_encode.cu
@@ -36,6 +36,25 @@
 namespace cudf {
 namespace detail {
 
+template <typename DeviceComparatorType>
+struct ohe_equality_functor {
+  ohe_equality_functor(size_type input_size, DeviceComparatorType d_equal)
+    : _input_size(input_size), _d_equal(d_equal)
+  {
+  }
+
+  auto __device__ operator()(size_type i) const noexcept
+  {
+    auto const element_index  = cudf::experimental::row::lhs_index_type{i % _input_size};
+    auto const category_index = cudf::experimental::row::rhs_index_type{i / _input_size};
+    return _d_equal(element_index, category_index);
+  }
+
+ private:
+  size_type _input_size;
+  DeviceComparatorType _d_equal;
+};
+
 std::pair<std::unique_ptr<column>, table_view> one_hot_encode(column_view const& input,
                                                               column_view const& categories,
                                                               rmm::cuda_stream_view stream,
@@ -59,19 +78,24 @@ std::pair<std::unique_ptr<column>, table_view> one_hot_encode(column_view const&
   auto const t_rhs = table_view{{categories}};
   auto const comparator =
     cudf::experimental::row::equality::two_table_comparator{t_lhs, t_rhs, stream};
-  auto const d_equal =
-    comparator.equal_to(nullate::DYNAMIC{has_nested_nulls(t_lhs) || has_nested_nulls(t_rhs)});
-
-  thrust::transform(
-    rmm::exec_policy(stream),
-    thrust::make_counting_iterator(0),
-    thrust::make_counting_iterator(total_size),
-    all_encodings->mutable_view().begin<bool>(),
-    [input_size = input.size(), d_equal] __device__(size_type i) {
-      auto const element_index  = cudf::experimental::row::lhs_index_type{i % input_size};
-      auto const category_index = cudf::experimental::row::rhs_index_type{i / input_size};
-      return d_equal(element_index, category_index);
-    });
+
+  auto const comparator_helper = [&](auto const d_equal) {
+    thrust::transform(rmm::exec_policy(stream),
+                      thrust::make_counting_iterator(0),
+                      thrust::make_counting_iterator(total_size),
+                      all_encodings->mutable_view().begin<bool>(),
+                      ohe_equality_functor<decltype(d_equal)>(input.size(), d_equal));
+  };
+
+  if (cudf::detail::has_nested_columns(t_lhs) or cudf::detail::has_nested_columns(t_rhs)) {
+    auto const d_equal = comparator.equal_to<true>(
+      nullate::DYNAMIC{has_nested_nulls(t_lhs) || has_nested_nulls(t_rhs)});
+    comparator_helper(d_equal);
+  } else {
+    auto const d_equal = comparator.equal_to<false>(
+      nullate::DYNAMIC{has_nested_nulls(t_lhs) || has_nested_nulls(t_rhs)});
+    comparator_helper(d_equal);
+  }
 
   auto const split_iter =
     make_counting_transform_iterator(1, [width = input.size()](auto i) { return i * width; });
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 053acafdd3d..83a1c14438b 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -508,6 +508,19 @@ ConfigureTest(
 # * bin tests ----------------------------------------------------------------------------------
 ConfigureTest(LABEL_BINS_TEST labeling/label_bins_tests.cpp)
 
+# ##################################################################################################
+# * stream identification tests -------------------------------------------------------------------
+ConfigureTest(
+  STREAM_IDENTIFICATION_TEST identify_stream_usage/test_default_stream_identification.cu
+)
+# Note that this only works when the test is invoked via ctest. At the moment CI is running all
+# tests by manually invoking the executable, so we'll have to manually pass this environment
+# variable in that setup.
+set_tests_properties(
+  STREAM_IDENTIFICATION_TEST PROPERTIES ENVIRONMENT
+                                        LD_PRELOAD=$<TARGET_FILE:cudf_identify_stream_usage>
+)
+
 # ##################################################################################################
 # enable testing ################################################################################
 # ##################################################################################################
diff --git a/cpp/tests/utilities/identify_stream_usage/test_default_stream_identification.cu b/cpp/tests/identify_stream_usage/test_default_stream_identification.cu
similarity index 93%
rename from cpp/tests/utilities/identify_stream_usage/test_default_stream_identification.cu
rename to cpp/tests/identify_stream_usage/test_default_stream_identification.cu
index 022244b148b..28bb47af40d 100644
--- a/cpp/tests/utilities/identify_stream_usage/test_default_stream_identification.cu
+++ b/cpp/tests/identify_stream_usage/test_default_stream_identification.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -30,7 +30,7 @@ void test_cudaLaunchKernel()
 
   try {
     kernel<<<1, 1>>>();
-  } catch (std::runtime_error) {
+  } catch (std::runtime_error&) {
     return;
   }
   throw std::runtime_error("No exception raised for kernel on default stream!");
diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp
index 997a5daa189..5a358687893 100644
--- a/cpp/tests/reductions/reduction_tests.cpp
+++ b/cpp/tests/reductions/reduction_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -294,7 +294,6 @@ TYPED_TEST(SumReductionTest, Sum)
                  .second);
 }
 
-using ReductionTypes = cudf::test::Types<int16_t, int32_t, float, double>;
 TYPED_TEST_SUITE(ReductionTest, cudf::test::NumericTypes);
 
 TYPED_TEST(ReductionTest, Product)
diff --git a/cpp/tests/sort/rank_test.cpp b/cpp/tests/sort/rank_test.cpp
index 8461b0a1984..2722c1dfdad 100644
--- a/cpp/tests/sort/rank_test.cpp
+++ b/cpp/tests/sort/rank_test.cpp
@@ -17,6 +17,7 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/iterator_utilities.hpp>
 #include <cudf_test/table_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
@@ -30,6 +31,13 @@
 #include <tuple>
 #include <vector>
 
+template <typename T>
+using lists_col   = cudf::test::lists_column_wrapper<T, int32_t>;
+using structs_col = cudf::test::structs_column_wrapper;
+
+using cudf::test::iterators::null_at;
+using cudf::test::iterators::nulls_at;
+
 namespace {
 void run_rank_test(cudf::table_view input,
                    cudf::table_view expected,
@@ -50,10 +58,9 @@ void run_rank_test(cudf::table_view input,
 }
 
 using input_arg_t = std::tuple<cudf::order, cudf::null_policy, cudf::null_order>;
-input_arg_t asce_keep{cudf::order::ASCENDING, cudf::null_policy::EXCLUDE, cudf::null_order::AFTER};
-input_arg_t asce_top{cudf::order::ASCENDING, cudf::null_policy::INCLUDE, cudf::null_order::BEFORE};
-input_arg_t asce_bottom{
-  cudf::order::ASCENDING, cudf::null_policy::INCLUDE, cudf::null_order::AFTER};
+input_arg_t asc_keep{cudf::order::ASCENDING, cudf::null_policy::EXCLUDE, cudf::null_order::AFTER};
+input_arg_t asc_top{cudf::order::ASCENDING, cudf::null_policy::INCLUDE, cudf::null_order::BEFORE};
+input_arg_t asc_bottom{cudf::order::ASCENDING, cudf::null_policy::INCLUDE, cudf::null_order::AFTER};
 
 input_arg_t desc_keep{
   cudf::order::DESCENDING, cudf::null_policy::EXCLUDE, cudf::null_order::BEFORE};
@@ -105,7 +112,7 @@ TYPED_TEST_SUITE(Rank, cudf::test::NumericTypes);
 
 // fixed_width_column_wrapper<T>   col1{{  5,   4,   3,   5,   8,   5}};
 //                                        3,   2,   1,   4,   6,   5
-TYPED_TEST(Rank, first_asce_keep)
+TYPED_TEST(Rank, first_asc_keep)
 {
   // ASCENDING
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{3, 2, 1, 4, 6, 5}};
@@ -113,25 +120,25 @@ TYPED_TEST(Rank, first_asce_keep)
                                                                     {1, 1, 0, 1, 1, 1}};  // KEEP
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{2, 5, 1, 3, 6, 4},
                                                                     {1, 1, 1, 1, 1, 1}};
-  this->run_all_tests(cudf::rank_method::FIRST, asce_keep, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::FIRST, asc_keep, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, first_asce_top)
+TYPED_TEST(Rank, first_asc_top)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{3, 2, 1, 4, 6, 5}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{
     {3, 2, 1, 4, 6, 5}};  // BEFORE = TOP
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{2, 5, 1, 3, 6, 4}};
-  this->run_all_tests(cudf::rank_method::FIRST, asce_top, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::FIRST, asc_top, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, first_asce_bottom)
+TYPED_TEST(Rank, first_asc_bottom)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{3, 2, 1, 4, 6, 5}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{
     {2, 1, 6, 3, 5, 4}};  // AFTER  = BOTTOM
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{2, 5, 1, 3, 6, 4}};
-  this->run_all_tests(cudf::rank_method::FIRST, asce_bottom, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::FIRST, asc_bottom, col1_rank, col2_rank, col3_rank);
 }
 
 TYPED_TEST(Rank, first_desc_keep)
@@ -163,30 +170,30 @@ TYPED_TEST(Rank, first_desc_bottom)
   this->run_all_tests(cudf::rank_method::FIRST, desc_bottom, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, dense_asce_keep)
+TYPED_TEST(Rank, dense_asc_keep)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{3, 2, 1, 3, 4, 3}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{{2, 1, -1, 2, 3, 2},
                                                                     {1, 1, 0, 1, 1, 1}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{2, 3, 1, 2, 4, 2},
                                                                     {1, 1, 1, 1, 1, 1}};
-  this->run_all_tests(cudf::rank_method::DENSE, asce_keep, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::DENSE, asc_keep, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, dense_asce_top)
+TYPED_TEST(Rank, dense_asc_top)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{3, 2, 1, 3, 4, 3}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{{3, 2, 1, 3, 4, 3}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{2, 3, 1, 2, 4, 2}};
-  this->run_all_tests(cudf::rank_method::DENSE, asce_top, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::DENSE, asc_top, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, dense_asce_bottom)
+TYPED_TEST(Rank, dense_asc_bottom)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{3, 2, 1, 3, 4, 3}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{{2, 1, 4, 2, 3, 2}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{2, 3, 1, 2, 4, 2}};
-  this->run_all_tests(cudf::rank_method::DENSE, asce_bottom, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::DENSE, asc_bottom, col1_rank, col2_rank, col3_rank);
 }
 
 TYPED_TEST(Rank, dense_desc_keep)
@@ -215,30 +222,30 @@ TYPED_TEST(Rank, dense_desc_bottom)
   this->run_all_tests(cudf::rank_method::DENSE, desc_bottom, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, min_asce_keep)
+TYPED_TEST(Rank, min_asc_keep)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{3, 2, 1, 3, 6, 3}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{{2, 1, -1, 2, 5, 2},
                                                                     {1, 1, 0, 1, 1, 1}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{2, 5, 1, 2, 6, 2},
                                                                     {1, 1, 1, 1, 1, 1}};
-  this->run_all_tests(cudf::rank_method::MIN, asce_keep, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::MIN, asc_keep, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, min_asce_top)
+TYPED_TEST(Rank, min_asc_top)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{3, 2, 1, 3, 6, 3}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{{3, 2, 1, 3, 6, 3}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{2, 5, 1, 2, 6, 2}};
-  this->run_all_tests(cudf::rank_method::MIN, asce_top, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::MIN, asc_top, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, min_asce_bottom)
+TYPED_TEST(Rank, min_asc_bottom)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{3, 2, 1, 3, 6, 3}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{{2, 1, 6, 2, 5, 2}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{2, 5, 1, 2, 6, 2}};
-  this->run_all_tests(cudf::rank_method::MIN, asce_bottom, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::MIN, asc_bottom, col1_rank, col2_rank, col3_rank);
 }
 
 TYPED_TEST(Rank, min_desc_keep)
@@ -267,30 +274,30 @@ TYPED_TEST(Rank, min_desc_bottom)
   this->run_all_tests(cudf::rank_method::MIN, desc_bottom, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, max_asce_keep)
+TYPED_TEST(Rank, max_asc_keep)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{5, 2, 1, 5, 6, 5}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{{4, 1, -1, 4, 5, 4},
                                                                     {1, 1, 0, 1, 1, 1}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{4, 5, 1, 4, 6, 4},
                                                                     {1, 1, 1, 1, 1, 1}};
-  this->run_all_tests(cudf::rank_method::MAX, asce_keep, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::MAX, asc_keep, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, max_asce_top)
+TYPED_TEST(Rank, max_asc_top)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{5, 2, 1, 5, 6, 5}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{{5, 2, 1, 5, 6, 5}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{4, 5, 1, 4, 6, 4}};
-  this->run_all_tests(cudf::rank_method::MAX, asce_top, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::MAX, asc_top, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, max_asce_bottom)
+TYPED_TEST(Rank, max_asc_bottom)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{5, 2, 1, 5, 6, 5}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{{4, 1, 6, 4, 5, 4}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{4, 5, 1, 4, 6, 4}};
-  this->run_all_tests(cudf::rank_method::MAX, asce_bottom, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::MAX, asc_bottom, col1_rank, col2_rank, col3_rank);
 }
 
 TYPED_TEST(Rank, max_desc_keep)
@@ -319,28 +326,28 @@ TYPED_TEST(Rank, max_desc_bottom)
   this->run_all_tests(cudf::rank_method::MAX, desc_bottom, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, average_asce_keep)
+TYPED_TEST(Rank, average_asc_keep)
 {
   cudf::test::fixed_width_column_wrapper<double> col1_rank{{4, 2, 1, 4, 6, 4}};
   cudf::test::fixed_width_column_wrapper<double> col2_rank{{3, 1, -1, 3, 5, 3}, {1, 1, 0, 1, 1, 1}};
   cudf::test::fixed_width_column_wrapper<double> col3_rank{{3, 5, 1, 3, 6, 3}, {1, 1, 1, 1, 1, 1}};
-  this->run_all_tests(cudf::rank_method::AVERAGE, asce_keep, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::AVERAGE, asc_keep, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, average_asce_top)
+TYPED_TEST(Rank, average_asc_top)
 {
   cudf::test::fixed_width_column_wrapper<double> col1_rank{{4, 2, 1, 4, 6, 4}};
   cudf::test::fixed_width_column_wrapper<double> col2_rank{{4, 2, 1, 4, 6, 4}};
   cudf::test::fixed_width_column_wrapper<double> col3_rank{{3, 5, 1, 3, 6, 3}};
-  this->run_all_tests(cudf::rank_method::AVERAGE, asce_top, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::AVERAGE, asc_top, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, average_asce_bottom)
+TYPED_TEST(Rank, average_asc_bottom)
 {
   cudf::test::fixed_width_column_wrapper<double> col1_rank{{4, 2, 1, 4, 6, 4}};
   cudf::test::fixed_width_column_wrapper<double> col2_rank{{3, 1, 6, 3, 5, 3}};
   cudf::test::fixed_width_column_wrapper<double> col3_rank{{3, 5, 1, 3, 6, 3}};
-  this->run_all_tests(cudf::rank_method::AVERAGE, asce_bottom, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::AVERAGE, asc_bottom, col1_rank, col2_rank, col3_rank);
 }
 
 TYPED_TEST(Rank, average_desc_keep)
@@ -368,30 +375,30 @@ TYPED_TEST(Rank, average_desc_bottom)
 }
 
 // percentage==true (dense, not-dense)
-TYPED_TEST(Rank, dense_asce_keep_pct)
+TYPED_TEST(Rank, dense_asc_keep_pct)
 {
   cudf::test::fixed_width_column_wrapper<double> col1_rank{{0.75, 0.5, 0.25, 0.75, 1., 0.75}};
   cudf::test::fixed_width_column_wrapper<double> col2_rank{
     {2.0 / 3.0, 1.0 / 3.0, -1., 2.0 / 3.0, 1., 2.0 / 3.0}, {1, 1, 0, 1, 1, 1}};
   cudf::test::fixed_width_column_wrapper<double> col3_rank{{0.5, 0.75, 0.25, 0.5, 1., 0.5},
                                                            {1, 1, 1, 1, 1, 1}};
-  this->run_all_tests(cudf::rank_method::DENSE, asce_keep, col1_rank, col2_rank, col3_rank, true);
+  this->run_all_tests(cudf::rank_method::DENSE, asc_keep, col1_rank, col2_rank, col3_rank, true);
 }
 
-TYPED_TEST(Rank, dense_asce_top_pct)
+TYPED_TEST(Rank, dense_asc_top_pct)
 {
   cudf::test::fixed_width_column_wrapper<double> col1_rank{{0.75, 0.5, 0.25, 0.75, 1., 0.75}};
   cudf::test::fixed_width_column_wrapper<double> col2_rank{{0.75, 0.5, 0.25, 0.75, 1., 0.75}};
   cudf::test::fixed_width_column_wrapper<double> col3_rank{{0.5, 0.75, 0.25, 0.5, 1., 0.5}};
-  this->run_all_tests(cudf::rank_method::DENSE, asce_top, col1_rank, col2_rank, col3_rank, true);
+  this->run_all_tests(cudf::rank_method::DENSE, asc_top, col1_rank, col2_rank, col3_rank, true);
 }
 
-TYPED_TEST(Rank, dense_asce_bottom_pct)
+TYPED_TEST(Rank, dense_asc_bottom_pct)
 {
   cudf::test::fixed_width_column_wrapper<double> col1_rank{{0.75, 0.5, 0.25, 0.75, 1., 0.75}};
   cudf::test::fixed_width_column_wrapper<double> col2_rank{{0.5, 0.25, 1., 0.5, 0.75, 0.5}};
   cudf::test::fixed_width_column_wrapper<double> col3_rank{{0.5, 0.75, 0.25, 0.5, 1., 0.5}};
-  this->run_all_tests(cudf::rank_method::DENSE, asce_bottom, col1_rank, col2_rank, col3_rank, true);
+  this->run_all_tests(cudf::rank_method::DENSE, asc_bottom, col1_rank, col2_rank, col3_rank, true);
 }
 
 TYPED_TEST(Rank, min_desc_keep_pct)
@@ -444,3 +451,472 @@ TEST_F(RankLarge, average_large)
   cudf::test::fixed_width_column_wrapper<double, int> expected(iter + 1, iter + 10559);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected);
 }
+
+template <typename T>
+struct RankListAndStruct : public cudf::test::BaseFixture {
+  void run_all_tests(cudf::rank_method method,
+                     input_arg_t input_arg,
+                     cudf::column_view const list_rank,
+                     cudf::column_view const struct_rank,
+                     bool percentage = false)
+  {
+    if constexpr (std::is_same_v<T, bool>) { return; }
+    /*
+    [
+      [],
+      [1],
+      [2, 2],
+      [2, 3],
+      [2, 2],
+      [1],
+      [],
+      NULL
+      [2],
+      NULL,
+      [1]
+    ]
+    */
+    auto list_col =
+      lists_col<T>{{{}, {1}, {2, 2}, {2, 3}, {2, 2}, {1}, {}, {} /*NULL*/, {2}, {} /*NULL*/, {1}},
+                   nulls_at({7, 9})};
+
+    // clang-format off
+    /*
+      +------------+
+      |           s|
+      +------------+
+    0 |   {0, null}|
+    1 |   {1, null}|
+    2 |        null|
+    3 |{null, null}|
+    4 |        null|
+    5 |{null, null}|
+    6 |   {null, 1}|
+    7 |   {null, 0}|
+      +------------+
+    */
+    std::vector<bool>                           struct_valids{1, 1, 0, 1, 0, 1, 1, 1};
+    auto col1       = cudf::test::fixed_width_column_wrapper<T>{{ 0,  1,  9, -1,  9, -1, -1, -1}, {1, 1, 1, 0, 1, 0, 0, 0}};
+    auto col2       = cudf::test::fixed_width_column_wrapper<T>{{-1, -1,  9, -1,  9, -1,  1,  0}, {0, 0, 1, 0, 1, 0, 1, 1}};
+    auto struct_col = cudf::test::structs_column_wrapper{{col1, col2}, struct_valids}.release();
+    // clang-format on
+
+    for (auto const& test_case : {
+           // Non-null column
+           test_case_t{cudf::table_view{{list_col}}, cudf::table_view{{list_rank}}},
+           // Null column
+           test_case_t{cudf::table_view{{struct_col->view()}}, cudf::table_view{{struct_rank}}},
+         }) {
+      auto [input, output] = test_case;
+
+      run_rank_test(input,
+                    output,
+                    method,
+                    std::get<0>(input_arg),
+                    std::get<1>(input_arg),
+                    std::get<2>(input_arg),
+                    percentage);
+    }
+  }
+};
+
+TYPED_TEST_SUITE(RankListAndStruct, cudf::test::NumericTypes);
+
+TYPED_TEST(RankListAndStruct, first_asc_keep)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> list_rank{
+    {1, 3, 7, 9, 8, 4, 2, -1, 6, -1, 5}, nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{{1, 2, -1, 5, -1, 6, 4, 3},
+                                                                      nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::FIRST, asc_keep, list_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, first_asc_top)
+{
+  // ASCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    3, 5, 9, 11, 10, 6, 4, 1, 8, 2, 7};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{7, 8, 1, 3, 2, 4, 6, 5};
+  this->run_all_tests(cudf::rank_method::FIRST, asc_top, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, first_asc_bottom)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    1, 3, 7, 9, 8, 4, 2, 10, 6, 11, 5};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{1, 2, 7, 5, 8, 6, 4, 3};
+  this->run_all_tests(cudf::rank_method::FIRST, asc_bottom, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, first_desc_keep)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    {8, 5, 2, 1, 3, 6, 9, -1, 4, -1, 7}, nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{{2, 1, -1, 5, -1, 6, 3, 4},
+                                                                      nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::FIRST, desc_keep, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, first_desc_top)
+{
+  // DESCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    10, 7, 4, 3, 5, 8, 11, 1, 6, 2, 9};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{8, 7, 1, 3, 2, 4, 5, 6};
+  this->run_all_tests(cudf::rank_method::FIRST, desc_top, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, first_desc_bottom)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    8, 5, 2, 1, 3, 6, 9, 10, 4, 11, 7};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{2, 1, 7, 5, 8, 6, 3, 4};
+  this->run_all_tests(cudf::rank_method::FIRST, desc_bottom, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, dense_asc_keep)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    {1, 2, 4, 5, 4, 2, 1, -1, 3, -1, 2}, nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{{1, 2, -1, 5, -1, 5, 4, 3},
+                                                                      nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::DENSE, asc_keep, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, dense_asc_top)
+{
+  // ASCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{2, 3, 5, 6, 5, 3, 2, 1, 4, 1, 3};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{5, 6, 1, 2, 1, 2, 4, 3};
+  this->run_all_tests(cudf::rank_method::DENSE, asc_top, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, dense_asc_bottom)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{1, 2, 4, 5, 4, 2, 1, 6, 3, 6, 2};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{1, 2, 6, 5, 6, 5, 4, 3};
+  this->run_all_tests(cudf::rank_method::DENSE, asc_bottom, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, dense_desc_keep)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    {5, 4, 2, 1, 2, 4, 5, -1, 3, -1, 4}, nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{{2, 1, -1, 5, -1, 5, 3, 4},
+                                                                      nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::DENSE, desc_keep, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, dense_desc_top)
+{
+  // DESCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{6, 5, 3, 2, 3, 5, 6, 1, 4, 1, 5};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{6, 5, 1, 2, 1, 2, 3, 4};
+  this->run_all_tests(cudf::rank_method::DENSE, desc_top, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, dense_desc_bottom)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{5, 4, 2, 1, 2, 4, 5, 6, 3, 6, 4};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{2, 1, 6, 5, 6, 5, 3, 4};
+  this->run_all_tests(cudf::rank_method::DENSE, desc_bottom, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, min_asc_keep)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    {1, 3, 7, 9, 7, 3, 1, -1, 6, -1, 3}, nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{{1, 2, -1, 5, -1, 5, 4, 3},
+                                                                      nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::MIN, asc_keep, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, min_asc_top)
+{
+  // ASCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    3, 5, 9, 11, 9, 5, 3, 1, 8, 1, 5};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{7, 8, 1, 3, 1, 3, 6, 5};
+  this->run_all_tests(cudf::rank_method::MIN, asc_top, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, min_asc_bottom)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    1, 3, 7, 9, 7, 3, 1, 10, 6, 10, 3};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{1, 2, 7, 5, 7, 5, 4, 3};
+  this->run_all_tests(cudf::rank_method::MIN, asc_bottom, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, min_desc_keep)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    {8, 5, 2, 1, 2, 5, 8, -1, 4, -1, 5}, nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{{2, 1, -1, 5, -1, 5, 3, 4},
+                                                                      nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::MIN, desc_keep, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, min_desc_top)
+{
+  // DESCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    10, 7, 4, 3, 4, 7, 10, 1, 6, 1, 7};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{8, 7, 1, 3, 1, 3, 5, 6};
+  this->run_all_tests(cudf::rank_method::MIN, desc_top, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, min_desc_bottom)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    8, 5, 2, 1, 2, 5, 8, 10, 4, 10, 5};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{2, 1, 7, 5, 7, 5, 3, 4};
+  this->run_all_tests(cudf::rank_method::MIN, desc_bottom, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, max_asc_keep)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    {2, 5, 8, 9, 8, 5, 2, -1, 6, -1, 5}, nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{{1, 2, -1, 6, -1, 6, 4, 3},
+                                                                      nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::MAX, asc_keep, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, max_asc_top)
+{
+  // ASCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    4, 7, 10, 11, 10, 7, 4, 2, 8, 2, 7};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{7, 8, 2, 4, 2, 4, 6, 5};
+  this->run_all_tests(cudf::rank_method::MAX, asc_top, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, max_asc_bottom)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    2, 5, 8, 9, 8, 5, 2, 11, 6, 11, 5};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{1, 2, 8, 6, 8, 6, 4, 3};
+  this->run_all_tests(cudf::rank_method::MAX, asc_bottom, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, max_desc_keep)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    {9, 7, 3, 1, 3, 7, 9, -1, 4, -1, 7}, nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{{2, 1, -1, 6, -1, 6, 3, 4},
+                                                                      nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::MAX, desc_keep, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, max_desc_top)
+{
+  // DESCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    11, 9, 5, 3, 5, 9, 11, 2, 6, 2, 9};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{8, 7, 2, 4, 2, 4, 5, 6};
+  this->run_all_tests(cudf::rank_method::MAX, desc_top, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, max_desc_bottom)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    9, 7, 3, 1, 3, 7, 9, 11, 4, 11, 7};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{2, 1, 8, 6, 8, 6, 3, 4};
+  this->run_all_tests(cudf::rank_method::MAX, desc_bottom, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, average_asc_keep)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<double> col_rank{
+    {1.5, 4.0, 7.5, 9.0, 7.5, 4.0, 1.5, -1.0, 6.0, -1.0, 4.0}, nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    {1.0, 2.0, -1.0, 5.5, -1.0, 5.5, 4.0, 3.0}, nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::AVERAGE, asc_keep, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, average_asc_top)
+{
+  // ASCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<double> col_rank{
+    3.5, 6.0, 9.5, 11.0, 9.5, 6.0, 3.5, 1.5, 8.0, 1.5, 6.0};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    7.0, 8.0, 1.5, 3.5, 1.5, 3.5, 6.0, 5.0};
+  this->run_all_tests(cudf::rank_method::AVERAGE, asc_top, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, average_asc_bottom)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<double> col_rank{
+    1.5, 4.0, 7.5, 9.0, 7.5, 4.0, 1.5, 10.5, 6.0, 10.5, 4.0};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    1.0, 2.0, 7.5, 5.5, 7.5, 5.5, 4.0, 3.0};
+  this->run_all_tests(cudf::rank_method::AVERAGE, asc_bottom, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, average_desc_keep)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<double> col_rank{
+    {8.5, 6.0, 2.5, 1.0, 2.5, 6.0, 8.5, -1.0, 4.0, -1.0, 6.0}, nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    {2.0, 1.0, -1.0, 5.5, -1.0, 5.5, 3.0, 4.0}, nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::AVERAGE, desc_keep, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, average_desc_top)
+{
+  // DESCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<double> col_rank{
+    10.5, 8.0, 4.5, 3.0, 4.5, 8.0, 10.5, 1.5, 6.0, 1.5, 8.0};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    8.0, 7.0, 1.5, 3.5, 1.5, 3.5, 5.0, 6.0};
+  this->run_all_tests(cudf::rank_method::AVERAGE, desc_top, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, average_desc_bottom)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<double> col_rank{
+    8.5, 6.0, 2.5, 1.0, 2.5, 6.0, 8.5, 10.5, 4.0, 10.5, 6.0};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    2.0, 1.0, 7.5, 5.5, 7.5, 5.5, 3.0, 4.0};
+  this->run_all_tests(cudf::rank_method::AVERAGE, desc_bottom, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, dense_asc_keep_pct)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<double> col_rank{{1.0 / 5.0,
+                                                           2.0 / 5.0,
+                                                           4.0 / 5.0,
+                                                           1.0,
+                                                           4.0 / 5.0,
+                                                           2.0 / 5.0,
+                                                           1.0 / 5.0,
+                                                           -1.0,
+                                                           3.0 / 5.0,
+                                                           -1.0,
+                                                           2.0 / 5.0},
+                                                          nulls_at({7, 9})};
+
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    {1.0 / 5.0, 2.0 / 5.0, -1.0, 1.0, -1.0, 1.0, 4.0 / 5.0, 3.0 / 5.0}, nulls_at({2, 4})};
+
+  this->run_all_tests(cudf::rank_method::DENSE, asc_keep, col_rank, struct_rank, true);
+}
+
+TYPED_TEST(RankListAndStruct, dense_asc_top_pct)
+{
+  // ASCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<double> col_rank{1.0 / 3.0,
+                                                          1.0 / 2.0,
+                                                          5.0 / 6.0,
+                                                          1.0,
+                                                          5.0 / 6.0,
+                                                          1.0 / 2.0,
+                                                          1.0 / 3.0,
+                                                          1.0 / 6.0,
+                                                          2.0 / 3.0,
+                                                          1.0 / 6.0,
+                                                          1.0 / 2.0};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    5.0 / 6.0, 1.0, 1.0 / 6.0, 2.0 / 6.0, 1.0 / 6.0, 2.0 / 6.0, 4.0 / 6.0, 3.0 / 6.0};
+  this->run_all_tests(cudf::rank_method::DENSE, asc_top, col_rank, struct_rank, true);
+}
+
+TYPED_TEST(RankListAndStruct, dense_asc_bottom_pct)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<double> col_rank{1.0 / 6.0,
+                                                          1.0 / 3.0,
+                                                          2.0 / 3.0,
+                                                          5.0 / 6.0,
+                                                          2.0 / 3.0,
+                                                          1.0 / 3.0,
+                                                          1.0 / 6.0,
+                                                          1.0,
+                                                          1.0 / 2.0,
+                                                          1.0,
+                                                          1.0 / 3.0};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    1.0 / 6.0, 2.0 / 6.0, 1.0, 5.0 / 6.0, 1.0, 5.0 / 6.0, 4.0 / 6.0, 3.0 / 6.0};
+  this->run_all_tests(cudf::rank_method::DENSE, asc_bottom, col_rank, struct_rank, true);
+}
+
+TYPED_TEST(RankListAndStruct, min_desc_keep_pct)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<double> col_rank{{8.0 / 9.0,
+                                                           5.0 / 9.0,
+                                                           2.0 / 9.0,
+                                                           1.0 / 9.0,
+                                                           2.0 / 9.0,
+                                                           5.0 / 9.0,
+                                                           8.0 / 9.0,
+                                                           -1.0,
+                                                           4.0 / 9.0,
+                                                           -1.0,
+                                                           5.0 / 9.0},
+                                                          nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    {2.0 / 6.0, 1.0 / 6.0, -1.0, 5.0 / 6.0, -1.0, 5.0 / 6.0, 3.0 / 6.0, 4.0 / 6.0},
+    nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::MIN, desc_keep, col_rank, struct_rank, true);
+}
+
+TYPED_TEST(RankListAndStruct, min_desc_top_pct)
+{
+  // DESCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<double> col_rank{10.0 / 11.0,
+                                                          7.0 / 11.0,
+                                                          4.0 / 11.0,
+                                                          3.0 / 11.0,
+                                                          4.0 / 11.0,
+                                                          7.0 / 11.0,
+                                                          10.0 / 11.0,
+                                                          1.0 / 11.0,
+                                                          6.0 / 11.0,
+                                                          1.0 / 11.0,
+                                                          7.0 / 11.0};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    1.0, 7.0 / 8.0, 1.0 / 8.0, 3.0 / 8.0, 1.0 / 8.0, 3.0 / 8.0, 5.0 / 8.0, 6.0 / 8.0};
+  this->run_all_tests(cudf::rank_method::MIN, desc_top, col_rank, struct_rank, true);
+}
+
+TYPED_TEST(RankListAndStruct, min_desc_bottom_pct)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<double> col_rank{8.0 / 11.0,
+                                                          5.0 / 11.0,
+                                                          2.0 / 11.0,
+                                                          1.0 / 11.0,
+                                                          2.0 / 11.0,
+                                                          5.0 / 11.0,
+                                                          8.0 / 11.0,
+                                                          10.0 / 11.0,
+                                                          4.0 / 11.0,
+                                                          10.0 / 11.0,
+                                                          5.0 / 11.0};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    2.0 / 8.0, 1.0 / 8.0, 7.0 / 8.0, 5.0 / 8.0, 7.0 / 8.0, 5.0 / 8.0, 3.0 / 8.0, 4.0 / 8.0};
+  this->run_all_tests(cudf::rank_method::MIN, desc_bottom, col_rank, struct_rank, true);
+}
diff --git a/cpp/tests/strings/repeat_strings_tests.cpp b/cpp/tests/strings/repeat_strings_tests.cpp
index 69d0494c253..e75409d9f39 100644
--- a/cpp/tests/strings/repeat_strings_tests.cpp
+++ b/cpp/tests/strings/repeat_strings_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -207,20 +207,6 @@ TEST_F(RepeatStringsTest, StringsColumnWithColumnRepeatTimesInvalidInput)
     EXPECT_THROW(cudf::strings::repeat_strings(strs_cv, repeat_times), cudf::logic_error);
   }
 
-  // Sizes mismatched between strings column and output_strings_sizes column.
-  {
-    auto const repeat_times = int32s_col{1, 2};
-    auto const sizes        = int32s_col{1, 2, 3, 4, 5};
-    EXPECT_THROW(cudf::strings::repeat_strings(strs_cv, repeat_times, sizes), cudf::logic_error);
-  }
-
-  // output_strings_sizes column has nulls.
-  {
-    auto const repeat_times = int32s_col{1, 2};
-    auto const sizes        = int32s_col{{null, 2}, null_at(0)};
-    EXPECT_THROW(cudf::strings::repeat_strings(strs_cv, repeat_times, sizes), cudf::logic_error);
-  }
-
   // Invalid data type for repeat_times column.
   {
     auto const repeat_times = cudf::test::fixed_width_column_wrapper<float>{1, 2, 3, 4, 5, 6};
@@ -243,11 +229,7 @@ TEST_F(RepeatStringsTest, StringsColumnWithColumnRepeatTimesOverflowOutput)
   auto const repeat_times =
     int32s_col{half_max, half_max, half_max, half_max, half_max, half_max, half_max};
 
-  auto const [sizes, total_bytes] =
-    cudf::strings::repeat_strings_output_sizes(strs_cv, repeat_times);
-  (void)sizes;
-  auto const expected_bytes = static_cast<int64_t>(half_max) * int64_t{1 + 2 + 3 + 4 + 5 + 6 + 7};
-  EXPECT_EQ(expected_bytes, total_bytes);
+  EXPECT_THROW(cudf::strings::repeat_strings(strs_cv, repeat_times), cudf::logic_error);
 }
 
 TYPED_TEST(RepeatStringsTypedTest, StringsColumnNoNullWithScalarRepeatTimes)
@@ -301,15 +283,6 @@ TYPED_TEST(RepeatStringsTypedTest, StringsColumnNoNullWithColumnRepeatTimes)
 
     auto results = cudf::strings::repeat_strings(strs_cv, repeat_times);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{6, 12, 27, 0, 0};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(strs_cv, repeat_times);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(45, total_bytes);
-
-    results = cudf::strings::repeat_strings(strs_cv, repeat_times, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
   }
 
   // repeat_times column has nulls.
@@ -320,15 +293,6 @@ TYPED_TEST(RepeatStringsTypedTest, StringsColumnNoNullWithColumnRepeatTimes)
 
     auto results = cudf::strings::repeat_strings(strs_cv, repeat_times);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{6, 0, 27, 12, 0};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(strs_cv, repeat_times);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(45, total_bytes);
-
-    results = cudf::strings::repeat_strings(strs_cv, repeat_times, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
   }
 }
 
@@ -377,15 +341,6 @@ TYPED_TEST(RepeatStringsTypedTest, SlicedStringsColumnNoNullWithColumnRepeatTime
 
     auto results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{6, 12, 27};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(sliced_strs_cv, sliced_rtimes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(45, total_bytes);
-
-    results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
   }
 
   // Sliced the middle of the column.
@@ -397,15 +352,6 @@ TYPED_TEST(RepeatStringsTypedTest, SlicedStringsColumnNoNullWithColumnRepeatTime
 
     auto results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{12, 27};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(sliced_strs_cv, sliced_rtimes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(39, total_bytes);
-
-    results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
   }
 
   // Sliced the second half of the column.
@@ -417,15 +363,6 @@ TYPED_TEST(RepeatStringsTypedTest, SlicedStringsColumnNoNullWithColumnRepeatTime
 
     auto results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{27, 12, 12};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(sliced_strs_cv, sliced_rtimes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(51, total_bytes);
-
-    results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
   }
 }
 
@@ -520,15 +457,6 @@ TYPED_TEST(RepeatStringsTypedTest, StringsColumnWithNullsWithColumnRepeatTimes)
 
     auto results = cudf::strings::repeat_strings(strs_cv, repeat_times);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{6, 0, 18, 0, 0, 0, 12, 12, 0, 0};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(strs_cv, repeat_times);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(48, total_bytes);
-
-    results = cudf::strings::repeat_strings(strs_cv, repeat_times, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
   }
 
   // repeat_times column has nulls.
@@ -549,15 +477,6 @@ TYPED_TEST(RepeatStringsTypedTest, StringsColumnWithNullsWithColumnRepeatTimes)
 
     auto results = cudf::strings::repeat_strings(strs_cv, repeat_times);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{6, 0, 0, 0, 0, 0, 12, 0, 0, 0};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(strs_cv, repeat_times);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(18, total_bytes);
-
-    results = cudf::strings::repeat_strings(strs_cv, repeat_times, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
   }
 }
 
@@ -631,15 +550,6 @@ TYPED_TEST(RepeatStringsTypedTest, SlicedStringsColumnWithNullsWithColumnRepeatT
 
     auto results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{6, 0, 0};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(sliced_strs_cv, sliced_rtimes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(6, total_bytes);
-
-    results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
   }
 
   // Sliced the middle of the column.
@@ -652,15 +562,6 @@ TYPED_TEST(RepeatStringsTypedTest, SlicedStringsColumnWithNullsWithColumnRepeatT
 
     auto results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{0, 0, 0, 0, 12};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(sliced_strs_cv, sliced_rtimes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(12, total_bytes);
-
-    results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
   }
 
   // Sliced the second half of the column, output has nulls.
@@ -672,15 +573,6 @@ TYPED_TEST(RepeatStringsTypedTest, SlicedStringsColumnWithNullsWithColumnRepeatT
 
     auto results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{12, 0, 0, 0};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(sliced_strs_cv, sliced_rtimes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(12, total_bytes);
-
-    results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
   }
 
   // Sliced the second half of the column, output does not have null.
@@ -693,14 +585,5 @@ TYPED_TEST(RepeatStringsTypedTest, SlicedStringsColumnWithNullsWithColumnRepeatT
 
     auto results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{0, 0};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(sliced_strs_cv, sliced_rtimes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(0, total_bytes);
-
-    results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_strs, *results, verbosity);
   }
 }
diff --git a/cpp/tests/table/experimental_row_operator_tests.cu b/cpp/tests/table/experimental_row_operator_tests.cu
index ae55275aaec..1f3f7eefe79 100644
--- a/cpp/tests/table/experimental_row_operator_tests.cu
+++ b/cpp/tests/table/experimental_row_operator_tests.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -115,18 +115,32 @@ auto self_equality(cudf::table_view input,
   rmm::cuda_stream_view stream{cudf::get_default_stream()};
 
   auto const table_comparator = cudf::experimental::row::equality::self_comparator{input, stream};
-  auto const equal_comparator =
-    table_comparator.equal_to(cudf::nullate::NO{}, cudf::null_equality::EQUAL, comparator);
 
   auto output = cudf::make_numeric_column(
     cudf::data_type(cudf::type_id::BOOL8), input.num_rows(), cudf::mask_state::UNALLOCATED);
 
-  thrust::transform(rmm::exec_policy(stream),
-                    thrust::make_counting_iterator(0),
-                    thrust::make_counting_iterator(input.num_rows()),
-                    thrust::make_counting_iterator(0),
-                    output->mutable_view().data<bool>(),
-                    equal_comparator);
+  if (cudf::detail::has_nested_columns(input)) {
+    auto const equal_comparator =
+      table_comparator.equal_to<true>(cudf::nullate::NO{}, cudf::null_equality::EQUAL, comparator);
+
+    thrust::transform(rmm::exec_policy(stream),
+                      thrust::make_counting_iterator(0),
+                      thrust::make_counting_iterator(input.num_rows()),
+                      thrust::make_counting_iterator(0),
+                      output->mutable_view().data<bool>(),
+                      equal_comparator);
+  } else {
+    auto const equal_comparator =
+      table_comparator.equal_to<false>(cudf::nullate::NO{}, cudf::null_equality::EQUAL, comparator);
+
+    thrust::transform(rmm::exec_policy(stream),
+                      thrust::make_counting_iterator(0),
+                      thrust::make_counting_iterator(input.num_rows()),
+                      thrust::make_counting_iterator(0),
+                      output->mutable_view().data<bool>(),
+                      equal_comparator);
+  }
+
   return output;
 }
 
@@ -140,20 +154,34 @@ auto two_table_equality(cudf::table_view lhs,
 
   auto const table_comparator =
     cudf::experimental::row::equality::two_table_comparator{lhs, rhs, stream};
-  auto const equal_comparator =
-    table_comparator.equal_to(cudf::nullate::NO{}, cudf::null_equality::EQUAL, comparator);
+
   auto const lhs_it = cudf::experimental::row::lhs_iterator(0);
   auto const rhs_it = cudf::experimental::row::rhs_iterator(0);
 
   auto output = cudf::make_numeric_column(
     cudf::data_type(cudf::type_id::BOOL8), lhs.num_rows(), cudf::mask_state::UNALLOCATED);
 
-  thrust::transform(rmm::exec_policy(stream),
-                    lhs_it,
-                    lhs_it + lhs.num_rows(),
-                    rhs_it,
-                    output->mutable_view().data<bool>(),
-                    equal_comparator);
+  if (cudf::detail::has_nested_columns(lhs) or cudf::detail::has_nested_columns(rhs)) {
+    auto const equal_comparator =
+      table_comparator.equal_to<true>(cudf::nullate::NO{}, cudf::null_equality::EQUAL, comparator);
+
+    thrust::transform(rmm::exec_policy(stream),
+                      lhs_it,
+                      lhs_it + lhs.num_rows(),
+                      rhs_it,
+                      output->mutable_view().data<bool>(),
+                      equal_comparator);
+  } else {
+    auto const equal_comparator =
+      table_comparator.equal_to<false>(cudf::nullate::NO{}, cudf::null_equality::EQUAL, comparator);
+
+    thrust::transform(rmm::exec_policy(stream),
+                      lhs_it,
+                      lhs_it + lhs.num_rows(),
+                      rhs_it,
+                      output->mutable_view().data<bool>(),
+                      equal_comparator);
+  }
   return output;
 }
 
diff --git a/cpp/tests/utilities/identify_stream_usage/identify_stream_usage.cpp b/cpp/tests/utilities/identify_stream_usage.cpp
similarity index 99%
rename from cpp/tests/utilities/identify_stream_usage/identify_stream_usage.cpp
rename to cpp/tests/utilities/identify_stream_usage.cpp
index 4a1a8f04791..87301a7d49d 100644
--- a/cpp/tests/utilities/identify_stream_usage/identify_stream_usage.cpp
+++ b/cpp/tests/utilities/identify_stream_usage.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/tests/utilities/identify_stream_usage/CMakeLists.txt b/cpp/tests/utilities/identify_stream_usage/CMakeLists.txt
deleted file mode 100644
index 89f40303550..00000000000
--- a/cpp/tests/utilities/identify_stream_usage/CMakeLists.txt
+++ /dev/null
@@ -1,60 +0,0 @@
-# =============================================================================
-# Copyright (c) 2022, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
-# in compliance with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software distributed under the License
-# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
-# or implied. See the License for the specific language governing permissions and limitations under
-# the License.
-# =============================================================================
-
-cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR)
-
-if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/CUDF_RAPIDS.cmake)
-  file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.12/RAPIDS.cmake
-       ${CMAKE_CURRENT_BINARY_DIR}/CUDF_RAPIDS.cmake
-  )
-endif()
-include(${CMAKE_CURRENT_BINARY_DIR}/CUDF_RAPIDS.cmake)
-
-project(
-  IDENTIFY_STREAM_USAGE
-  VERSION 0.0.1
-  LANGUAGES CXX CUDA
-)
-
-include(rapids-cpm)
-include(${rapids-cmake-dir}/cpm/rmm.cmake)
-rapids_cpm_init()
-rapids_cpm_rmm()
-
-set(CMAKE_CUDA_RUNTIME_LIBRARY SHARED)
-add_library(identify_stream_usage SHARED identify_stream_usage.cpp)
-
-find_package(CUDAToolkit REQUIRED)
-
-set_target_properties(identify_stream_usage PROPERTIES CUDA_RUNTIME_LIBRARY SHARED)
-target_link_libraries(identify_stream_usage PUBLIC CUDA::cudart rmm::rmm)
-
-set_target_properties(
-  identify_stream_usage
-  PROPERTIES # set target compile options
-             CXX_STANDARD 17
-             CXX_STANDARD_REQUIRED ON
-             POSITION_INDEPENDENT_CODE ON
-)
-
-# Add the test file.
-include(CTest)
-
-add_executable(Tests test_default_stream_identification.cu)
-add_test(NAME default_stream_identification COMMAND Tests)
-
-set_tests_properties(
-  default_stream_identification PROPERTIES ENVIRONMENT
-                                           LD_PRELOAD=$<TARGET_FILE:identify_stream_usage>
-)
diff --git a/dependencies.yaml b/dependencies.yaml
index 126e2b0be29..ae8eac4ea30 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -28,6 +28,7 @@ files:
   test_java:
     output: none
     includes:
+      - build
       - cudatoolkit
       - test_java
   test_notebooks:
@@ -225,8 +226,8 @@ dependencies:
       - output_types: [conda, requirements]
         packages:
           - cachetools
-          - dask>=2022.12.0
-          - distributed>=2022.12.0
+          - dask>=2023.1.1
+          - distributed>=2023.1.1
           - fsspec>=0.6.0
           - numba>=0.56.2
           - numpy
diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java
index b3111cec77b..0cb9ed37d9f 100644
--- a/java/src/main/java/ai/rapids/cudf/ColumnView.java
+++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java
@@ -2531,12 +2531,34 @@ public final ColumnVector stringLocate(Scalar substring, int start, int end) {
    *                     regular expression pattern or just by a string literal delimiter.
    * @return list of strings columns as a table.
    */
+  @Deprecated
   public final Table stringSplit(String pattern, int limit, boolean splitByRegex) {
+    if (splitByRegex) {
+      return stringSplit(new RegexProgram(pattern, CaptureGroups.NON_CAPTURE), limit);
+    } else {
+      return stringSplit(pattern, limit);
+    }
+  }
+
+  /**
+   * Returns a list of columns by splitting each string using the specified regex program pattern.
+   * The number of rows in the output columns will be the same as the input column. Null entries
+   * are added for the rows where split results have been exhausted. Null input entries result in
+   * all nulls in the corresponding rows of the output columns.
+   *
+   * @param regexProg the regex program with UTF-8 encoded string identifying the split pattern
+   *                  for each input string.
+   * @param limit the maximum size of the list resulting from splitting each input string,
+   *              or -1 for all possible splits. Note that limit = 0 (all possible splits without
+   *              trailing empty strings) and limit = 1 (no split at all) are not supported.
+   * @return list of strings columns as a table.
+   */
+  public final Table stringSplit(RegexProgram regexProg, int limit) {
     assert type.equals(DType.STRING) : "column type must be a String";
-    assert pattern != null : "pattern is null";
-    assert pattern.length() > 0 : "empty pattern is not supported";
+    assert regexProg != null : "regex program is null";
     assert limit != 0 && limit != 1 : "split limit == 0 and limit == 1 are not supported";
-    return new Table(stringSplit(this.getNativeView(), pattern, limit, splitByRegex));
+    return new Table(stringSplitRe(this.getNativeView(), regexProg.pattern(), regexProg.combinedFlags(),
+                                   regexProg.capture().nativeId, limit));
   }
 
   /**
@@ -2550,6 +2572,7 @@ public final Table stringSplit(String pattern, int limit, boolean splitByRegex)
    *                     regular expression pattern or just by a string literal delimiter.
    * @return list of strings columns as a table.
    */
+  @Deprecated
   public final Table stringSplit(String pattern, boolean splitByRegex) {
     return stringSplit(pattern, -1, splitByRegex);
   }
@@ -2567,7 +2590,10 @@ public final Table stringSplit(String pattern, boolean splitByRegex) {
    * @return list of strings columns as a table.
    */
   public final Table stringSplit(String delimiter, int limit) {
-    return stringSplit(delimiter, limit, false);
+    assert type.equals(DType.STRING) : "column type must be a String";
+    assert delimiter != null : "delimiter is null";
+    assert limit != 0 && limit != 1 : "split limit == 0 and limit == 1 are not supported";
+    return new Table(stringSplit(this.getNativeView(), delimiter, limit));
   }
 
   /**
@@ -2580,7 +2606,21 @@ public final Table stringSplit(String delimiter, int limit) {
    * @return list of strings columns as a table.
    */
   public final Table stringSplit(String delimiter) {
-    return stringSplit(delimiter, -1, false);
+    return stringSplit(delimiter, -1);
+  }
+
+  /**
+   * Returns a list of columns by splitting each string using the specified regex program pattern.
+   * The number of rows in the output columns will be the same as the input column. Null entries
+   * are added for the rows where split results have been exhausted. Null input entries result in
+   * all nulls in the corresponding rows of the output columns.
+   *
+   * @param regexProg the regex program with UTF-8 encoded string identifying the split pattern
+   *                  for each input string.
+   * @return list of strings columns as a table.
+   */
+  public final Table stringSplit(RegexProgram regexProg) {
+    return stringSplit(regexProg, -1);
   }
 
   /**
@@ -2595,13 +2635,33 @@ public final Table stringSplit(String delimiter) {
    *                     regular expression pattern or just by a string literal delimiter.
    * @return a LIST column of string elements.
    */
+  @Deprecated
   public final ColumnVector stringSplitRecord(String pattern, int limit, boolean splitByRegex) {
+    if (splitByRegex) {
+      return stringSplitRecord(new RegexProgram(pattern, CaptureGroups.NON_CAPTURE), limit);
+    } else {
+      return stringSplitRecord(pattern, limit);
+    }
+  }
+
+  /**
+   * Returns a column that are lists of strings in which each list is made by splitting the
+   * corresponding input string using the specified regex program pattern.
+   *
+   * @param regexProg the regex program with UTF-8 encoded string identifying the split pattern
+   *                  for each input string.
+   * @param limit the maximum size of the list resulting from splitting each input string,
+   *              or -1 for all possible splits. Note that limit = 0 (all possible splits without
+   *              trailing empty strings) and limit = 1 (no split at all) are not supported.
+   * @return a LIST column of string elements.
+   */
+  public final ColumnVector stringSplitRecord(RegexProgram regexProg, int limit) {
     assert type.equals(DType.STRING) : "column type must be String";
-    assert pattern != null : "pattern is null";
-    assert pattern.length() > 0 : "empty pattern is not supported";
+    assert regexProg != null : "regex program is null";
     assert limit != 0 && limit != 1 : "split limit == 0 and limit == 1 are not supported";
     return new ColumnVector(
-        stringSplitRecord(this.getNativeView(), pattern, limit, splitByRegex));
+        stringSplitRecordRe(this.getNativeView(), regexProg.pattern(), regexProg.combinedFlags(),
+                            regexProg.capture().nativeId, limit));
   }
 
   /**
@@ -2613,6 +2673,7 @@ public final ColumnVector stringSplitRecord(String pattern, int limit, boolean s
    *                     regular expression pattern or just by a string literal delimiter.
    * @return a LIST column of string elements.
    */
+  @Deprecated
   public final ColumnVector stringSplitRecord(String pattern, boolean splitByRegex) {
     return stringSplitRecord(pattern, -1, splitByRegex);
   }
@@ -2628,7 +2689,10 @@ public final ColumnVector stringSplitRecord(String pattern, boolean splitByRegex
    * @return a LIST column of string elements.
    */
   public final ColumnVector stringSplitRecord(String delimiter, int limit) {
-    return stringSplitRecord(delimiter, limit, false);
+    assert type.equals(DType.STRING) : "column type must be String";
+    assert delimiter != null : "delimiter is null";
+    assert limit != 0 && limit != 1 : "split limit == 0 and limit == 1 are not supported";
+    return new ColumnVector(stringSplitRecord(this.getNativeView(), delimiter, limit));
   }
 
   /**
@@ -2639,7 +2703,19 @@ public final ColumnVector stringSplitRecord(String delimiter, int limit) {
    * @return a LIST column of string elements.
    */
   public final ColumnVector stringSplitRecord(String delimiter) {
-    return stringSplitRecord(delimiter, -1, false);
+    return stringSplitRecord(delimiter, -1);
+  }
+
+  /**
+   * Returns a column that are lists of strings in which each list is made by splitting the
+   * corresponding input string using the specified regex program pattern.
+   *
+   * @param regexProg the regex program with UTF-8 encoded string identifying the split pattern
+   *                  for each input string.
+   * @return a LIST column of string elements.
+   */
+  public final ColumnVector stringSplitRecord(RegexProgram regexProg) {
+    return stringSplitRecord(regexProg, -1);
   }
 
   /**
@@ -2846,8 +2922,21 @@ public final ColumnVector stringReplace(Scalar target, Scalar replace) {
    * @param repl The string scalar to replace for each pattern match.
    * @return A new column vector containing the string results.
    */
+  @Deprecated
   public final ColumnVector replaceRegex(String pattern, Scalar repl) {
-    return replaceRegex(pattern, repl, -1);
+    return replaceRegex(new RegexProgram(pattern, CaptureGroups.NON_CAPTURE), repl);
+  }
+
+  /**
+   * For each string, replaces any character sequence matching the given regex program pattern
+   * using the replacement string scalar.
+   *
+   * @param regexProg The regex program with pattern to search within each string.
+   * @param repl The string scalar to replace for each pattern match.
+   * @return A new column vector containing the string results.
+   */
+  public final ColumnVector replaceRegex(RegexProgram regexProg, Scalar repl) {
+    return replaceRegex(regexProg, repl, -1);
   }
 
   /**
@@ -2859,12 +2948,27 @@ public final ColumnVector replaceRegex(String pattern, Scalar repl) {
    * @param maxRepl The maximum number of times a replacement should occur within each string.
    * @return A new column vector containing the string results.
    */
+  @Deprecated
   public final ColumnVector replaceRegex(String pattern, Scalar repl, int maxRepl) {
+    return replaceRegex(new RegexProgram(pattern, CaptureGroups.NON_CAPTURE), repl, maxRepl);
+  }
+
+  /**
+   * For each string, replaces any character sequence matching the given regex program pattern
+   * using the replacement string scalar.
+   *
+   * @param regexProg The regex program with pattern to search within each string.
+   * @param repl The string scalar to replace for each pattern match.
+   * @param maxRepl The maximum number of times a replacement should occur within each string.
+   * @return A new column vector containing the string results.
+   */
+  public final ColumnVector replaceRegex(RegexProgram regexProg, Scalar repl, int maxRepl) {
     if (!repl.getType().equals(DType.STRING)) {
       throw new IllegalArgumentException("Replacement must be a string scalar");
     }
-    return new ColumnVector(replaceRegex(getNativeView(), pattern, repl.getScalarHandle(),
-        maxRepl));
+    assert regexProg != null : "regex program may not be null";
+    return new ColumnVector(replaceRegex(getNativeView(), regexProg.pattern(), regexProg.combinedFlags(),
+                                         regexProg.capture().nativeId, repl.getScalarHandle(), maxRepl));
   }
 
   /**
@@ -2890,9 +2994,26 @@ public final ColumnVector replaceMultiRegex(String[] patterns, ColumnView repls)
    * @param replace The replacement template for creating the output string.
    * @return A new java column vector containing the string results.
    */
+  @Deprecated
   public final ColumnVector stringReplaceWithBackrefs(String pattern, String replace) {
-    return new ColumnVector(stringReplaceWithBackrefs(getNativeView(), pattern,
-        replace));
+    return stringReplaceWithBackrefs(new RegexProgram(pattern), replace);
+  }
+
+  /**
+   * For each string, replaces any character sequence matching the given regex program
+   * pattern using the replace template for back-references.
+   *
+   * Any null string entries return corresponding null output column entries.
+   *
+   * @param regexProg The regex program with pattern to search within each string.
+   * @param replace The replacement template for creating the output string.
+   * @return A new java column vector containing the string results.
+   */
+  public final ColumnVector stringReplaceWithBackrefs(RegexProgram regexProg, String replace) {
+    assert regexProg != null : "regex program may not be null";
+    return new ColumnVector(
+        stringReplaceWithBackrefs(getNativeView(), regexProg.pattern(), regexProg.combinedFlags(),
+                                  regexProg.capture().nativeId, replace));
   }
 
   /**
@@ -3252,10 +3373,28 @@ public final ColumnVector containsRe(RegexProgram regexProg) {
    * @throws CudfException if any error happens including if the RE does
    * not contain any capture groups.
    */
+  @Deprecated
   public final Table extractRe(String pattern) throws CudfException {
+    return extractRe(new RegexProgram(pattern));
+  }
+
+  /**
+   * For each captured group specified in the given regex program
+   * return a column in the table. Null entries are added if the string
+   * does not match. Any null inputs also result in null output entries.
+   *
+   * For supported regex patterns refer to:
+   * @link https://docs.rapids.ai/api/libcudf/nightly/md_regex.html
+   * @param regexProg the regex program to use
+   * @return the table of extracted matches
+   * @throws CudfException if any error happens including if the regex
+   * program does not contain any capture groups.
+   */
+  public final Table extractRe(RegexProgram regexProg) throws CudfException {
     assert type.equals(DType.STRING) : "column type must be a String";
-    assert pattern != null : "pattern may not be null";
-    return new Table(extractRe(this.getNativeView(), pattern));
+    assert regexProg != null : "regex program may not be null";
+    return new Table(extractRe(this.getNativeView(), regexProg.pattern(),
+                               regexProg.combinedFlags(), regexProg.capture().nativeId));
   }
 
   /**
@@ -3940,36 +4079,64 @@ private static native long repeatStringsWithColumnRepeatTimes(long stringsHandle
   private static native long substringLocate(long columnView, long substringScalar, int start, int end);
 
   /**
-   * Returns a list of columns by splitting each string using the specified pattern. The number of
-   * rows in the output columns will be the same as the input column. Null entries are added for a
-   * row where split results have been exhausted. Null input entries result in all nulls in the
-   * corresponding rows of the output columns.
+   * Returns a list of columns by splitting each string using the specified string literal
+   * delimiter. The number of rows in the output columns will be the same as the input column.
+   * Null entries are added for the rows where split results have been exhausted. Null input entries
+   * result in all nulls in the corresponding rows of the output columns.
    *
    * @param nativeHandle native handle of the input strings column that being operated on.
-   * @param pattern UTF-8 encoded string identifying the split pattern for each input string.
+   * @param delimiter UTF-8 encoded string identifying the split delimiter for each input string.
+   * @param limit the maximum size of the list resulting from splitting each input string,
+   *              or -1 for all possible splits. Note that limit = 0 (all possible splits without
+   *              trailing empty strings) and limit = 1 (no split at all) are not supported.
+   */
+  private static native long[] stringSplit(long nativeHandle, String delimiter, int limit);
+
+  /**
+   * Returns a list of columns by splitting each string using the specified regular expression
+   * pattern. The number of rows in the output columns will be the same as the input column.
+   * Null entries are added for the rows where split results have been exhausted. Null input entries
+   * result in all nulls in the corresponding rows of the output columns.
+   *
+   * @param nativeHandle native handle of the input strings column that being operated on.
+   * @param pattern UTF-8 encoded string identifying the split regular expression pattern for
+   *                each input string.
+   * @param flags regex flags setting.
+   * @param capture capture groups setting.
    * @param limit the maximum size of the list resulting from splitting each input string,
    *              or -1 for all possible splits. Note that limit = 0 (all possible splits without
    *              trailing empty strings) and limit = 1 (no split at all) are not supported.
-   * @param splitByRegex a boolean flag indicating whether the input strings will be split by a
-   *                     regular expression pattern or just by a string literal delimiter.
    */
-  private static native long[] stringSplit(long nativeHandle, String pattern, int limit,
-                                           boolean splitByRegex);
+  private static native long[] stringSplitRe(long nativeHandle, String pattern, int flags,
+                                             int capture, int limit);
 
   /**
    * Returns a column that are lists of strings in which each list is made by splitting the
    * corresponding input string using the specified string literal delimiter.
    *
    * @param nativeHandle native handle of the input strings column that being operated on.
-   * @param pattern UTF-8 encoded string identifying the split pattern for each input string.
+   * @param delimiter UTF-8 encoded string identifying the split delimiter for each input string.
    * @param limit the maximum size of the list resulting from splitting each input string,
    *              or -1 for all possible splits. Note that limit = 0 (all possible splits without
    *              trailing empty strings) and limit = 1 (no split at all) are not supported.
-   * @param splitByRegex a boolean flag indicating whether the input strings will be split by a
-   *                     regular expression pattern or just by a string literal delimiter.
    */
-  private static native long stringSplitRecord(long nativeHandle, String pattern, int limit,
-                                               boolean splitByRegex);
+  private static native long stringSplitRecord(long nativeHandle, String delimiter, int limit);
+
+  /**
+   * Returns a column that are lists of strings in which each list is made by splitting the
+   * corresponding input string using the specified regular expression pattern.
+   *
+   * @param nativeHandle native handle of the input strings column that being operated on.
+   * @param pattern UTF-8 encoded string identifying the split regular expression pattern for
+   *                each input string.
+   * @param flags regex flags setting.
+   * @param capture capture groups setting.
+   * @param limit the maximum size of the list resulting from splitting each input string,
+   *              or -1 for all possible splits. Note that limit = 0 (all possible splits without
+   *              trailing empty strings) and limit = 1 (no split at all) are not supported.
+   */
+  private static native long stringSplitRecordRe(long nativeHandle, String pattern, int flags,
+                                                 int capture, int limit);
 
   /**
    * Native method to calculate substring from a given string column. 0 indexing.
@@ -4007,12 +4174,14 @@ private static native long substringColumn(long columnView, long startColumn, lo
    * Native method for replacing each regular expression pattern match with the specified
    * replacement string.
    * @param columnView native handle of the cudf::column_view being operated on.
-   * @param pattern The regular expression pattern to search within each string.
+   * @param pattern regular expression pattern to search within each string.
+   * @param flags regex flags setting.
+   * @param capture capture groups setting.
    * @param repl native handle of the cudf::scalar containing the replacement string.
    * @param maxRepl maximum number of times to replace the pattern within a string
    * @return native handle of the resulting cudf column containing the string results.
    */
-  private static native long replaceRegex(long columnView, String pattern,
+  private static native long replaceRegex(long columnView, String pattern, int flags, int capture,
                                           long repl, long maxRepl) throws CudfException;
 
   /**
@@ -4026,15 +4195,17 @@ private static native long replaceMultiRegex(long columnView, String[] patterns,
                                                long repls) throws CudfException;
 
   /**
-   * Native method for replacing any character sequence matching the given pattern
-   * using the replace template for back-references.
+   * Native method for replacing any character sequence matching the given regex program
+   * pattern using the replace template for back-references.
    * @param columnView native handle of the cudf::column_view being operated on.
    * @param pattern The regular expression patterns to search within each string.
+   * @param flags Regex flags setting.
+   * @param capture Capture groups setting.
    * @param replace The replacement template for creating the output string.
    * @return native handle of the resulting cudf column containing the string results.
    */
-  private static native long stringReplaceWithBackrefs(long columnView, String pattern,
-                                                       String replace) throws CudfException;
+  private static native long stringReplaceWithBackrefs(long columnView, String pattern, int flags,
+                                                       int capture, String replace) throws CudfException;
 
   /**
    * Native method for checking if strings in a column starts with a specified comparison string.
@@ -4100,9 +4271,14 @@ private static native long stringReplaceWithBackrefs(long columnView, String pat
   private static native long stringContains(long cudfViewHandle, long compString) throws CudfException;
 
   /**
-   * Native method for extracting results from an regular expressions.  Returns a table handle.
+   * Native method for extracting results from a regex program pattern. Returns a table handle.
+   *
+   * @param cudfViewHandle Native handle of the cudf::column_view being operated on.
+   * @param pattern String regex pattern.
+   * @param flags Regex flags setting.
+   * @param capture Capture groups setting.
    */
-  private static native long[] extractRe(long cudfViewHandle, String pattern) throws CudfException;
+  private static native long[] extractRe(long cudfViewHandle, String pattern, int flags, int capture) throws CudfException;
 
   /**
    * Native method for extracting all results corresponding to group idx from a regex program pattern.
diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java
index 3eed7e45eed..3ccab70ccda 100644
--- a/java/src/main/java/ai/rapids/cudf/Table.java
+++ b/java/src/main/java/ai/rapids/cudf/Table.java
@@ -194,6 +194,7 @@ private static native long[] hashPartition(long inputTable,
                                              int[] columnsToHash,
                                              int hashTypeId,
                                              int numberOfPartitions,
+                                             int seed,
                                              int[] outputOffsets) throws CudfException;
 
   private static native long[] roundRobinPartition(long inputTable,
@@ -4253,12 +4254,27 @@ public PartitionedTable hashPartition(int numberOfPartitions) {
      * {@link Table} class
      */
     public PartitionedTable hashPartition(HashType type, int numberOfPartitions) {
+      final int DEFAULT_HASH_SEED = 0;
+      return hashPartition(type, numberOfPartitions, DEFAULT_HASH_SEED);
+    }
+
+    /**
+     * Hash partition a table into the specified number of partitions.
+     * @param type the type of hash to use. Depending on the type of hash different restrictions
+     *             on the hash column(s) may exist. Not all hash functions are guaranteed to work
+     *             besides IDENTITY and MURMUR3.
+     * @param numberOfPartitions number of partitions to use
+     * @param seed the seed value for hashing
+     * @return Table that exposes a limited functionality of the {@link Table} class
+     */
+    public PartitionedTable hashPartition(HashType type, int numberOfPartitions, int seed) {
       int[] partitionOffsets = new int[numberOfPartitions];
       return new PartitionedTable(new Table(Table.hashPartition(
           operation.table.nativeHandle,
           operation.indices,
           type.nativeId,
           partitionOffsets.length,
+          seed,
           partitionOffsets)), partitionOffsets);
     }
   }
diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp
index ff07a6786c1..c42cc430560 100644
--- a/java/src/main/native/src/ColumnViewJni.cpp
+++ b/java/src/main/native/src/ColumnViewJni.cpp
@@ -681,9 +681,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_reverseStringsOrLists(JNI
 
 JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplit(JNIEnv *env, jclass,
                                                                         jlong input_handle,
-                                                                        jstring pattern_obj,
-                                                                        jint limit,
-                                                                        jboolean split_by_regex) {
+                                                                        jstring delimiter_obj,
+                                                                        jint limit) {
   JNI_NULL_CHECK(env, input_handle, "input_handle is null", 0);
 
   if (limit == 0 || limit == 1) {
@@ -697,21 +696,42 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplit(JNIEnv *
 
   try {
     cudf::jni::auto_set_device(env);
-    auto const input = reinterpret_cast<cudf::column_view *>(input_handle);
-    auto const strs_input = cudf::strings_column_view{*input};
+    auto const input = reinterpret_cast<cudf::column_view const *>(input_handle);
+    auto const strings_column = cudf::strings_column_view{*input};
+    auto const delimiter_jstr = cudf::jni::native_jstring(env, delimiter_obj);
+    auto const delimiter = std::string(delimiter_jstr.get(), delimiter_jstr.size_bytes());
+    auto const max_split = limit > 1 ? limit - 1 : limit;
+    auto result = cudf::strings::split(strings_column, cudf::string_scalar{delimiter}, max_split);
+    return cudf::jni::convert_table_for_return(env, std::move(result));
+  }
+  CATCH_STD(env, 0);
+}
 
-    auto const pattern_jstr = cudf::jni::native_jstring(env, pattern_obj);
-    if (pattern_jstr.is_empty()) {
-      // Java's split API produces different behaviors than cudf when splitting with empty
-      // pattern.
-      JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "Empty pattern is not supported", 0);
-    }
+JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRe(
+    JNIEnv *env, jclass, jlong input_handle, jstring pattern_obj, jint regex_flags,
+    jint capture_groups, jint limit) {
+  JNI_NULL_CHECK(env, input_handle, "input_handle is null", 0);
+
+  if (limit == 0 || limit == 1) {
+    // Cannot achieve the results of splitting with limit == 0 or limit == 1.
+    // This is because cudf operates on a different parameter (`max_split`) which is converted from
+    // limit. When limit == 0 or limit == 1, max_split will be non-positive and will result in an
+    // unlimited split.
+    JNI_THROW_NEW(env, "java/lang/IllegalArgumentException",
+                  "limit == 0 and limit == 1 are not supported", 0);
+  }
 
+  try {
+    cudf::jni::auto_set_device(env);
+    auto const input = reinterpret_cast<cudf::column_view const *>(input_handle);
+    auto const strings_column = cudf::strings_column_view{*input};
+    auto const pattern_jstr = cudf::jni::native_jstring(env, pattern_obj);
     auto const pattern = std::string(pattern_jstr.get(), pattern_jstr.size_bytes());
     auto const max_split = limit > 1 ? limit - 1 : limit;
-    auto result = split_by_regex ?
-                      cudf::strings::split_re(strs_input, pattern, max_split) :
-                      cudf::strings::split(strs_input, cudf::string_scalar{pattern}, max_split);
+    auto const flags = static_cast<cudf::strings::regex_flags>(regex_flags);
+    auto const groups = static_cast<cudf::strings::capture_groups>(capture_groups);
+    auto const regex_prog = cudf::strings::regex_program::create(pattern, flags, groups);
+    auto result = cudf::strings::split_re(strings_column, *regex_prog, max_split);
     return cudf::jni::convert_table_for_return(env, std::move(result));
   }
   CATCH_STD(env, 0);
@@ -719,9 +739,8 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplit(JNIEnv *
 
 JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecord(JNIEnv *env, jclass,
                                                                          jlong input_handle,
-                                                                         jstring pattern_obj,
-                                                                         jint limit,
-                                                                         jboolean split_by_regex) {
+                                                                         jstring delimiter_obj,
+                                                                         jint limit) {
   JNI_NULL_CHECK(env, input_handle, "input_handle is null", 0);
 
   if (limit == 0 || limit == 1) {
@@ -735,22 +754,43 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecord(JNIEnv
 
   try {
     cudf::jni::auto_set_device(env);
-    auto const input = reinterpret_cast<cudf::column_view *>(input_handle);
-    auto const strs_input = cudf::strings_column_view{*input};
+    auto const input = reinterpret_cast<cudf::column_view const *>(input_handle);
+    auto const strings_column = cudf::strings_column_view{*input};
+    auto const delimiter_jstr = cudf::jni::native_jstring(env, delimiter_obj);
+    auto const delimiter = std::string(delimiter_jstr.get(), delimiter_jstr.size_bytes());
+    auto const max_split = limit > 1 ? limit - 1 : limit;
+    auto result =
+        cudf::strings::split_record(strings_column, cudf::string_scalar{delimiter}, max_split);
+    return release_as_jlong(result);
+  }
+  CATCH_STD(env, 0);
+}
 
-    auto const pattern_jstr = cudf::jni::native_jstring(env, pattern_obj);
-    if (pattern_jstr.is_empty()) {
-      // Java's split API produces different behaviors than cudf when splitting with empty
-      // pattern.
-      JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "Empty pattern is not supported", 0);
-    }
+JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecordRe(
+    JNIEnv *env, jclass, jlong input_handle, jstring pattern_obj, jint regex_flags,
+    jint capture_groups, jint limit) {
+  JNI_NULL_CHECK(env, input_handle, "input_handle is null", 0);
 
+  if (limit == 0 || limit == 1) {
+    // Cannot achieve the results of splitting with limit == 0 or limit == 1.
+    // This is because cudf operates on a different parameter (`max_split`) which is converted from
+    // limit. When limit == 0 or limit == 1, max_split will be non-positive and will result in an
+    // unlimited split.
+    JNI_THROW_NEW(env, "java/lang/IllegalArgumentException",
+                  "limit == 0 and limit == 1 are not supported", 0);
+  }
+
+  try {
+    cudf::jni::auto_set_device(env);
+    auto const input = reinterpret_cast<cudf::column_view const *>(input_handle);
+    auto const strings_column = cudf::strings_column_view{*input};
+    auto const pattern_jstr = cudf::jni::native_jstring(env, pattern_obj);
     auto const pattern = std::string(pattern_jstr.get(), pattern_jstr.size_bytes());
     auto const max_split = limit > 1 ? limit - 1 : limit;
-    auto result =
-        split_by_regex ?
-            cudf::strings::split_record_re(strs_input, pattern, max_split) :
-            cudf::strings::split_record(strs_input, cudf::string_scalar{pattern}, max_split);
+    auto const flags = static_cast<cudf::strings::regex_flags>(regex_flags);
+    auto const groups = static_cast<cudf::strings::capture_groups>(capture_groups);
+    auto const regex_prog = cudf::strings::regex_program::create(pattern, flags, groups);
+    auto result = cudf::strings::split_record_re(strings_column, *regex_prog, max_split);
     return release_as_jlong(result);
   }
   CATCH_STD(env, 0);
@@ -1566,21 +1606,24 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_mapContains(JNIEnv *env,
   CATCH_STD(env, 0);
 }
 
-JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_replaceRegex(JNIEnv *env, jclass,
-                                                                    jlong j_column_view,
-                                                                    jstring j_pattern, jlong j_repl,
-                                                                    jlong j_maxrepl) {
+JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_replaceRegex(
+    JNIEnv *env, jclass, jlong j_column_view, jstring j_pattern, jint regex_flags,
+    jint capture_groups, jlong j_repl, jlong j_maxrepl) {
 
   JNI_NULL_CHECK(env, j_column_view, "column is null", 0);
   JNI_NULL_CHECK(env, j_pattern, "pattern string is null", 0);
   JNI_NULL_CHECK(env, j_repl, "replace scalar is null", 0);
   try {
     cudf::jni::auto_set_device(env);
-    auto cv = reinterpret_cast<cudf::column_view const *>(j_column_view);
-    cudf::strings_column_view scv(*cv);
-    cudf::jni::native_jstring pattern(env, j_pattern);
-    auto repl = reinterpret_cast<cudf::string_scalar const *>(j_repl);
-    return release_as_jlong(cudf::strings::replace_re(scv, pattern.get(), *repl, j_maxrepl));
+    auto const cv = reinterpret_cast<cudf::column_view const *>(j_column_view);
+    auto const strings_column = cudf::strings_column_view{*cv};
+    auto const pattern = cudf::jni::native_jstring(env, j_pattern);
+    auto const flags = static_cast<cudf::strings::regex_flags>(regex_flags);
+    auto const groups = static_cast<cudf::strings::capture_groups>(capture_groups);
+    auto const regex_prog = cudf::strings::regex_program::create(pattern.get(), flags, groups);
+    auto const repl = reinterpret_cast<cudf::string_scalar const *>(j_repl);
+    return release_as_jlong(
+        cudf::strings::replace_re(strings_column, *regex_prog, *repl, j_maxrepl));
   }
   CATCH_STD(env, 0);
 }
@@ -1606,19 +1649,23 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_replaceMultiRegex(JNIEnv
 }
 
 JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringReplaceWithBackrefs(
-    JNIEnv *env, jclass, jlong column_view, jstring patternObj, jstring replaceObj) {
+    JNIEnv *env, jclass, jlong j_column_view, jstring pattern_obj, jint regex_flags,
+    jint capture_groups, jstring replace_obj) {
 
-  JNI_NULL_CHECK(env, column_view, "column is null", 0);
-  JNI_NULL_CHECK(env, patternObj, "pattern string is null", 0);
-  JNI_NULL_CHECK(env, replaceObj, "replace string is null", 0);
+  JNI_NULL_CHECK(env, j_column_view, "column is null", 0);
+  JNI_NULL_CHECK(env, pattern_obj, "pattern string is null", 0);
+  JNI_NULL_CHECK(env, replace_obj, "replace string is null", 0);
   try {
     cudf::jni::auto_set_device(env);
-    cudf::column_view *cv = reinterpret_cast<cudf::column_view *>(column_view);
-    cudf::strings_column_view scv(*cv);
-    cudf::jni::native_jstring ss_pattern(env, patternObj);
-    cudf::jni::native_jstring ss_replace(env, replaceObj);
+    auto const cv = reinterpret_cast<cudf::column_view const *>(j_column_view);
+    auto const strings_column = cudf::strings_column_view{*cv};
+    auto const pattern = cudf::jni::native_jstring(env, pattern_obj);
+    auto const flags = static_cast<cudf::strings::regex_flags>(regex_flags);
+    auto const groups = static_cast<cudf::strings::capture_groups>(capture_groups);
+    auto const regex_prog = cudf::strings::regex_program::create(pattern.get(), flags, groups);
+    cudf::jni::native_jstring ss_replace(env, replace_obj);
     return release_as_jlong(
-        cudf::strings::replace_with_backrefs(scv, ss_pattern.get(), ss_replace.get()));
+        cudf::strings::replace_with_backrefs(strings_column, *regex_prog, ss_replace.get()));
   }
   CATCH_STD(env, 0);
 }
@@ -1674,18 +1721,22 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringStrip(JNIEnv *env,
 
 JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_extractRe(JNIEnv *env, jclass,
                                                                       jlong j_view_handle,
-                                                                      jstring patternObj) {
+                                                                      jstring pattern_obj,
+                                                                      jint regex_flags,
+                                                                      jint capture_groups) {
   JNI_NULL_CHECK(env, j_view_handle, "column is null", nullptr);
-  JNI_NULL_CHECK(env, patternObj, "pattern is null", nullptr);
+  JNI_NULL_CHECK(env, pattern_obj, "pattern is null", nullptr);
 
   try {
     cudf::jni::auto_set_device(env);
-    cudf::strings_column_view const strings_column{
-        *reinterpret_cast<cudf::column_view *>(j_view_handle)};
-    cudf::jni::native_jstring pattern(env, patternObj);
-
-    return cudf::jni::convert_table_for_return(
-        env, cudf::strings::extract(strings_column, pattern.get()));
+    auto const column_view = reinterpret_cast<cudf::column_view const *>(j_view_handle);
+    auto const strings_column = cudf::strings_column_view{*column_view};
+    auto const pattern = cudf::jni::native_jstring(env, pattern_obj);
+    auto const flags = static_cast<cudf::strings::regex_flags>(regex_flags);
+    auto const groups = static_cast<cudf::strings::capture_groups>(capture_groups);
+    auto const regex_prog = cudf::strings::regex_program::create(pattern.get(), flags, groups);
+    return cudf::jni::convert_table_for_return(env,
+                                               cudf::strings::extract(strings_column, *regex_prog));
   }
   CATCH_STD(env, 0);
 }
diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp
index 3d730ff61a1..ddcc8644a9c 100644
--- a/java/src/main/native/src/TableJni.cpp
+++ b/java/src/main/native/src/TableJni.cpp
@@ -1334,8 +1334,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_readAndInferJSON(
 
     cudf::io::json_reader_options_builder opts = cudf::io::json_reader_options::builder(source)
                                                      .dayfirst(static_cast<bool>(day_first))
-                                                     .lines(static_cast<bool>(lines))
-                                                     .legacy(true);
+                                                     .lines(static_cast<bool>(lines));
 
     auto result =
         std::make_unique<cudf::io::table_with_metadata>(cudf::io::read_json(opts.build()));
@@ -1441,8 +1440,7 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_readJSON(
 
     cudf::io::json_reader_options_builder opts = cudf::io::json_reader_options::builder(source)
                                                      .dayfirst(static_cast<bool>(day_first))
-                                                     .lines(static_cast<bool>(lines))
-                                                     .legacy(true);
+                                                     .lines(static_cast<bool>(lines));
 
     if (!n_col_names.is_null() && data_types.size() > 0) {
       if (n_col_names.size() != n_types.size()) {
@@ -2655,7 +2653,7 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_partition(JNIEnv *env, jc
 
 JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_hashPartition(
     JNIEnv *env, jclass, jlong input_table, jintArray columns_to_hash, jint hash_function,
-    jint number_of_partitions, jintArray output_offsets) {
+    jint number_of_partitions, jint seed, jintArray output_offsets) {
 
   JNI_NULL_CHECK(env, input_table, "input table is null", NULL);
   JNI_NULL_CHECK(env, columns_to_hash, "columns_to_hash is null", NULL);
@@ -2665,6 +2663,7 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_hashPartition(
   try {
     cudf::jni::auto_set_device(env);
     auto const hash_func = static_cast<cudf::hash_id>(hash_function);
+    auto const hash_seed = static_cast<uint32_t>(seed);
     auto const n_input_table = reinterpret_cast<cudf::table_view const *>(input_table);
     cudf::jni::native_jintArray n_columns_to_hash(env, columns_to_hash);
     JNI_ARG_CHECK(env, n_columns_to_hash.size() > 0, "columns_to_hash is zero", NULL);
@@ -2672,8 +2671,8 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_hashPartition(
     std::vector<cudf::size_type> columns_to_hash_vec(n_columns_to_hash.begin(),
                                                      n_columns_to_hash.end());
 
-    auto [partitioned_table, partition_offsets] =
-        cudf::hash_partition(*n_input_table, columns_to_hash_vec, number_of_partitions, hash_func);
+    auto [partitioned_table, partition_offsets] = cudf::hash_partition(
+        *n_input_table, columns_to_hash_vec, number_of_partitions, hash_func, hash_seed);
 
     cudf::jni::native_jintArray n_output_offsets(env, output_offsets);
     std::copy(partition_offsets.begin(), partition_offsets.end(), n_output_offsets.begin());
diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
index 26817281c2e..db64dcb08c7 100644
--- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
@@ -4040,14 +4040,18 @@ void testStringFindOperations() {
 
   @Test
   void testExtractRe() {
-      try (ColumnVector input = ColumnVector.fromStrings("a1", "b2", "c3", null);
-            Table expected = new Table.TestBuilder()
-                    .column("a", "b", null, null)
-                    .column("1", "2", null, null)
-                    .build();
-            Table found = input.extractRe("([ab])(\\d)")) {
-          assertTablesAreEqual(expected, found);
+    try (ColumnVector input = ColumnVector.fromStrings("a1", "b2", "c3", null);
+         Table expected = new Table.TestBuilder()
+             .column("a", "b", null, null)
+             .column("1", "2", null, null)
+             .build()) {
+      try (Table found = input.extractRe("([ab])(\\d)")) {
+        assertTablesAreEqual(expected, found);
       }
+      try (Table found = input.extractRe(new RegexProgram("([ab])(\\d)"))) {
+        assertTablesAreEqual(expected, found);
+      }
+    }
   }
 
   @Test
@@ -4986,28 +4990,29 @@ void testReverseList() {
   void testStringSplit() {
     String pattern = " ";
     try (ColumnVector v = ColumnVector.fromStrings("Héllo there all", "thésé", null, "",
-        "ARé some things", "test strings here");
+             "ARé some things", "test strings here");
          Table expectedSplitLimit2 = new Table.TestBuilder()
-         .column("Héllo", "thésé", null, "", "ARé", "test")
-         .column("there all", null, null, null, "some things", "strings here")
-         .build();
+             .column("Héllo", "thésé", null, "", "ARé", "test")
+             .column("there all", null, null, null, "some things", "strings here")
+             .build();
          Table expectedSplitAll = new Table.TestBuilder()
-         .column("Héllo", "thésé", null, "", "ARé", "test")
-         .column("there", null, null, null, "some", "strings")
-         .column("all", null, null, null, "things", "here")
-         .build();
+             .column("Héllo", "thésé", null, "", "ARé", "test")
+             .column("there", null, null, null, "some", "strings")
+             .column("all", null, null, null, "things", "here")
+             .build();
          Table resultSplitLimit2 = v.stringSplit(pattern, 2);
          Table resultSplitAll = v.stringSplit(pattern)) {
-          assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2);
-          assertTablesAreEqual(expectedSplitAll, resultSplitAll);
+      assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2);
+      assertTablesAreEqual(expectedSplitAll, resultSplitAll);
     }
   }
 
   @Test
   void testStringSplitByRegularExpression() {
     String pattern = "[_ ]";
+    RegexProgram regexProg = new RegexProgram(pattern, CaptureGroups.NON_CAPTURE);
     try (ColumnVector v = ColumnVector.fromStrings("Héllo_there all", "thésé", null, "",
-        "ARé some_things", "test_strings_here");
+             "ARé some_things", "test_strings_here");
          Table expectedSplitLimit2 = new Table.TestBuilder()
              .column("Héllo", "thésé", null, "", "ARé", "test")
              .column("there all", null, null, null, "some_things", "strings_here")
@@ -5016,11 +5021,17 @@ void testStringSplitByRegularExpression() {
              .column("Héllo", "thésé", null, "", "ARé", "test")
              .column("there", null, null, null, "some", "strings")
              .column("all", null, null, null, "things", "here")
-             .build();
-         Table resultSplitLimit2 = v.stringSplit(pattern, 2, true);
-         Table resultSplitAll = v.stringSplit(pattern, true)) {
-      assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2);
-      assertTablesAreEqual(expectedSplitAll, resultSplitAll);
+             .build()) {
+      try (Table resultSplitLimit2 = v.stringSplit(pattern, 2, true);
+           Table resultSplitAll = v.stringSplit(pattern, true)) {
+        assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2);
+        assertTablesAreEqual(expectedSplitAll, resultSplitAll);
+      }
+      try (Table resultSplitLimit2 = v.stringSplit(regexProg, 2);
+           Table resultSplitAll = v.stringSplit(regexProg)) {
+        assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2);
+        assertTablesAreEqual(expectedSplitAll, resultSplitAll);
+      }
     }
   }
 
@@ -5028,7 +5039,7 @@ void testStringSplitByRegularExpression() {
   void testStringSplitRecord() {
     String pattern = " ";
     try (ColumnVector v = ColumnVector.fromStrings("Héllo there all", "thésé", null, "",
-        "ARé some things", "test strings here");
+             "ARé some things", "test strings here");
          ColumnVector expectedSplitLimit2 = ColumnVector.fromLists(
              new HostColumnVector.ListType(true,
                  new HostColumnVector.BasicType(true, DType.STRING)),
@@ -5057,8 +5068,9 @@ void testStringSplitRecord() {
   @Test
   void testStringSplitRecordByRegularExpression() {
     String pattern = "[_ ]";
+    RegexProgram regexProg = new RegexProgram(pattern, CaptureGroups.NON_CAPTURE);
     try (ColumnVector v = ColumnVector.fromStrings("Héllo_there all", "thésé", null, "",
-        "ARé some_things", "test_strings_here");
+             "ARé some_things", "test_strings_here");
          ColumnVector expectedSplitLimit2 = ColumnVector.fromLists(
              new HostColumnVector.ListType(true,
                  new HostColumnVector.BasicType(true, DType.STRING)),
@@ -5076,11 +5088,17 @@ void testStringSplitRecordByRegularExpression() {
              null,
              Arrays.asList(""),
              Arrays.asList("ARé", "some", "things"),
-             Arrays.asList("test", "strings", "here"));
-         ColumnVector resultSplitLimit2 = v.stringSplitRecord(pattern, 2, true);
-         ColumnVector resultSplitAll = v.stringSplitRecord(pattern, true)) {
-      assertColumnsAreEqual(expectedSplitLimit2, resultSplitLimit2);
-      assertColumnsAreEqual(expectedSplitAll, resultSplitAll);
+             Arrays.asList("test", "strings", "here"))) {
+      try (ColumnVector resultSplitLimit2 = v.stringSplitRecord(pattern, 2, true);
+           ColumnVector resultSplitAll = v.stringSplitRecord(pattern, true)) {
+        assertColumnsAreEqual(expectedSplitLimit2, resultSplitLimit2);
+        assertColumnsAreEqual(expectedSplitAll, resultSplitAll);
+      }
+      try (ColumnVector resultSplitLimit2 = v.stringSplitRecord(regexProg, 2);
+           ColumnVector resultSplitAll = v.stringSplitRecord(regexProg)) {
+        assertColumnsAreEqual(expectedSplitLimit2, resultSplitLimit2);
+        assertColumnsAreEqual(expectedSplitAll, resultSplitAll);
+      }
     }
   }
 
@@ -5129,29 +5147,42 @@ void teststringReplaceThrowsException() {
 
   @Test
   void testReplaceRegex() {
-    try (ColumnVector v =
-             ColumnVector.fromStrings("title and Title with title", "nothing", null, "Title");
-         Scalar repl = Scalar.fromString("Repl");
-         ColumnVector actual = v.replaceRegex("[tT]itle", repl);
-         ColumnVector expected =
-             ColumnVector.fromStrings("Repl and Repl with Repl", "nothing", null, "Repl")) {
-      assertColumnsAreEqual(expected, actual);
-    }
+    try (ColumnVector v = ColumnVector.fromStrings("title and Title with title", "nothing", null, "Title");
+         Scalar repl = Scalar.fromString("Repl")) {
+      String pattern = "[tT]itle";
+      RegexProgram regexProg = new RegexProgram(pattern, CaptureGroups.NON_CAPTURE);
+
+      try (ColumnVector actual = v.replaceRegex(pattern, repl);
+           ColumnVector expected =
+               ColumnVector.fromStrings("Repl and Repl with Repl", "nothing", null, "Repl")) {
+        assertColumnsAreEqual(expected, actual);
+      }
 
-    try (ColumnVector v =
-             ColumnVector.fromStrings("title and Title with title", "nothing", null, "Title");
-         Scalar repl = Scalar.fromString("Repl");
-         ColumnVector actual = v.replaceRegex("[tT]itle", repl, 0)) {
-      assertColumnsAreEqual(v, actual);
-    }
+      try (ColumnVector actual = v.replaceRegex(pattern, repl, 0)) {
+        assertColumnsAreEqual(v, actual);
+      }
 
-    try (ColumnVector v =
-             ColumnVector.fromStrings("title and Title with title", "nothing", null, "Title");
-         Scalar repl = Scalar.fromString("Repl");
-         ColumnVector actual = v.replaceRegex("[tT]itle", repl, 1);
-         ColumnVector expected =
-             ColumnVector.fromStrings("Repl and Title with title", "nothing", null, "Repl")) {
-      assertColumnsAreEqual(expected, actual);
+      try (ColumnVector actual = v.replaceRegex(pattern, repl, 1);
+           ColumnVector expected =
+               ColumnVector.fromStrings("Repl and Title with title", "nothing", null, "Repl")) {
+        assertColumnsAreEqual(expected, actual);
+      }
+
+      try (ColumnVector actual = v.replaceRegex(regexProg, repl);
+           ColumnVector expected =
+               ColumnVector.fromStrings("Repl and Repl with Repl", "nothing", null, "Repl")) {
+        assertColumnsAreEqual(expected, actual);
+      }
+
+      try (ColumnVector actual = v.replaceRegex(regexProg, repl, 0)) {
+        assertColumnsAreEqual(v, actual);
+      }
+
+      try (ColumnVector actual = v.replaceRegex(regexProg, repl, 1);
+           ColumnVector expected =
+               ColumnVector.fromStrings("Repl and Title with title", "nothing", null, "Repl")) {
+        assertColumnsAreEqual(expected, actual);
+      }
     }
   }
 
@@ -5170,45 +5201,55 @@ void testReplaceMultiRegex() {
   @Test
   void testStringReplaceWithBackrefs() {
 
-    try (ColumnVector v = ColumnVector.fromStrings("<h1>title</h1>", "<h1>another title</h1>",
-        null);
+    try (ColumnVector v = ColumnVector.fromStrings("<h1>title</h1>", "<h1>another title</h1>", null);
          ColumnVector expected = ColumnVector.fromStrings("<h2>title</h2>",
              "<h2>another title</h2>", null);
-         ColumnVector actual = v.stringReplaceWithBackrefs("<h1>(.*)</h1>", "<h2>\\1</h2>")) {
+         ColumnVector actual = v.stringReplaceWithBackrefs("<h1>(.*)</h1>", "<h2>\\1</h2>");
+         ColumnVector actualRe =
+             v.stringReplaceWithBackrefs(new RegexProgram("<h1>(.*)</h1>"), "<h2>\\1</h2>")) {
       assertColumnsAreEqual(expected, actual);
+      assertColumnsAreEqual(expected, actualRe);
     }
 
     try (ColumnVector v = ColumnVector.fromStrings("2020-1-01", "2020-2-02", null);
          ColumnVector expected = ColumnVector.fromStrings("2020-01-01", "2020-02-02", null);
-         ColumnVector actual = v.stringReplaceWithBackrefs("-([0-9])-", "-0\\1-")) {
+         ColumnVector actual = v.stringReplaceWithBackrefs("-([0-9])-", "-0\\1-");
+         ColumnVector actualRe =
+             v.stringReplaceWithBackrefs(new RegexProgram("-([0-9])-"), "-0\\1-")) {
       assertColumnsAreEqual(expected, actual);
+      assertColumnsAreEqual(expected, actualRe);
     }
 
-    try (ColumnVector v = ColumnVector.fromStrings("2020-01-1", "2020-02-2",
-        "2020-03-3invalid", null);
+    try (ColumnVector v = ColumnVector.fromStrings("2020-01-1", "2020-02-2", "2020-03-3invalid", null);
          ColumnVector expected = ColumnVector.fromStrings("2020-01-01", "2020-02-02",
              "2020-03-3invalid", null);
-         ColumnVector actual = v.stringReplaceWithBackrefs(
-             "-([0-9])$", "-0\\1")) {
+         ColumnVector actual = v.stringReplaceWithBackrefs("-([0-9])$", "-0\\1");
+         ColumnVector actualRe =
+             v.stringReplaceWithBackrefs(new RegexProgram("-([0-9])$"), "-0\\1")) {
       assertColumnsAreEqual(expected, actual);
+      assertColumnsAreEqual(expected, actualRe);
     }
 
     try (ColumnVector v = ColumnVector.fromStrings("2020-01-1 random_text", "2020-02-2T12:34:56",
-        "2020-03-3invalid", null);
+             "2020-03-3invalid", null);
          ColumnVector expected = ColumnVector.fromStrings("2020-01-01 random_text",
              "2020-02-02T12:34:56", "2020-03-3invalid", null);
-         ColumnVector actual = v.stringReplaceWithBackrefs(
-             "-([0-9])([ T])", "-0\\1\\2")) {
+         ColumnVector actual = v.stringReplaceWithBackrefs("-([0-9])([ T])", "-0\\1\\2");
+         ColumnVector actualRe =
+             v.stringReplaceWithBackrefs(new RegexProgram("-([0-9])([ T])"), "-0\\1\\2")) {
       assertColumnsAreEqual(expected, actual);
+      assertColumnsAreEqual(expected, actualRe);
     }
 
     // test zero as group index
     try (ColumnVector v = ColumnVector.fromStrings("aa-11 b2b-345", "aa-11a 1c-2b2 b2-c3", "11-aa", null);
          ColumnVector expected = ColumnVector.fromStrings("aa-11:aa:11; b2b-345:b:345;",
              "aa-11:aa:11;a 1c-2:c:2;b2 b2-c3", "11-aa", null);
-         ColumnVector actual = v.stringReplaceWithBackrefs(
-             "([a-z]+)-([0-9]+)", "${0}:${1}:${2};")) {
+         ColumnVector actual = v.stringReplaceWithBackrefs("([a-z]+)-([0-9]+)", "${0}:${1}:${2};");
+         ColumnVector actualRe =
+             v.stringReplaceWithBackrefs(new RegexProgram("([a-z]+)-([0-9]+)"), "${0}:${1}:${2};")) {
       assertColumnsAreEqual(expected, actual);
+      assertColumnsAreEqual(expected, actualRe);
     }
 
     // group index exceeds group count
@@ -5218,6 +5259,13 @@ void testStringReplaceWithBackrefs() {
       }
     });
 
+    // group index exceeds group count
+    assertThrows(CudfException.class, () -> {
+      try (ColumnVector v = ColumnVector.fromStrings("ABC123defgh");
+           ColumnVector r =
+               v.stringReplaceWithBackrefs(new RegexProgram("([A-Z]+)([0-9]+)([a-z]+)"), "\\4")) {
+      }
+    });
   }
 
   @Test
diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java
index 1656d871c2d..4f00bc7493d 100644
--- a/java/src/test/java/ai/rapids/cudf/TableTest.java
+++ b/java/src/test/java/ai/rapids/cudf/TableTest.java
@@ -335,22 +335,38 @@ void testReadJSONBufferInferred() {
     JSONOptions opts = JSONOptions.builder()
         .withDayFirst(true)
         .build();
-    byte[] data = ("[false,A,1,2,05/03/2001]\n" +
-        "[true,B,2,3,31/10/2010]'\n" +
-        "[false,C,3,4,20/10/1994]\n" +
-        "[true,D,4,5,18/10/1990]").getBytes(StandardCharsets.UTF_8);
+    byte[] data = ("[false,A,1,2]\n" +
+        "[true,B,2,3]\n" +
+        "[false,C,3,4]\n" +
+        "[true,D,4,5]").getBytes(StandardCharsets.UTF_8);
     try (Table expected = new Table.TestBuilder()
         .column(false, true, false, true)
         .column("A", "B", "C", "D")
         .column(1L, 2L, 3L, 4L)
         .column(2L, 3L, 4L, 5L)
-        .timestampMillisecondsColumn(983750400000L, 1288483200000L, 782611200000L, 656208000000L)
         .build();
          Table table = Table.readJSON(Schema.INFERRED, opts, data)) {
       assertTablesAreEqual(expected, table);
     }
   }
 
+  @Test
+  void testReadJSONSubColumns() {
+    // JSON file has 2 columns, here only read 1 column
+    Schema schema = Schema.builder()
+        .column(DType.INT32, "age")
+        .build();
+    JSONOptions opts = JSONOptions.builder()
+        .withLines(true)
+        .build();
+    try (Table expected = new Table.TestBuilder()
+        .column(null, 30, 19)
+        .build();
+         Table table = Table.readJSON(schema, opts, TEST_SIMPLE_JSON_FILE)) {
+      assertTablesAreEqual(expected, table);
+    }
+  }
+
   @Test
   void testReadJSONBuffer() {
     // JSON reader will set the column according to the iterator if can't infer the name
@@ -363,7 +379,7 @@ void testReadJSONBuffer() {
     JSONOptions opts = JSONOptions.builder()
         .build();
     byte[] data = ("[A,1,2]\n" +
-        "[B,2,3]'\n" +
+        "[B,2,3]\n" +
         "[C,3,4]\n" +
         "[D,4,5]").getBytes(StandardCharsets.UTF_8);
     try (Table expected = new Table.TestBuilder()
@@ -389,7 +405,7 @@ void testReadJSONBufferWithOffset() {
         .build();
     int bytesToIgnore = 8;
     byte[] data = ("[A,1,2]\n" +
-        "[B,2,3]'\n" +
+        "[B,2,3]\n" +
         "[C,3,4]\n" +
         "[D,4,5]").getBytes(StandardCharsets.UTF_8);
     try (Table expected = new Table.TestBuilder()
diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt
index 5223bc0a5c7..7457b770b13 100644
--- a/python/cudf/CMakeLists.txt
+++ b/python/cudf/CMakeLists.txt
@@ -89,6 +89,7 @@ if(NOT cudf_FOUND)
     # We don't build C++ tests when building wheels, so we can also omit the test util and shrink
     # the wheel by avoiding embedding GTest.
     set(CUDF_BUILD_TESTUTIL OFF)
+    set(CUDF_BUILD_STREAMS_TEST_UTIL OFF)
 
     # Statically link cudart if building wheels
     set(CUDA_STATIC_RUNTIME ON)
diff --git a/python/cudf/cudf/_lib/json.pyx b/python/cudf/cudf/_lib/json.pyx
index a40ba7862b2..2339b874ea0 100644
--- a/python/cudf/cudf/_lib/json.pyx
+++ b/python/cudf/cudf/_lib/json.pyx
@@ -156,7 +156,7 @@ def write_json(
     bool include_nulls=True,
     bool lines=False,
     bool index=False,
-    int rows_per_chunk=8,
+    int rows_per_chunk=1024*256,  # 256K rows
 ):
     """
     Cython function to call into libcudf API, see `write_json`.
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 56436ac141d..0c546168fe3 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2023, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -261,6 +261,11 @@ def normalize_binop_value(self, other: DatetimeLikeScalar) -> ScalarLike:
                 return cudf.Scalar(None, dtype=other.dtype)
 
             return cudf.Scalar(other)
+        elif isinstance(other, str):
+            try:
+                return cudf.Scalar(other, dtype=self.dtype)
+            except ValueError:
+                pass
 
         return NotImplemented
 
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 9c30585a541..ce8bc3da08b 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -5665,7 +5665,7 @@ def normalize_binop_value(
             and other.dtype == "object"
         ):
             return other
-        if isinstance(other, str):
+        if is_scalar(other):
             return cudf.Scalar(other)
         return NotImplemented
 
@@ -5701,6 +5701,17 @@ def _binaryop(
             return NotImplemented
 
         if isinstance(other, (StringColumn, str, cudf.Scalar)):
+            if isinstance(other, cudf.Scalar) and other.dtype != "O":
+                if op in {
+                    "__eq__",
+                    "__ne__",
+                }:
+                    return column.full(
+                        len(self), op == "__ne__", dtype="bool"
+                    ).set_mask(self.mask)
+                else:
+                    return NotImplemented
+
             if op == "__add__":
                 if isinstance(other, cudf.Scalar):
                     other = cast(
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 1ebf59ba6e4..535fe2352aa 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -4614,10 +4614,13 @@ def partition_by_hash(self, columns, nparts, keep_index=True):
             self._column_names,
             self._index_names if keep_index else None,
         )
-        # Slice into partition
-        ret = [outdf[s:e] for s, e in zip(offsets, offsets[1:] + [None])]
-        if not keep_index:
-            ret = [df.reset_index(drop=True) for df in ret]
+        # Slice into partitions. Notice, `hash_partition` returns the start
+        # offset of each partition thus we skip the first offset
+        ret = outdf._split(offsets[1:], keep_index=keep_index)
+
+        # Calling `_split()` on an empty dataframe returns an empty list
+        # so we add empty partitions here
+        ret += [self._empty_like(keep_index) for _ in range(nparts - len(ret))]
         return ret
 
     def info(
diff --git a/python/cudf/cudf/core/groupby/__init__.py b/python/cudf/cudf/core/groupby/__init__.py
index bb21dd1729d..4375ed3e3da 100644
--- a/python/cudf/cudf/core/groupby/__init__.py
+++ b/python/cudf/cudf/core/groupby/__init__.py
@@ -1,3 +1,8 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 
 from cudf.core.groupby.groupby import GroupBy, Grouper
+
+__all__ = [
+    "GroupBy",
+    "Grouper",
+]
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 91e00eb43f3..8ff3e17d6ff 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -111,7 +111,7 @@ def _quantile_75(x):
 ...     'Max Speed': [380., 370., 24., 26.],
 ... }})
 >>> df
-    Animal  Max Speed
+   Animal  Max Speed
 0  Falcon      380.0
 1  Falcon      370.0
 2  Parrot       24.0
@@ -275,6 +275,37 @@ def __iter__(self):
         for i, name in enumerate(group_names):
             yield name, grouped_values[offsets[i] : offsets[i + 1]]
 
+    @property
+    def dtypes(self):
+        """
+        Return the dtypes in this group.
+
+        Returns
+        -------
+        pandas.DataFrame
+            The data type of each column of the group.
+
+        Examples
+        --------
+        >>> import cudf
+        >>> df = cudf.DataFrame({'a': [1, 2, 3, 3], 'b': ['x', 'y', 'z', 'a'],
+        ...                      'c':[10, 11, 12, 12]})
+        >>> df.groupby("a").dtypes
+                b      c
+        a
+        1  object  int64
+        2  object  int64
+        3  object  int64
+        """
+        index = self.grouping.keys.unique().to_pandas()
+        return pd.DataFrame(
+            {
+                name: [self.obj._dtypes[name]] * len(index)
+                for name in self.grouping.values._column_names
+            },
+            index=index,
+        )
+
     @cached_property
     def groups(self):
         """
@@ -420,8 +451,11 @@ def agg(self, func):
         Examples
         --------
         >>> import cudf
-        >>> a = cudf.DataFrame(
-            {'a': [1, 1, 2], 'b': [1, 2, 3], 'c': [2, 2, 1]})
+        >>> a = cudf.DataFrame({
+        ...     'a': [1, 1, 2],
+        ...     'b': [1, 2, 3],
+        ...     'c': [2, 2, 1]
+        ... })
         >>> a.groupby('a').agg('sum')
            b  c
         a
@@ -430,6 +464,12 @@ def agg(self, func):
 
         Specifying a list of aggregations to perform on each column.
 
+        >>> import cudf
+        >>> a = cudf.DataFrame({
+        ...     'a': [1, 1, 2],
+        ...     'b': [1, 2, 3],
+        ...     'c': [2, 2, 1]
+        ... })
         >>> a.groupby('a').agg(['sum', 'min'])
             b       c
           sum min sum min
@@ -439,6 +479,12 @@ def agg(self, func):
 
         Using a dict to specify aggregations to perform per column.
 
+        >>> import cudf
+        >>> a = cudf.DataFrame({
+        ...     'a': [1, 1, 2],
+        ...     'b': [1, 2, 3],
+        ...     'c': [2, 2, 1]
+        ... })
         >>> a.groupby('a').agg({'a': 'max', 'b': ['min', 'mean']})
             a   b
           max min mean
@@ -448,6 +494,12 @@ def agg(self, func):
 
         Using lambdas/callables to specify aggregations taking parameters.
 
+        >>> import cudf
+        >>> a = cudf.DataFrame({
+        ...     'a': [1, 1, 2],
+        ...     'b': [1, 2, 3],
+        ...     'c': [2, 2, 1]
+        ... })
         >>> f1 = lambda x: x.quantile(0.5); f1.__name__ = "q0.5"
         >>> f2 = lambda x: x.quantile(0.75); f2.__name__ = "q0.75"
         >>> a.groupby('a').agg([f1, f2])
@@ -905,6 +957,7 @@ def mult(df):
 
             .. code-block::
 
+                >>> import pandas as pd
                 >>> df = pd.DataFrame({
                 ...     'a': [1, 1, 2, 2],
                 ...     'b': [1, 2, 1, 2],
@@ -1218,10 +1271,12 @@ def describe(self, include=None, exclude=None):
         Examples
         --------
         >>> import cudf
-        >>> gdf = cudf.DataFrame({"Speed": [380.0, 370.0, 24.0, 26.0],
-                                  "Score": [50, 30, 90, 80]})
+        >>> gdf = cudf.DataFrame({
+        ...     "Speed": [380.0, 370.0, 24.0, 26.0],
+        ...      "Score": [50, 30, 90, 80],
+        ... })
         >>> gdf
-        Speed  Score
+           Speed  Score
         0  380.0     50
         1  370.0     30
         2   24.0     90
@@ -1290,7 +1345,7 @@ def corr(self, method="pearson", min_periods=1):
         ...             "val2": [4, 5, 6, 1, 2, 9, 8, 5, 1],
         ...             "val3": [4, 5, 6, 1, 2, 9, 8, 5, 1]})
         >>> gdf
-        id  val1  val2  val3
+           id  val1  val2  val3
         0  a     5     4     4
         1  a     4     5     5
         2  a     6     6     6
@@ -1652,28 +1707,6 @@ def fillna(
         Returns
         -------
         DataFrame or Series
-
-        .. pandas-compat::
-            **groupby.fillna**
-
-            This function may return result in different format to the method
-            Pandas supports. For example:
-
-            .. code-block::
-
-                >>> df = pd.DataFrame({'k': [1, 1, 2], 'v': [2, None, 4]})
-                >>> gdf = cudf.from_pandas(df)
-                >>> df.groupby('k').fillna({'v': 4}) # pandas
-                       v
-                k
-                1 0  2.0
-                  1  4.0
-                2 2  4.0
-                >>> gdf.groupby('k').fillna({'v': 4}) # cudf
-                     v
-                0  2.0
-                1  4.0
-                2  4.0
         """
         if inplace:
             raise NotImplementedError("Does not support inplace yet.")
diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py
index e5ade1326c9..7d01f89eada 100644
--- a/python/cudf/cudf/tests/test_binops.py
+++ b/python/cudf/cudf/tests/test_binops.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+# Copyright (c) 2018-2023, NVIDIA CORPORATION.
 
 import decimal
 import operator
@@ -320,29 +320,68 @@ def test_series_compare_nulls(cmpop, dtypes):
     utils.assert_eq(expect, got)
 
 
-@pytest.mark.parametrize(
-    "obj", [pd.Series(["a", "b", None, "d", "e", None], dtype="string"), "a"]
-)
-@pytest.mark.parametrize("cmpop", _cmpops)
-@pytest.mark.parametrize(
-    "cmp_obj",
-    [pd.Series(["b", "a", None, "d", "f", None], dtype="string"), "a"],
-)
-def test_string_series_compare(obj, cmpop, cmp_obj):
+@pytest.fixture
+def str_series_cmp_data():
+    return pd.Series(["a", "b", None, "d", "e", None], dtype="string")
 
-    g_obj = obj
-    if isinstance(g_obj, pd.Series):
-        g_obj = Series.from_pandas(g_obj)
-    g_cmp_obj = cmp_obj
-    if isinstance(g_cmp_obj, pd.Series):
-        g_cmp_obj = Series.from_pandas(g_cmp_obj)
-    got = cmpop(g_obj, g_cmp_obj)
-    expected = cmpop(obj, cmp_obj)
 
-    if isinstance(expected, pd.Series):
-        expected = cudf.from_pandas(expected)
+@pytest.fixture(ids=[op.__name__ for op in _cmpops], params=_cmpops)
+def str_series_compare_str_cmpop(request):
+    return request.param
 
-    utils.assert_eq(expected, got)
+
+@pytest.fixture(ids=["eq", "ne"], params=[operator.eq, operator.ne])
+def str_series_compare_num_cmpop(request):
+    return request.param
+
+
+@pytest.fixture(ids=["int", "float", "bool"], params=[1, 1.5, True])
+def cmp_scalar(request):
+    return request.param
+
+
+def test_str_series_compare_str(
+    str_series_cmp_data, str_series_compare_str_cmpop
+):
+    expect = str_series_compare_str_cmpop(str_series_cmp_data, "a")
+    got = str_series_compare_str_cmpop(
+        Series.from_pandas(str_series_cmp_data), "a"
+    )
+
+    utils.assert_eq(expect, got.to_pandas(nullable=True))
+
+
+def test_str_series_compare_str_reflected(
+    str_series_cmp_data, str_series_compare_str_cmpop
+):
+    expect = str_series_compare_str_cmpop("a", str_series_cmp_data)
+    got = str_series_compare_str_cmpop(
+        "a", Series.from_pandas(str_series_cmp_data)
+    )
+
+    utils.assert_eq(expect, got.to_pandas(nullable=True))
+
+
+def test_str_series_compare_num(
+    str_series_cmp_data, str_series_compare_num_cmpop, cmp_scalar
+):
+    expect = str_series_compare_num_cmpop(str_series_cmp_data, cmp_scalar)
+    got = str_series_compare_num_cmpop(
+        Series.from_pandas(str_series_cmp_data), cmp_scalar
+    )
+
+    utils.assert_eq(expect, got.to_pandas(nullable=True))
+
+
+def test_str_series_compare_num_reflected(
+    str_series_cmp_data, str_series_compare_num_cmpop, cmp_scalar
+):
+    expect = str_series_compare_num_cmpop(cmp_scalar, str_series_cmp_data)
+    got = str_series_compare_num_cmpop(
+        cmp_scalar, Series.from_pandas(str_series_cmp_data)
+    )
+
+    utils.assert_eq(expect, got.to_pandas(nullable=True))
 
 
 @pytest.mark.parametrize("obj_class", ["Series", "Index"])
diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py
index 5616cea42ba..1211938ff10 100644
--- a/python/cudf/cudf/tests/test_datetime.py
+++ b/python/cudf/cudf/tests/test_datetime.py
@@ -22,6 +22,15 @@
     expect_warning_if,
 )
 
+_cmpops = [
+    operator.lt,
+    operator.gt,
+    operator.le,
+    operator.ge,
+    operator.eq,
+    operator.ne,
+]
+
 
 def data1():
     return pd.date_range("20010101", "20020215", freq="400h", name="times")
@@ -986,6 +995,23 @@ def test_datetime_series_ops_with_scalars(data, other_scalars, dtype, op):
         )
 
 
+@pytest.mark.parametrize("data", ["20110101", "20120101", "20130101"])
+@pytest.mark.parametrize("other_scalars", ["20110101", "20120101", "20130101"])
+@pytest.mark.parametrize("op", _cmpops)
+@pytest.mark.parametrize(
+    "dtype",
+    ["datetime64[ns]", "datetime64[us]", "datetime64[ms]", "datetime64[s]"],
+)
+def test_datetime_series_cmpops_with_scalars(data, other_scalars, dtype, op):
+    gsr = cudf.Series(data=data, dtype=dtype)
+    psr = gsr.to_pandas()
+
+    expect = op(psr, other_scalars)
+    got = op(gsr, other_scalars)
+
+    assert_eq(expect, got)
+
+
 @pytest.mark.parametrize(
     "data",
     [
diff --git a/python/cudf/cudf/tests/test_doctests.py b/python/cudf/cudf/tests/test_doctests.py
index dbb5c548166..0da5c6b04d6 100644
--- a/python/cudf/cudf/tests/test_doctests.py
+++ b/python/cudf/cudf/tests/test_doctests.py
@@ -1,8 +1,9 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
 import contextlib
 import doctest
 import inspect
 import io
+import itertools
 import os
 
 import numpy as np
@@ -12,6 +13,9 @@
 
 pytestmark = pytest.mark.filterwarnings("ignore::FutureWarning")
 
+# modules that will be searched for doctests
+tests = [cudf, cudf.core.groupby]
+
 
 def _name_in_all(parent, name):
     return name in getattr(parent, "__all__", [])
@@ -78,7 +82,7 @@ def chdir_to_tmp_path(cls, tmp_path):
 
     @pytest.mark.parametrize(
         "docstring",
-        _find_doctests_in_obj(cudf),
+        itertools.chain(*[_find_doctests_in_obj(mod) for mod in tests]),
         ids=lambda docstring: docstring.name,
     )
     def test_docstring(self, docstring):
diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py
index c5b330fd89c..97700779a89 100644
--- a/python/cudf/cudf/tests/test_groupby.py
+++ b/python/cudf/cudf/tests/test_groupby.py
@@ -519,6 +519,23 @@ def test_groupby_apply_jit_args(func, args, groupby_jit_data):
     run_groupby_apply_jit_test(groupby_jit_data, func, ["key1", "key2"], *args)
 
 
+def test_groupby_apply_jit_block_divergence():
+    # https://github.com/rapidsai/cudf/issues/12686
+    df = cudf.DataFrame(
+        {
+            "a": [0, 0, 0, 1, 1, 1],
+            "b": [1, 1, 1, 2, 3, 4],
+        }
+    )
+
+    def diverging_block(grp_df):
+        if grp_df["a"].mean() > 0:
+            return grp_df["b"].mean()
+        return 0
+
+    run_groupby_apply_jit_test(df, diverging_block, ["a"])
+
+
 @pytest.mark.parametrize("nelem", [2, 3, 100, 500, 1000])
 @pytest.mark.parametrize(
     "func",
@@ -2943,3 +2960,15 @@ def test_groupby_ngroup(by, ascending, df_ngroup):
     expected = df_ngroup.to_pandas().groupby(by).ngroup(ascending=ascending)
     actual = df_ngroup.groupby(by).ngroup(ascending=ascending)
     assert_eq(expected, actual, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "groups", ["a", "b", "c", ["a", "c"], ["a", "b", "c"]]
+)
+def test_groupby_dtypes(groups):
+    df = cudf.DataFrame(
+        {"a": [1, 2, 3, 3], "b": ["x", "y", "z", "a"], "c": [10, 11, 12, 12]}
+    )
+    pdf = df.to_pandas()
+
+    assert_eq(pdf.groupby(groups).dtypes, df.groupby(groups).dtypes)
diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py
index 81acb43ee7d..b778db4465f 100644
--- a/python/cudf/cudf/tests/test_json.py
+++ b/python/cudf/cudf/tests/test_json.py
@@ -187,14 +187,23 @@ def test_json_writer(tmpdir, pdf, gdf):
         assert_eq(pdf_string, gdf_string)
 
 
-def test_cudf_json_writer(pdf):
+@pytest.mark.parametrize(
+    "lines", [True, False], ids=["lines=True", "lines=False"]
+)
+def test_cudf_json_writer(pdf, lines):
     # removing datetime column because pandas doesn't support it
     for col_name in pdf.columns:
         if "datetime" in col_name:
             pdf.drop(col_name, axis=1, inplace=True)
     gdf = cudf.DataFrame.from_pandas(pdf)
-    pdf_string = pdf.to_json(orient="records", lines=True)
-    gdf_string = gdf.to_json(orient="records", lines=True, engine="cudf")
+    pdf_string = pdf.to_json(orient="records", lines=lines)
+    gdf_string = gdf.to_json(orient="records", lines=lines, engine="cudf")
+
+    assert_eq(pdf_string, gdf_string)
+
+    gdf_string = gdf.to_json(
+        orient="records", lines=lines, engine="cudf", rows_per_chunk=8
+    )
 
     assert_eq(pdf_string, gdf_string)
 
diff --git a/python/cudf/setup.py b/python/cudf/setup.py
index 48199d83478..88bc2cfae28 100644
--- a/python/cudf/setup.py
+++ b/python/cudf/setup.py
@@ -21,7 +21,7 @@
     "typing_extensions",
     # Allow floating minor versions for Arrow.
     "pyarrow==10",
-    f"rmm{cuda_suffix}==23.2.*",
+    f"rmm{cuda_suffix}==23.4.*",
     f"ptxcompiler{cuda_suffix}",
     f"cubinlinker{cuda_suffix}",
     "cupy-cuda11x",
diff --git a/python/cudf/udf_cpp/groupby/function.cu b/python/cudf/udf_cpp/groupby/function.cu
index f94f99c4b49..782371b8a44 100644
--- a/python/cudf/udf_cpp/groupby/function.cu
+++ b/python/cudf/udf_cpp/groupby/function.cu
@@ -284,7 +284,7 @@ extern "C" {
   __device__ int name##_##cname(return_type* numba_return_value, type* const data, int64_t size) \
   {                                                                                              \
     return_type const res = name<type>(data, size);                                              \
-    if (threadIdx.x == 0) { *numba_return_value = res; }                                         \
+    *numba_return_value   = res;                                                                 \
     __syncthreads();                                                                             \
     return 0;                                                                                    \
   }
@@ -309,8 +309,8 @@ extern "C" {
   __device__ int name##_##cname(                                                 \
     int64_t* numba_return_value, type* const data, int64_t* index, int64_t size) \
   {                                                                              \
-    auto const res = name<type>(data, index, size);                              \
-    if (threadIdx.x == 0) { *numba_return_value = res; }                         \
+    auto const res      = name<type>(data, index, size);                         \
+    *numba_return_value = res;                                                   \
     __syncthreads();                                                             \
     return 0;                                                                    \
   }
diff --git a/python/cudf_kafka/setup.py b/python/cudf_kafka/setup.py
index 6d8f954f337..499d1d4a84b 100644
--- a/python/cudf_kafka/setup.py
+++ b/python/cudf_kafka/setup.py
@@ -43,7 +43,7 @@
     ),
 )
 CUDF_KAFKA_ROOT = os.environ.get(
-    "CUDF_KAFKA_ROOT", "../../libcudf_kafka/build"
+    "CUDF_KAFKA_ROOT", "../../cpp/libcudf_kafka/build"
 )
 
 try:
@@ -72,8 +72,14 @@
             pa.get_include(),
             cuda_include_dir,
         ],
-        library_dirs=([get_python_lib(), os.path.join(os.sys.prefix, "lib")]),
-        libraries=["cudf", "cudf_kafka"],
+        library_dirs=(
+            [
+                get_python_lib(),
+                os.path.join(os.sys.prefix, "lib"),
+                CUDF_KAFKA_ROOT,
+            ]
+        ),
+        libraries=["cudf", "cudf_kafka", "fmt"],
         language="c++",
         extra_compile_args=["-std=c++17"],
     )
diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py
index b6be5ade6ba..821ec103204 100644
--- a/python/dask_cudf/dask_cudf/backends.py
+++ b/python/dask_cudf/dask_cudf/backends.py
@@ -11,6 +11,10 @@
 
 import dask.dataframe as dd
 from dask import config
+from dask.dataframe.backends import (
+    DataFrameBackendEntrypoint,
+    PandasBackendEntrypoint,
+)
 from dask.dataframe.core import get_parallel_type, meta_nonempty
 from dask.dataframe.dispatch import (
     categorical_dtype_dispatch,
@@ -30,7 +34,7 @@
     make_meta_obj,
 )
 from dask.sizeof import sizeof as sizeof_dispatch
-from dask.utils import is_arraylike
+from dask.utils import Dispatch, is_arraylike
 
 import cudf
 from cudf.api.types import is_string_dtype
@@ -446,91 +450,127 @@ def _default_backend(func, *args, **kwargs):
         return func(*args, **kwargs)
 
 
-try:
+def _unsupported_kwargs(old, new, kwargs):
+    # Utility to raise a meaningful error when
+    # unsupported kwargs are encountered within
+    # ``to_backend_dispatch``
+    if kwargs:
+        raise ValueError(
+            f"Unsupported key-word arguments used in `to_backend` "
+            f"for {old}-to-{new} conversion: {kwargs}"
+        )
 
-    # Define "cudf" backend engine to be registered with Dask
-    from dask.dataframe.backends import DataFrameBackendEntrypoint
-
-    class CudfBackendEntrypoint(DataFrameBackendEntrypoint):
-        """Backend-entrypoint class for Dask-DataFrame
-
-        This class is registered under the name "cudf" for the
-        ``dask.dataframe.backends`` entrypoint in ``setup.cfg``.
-        Dask-DataFrame will use the methods defined in this class
-        in place of ``dask.dataframe.<creation-method>`` when the
-        "dataframe.backend" configuration is set to "cudf":
-
-        Examples
-        --------
-        >>> import dask
-        >>> import dask.dataframe as dd
-        >>> with dask.config.set({"dataframe.backend": "cudf"}):
-        ...     ddf = dd.from_dict({"a": range(10)})
-        >>> type(ddf)
-        <class 'dask_cudf.core.DataFrame'>
-        """
-
-        @staticmethod
-        def from_dict(
-            data,
-            npartitions,
-            orient="columns",
-            dtype=None,
-            columns=None,
-            constructor=cudf.DataFrame,
-        ):
-
-            return _default_backend(
-                dd.from_dict,
-                data,
-                npartitions=npartitions,
-                orient=orient,
-                dtype=dtype,
-                columns=columns,
-                constructor=constructor,
-            )
 
-        @staticmethod
-        def read_parquet(*args, engine=None, **kwargs):
-            from dask_cudf.io.parquet import CudfEngine
+# Register cudf->pandas
+to_pandas_dispatch = PandasBackendEntrypoint.to_backend_dispatch()
 
-            return _default_backend(
-                dd.read_parquet,
-                *args,
-                engine=CudfEngine,
-                **kwargs,
-            )
 
-        @staticmethod
-        def read_json(*args, **kwargs):
-            from dask_cudf.io.json import read_json
+@to_pandas_dispatch.register((cudf.DataFrame, cudf.Series, cudf.Index))
+def to_pandas_dispatch_from_cudf(data, nullable=False, **kwargs):
+    _unsupported_kwargs("cudf", "pandas", kwargs)
+    return data.to_pandas(nullable=nullable)
 
-            return read_json(*args, **kwargs)
 
-        @staticmethod
-        def read_orc(*args, **kwargs):
-            from dask_cudf.io import read_orc
+# Register pandas->cudf
+to_cudf_dispatch = Dispatch("to_cudf_dispatch")
 
-            return read_orc(*args, **kwargs)
 
-        @staticmethod
-        def read_csv(*args, **kwargs):
-            from dask_cudf.io import read_csv
+@to_cudf_dispatch.register((pd.DataFrame, pd.Series, pd.Index))
+def to_cudf_dispatch_from_pandas(data, nan_as_null=None, **kwargs):
+    _unsupported_kwargs("pandas", "cudf", kwargs)
+    return cudf.from_pandas(data, nan_as_null=nan_as_null)
 
-            return read_csv(*args, **kwargs)
 
-        @staticmethod
-        def read_hdf(*args, **kwargs):
-            from dask_cudf import from_dask_dataframe
+# Define "cudf" backend engine to be registered with Dask
+class CudfBackendEntrypoint(DataFrameBackendEntrypoint):
+    """Backend-entrypoint class for Dask-DataFrame
 
-            # HDF5 reader not yet implemented in cudf
-            warnings.warn(
-                "read_hdf is not yet implemented in cudf/dask_cudf. "
-                "Moving to cudf from pandas. Expect poor performance!"
-            )
-            return from_dask_dataframe(
-                _default_backend(dd.read_hdf, *args, **kwargs)
-            )
+    This class is registered under the name "cudf" for the
+    ``dask.dataframe.backends`` entrypoint in ``setup.cfg``.
+    Dask-DataFrame will use the methods defined in this class
+    in place of ``dask.dataframe.<creation-method>`` when the
+    "dataframe.backend" configuration is set to "cudf":
 
-except ImportError:
-    pass
+    Examples
+    --------
+    >>> import dask
+    >>> import dask.dataframe as dd
+    >>> with dask.config.set({"dataframe.backend": "cudf"}):
+    ...     ddf = dd.from_dict({"a": range(10)})
+    >>> type(ddf)
+    <class 'dask_cudf.core.DataFrame'>
+    """
+
+    @classmethod
+    def to_backend_dispatch(cls):
+        return to_cudf_dispatch
+
+    @classmethod
+    def to_backend(cls, data: dd.core._Frame, **kwargs):
+        if isinstance(data._meta, (cudf.DataFrame, cudf.Series, cudf.Index)):
+            # Already a cudf-backed collection
+            _unsupported_kwargs("cudf", "cudf", kwargs)
+            return data
+        return data.map_partitions(cls.to_backend_dispatch(), **kwargs)
+
+    @staticmethod
+    def from_dict(
+        data,
+        npartitions,
+        orient="columns",
+        dtype=None,
+        columns=None,
+        constructor=cudf.DataFrame,
+    ):
+
+        return _default_backend(
+            dd.from_dict,
+            data,
+            npartitions=npartitions,
+            orient=orient,
+            dtype=dtype,
+            columns=columns,
+            constructor=constructor,
+        )
+
+    @staticmethod
+    def read_parquet(*args, engine=None, **kwargs):
+        from dask_cudf.io.parquet import CudfEngine
+
+        return _default_backend(
+            dd.read_parquet,
+            *args,
+            engine=CudfEngine,
+            **kwargs,
+        )
+
+    @staticmethod
+    def read_json(*args, **kwargs):
+        from dask_cudf.io.json import read_json
+
+        return read_json(*args, **kwargs)
+
+    @staticmethod
+    def read_orc(*args, **kwargs):
+        from dask_cudf.io import read_orc
+
+        return read_orc(*args, **kwargs)
+
+    @staticmethod
+    def read_csv(*args, **kwargs):
+        from dask_cudf.io import read_csv
+
+        return read_csv(*args, **kwargs)
+
+    @staticmethod
+    def read_hdf(*args, **kwargs):
+        from dask_cudf import from_dask_dataframe
+
+        # HDF5 reader not yet implemented in cudf
+        warnings.warn(
+            "read_hdf is not yet implemented in cudf/dask_cudf. "
+            "Moving to cudf from pandas. Expect poor performance!"
+        )
+        return from_dask_dataframe(
+            _default_backend(dd.read_hdf, *args, **kwargs)
+        )
diff --git a/python/dask_cudf/dask_cudf/tests/test_core.py b/python/dask_cudf/dask_cudf/tests/test_core.py
index ee8229bc7e8..7f8876c8564 100644
--- a/python/dask_cudf/dask_cudf/tests/test_core.py
+++ b/python/dask_cudf/dask_cudf/tests/test_core.py
@@ -6,6 +6,7 @@
 import numpy as np
 import pandas as pd
 import pytest
+from packaging import version
 
 import dask
 from dask import dataframe as dd
@@ -31,6 +32,58 @@ def test_from_dict_backend_dispatch():
     dd.assert_eq(expect, ddf)
 
 
+def test_to_backend():
+    np.random.seed(0)
+    data = {
+        "x": np.random.randint(0, 5, size=10000),
+        "y": np.random.normal(size=10000),
+    }
+    with dask.config.set({"dataframe.backend": "pandas"}):
+        ddf = dd.from_dict(data, npartitions=2)
+        assert isinstance(ddf._meta, pd.DataFrame)
+
+        gdf = ddf.to_backend("cudf")
+        assert isinstance(gdf, dgd.DataFrame)
+        dd.assert_eq(cudf.DataFrame(data), ddf)
+
+        assert isinstance(gdf.to_backend()._meta, pd.DataFrame)
+
+
+def test_to_backend_kwargs():
+    data = {"x": [0, 2, np.nan, 3, 4, 5]}
+    with dask.config.set({"dataframe.backend": "pandas"}):
+        dser = dd.from_dict(data, npartitions=2)["x"]
+        assert isinstance(dser._meta, pd.Series)
+
+        # Using `nan_as_null=False` will result in a cudf-backed
+        # Series with a NaN element (ranther than <NA>)
+        gser_nan = dser.to_backend("cudf", nan_as_null=False)
+        assert isinstance(gser_nan, dgd.Series)
+        assert np.isnan(gser_nan.compute()).sum() == 1
+
+        # Using `nan_as_null=True` will result in a cudf-backed
+        # Series with a <NA> element (ranther than NaN)
+        gser_null = dser.to_backend("cudf", nan_as_null=True)
+        assert isinstance(gser_null, dgd.Series)
+        assert np.isnan(gser_null.compute()).sum() == 0
+
+        # Check `nullable` argument for `cudf.Series.to_pandas`
+        dser_null = gser_null.to_backend("pandas", nullable=False)
+        assert dser_null.compute().dtype == "float"
+        dser_null = gser_null.to_backend("pandas", nullable=True)
+        assert isinstance(dser_null.compute().dtype, pd.Float64Dtype)
+
+        # Check unsupported arguments
+        with pytest.raises(ValueError, match="pandas-to-cudf"):
+            dser.to_backend("cudf", bad_arg=True)
+
+        with pytest.raises(ValueError, match="cudf-to-cudf"):
+            gser_null.to_backend("cudf", bad_arg=True)
+
+        with pytest.raises(ValueError, match="cudf-to-pandas"):
+            gser_null.to_backend("pandas", bad_arg=True)
+
+
 def test_from_cudf():
     np.random.seed(0)
 
@@ -547,8 +600,6 @@ def test_unary_ops(func, gdf, gddf):
 
     # Fixed in https://github.com/dask/dask/pull/4657
     if isinstance(p, cudf.Index):
-        from packaging import version
-
         if version.parse(dask.__version__) < version.parse("1.1.6"):
             pytest.skip(
                 "dask.dataframe assert_eq index check hardcoded to "
diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py
index 1108da91d03..04145d23978 100644
--- a/python/dask_cudf/setup.py
+++ b/python/dask_cudf/setup.py
@@ -8,12 +8,12 @@
 cuda_suffix = os.getenv("RAPIDS_PY_WHEEL_CUDA_SUFFIX", default="")
 
 install_requires = [
-    "dask>=2022.12.0",
-    "distributed>=2022.12.0",
+    "dask>=2023.1.1",
+    "distributed>=2023.1.1",
     "fsspec>=0.6.0",
     "numpy",
     "pandas>=1.0,<1.6.0dev0",
-    f"cudf{cuda_suffix}==23.2.*",
+    f"cudf{cuda_suffix}==23.4.*",
     "cupy-cuda11x",
 ]