diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 3366554db30..26d07515f70 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -28,7 +28,7 @@ concurrency:
jobs:
cpp-build:
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.02
+ uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.04
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
@@ -37,7 +37,7 @@ jobs:
python-build:
needs: [cpp-build]
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.02
+ uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.04
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
@@ -46,7 +46,7 @@ jobs:
upload-conda:
needs: [cpp-build, python-build]
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.02
+ uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.04
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
@@ -55,7 +55,7 @@ jobs:
skip_upload_pkgs: libcudf-example
wheel-build-cudf:
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.02
+ uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.04
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
@@ -67,7 +67,7 @@ jobs:
wheel-publish-cudf:
needs: wheel-build-cudf
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@branch-23.02
+ uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@branch-23.04
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
@@ -77,7 +77,7 @@ jobs:
wheel-build-dask-cudf:
needs: wheel-publish-cudf
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-build.yml@branch-23.02
+ uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-build.yml@branch-23.04
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
@@ -88,7 +88,7 @@ jobs:
wheel-publish-dask-cudf:
needs: wheel-build-dask-cudf
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-publish.yml@branch-23.02
+ uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-publish.yml@branch-23.04
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index cf20b0006a2..f33fc15c52f 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -25,32 +25,32 @@ jobs:
- wheel-build-dask-cudf
- wheel-tests-dask-cudf
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.02
+ uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.04
checks:
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.02
+ uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.04
conda-cpp-build:
needs: checks
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.02
+ uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.04
with:
build_type: pull-request
conda-cpp-tests:
needs: conda-cpp-build
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.02
+ uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.04
with:
build_type: pull-request
conda-python-build:
needs: conda-cpp-build
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.02
+ uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.04
with:
build_type: pull-request
conda-python-cudf-tests:
needs: conda-python-build
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.02
+ uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.04
with:
build_type: pull-request
test_script: "ci/test_python_cudf.sh"
@@ -58,14 +58,14 @@ jobs:
# Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism
needs: conda-python-build
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.02
+ uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.04
with:
build_type: pull-request
test_script: "ci/test_python_other.sh"
conda-java-tests:
needs: conda-cpp-build
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.02
+ uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04
with:
build_type: pull-request
node_type: "gpu-latest-1"
@@ -75,7 +75,7 @@ jobs:
conda-notebook-tests:
needs: conda-python-build
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.02
+ uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04
with:
build_type: pull-request
node_type: "gpu-latest-1"
@@ -85,7 +85,7 @@ jobs:
wheel-build-cudf:
needs: checks
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.02
+ uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.04
with:
build_type: pull-request
package-name: cudf
@@ -94,7 +94,7 @@ jobs:
wheel-tests-cudf:
needs: wheel-build-cudf
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.02
+ uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.04
with:
build_type: pull-request
package-name: cudf
@@ -106,7 +106,7 @@ jobs:
wheel-build-dask-cudf:
needs: wheel-tests-cudf
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-build.yml@branch-23.02
+ uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-build.yml@branch-23.04
with:
build_type: pull-request
package-name: dask_cudf
@@ -115,7 +115,7 @@ jobs:
wheel-tests-dask-cudf:
needs: wheel-build-dask-cudf
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-test.yml@branch-23.02
+ uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-test.yml@branch-23.04
with:
build_type: pull-request
package-name: dask_cudf
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 1b117bb2f4f..ff19d51f8ef 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -16,7 +16,7 @@ on:
jobs:
conda-cpp-tests:
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.02
+ uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.04
with:
build_type: nightly
branch: ${{ inputs.branch }}
@@ -24,7 +24,7 @@ jobs:
sha: ${{ inputs.sha }}
conda-python-cudf-tests:
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.02
+ uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.04
with:
build_type: nightly
branch: ${{ inputs.branch }}
@@ -34,7 +34,7 @@ jobs:
conda-python-other-tests:
# Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.02
+ uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.04
with:
build_type: nightly
branch: ${{ inputs.branch }}
@@ -43,7 +43,7 @@ jobs:
test_script: "ci/test_python_other.sh"
conda-java-tests:
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.02
+ uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04
with:
build_type: nightly
branch: ${{ inputs.branch }}
@@ -55,7 +55,7 @@ jobs:
run_script: "ci/test_java.sh"
conda-notebook-tests:
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.02
+ uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04
with:
build_type: nightly
branch: ${{ inputs.branch }}
@@ -67,7 +67,7 @@ jobs:
run_script: "ci/test_notebooks.sh"
wheel-tests-cudf:
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.02
+ uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.04
with:
build_type: nightly
branch: ${{ inputs.branch }}
@@ -78,7 +78,7 @@ jobs:
test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests"
wheel-tests-dask-cudf:
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-test.yml@branch-23.02
+ uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-test.yml@branch-23.04
with:
build_type: nightly
branch: ${{ inputs.branch }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d653d503a1e..4acad48eabf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,244 @@
-# cuDF 23.02.00 (Date TBD)
+# cuDF 23.02.00 (9 Feb 2023)
-Please see https://github.com/rapidsai/cudf/releases/tag/v23.02.00a for the latest changes to this development branch.
+## 🚨 Breaking Changes
+
+- Pin `dask` and `distributed` for release ([#12695](https://github.com/rapidsai/cudf/pull/12695)) [@galipremsagar](https://github.com/galipremsagar)
+- Change ways to access `ptr` in `Buffer` ([#12587](https://github.com/rapidsai/cudf/pull/12587)) [@galipremsagar](https://github.com/galipremsagar)
+- Remove column names ([#12578](https://github.com/rapidsai/cudf/pull/12578)) [@vuule](https://github.com/vuule)
+- Default `cudf::io::read_json` to nested JSON parser ([#12544](https://github.com/rapidsai/cudf/pull/12544)) [@vuule](https://github.com/vuule)
+- Switch `engine=cudf` to the new `JSON` reader ([#12509](https://github.com/rapidsai/cudf/pull/12509)) [@galipremsagar](https://github.com/galipremsagar)
+- Add trailing comma support for nested JSON reader ([#12448](https://github.com/rapidsai/cudf/pull/12448)) [@karthikeyann](https://github.com/karthikeyann)
+- Upgrade to `arrow-10.0.1` ([#12327](https://github.com/rapidsai/cudf/pull/12327)) [@galipremsagar](https://github.com/galipremsagar)
+- Fail loudly to avoid data corruption with unsupported input in `read_orc` ([#12325](https://github.com/rapidsai/cudf/pull/12325)) [@vuule](https://github.com/vuule)
+- CSV, JSON reader to infer integer column with nulls as int64 instead of float64 ([#12309](https://github.com/rapidsai/cudf/pull/12309)) [@karthikeyann](https://github.com/karthikeyann)
+- Remove deprecated code for 23.02 ([#12281](https://github.com/rapidsai/cudf/pull/12281)) [@vyasr](https://github.com/vyasr)
+- Null element for parsing error in numeric types in JSON, CSV reader ([#12272](https://github.com/rapidsai/cudf/pull/12272)) [@karthikeyann](https://github.com/karthikeyann)
+- Purge non-empty nulls for `superimpose_nulls` and `push_down_nulls` ([#12239](https://github.com/rapidsai/cudf/pull/12239)) [@ttnghia](https://github.com/ttnghia)
+- Rename `cudf::structs::detail::superimpose_parent_nulls` APIs ([#12230](https://github.com/rapidsai/cudf/pull/12230)) [@ttnghia](https://github.com/ttnghia)
+- Remove JIT type names, refactor id_to_type. ([#12158](https://github.com/rapidsai/cudf/pull/12158)) [@bdice](https://github.com/bdice)
+- Floor division uses integer division for integral arguments ([#12131](https://github.com/rapidsai/cudf/pull/12131)) [@wence-](https://github.com/wence-)
+
+## 🐛 Bug Fixes
+
+- Fix a mask data corruption in UDF ([#12647](https://github.com/rapidsai/cudf/pull/12647)) [@galipremsagar](https://github.com/galipremsagar)
+- pre-commit: Update isort version to 5.12.0 ([#12645](https://github.com/rapidsai/cudf/pull/12645)) [@wence-](https://github.com/wence-)
+- tests: Skip cuInit tests if cuda-gdb is not found or not working ([#12644](https://github.com/rapidsai/cudf/pull/12644)) [@wence-](https://github.com/wence-)
+- Revert regex program java APIs and tests ([#12639](https://github.com/rapidsai/cudf/pull/12639)) [@cindyyuanjiang](https://github.com/cindyyuanjiang)
+- Fix leaks in ColumnVectorTest ([#12625](https://github.com/rapidsai/cudf/pull/12625)) [@jlowe](https://github.com/jlowe)
+- Handle when spillable buffers own each other ([#12607](https://github.com/rapidsai/cudf/pull/12607)) [@madsbk](https://github.com/madsbk)
+- Fix incorrect null counts for sliced columns in JCudfSerialization ([#12589](https://github.com/rapidsai/cudf/pull/12589)) [@jlowe](https://github.com/jlowe)
+- lists: Transfer dtypes correctly through list.get ([#12586](https://github.com/rapidsai/cudf/pull/12586)) [@wence-](https://github.com/wence-)
+- timedelta: Don't go via float intermediates for floordiv ([#12585](https://github.com/rapidsai/cudf/pull/12585)) [@wence-](https://github.com/wence-)
+- Fixing BUG, `get_next_chunk()` should use the blocking function `device_read()` ([#12584](https://github.com/rapidsai/cudf/pull/12584)) [@madsbk](https://github.com/madsbk)
+- Make JNI QuoteStyle accessible outside ai.rapids.cudf ([#12572](https://github.com/rapidsai/cudf/pull/12572)) [@mythrocks](https://github.com/mythrocks)
+- `partition_by_hash()`: support index ([#12554](https://github.com/rapidsai/cudf/pull/12554)) [@madsbk](https://github.com/madsbk)
+- Mixed Join benchmark bug due to wrong conditional column ([#12553](https://github.com/rapidsai/cudf/pull/12553)) [@divyegala](https://github.com/divyegala)
+- Update List Lexicographical Comparator ([#12538](https://github.com/rapidsai/cudf/pull/12538)) [@divyegala](https://github.com/divyegala)
+- Dynamically read PTX version ([#12534](https://github.com/rapidsai/cudf/pull/12534)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- build.sh switch to use `RAPIDS` magic value ([#12525](https://github.com/rapidsai/cudf/pull/12525)) [@robertmaynard](https://github.com/robertmaynard)
+- Loosen runtime arrow pinning ([#12522](https://github.com/rapidsai/cudf/pull/12522)) [@vyasr](https://github.com/vyasr)
+- Enable metadata transfer for complex types in transpose ([#12491](https://github.com/rapidsai/cudf/pull/12491)) [@galipremsagar](https://github.com/galipremsagar)
+- Fix issues with parquet chunked reader ([#12488](https://github.com/rapidsai/cudf/pull/12488)) [@nvdbaranec](https://github.com/nvdbaranec)
+- Fix missing metadata transfer in concat for `ListColumn` ([#12487](https://github.com/rapidsai/cudf/pull/12487)) [@galipremsagar](https://github.com/galipremsagar)
+- Rename libcudf substring source files to slice ([#12484](https://github.com/rapidsai/cudf/pull/12484)) [@davidwendt](https://github.com/davidwendt)
+- Fix compile issue with arrow 10 ([#12465](https://github.com/rapidsai/cudf/pull/12465)) [@ttnghia](https://github.com/ttnghia)
+- Fix List offsets bug in mixed type list column in nested JSON reader ([#12447](https://github.com/rapidsai/cudf/pull/12447)) [@karthikeyann](https://github.com/karthikeyann)
+- Fix xfail incompatibilities ([#12423](https://github.com/rapidsai/cudf/pull/12423)) [@vyasr](https://github.com/vyasr)
+- Fix bug in Parquet column index encoding ([#12404](https://github.com/rapidsai/cudf/pull/12404)) [@etseidl](https://github.com/etseidl)
+- When building Arrow shared look for a shared OpenSSL ([#12396](https://github.com/rapidsai/cudf/pull/12396)) [@robertmaynard](https://github.com/robertmaynard)
+- Fix get_json_object to return empty column on empty input ([#12384](https://github.com/rapidsai/cudf/pull/12384)) [@davidwendt](https://github.com/davidwendt)
+- Pin arrow 9 in testing dependencies to prevent conda solve issues ([#12377](https://github.com/rapidsai/cudf/pull/12377)) [@vyasr](https://github.com/vyasr)
+- Fix reductions any/all return value for empty input ([#12374](https://github.com/rapidsai/cudf/pull/12374)) [@davidwendt](https://github.com/davidwendt)
+- Fix debug compile errors in parquet.hpp ([#12372](https://github.com/rapidsai/cudf/pull/12372)) [@davidwendt](https://github.com/davidwendt)
+- Purge non-empty nulls in `cudf::make_lists_column` ([#12370](https://github.com/rapidsai/cudf/pull/12370)) [@ttnghia](https://github.com/ttnghia)
+- Use correct memory resource in io::make_column ([#12364](https://github.com/rapidsai/cudf/pull/12364)) [@vyasr](https://github.com/vyasr)
+- Add code to detect possible malformed page data in parquet files. ([#12360](https://github.com/rapidsai/cudf/pull/12360)) [@nvdbaranec](https://github.com/nvdbaranec)
+- Fail loudly to avoid data corruption with unsupported input in `read_orc` ([#12325](https://github.com/rapidsai/cudf/pull/12325)) [@vuule](https://github.com/vuule)
+- Fix NumericPairIteratorTest for float values ([#12306](https://github.com/rapidsai/cudf/pull/12306)) [@davidwendt](https://github.com/davidwendt)
+- Fixes memory allocation in nested JSON tokenizer ([#12300](https://github.com/rapidsai/cudf/pull/12300)) [@elstehle](https://github.com/elstehle)
+- Reconstruct dtypes correctly for list aggs of struct columns ([#12290](https://github.com/rapidsai/cudf/pull/12290)) [@wence-](https://github.com/wence-)
+- Fix regex \A and \Z to strictly match string begin/end ([#12282](https://github.com/rapidsai/cudf/pull/12282)) [@davidwendt](https://github.com/davidwendt)
+- Fix compile issue in `json_chunked_reader.cpp` ([#12280](https://github.com/rapidsai/cudf/pull/12280)) [@ttnghia](https://github.com/ttnghia)
+- Change reductions any/all to return valid values for empty input ([#12279](https://github.com/rapidsai/cudf/pull/12279)) [@davidwendt](https://github.com/davidwendt)
+- Only exclude join keys that are indices from key columns ([#12271](https://github.com/rapidsai/cudf/pull/12271)) [@wence-](https://github.com/wence-)
+- Fix spill to device limit ([#12252](https://github.com/rapidsai/cudf/pull/12252)) [@madsbk](https://github.com/madsbk)
+- Correct behaviour of sort in `concat` for singleton concatenations ([#12247](https://github.com/rapidsai/cudf/pull/12247)) [@wence-](https://github.com/wence-)
+- Purge non-empty nulls for `superimpose_nulls` and `push_down_nulls` ([#12239](https://github.com/rapidsai/cudf/pull/12239)) [@ttnghia](https://github.com/ttnghia)
+- Patch CUB DeviceSegmentedSort and remove workaround ([#12234](https://github.com/rapidsai/cudf/pull/12234)) [@davidwendt](https://github.com/davidwendt)
+- Fix memory leak in udf_string::assign(&&) function ([#12206](https://github.com/rapidsai/cudf/pull/12206)) [@davidwendt](https://github.com/davidwendt)
+- Workaround thrust-copy-if limit in json get_tree_representation ([#12190](https://github.com/rapidsai/cudf/pull/12190)) [@davidwendt](https://github.com/davidwendt)
+- Fix page size calculation in Parquet writer ([#12182](https://github.com/rapidsai/cudf/pull/12182)) [@etseidl](https://github.com/etseidl)
+- Add cudf::detail::sizes_to_offsets_iterator to allow checking overflow in offsets ([#12180](https://github.com/rapidsai/cudf/pull/12180)) [@davidwendt](https://github.com/davidwendt)
+- Workaround thrust-copy-if limit in wordpiece-tokenizer ([#12168](https://github.com/rapidsai/cudf/pull/12168)) [@davidwendt](https://github.com/davidwendt)
+- Floor division uses integer division for integral arguments ([#12131](https://github.com/rapidsai/cudf/pull/12131)) [@wence-](https://github.com/wence-)
+
+## 📖 Documentation
+
+- Fix link to NVTX ([#12598](https://github.com/rapidsai/cudf/pull/12598)) [@sameerz](https://github.com/sameerz)
+- Include missing groupby functions in documentation ([#12580](https://github.com/rapidsai/cudf/pull/12580)) [@quasiben](https://github.com/quasiben)
+- Fix documentation author ([#12527](https://github.com/rapidsai/cudf/pull/12527)) [@bdice](https://github.com/bdice)
+- Update libcudf reduction docs for casting output types ([#12526](https://github.com/rapidsai/cudf/pull/12526)) [@davidwendt](https://github.com/davidwendt)
+- Add JSON reader page in user guide ([#12499](https://github.com/rapidsai/cudf/pull/12499)) [@GregoryKimball](https://github.com/GregoryKimball)
+- Link unsupported iteration API docstrings ([#12482](https://github.com/rapidsai/cudf/pull/12482)) [@galipremsagar](https://github.com/galipremsagar)
+- `strings_udf` doc update ([#12469](https://github.com/rapidsai/cudf/pull/12469)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Update cudf_assert docs with correct NDEBUG behavior ([#12464](https://github.com/rapidsai/cudf/pull/12464)) [@robertmaynard](https://github.com/robertmaynard)
+- Update pre-commit hooks guide ([#12395](https://github.com/rapidsai/cudf/pull/12395)) [@bdice](https://github.com/bdice)
+- Update test docs to not use detail comparison utilities ([#12332](https://github.com/rapidsai/cudf/pull/12332)) [@PointKernel](https://github.com/PointKernel)
+- Fix doxygen description for regex_program::compute_working_memory_size ([#12329](https://github.com/rapidsai/cudf/pull/12329)) [@davidwendt](https://github.com/davidwendt)
+- Add eval to docs. ([#12322](https://github.com/rapidsai/cudf/pull/12322)) [@vyasr](https://github.com/vyasr)
+- Turn on xfail_strict=true ([#12244](https://github.com/rapidsai/cudf/pull/12244)) [@wence-](https://github.com/wence-)
+- Update 10 minutes to cuDF ([#12114](https://github.com/rapidsai/cudf/pull/12114)) [@wence-](https://github.com/wence-)
+
+## 🚀 New Features
+
+- Use kvikIO as the default IO backend ([#12574](https://github.com/rapidsai/cudf/pull/12574)) [@vuule](https://github.com/vuule)
+- Use `has_nonempty_nulls` instead of `may_contain_non_empty_nulls` in `superimpose_nulls` and `push_down_nulls` ([#12560](https://github.com/rapidsai/cudf/pull/12560)) [@ttnghia](https://github.com/ttnghia)
+- Add strings methods removeprefix and removesuffix ([#12557](https://github.com/rapidsai/cudf/pull/12557)) [@davidwendt](https://github.com/davidwendt)
+- Add `regex_program` java APIs and unit tests ([#12548](https://github.com/rapidsai/cudf/pull/12548)) [@cindyyuanjiang](https://github.com/cindyyuanjiang)
+- Default `cudf::io::read_json` to nested JSON parser ([#12544](https://github.com/rapidsai/cudf/pull/12544)) [@vuule](https://github.com/vuule)
+- Make string quoting optional on CSV write ([#12539](https://github.com/rapidsai/cudf/pull/12539)) [@mythrocks](https://github.com/mythrocks)
+- Use new nvCOMP API to optimize the compression temp memory size ([#12533](https://github.com/rapidsai/cudf/pull/12533)) [@vuule](https://github.com/vuule)
+- Support "values" orient (array of arrays) in Nested JSON reader ([#12498](https://github.com/rapidsai/cudf/pull/12498)) [@karthikeyann](https://github.com/karthikeyann)
+- `one_hot_encode` to use experimental row comparators ([#12478](https://github.com/rapidsai/cudf/pull/12478)) [@divyegala](https://github.com/divyegala)
+- Support %W and %w format specifiers in cudf::strings::to_timestamps ([#12475](https://github.com/rapidsai/cudf/pull/12475)) [@davidwendt](https://github.com/davidwendt)
+- Add JSON Writer ([#12474](https://github.com/rapidsai/cudf/pull/12474)) [@karthikeyann](https://github.com/karthikeyann)
+- Refactor `thrust_copy_if` into `cudf::detail::copy_if_safe` ([#12455](https://github.com/rapidsai/cudf/pull/12455)) [@ttnghia](https://github.com/ttnghia)
+- Add trailing comma support for nested JSON reader ([#12448](https://github.com/rapidsai/cudf/pull/12448)) [@karthikeyann](https://github.com/karthikeyann)
+- Extract `tokenize_json.hpp` detail header from `src/io/json/nested_json.hpp` ([#12432](https://github.com/rapidsai/cudf/pull/12432)) [@ttnghia](https://github.com/ttnghia)
+- JNI bindings to write CSV ([#12425](https://github.com/rapidsai/cudf/pull/12425)) [@mythrocks](https://github.com/mythrocks)
+- Nested JSON depth benchmark ([#12371](https://github.com/rapidsai/cudf/pull/12371)) [@karthikeyann](https://github.com/karthikeyann)
+- Implement `lists::reverse` ([#12336](https://github.com/rapidsai/cudf/pull/12336)) [@ttnghia](https://github.com/ttnghia)
+- Use `device_read` in experimental `read_json` ([#12314](https://github.com/rapidsai/cudf/pull/12314)) [@vuule](https://github.com/vuule)
+- Implement JNI for `strings::reverse` ([#12283](https://github.com/rapidsai/cudf/pull/12283)) [@ttnghia](https://github.com/ttnghia)
+- Null element for parsing error in numeric types in JSON, CSV reader ([#12272](https://github.com/rapidsai/cudf/pull/12272)) [@karthikeyann](https://github.com/karthikeyann)
+- Add cudf::strings:like function with multiple patterns ([#12269](https://github.com/rapidsai/cudf/pull/12269)) [@davidwendt](https://github.com/davidwendt)
+- Add environment variable to control host memory allocation in `hostdevice_vector` ([#12251](https://github.com/rapidsai/cudf/pull/12251)) [@vuule](https://github.com/vuule)
+- Add cudf::strings::reverse function ([#12227](https://github.com/rapidsai/cudf/pull/12227)) [@davidwendt](https://github.com/davidwendt)
+- Selectively use dictionary encoding in Parquet writer ([#12211](https://github.com/rapidsai/cudf/pull/12211)) [@etseidl](https://github.com/etseidl)
+- Support `replace` in `strings_udf` ([#12207](https://github.com/rapidsai/cudf/pull/12207)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Add support to read binary encoded decimals in parquet ([#12205](https://github.com/rapidsai/cudf/pull/12205)) [@PointKernel](https://github.com/PointKernel)
+- Support regex EOL where the string ends with a new-line character ([#12181](https://github.com/rapidsai/cudf/pull/12181)) [@davidwendt](https://github.com/davidwendt)
+- Updating `stream_compaction/unique` to use new row comparators ([#12159](https://github.com/rapidsai/cudf/pull/12159)) [@divyegala](https://github.com/divyegala)
+- Add device buffer datasource ([#12024](https://github.com/rapidsai/cudf/pull/12024)) [@PointKernel](https://github.com/PointKernel)
+- Implement groupby apply with JIT ([#11452](https://github.com/rapidsai/cudf/pull/11452)) [@bwyogatama](https://github.com/bwyogatama)
+
+## 🛠️ Improvements
+
+- Update shared workflow branches ([#12696](https://github.com/rapidsai/cudf/pull/12696)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Pin `dask` and `distributed` for release ([#12695](https://github.com/rapidsai/cudf/pull/12695)) [@galipremsagar](https://github.com/galipremsagar)
+- Don't upload `libcudf-example` to Anaconda.org ([#12671](https://github.com/rapidsai/cudf/pull/12671)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Pin wheel dependencies to same RAPIDS release ([#12659](https://github.com/rapidsai/cudf/pull/12659)) [@sevagh](https://github.com/sevagh)
+- Use CTK 118/cp310 branch of wheel workflows ([#12602](https://github.com/rapidsai/cudf/pull/12602)) [@sevagh](https://github.com/sevagh)
+- Change ways to access `ptr` in `Buffer` ([#12587](https://github.com/rapidsai/cudf/pull/12587)) [@galipremsagar](https://github.com/galipremsagar)
+- Version a parquet writer xfail ([#12579](https://github.com/rapidsai/cudf/pull/12579)) [@galipremsagar](https://github.com/galipremsagar)
+- Remove column names ([#12578](https://github.com/rapidsai/cudf/pull/12578)) [@vuule](https://github.com/vuule)
+- Parquet reader optimization to address V100 regression. ([#12577](https://github.com/rapidsai/cudf/pull/12577)) [@nvdbaranec](https://github.com/nvdbaranec)
+- Add support for `category` dtypes in CSV reader ([#12571](https://github.com/rapidsai/cudf/pull/12571)) [@galipremsagar](https://github.com/galipremsagar)
+- Remove `spill_lock` parameter from `SpillableBuffer.get_ptr()` ([#12564](https://github.com/rapidsai/cudf/pull/12564)) [@madsbk](https://github.com/madsbk)
+- Optimize `cudf::make_lists_column` ([#12547](https://github.com/rapidsai/cudf/pull/12547)) [@ttnghia](https://github.com/ttnghia)
+- Remove `cudf::strings::repeat_strings_output_sizes` from Java and JNI ([#12546](https://github.com/rapidsai/cudf/pull/12546)) [@ttnghia](https://github.com/ttnghia)
+- Test that cuInit is not called when RAPIDS_NO_INITIALIZE is set ([#12545](https://github.com/rapidsai/cudf/pull/12545)) [@wence-](https://github.com/wence-)
+- Rework repeat_strings to use sizes-to-offsets utility ([#12543](https://github.com/rapidsai/cudf/pull/12543)) [@davidwendt](https://github.com/davidwendt)
+- Replace exclusive_scan with sizes_to_offsets in cudf::lists::sequences ([#12541](https://github.com/rapidsai/cudf/pull/12541)) [@davidwendt](https://github.com/davidwendt)
+- Rework nvtext::ngrams_tokenize to use sizes-to-offsets utility ([#12540](https://github.com/rapidsai/cudf/pull/12540)) [@davidwendt](https://github.com/davidwendt)
+- Fix binary-ops gtests coded in namespace cudf::test ([#12536](https://github.com/rapidsai/cudf/pull/12536)) [@davidwendt](https://github.com/davidwendt)
+- More `[@acquire_spill_lock()` and `as_buffer(..., exposed=False)` ([#12535](https://github.com/rapidsai/cudf/pull/12535)) @madsbk](https://github.com/acquire_spill_lock()` and `as_buffer(..., exposed=False)` ([#12535](https://github.com/rapidsai/cudf/pull/12535)) @madsbk)
+- Guard CUDA runtime APIs with error checking ([#12531](https://github.com/rapidsai/cudf/pull/12531)) [@PointKernel](https://github.com/PointKernel)
+- Update TODOs from issue 10432. ([#12528](https://github.com/rapidsai/cudf/pull/12528)) [@bdice](https://github.com/bdice)
+- Update rapids-cmake definitions version in GitHub Actions style checks. ([#12511](https://github.com/rapidsai/cudf/pull/12511)) [@bdice](https://github.com/bdice)
+- Switch `engine=cudf` to the new `JSON` reader ([#12509](https://github.com/rapidsai/cudf/pull/12509)) [@galipremsagar](https://github.com/galipremsagar)
+- Fix SUM/MEAN aggregation type support. ([#12503](https://github.com/rapidsai/cudf/pull/12503)) [@bdice](https://github.com/bdice)
+- Stop using pandas._testing ([#12492](https://github.com/rapidsai/cudf/pull/12492)) [@vyasr](https://github.com/vyasr)
+- Fix ROLLING_TEST gtests coded in namespace cudf::test ([#12490](https://github.com/rapidsai/cudf/pull/12490)) [@davidwendt](https://github.com/davidwendt)
+- Fix erroneously skipped ORC ZSTD test ([#12486](https://github.com/rapidsai/cudf/pull/12486)) [@vuule](https://github.com/vuule)
+- Rework nvtext::generate_character_ngrams to use make_strings_children ([#12480](https://github.com/rapidsai/cudf/pull/12480)) [@davidwendt](https://github.com/davidwendt)
+- Raise warnings as errors in the test suite ([#12468](https://github.com/rapidsai/cudf/pull/12468)) [@vyasr](https://github.com/vyasr)
+- Remove `int32` hard-coding in python ([#12467](https://github.com/rapidsai/cudf/pull/12467)) [@galipremsagar](https://github.com/galipremsagar)
+- Use cudaMemcpyDefault. ([#12466](https://github.com/rapidsai/cudf/pull/12466)) [@bdice](https://github.com/bdice)
+- Update workflows for nightly tests ([#12462](https://github.com/rapidsai/cudf/pull/12462)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Build CUDA `11.8` and Python `3.10` Packages ([#12457](https://github.com/rapidsai/cudf/pull/12457)) [@ajschmidt8](https://github.com/ajschmidt8)
+- JNI build image default as cuda11.8 ([#12441](https://github.com/rapidsai/cudf/pull/12441)) [@pxLi](https://github.com/pxLi)
+- Re-enable `Recently Updated` Check ([#12435](https://github.com/rapidsai/cudf/pull/12435)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Rework remaining cudf::strings::from_xyz functions to use make_strings_children ([#12434](https://github.com/rapidsai/cudf/pull/12434)) [@vuule](https://github.com/vuule)
+- Build wheels alongside conda CI ([#12427](https://github.com/rapidsai/cudf/pull/12427)) [@sevagh](https://github.com/sevagh)
+- Remove arguments for checking exception messages in Python ([#12424](https://github.com/rapidsai/cudf/pull/12424)) [@vyasr](https://github.com/vyasr)
+- Clean up cuco usage ([#12421](https://github.com/rapidsai/cudf/pull/12421)) [@PointKernel](https://github.com/PointKernel)
+- Fix warnings in remaining modules ([#12406](https://github.com/rapidsai/cudf/pull/12406)) [@vyasr](https://github.com/vyasr)
+- Update `ops-bot.yaml` ([#12402](https://github.com/rapidsai/cudf/pull/12402)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Rework cudf::strings::integers_to_ipv4 to use make_strings_children utility ([#12401](https://github.com/rapidsai/cudf/pull/12401)) [@davidwendt](https://github.com/davidwendt)
+- Use `numpy.empty()` instead of `bytearray` to allocate host memory for spilling ([#12399](https://github.com/rapidsai/cudf/pull/12399)) [@madsbk](https://github.com/madsbk)
+- Deprecate chunksize from dask_cudf.read_csv ([#12394](https://github.com/rapidsai/cudf/pull/12394)) [@rjzamora](https://github.com/rjzamora)
+- Expose the RMM pool size in JNI ([#12390](https://github.com/rapidsai/cudf/pull/12390)) [@revans2](https://github.com/revans2)
+- Fix COPYING_TEST: gtests coded in namespace cudf::test ([#12387](https://github.com/rapidsai/cudf/pull/12387)) [@davidwendt](https://github.com/davidwendt)
+- Rework cudf::strings::url_encode to use make_strings_children utility ([#12385](https://github.com/rapidsai/cudf/pull/12385)) [@davidwendt](https://github.com/davidwendt)
+- Use make_strings_children in parse_data nested json reader ([#12382](https://github.com/rapidsai/cudf/pull/12382)) [@karthikeyann](https://github.com/karthikeyann)
+- Fix warnings in test_datetime.py ([#12381](https://github.com/rapidsai/cudf/pull/12381)) [@vyasr](https://github.com/vyasr)
+- Mixed Join Benchmarks ([#12375](https://github.com/rapidsai/cudf/pull/12375)) [@divyegala](https://github.com/divyegala)
+- Fix warnings in dataframe.py ([#12369](https://github.com/rapidsai/cudf/pull/12369)) [@vyasr](https://github.com/vyasr)
+- Update conda recipes. ([#12368](https://github.com/rapidsai/cudf/pull/12368)) [@bdice](https://github.com/bdice)
+- Use gpu-latest-1 runner tag ([#12366](https://github.com/rapidsai/cudf/pull/12366)) [@bdice](https://github.com/bdice)
+- Rework cudf::strings::from_booleans to use make_strings_children ([#12365](https://github.com/rapidsai/cudf/pull/12365)) [@vuule](https://github.com/vuule)
+- Fix warnings in test modules up to test_dataframe.py ([#12355](https://github.com/rapidsai/cudf/pull/12355)) [@vyasr](https://github.com/vyasr)
+- JSON column performance optimization - struct column nulls ([#12354](https://github.com/rapidsai/cudf/pull/12354)) [@karthikeyann](https://github.com/karthikeyann)
+- Accelerate stable-segmented-sort with CUB segmented sort ([#12347](https://github.com/rapidsai/cudf/pull/12347)) [@davidwendt](https://github.com/davidwendt)
+- Add size check to make_offsets_child_column utility ([#12345](https://github.com/rapidsai/cudf/pull/12345)) [@davidwendt](https://github.com/davidwendt)
+- Enable max compression ratio small block optimization for ZSTD ([#12338](https://github.com/rapidsai/cudf/pull/12338)) [@vuule](https://github.com/vuule)
+- Fix warnings in test_monotonic.py ([#12334](https://github.com/rapidsai/cudf/pull/12334)) [@vyasr](https://github.com/vyasr)
+- Improve JSON column creation performance (list offsets) ([#12330](https://github.com/rapidsai/cudf/pull/12330)) [@karthikeyann](https://github.com/karthikeyann)
+- Upgrade to `arrow-10.0.1` ([#12327](https://github.com/rapidsai/cudf/pull/12327)) [@galipremsagar](https://github.com/galipremsagar)
+- Fix warnings in test_orc.py ([#12326](https://github.com/rapidsai/cudf/pull/12326)) [@vyasr](https://github.com/vyasr)
+- Fix warnings in test_groupby.py ([#12324](https://github.com/rapidsai/cudf/pull/12324)) [@vyasr](https://github.com/vyasr)
+- Fix `test_notebooks.sh` ([#12323](https://github.com/rapidsai/cudf/pull/12323)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Fix transform gtests coded in namespace cudf::test ([#12321](https://github.com/rapidsai/cudf/pull/12321)) [@davidwendt](https://github.com/davidwendt)
+- Fix `check_style.sh` script ([#12320](https://github.com/rapidsai/cudf/pull/12320)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Rework cudf::strings::from_timestamps to use make_strings_children ([#12317](https://github.com/rapidsai/cudf/pull/12317)) [@davidwendt](https://github.com/davidwendt)
+- Fix warnings in test_index.py ([#12313](https://github.com/rapidsai/cudf/pull/12313)) [@vyasr](https://github.com/vyasr)
+- Fix warnings in test_multiindex.py ([#12310](https://github.com/rapidsai/cudf/pull/12310)) [@vyasr](https://github.com/vyasr)
+- CSV, JSON reader to infer integer column with nulls as int64 instead of float64 ([#12309](https://github.com/rapidsai/cudf/pull/12309)) [@karthikeyann](https://github.com/karthikeyann)
+- Fix warnings in test_indexing.py ([#12305](https://github.com/rapidsai/cudf/pull/12305)) [@vyasr](https://github.com/vyasr)
+- Fix warnings in test_joining.py ([#12304](https://github.com/rapidsai/cudf/pull/12304)) [@vyasr](https://github.com/vyasr)
+- Unpin `dask` and `distributed` for development ([#12302](https://github.com/rapidsai/cudf/pull/12302)) [@galipremsagar](https://github.com/galipremsagar)
+- Re-enable `sccache` for Jenkins builds ([#12297](https://github.com/rapidsai/cudf/pull/12297)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Define needs for pr-builder workflow. ([#12296](https://github.com/rapidsai/cudf/pull/12296)) [@bdice](https://github.com/bdice)
+- Forward merge 22.12 into 23.02 ([#12294](https://github.com/rapidsai/cudf/pull/12294)) [@vyasr](https://github.com/vyasr)
+- Fix warnings in test_stats.py ([#12293](https://github.com/rapidsai/cudf/pull/12293)) [@vyasr](https://github.com/vyasr)
+- Fix table gtests coded in namespace cudf::test ([#12292](https://github.com/rapidsai/cudf/pull/12292)) [@davidwendt](https://github.com/davidwendt)
+- Change cython for regex calls to use cudf::strings::regex_program ([#12289](https://github.com/rapidsai/cudf/pull/12289)) [@davidwendt](https://github.com/davidwendt)
+- Improved error reporting when reading multiple JSON files ([#12285](https://github.com/rapidsai/cudf/pull/12285)) [@vuule](https://github.com/vuule)
+- Deprecate Frame.sum_of_squares ([#12284](https://github.com/rapidsai/cudf/pull/12284)) [@vyasr](https://github.com/vyasr)
+- Remove deprecated code for 23.02 ([#12281](https://github.com/rapidsai/cudf/pull/12281)) [@vyasr](https://github.com/vyasr)
+- Clean up handling of max_page_size_bytes in Parquet writer ([#12277](https://github.com/rapidsai/cudf/pull/12277)) [@etseidl](https://github.com/etseidl)
+- Fix replace gtests coded in namespace cudf::test ([#12270](https://github.com/rapidsai/cudf/pull/12270)) [@davidwendt](https://github.com/davidwendt)
+- Add pandas nullable type support in `Index.to_pandas` ([#12268](https://github.com/rapidsai/cudf/pull/12268)) [@galipremsagar](https://github.com/galipremsagar)
+- Rework nvtext::detokenize to use indexalator for row indices ([#12267](https://github.com/rapidsai/cudf/pull/12267)) [@davidwendt](https://github.com/davidwendt)
+- Fix reduction gtests coded in namespace cudf::test ([#12257](https://github.com/rapidsai/cudf/pull/12257)) [@davidwendt](https://github.com/davidwendt)
+- Remove default parameters from cudf::detail::sort function declarations ([#12254](https://github.com/rapidsai/cudf/pull/12254)) [@davidwendt](https://github.com/davidwendt)
+- Add `duplicated` support for `Series`, `DataFrame` and `Index` ([#12246](https://github.com/rapidsai/cudf/pull/12246)) [@galipremsagar](https://github.com/galipremsagar)
+- Replace column/table test utilities with macros ([#12242](https://github.com/rapidsai/cudf/pull/12242)) [@PointKernel](https://github.com/PointKernel)
+- Rework cudf::strings::pad and zfill to use make_strings_children ([#12238](https://github.com/rapidsai/cudf/pull/12238)) [@davidwendt](https://github.com/davidwendt)
+- Fix sort gtests coded in namespace cudf::test ([#12237](https://github.com/rapidsai/cudf/pull/12237)) [@davidwendt](https://github.com/davidwendt)
+- Wrapping concat and file writes in `[@acquire_spill_lock()` ([#12232](https://github.com/rapidsai/cudf/pull/12232)) @madsbk](https://github.com/acquire_spill_lock()` ([#12232](https://github.com/rapidsai/cudf/pull/12232)) @madsbk)
+- Rename `cudf::structs::detail::superimpose_parent_nulls` APIs ([#12230](https://github.com/rapidsai/cudf/pull/12230)) [@ttnghia](https://github.com/ttnghia)
+- Cover parsing to decimal types in `read_json` tests ([#12229](https://github.com/rapidsai/cudf/pull/12229)) [@vuule](https://github.com/vuule)
+- Spill Statistics ([#12223](https://github.com/rapidsai/cudf/pull/12223)) [@madsbk](https://github.com/madsbk)
+- Use CUDF_JNI_ENABLE_PROFILING to conditionally enable profiling support. ([#12221](https://github.com/rapidsai/cudf/pull/12221)) [@bdice](https://github.com/bdice)
+- Clean up of `test_spilling.py` ([#12220](https://github.com/rapidsai/cudf/pull/12220)) [@madsbk](https://github.com/madsbk)
+- Simplify repetitive boolean logic ([#12218](https://github.com/rapidsai/cudf/pull/12218)) [@vuule](https://github.com/vuule)
+- Add `Series.hasnans` and `Index.hasnans` ([#12214](https://github.com/rapidsai/cudf/pull/12214)) [@galipremsagar](https://github.com/galipremsagar)
+- Add cudf::strings:udf::replace function ([#12210](https://github.com/rapidsai/cudf/pull/12210)) [@davidwendt](https://github.com/davidwendt)
+- Adds in new java APIs for appending byte arrays to host columnar data ([#12208](https://github.com/rapidsai/cudf/pull/12208)) [@revans2](https://github.com/revans2)
+- Remove Python dependencies from Java CI. ([#12193](https://github.com/rapidsai/cudf/pull/12193)) [@bdice](https://github.com/bdice)
+- Fix null order in sort-based groupby and improve groupby tests ([#12191](https://github.com/rapidsai/cudf/pull/12191)) [@divyegala](https://github.com/divyegala)
+- Move strings children functions from cudf/strings/detail/utilities.cuh to new header ([#12185](https://github.com/rapidsai/cudf/pull/12185)) [@davidwendt](https://github.com/davidwendt)
+- Clean up existing JNI scalar to column code ([#12173](https://github.com/rapidsai/cudf/pull/12173)) [@revans2](https://github.com/revans2)
+- Remove JIT type names, refactor id_to_type. ([#12158](https://github.com/rapidsai/cudf/pull/12158)) [@bdice](https://github.com/bdice)
+- Update JNI version to 23.02.0-SNAPSHOT ([#12129](https://github.com/rapidsai/cudf/pull/12129)) [@pxLi](https://github.com/pxLi)
+- Minor refactor of cpp/src/io/parquet/page_data.cu ([#12126](https://github.com/rapidsai/cudf/pull/12126)) [@etseidl](https://github.com/etseidl)
+- Add codespell as a linter ([#12097](https://github.com/rapidsai/cudf/pull/12097)) [@benfred](https://github.com/benfred)
+- Enable specifying exceptions in error macros ([#12078](https://github.com/rapidsai/cudf/pull/12078)) [@vyasr](https://github.com/vyasr)
+- Move `_label_encoding` from Series to Column ([#12040](https://github.com/rapidsai/cudf/pull/12040)) [@shwina](https://github.com/shwina)
+- Add GitHub Actions Workflows ([#12002](https://github.com/rapidsai/cudf/pull/12002)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Consolidate dask-cudf `groupby_agg` calls in one place ([#10835](https://github.com/rapidsai/cudf/pull/10835)) [@charlesbluca](https://github.com/charlesbluca)
# cuDF 22.12.00 (8 Dec 2022)
diff --git a/README.md b/README.md
index 68c2d4f6276..36c1ff1d1fa 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,5 @@
#
cuDF - GPU DataFrames
-[![Build Status](https://gpuci.gpuopenanalytics.com/job/rapidsai/job/gpuci/job/cudf/job/branches/job/cudf-branch-pipeline/badge/icon)](https://gpuci.gpuopenanalytics.com/job/rapidsai/job/gpuci/job/cudf/job/branches/job/cudf-branch-pipeline/)
-
**NOTE:** For the latest stable [README.md](https://github.com/rapidsai/cudf/blob/main/README.md) ensure you are on the `main` branch.
## Resources
diff --git a/ci/benchmark/build.sh b/ci/benchmark/build.sh
deleted file mode 100755
index ec4f8d55372..00000000000
--- a/ci/benchmark/build.sh
+++ /dev/null
@@ -1,196 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
-#########################################
-# cuDF GPU build and test script for CI #
-#########################################
-set -e
-NUMARGS=$#
-ARGS=$*
-
-# Logger function for build status output
-function logger() {
- echo -e "\n>>>> $@\n"
-}
-
-# Arg parsing function
-function hasArg {
- (( ${NUMARGS} != 0 )) && (echo " ${ARGS} " | grep -q " $1 ")
-}
-
-# Set path and build parallel level
-export PATH=/conda/bin:/usr/local/cuda/bin:$PATH
-export PARALLEL_LEVEL=4
-export CUDA_REL=${CUDA_VERSION%.*}
-export HOME="$WORKSPACE"
-
-# Parse git describe
-cd "$WORKSPACE"
-export GIT_DESCRIBE_TAG=`git describe --tags`
-export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`
-
-# Set Benchmark Vars
-export GBENCH_BENCHMARKS_DIR="$WORKSPACE/cpp/build/gbenchmarks/"
-
-# Set `LIBCUDF_KERNEL_CACHE_PATH` environment variable to $HOME/.jitify-cache because
-# it's local to the container's virtual file system, and not shared with other CI jobs
-# like `/tmp` is.
-export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache"
-
-# Dask & Distributed option to install main(nightly) or `conda-forge` packages.
-export INSTALL_DASK_MAIN=1
-
-# Dask version to install when `INSTALL_DASK_MAIN=0`
-export DASK_STABLE_VERSION="2022.12.0"
-
-function remove_libcudf_kernel_cache_dir {
- EXITCODE=$?
- logger "removing kernel cache dir: $LIBCUDF_KERNEL_CACHE_PATH"
- rm -rf "$LIBCUDF_KERNEL_CACHE_PATH" || logger "could not rm -rf $LIBCUDF_KERNEL_CACHE_PATH"
- exit $EXITCODE
-}
-
-trap remove_libcudf_kernel_cache_dir EXIT
-
-mkdir -p "$LIBCUDF_KERNEL_CACHE_PATH" || logger "could not mkdir -p $LIBCUDF_KERNEL_CACHE_PATH"
-
-################################################################################
-# SETUP - Check environment
-################################################################################
-
-logger "Check environment..."
-env
-
-logger "Check GPU usage..."
-nvidia-smi
-
-logger "Activate conda env..."
-. /opt/conda/etc/profile.d/conda.sh
-conda activate rapids
-
-# Enter dependencies to be shown in ASV tooltips.
-CUDF_DEPS=(librmm)
-LIBCUDF_DEPS=(librmm)
-
-conda install "rmm=$MINOR_VERSION.*" "cudatoolkit=$CUDA_REL" \
- "rapids-build-env=$MINOR_VERSION.*" \
- "rapids-notebook-env=$MINOR_VERSION.*" \
- rapids-pytest-benchmark
-
-# https://docs.rapids.ai/maintainers/depmgmt/
-# conda remove -f rapids-build-env rapids-notebook-env
-# conda install "your-pkg=1.0.0"
-
-# Install the conda-forge or nightly version of dask and distributed
-if [[ "${INSTALL_DASK_MAIN}" == 1 ]]; then
- gpuci_logger "gpuci_mamba_retry install -c dask/label/dev 'dask/label/dev::dask' 'dask/label/dev::distributed'"
- gpuci_mamba_retry install -c dask/label/dev "dask/label/dev::dask" "dask/label/dev::distributed"
-else
- gpuci_logger "gpuci_mamba_retry install conda-forge::dask=={$DASK_STABLE_VERSION} conda-forge::distributed=={$DASK_STABLE_VERSION} conda-forge::dask-core=={$DASK_STABLE_VERSION} --force-reinstall"
- gpuci_mamba_retry install conda-forge::dask=={$DASK_STABLE_VERSION} conda-forge::distributed=={$DASK_STABLE_VERSION} conda-forge::dask-core=={$DASK_STABLE_VERSION} --force-reinstall
-fi
-
-# Install the master version of streamz
-logger "pip install git+https://github.com/python-streamz/streamz.git@master --upgrade --no-deps"
-pip install "git+https://github.com/python-streamz/streamz.git@master" --upgrade --no-deps
-
-logger "Check versions..."
-python --version
-
-conda info
-conda config --show-sources
-conda list --show-channel-urls
-
-################################################################################
-# BUILD - Build libcudf, cuDF and dask_cudf from source
-################################################################################
-
-logger "Build libcudf..."
-"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf benchmarks tests --ptds
-
-################################################################################
-# BENCHMARK - Run and parse libcudf and cuDF benchmarks
-################################################################################
-
-logger "Running benchmarks..."
-
-#Download GBench results Parser
-curl -L https://raw.githubusercontent.com/rapidsai/benchmark/main/parser/GBenchToASV.py --output GBenchToASV.py
-
-###
-# Generate Metadata for dependencies
-###
-
-# Concatenate dependency arrays, convert to JSON array,
-# and remove duplicates.
-X=("${CUDF_DEPS[@]}" "${LIBCUDF_DEPS[@]}")
-DEPS=$(printf '%s\n' "${X[@]}" | jq -R . | jq -s 'unique')
-
-# Build object with k/v pairs of "dependency:version"
-DEP_VER_DICT=$(jq -n '{}')
-for DEP in $(echo "${DEPS}" | jq -r '.[]'); do
- VER=$(conda list | grep "^${DEP}" | awk '{print $2"-"$3}')
- DEP_VER_DICT=$(echo "${DEP_VER_DICT}" | jq -c --arg DEP "${DEP}" --arg VER "${VER}" '. + { ($DEP): $VER }')
-done
-
-# Pass in an array of dependencies to get a dict of "dependency:version"
-function getReqs() {
- local DEPS_ARR=("$@")
- local REQS="{}"
- for DEP in "${DEPS_ARR[@]}"; do
- VER=$(echo "${DEP_VER_DICT}" | jq -r --arg DEP "${DEP}" '.[$DEP]')
- REQS=$(echo "${REQS}" | jq -c --arg DEP "${DEP}" --arg VER "${VER}" '. + { ($DEP): $VER }')
- done
-
- echo "${REQS}"
-}
-
-###
-# Run LIBCUDF Benchmarks
-###
-
-REQS=$(getReqs "${LIBCUDF_DEPS[@]}")
-
-mkdir -p "$WORKSPACE/tmp/benchmark"
-touch "$WORKSPACE/tmp/benchmark/benchmarks.txt"
-ls ${GBENCH_BENCHMARKS_DIR} > "$WORKSPACE/tmp/benchmark/benchmarks.txt"
-
-#Disable error aborting while tests run, failed tests will not generate data
-logger "Running libcudf GBenchmarks..."
-cd ${GBENCH_BENCHMARKS_DIR}
-set +e
-while read BENCH;
-do
- nvidia-smi
- ./${BENCH} --benchmark_out=${BENCH}.json --benchmark_out_format=json
- EXITCODE=$?
- if [[ ${EXITCODE} != 0 ]]; then
- rm ./${BENCH}.json
- JOBEXITCODE=1
- fi
-done < "$WORKSPACE/tmp/benchmark/benchmarks.txt"
-set -e
-
-rm "$WORKSPACE/tmp/benchmark/benchmarks.txt"
-cd "$WORKSPACE"
-mv ${GBENCH_BENCHMARKS_DIR}/*.json "$WORKSPACE/tmp/benchmark/"
-python GBenchToASV.py -d "$WORKSPACE/tmp/benchmark/" -t ${S3_ASV_DIR} -n libcudf -b branch-${MINOR_VERSION} -r "${REQS}"
-
-###
-# Run Python Benchmarks
-###
-
-#REQS=$(getReqs "${CUDF_DEPS[@]}")
-
-#BENCHMARK_META=$(jq -n \
-# --arg NODE "${NODE_NAME}" \
-# --arg BRANCH "branch-${MINOR_VERSION}" \
-# --argjson REQS "${REQS}" '
-# {
-# "machineName": $NODE,
-# "commitBranch": $BRANCH,
-# "requirements": $REQS
-# }
-#')
-
-#echo "Benchmark meta:"
-#echo "${BENCHMARK_META}" | jq "."
diff --git a/ci/checks/changelog.sh b/ci/checks/changelog.sh
deleted file mode 100755
index 0dfcf27298e..00000000000
--- a/ci/checks/changelog.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2018, NVIDIA CORPORATION.
-#########################
-# cuDF CHANGELOG Tester #
-#########################
-
-# Checkout main for comparison
-git checkout --force --quiet main
-
-# Switch back to tip of PR branch
-git checkout --force --quiet current-pr-branch
-
-# Ignore errors during searching
-set +e
-
-# Get list of modified files between main and PR branch
-CHANGELOG=`git diff --name-only main...current-pr-branch | grep CHANGELOG.md`
-# Check if CHANGELOG has PR ID
-PRNUM=`cat CHANGELOG.md | grep "$PR_ID"`
-RETVAL=0
-
-# Return status of check result
-if [ "$CHANGELOG" != "" -a "$PRNUM" != "" ] ; then
- echo -e "\n\n>>>> PASSED: CHANGELOG.md has been updated with current PR information.\n\nPlease ensure the update meets the following criteria.\n"
-else
- echo -e "\n\n>>>> FAILED: CHANGELOG.md has not been updated!\n\nPlease add a line describing this PR to CHANGELOG.md in the repository root directory. The line should meet the following criteria.\n"
- RETVAL=1
-fi
-
-cat << EOF
- It should be placed under the section for the appropriate release.
- It should be placed under "New Features", "Improvements", or "Bug Fixes" as appropriate.
- It should be formatted as '- PR # '
- Example format for #491 '- PR #491 Add CI test script to check for updates to CHANGELOG.md in PRs'
-
-
-EOF
-
-exit $RETVAL
diff --git a/ci/checks/style.sh b/ci/checks/style.sh
deleted file mode 100755
index d32d88f5574..00000000000
--- a/ci/checks/style.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
-#####################
-# cuDF Style Tester #
-#####################
-
-# Ignore errors and set path
-set +e
-PATH=/conda/bin:$PATH
-LC_ALL=C.UTF-8
-LANG=C.UTF-8
-
-# Activate common conda env
-. /opt/conda/etc/profile.d/conda.sh
-conda activate rapids
-
-FORMAT_FILE_URL=https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.04/cmake-format-rapids-cmake.json
-export RAPIDS_CMAKE_FORMAT_FILE=/tmp/rapids_cmake_ci/cmake-formats-rapids-cmake.json
-mkdir -p $(dirname ${RAPIDS_CMAKE_FORMAT_FILE})
-wget -O ${RAPIDS_CMAKE_FORMAT_FILE} ${FORMAT_FILE_URL}
-
-# Run pre-commit checks
-pre-commit run --hook-stage manual --all-files --show-diff-on-failure
diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh
deleted file mode 100755
index 7ffd032bce0..00000000000
--- a/ci/cpu/build.sh
+++ /dev/null
@@ -1,150 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
-##############################################
-# cuDF CPU conda build script for CI #
-##############################################
-set -e
-
-# Set path and build parallel level
-# FIXME: PATH variable shouldn't be necessary.
-# This should be removed once we either stop using the `remote-docker-plugin`
-# or the following issue is addressed: https://github.com/gpuopenanalytics/remote-docker-plugin/issues/47
-export PATH=/usr/local/gcc9/bin:/opt/conda/bin:/usr/local/cuda/bin:$PATH
-export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4}
-
-# Set home to the job's workspace
-export HOME="$WORKSPACE"
-
-# Determine CUDA release version
-export CUDA_REL=${CUDA_VERSION%.*}
-
-# Setup 'gpuci_conda_retry' for build retries (results in 2 total attempts)
-export GPUCI_CONDA_RETRY_MAX=1
-export GPUCI_CONDA_RETRY_SLEEP=30
-
-# Workaround to keep Jenkins builds working
-# until we migrate fully to GitHub Actions
-export RAPIDS_CUDA_VERSION="${CUDA}"
-export SCCACHE_BUCKET=rapids-sccache
-export SCCACHE_REGION=us-west-2
-export SCCACHE_IDLE_TIMEOUT=32768
-
-# Use Ninja to build, setup Conda Build Dir
-export CMAKE_GENERATOR="Ninja"
-export CONDA_BLD_DIR="$WORKSPACE/.conda-bld"
-
-# Whether to keep `dask/label/dev` channel in the env. If INSTALL_DASK_MAIN=0,
-# `dask/label/dev` channel is removed.
-export INSTALL_DASK_MAIN=1
-
-# Switch to project root; also root of repo checkout
-cd "$WORKSPACE"
-
-# If nightly build, append current YYMMDD to version
-if [[ "$BUILD_MODE" = "branch" && "$SOURCE_BRANCH" = branch-* ]] ; then
- export VERSION_SUFFIX=`date +%y%m%d`
-fi
-
-################################################################################
-# SETUP - Check environment
-################################################################################
-
-gpuci_logger "Check environment variables"
-env
-
-gpuci_logger "Activate conda env"
-. /opt/conda/etc/profile.d/conda.sh
-conda activate rapids
-
-# Remove `rapidsai-nightly` & `dask/label/dev` channel if we are building main branch
-if [ "$SOURCE_BRANCH" = "main" ]; then
- conda config --system --remove channels rapidsai-nightly
- conda config --system --remove channels dask/label/dev
-elif [[ "${INSTALL_DASK_MAIN}" == 0 ]]; then
- # Remove `dask/label/dev` channel if INSTALL_DASK_MAIN=0
- conda config --system --remove channels dask/label/dev
-fi
-
-gpuci_logger "Check compiler versions"
-python --version
-
-gpuci_logger "Check conda environment"
-conda info
-conda config --show-sources
-conda list --show-channel-urls
-
-# FIX Added to deal with Anancoda SSL verification issues during conda builds
-conda config --set ssl_verify False
-
-# TODO: Move boa install to gpuci/rapidsai
-gpuci_mamba_retry install boa
-################################################################################
-# BUILD - Conda package builds
-################################################################################
-
-if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then
- CONDA_BUILD_ARGS=""
- CONDA_CHANNEL=""
-else
- CONDA_BUILD_ARGS="--dirty --no-remove-work-dir"
- CONDA_CHANNEL="-c $WORKSPACE/ci/artifacts/cudf/cpu/.conda-bld/"
-fi
-
-if [ "$BUILD_LIBCUDF" == '1' ]; then
- gpuci_logger "Build conda pkg for libcudf"
- gpuci_conda_retry mambabuild --no-build-id --croot ${CONDA_BLD_DIR} conda/recipes/libcudf $CONDA_BUILD_ARGS
-
- # BUILD_LIBCUDF == 1 means this job is being run on the cpu_build jobs
- # that is where we must also build the strings_udf package
- mkdir -p ${CONDA_BLD_DIR}/strings_udf/work
- STRINGS_UDF_BUILD_DIR=${CONDA_BLD_DIR}/strings_udf/work
- gpuci_logger "Build conda pkg for cudf (python 3.8), for strings_udf"
- gpuci_conda_retry mambabuild --no-build-id --croot ${STRINGS_UDF_BUILD_DIR} -c ${CONDA_BLD_DIR} conda/recipes/cudf ${CONDA_BUILD_ARGS} --python=3.8
- gpuci_logger "Build conda pkg for cudf (python 3.9), for strings_udf"
- gpuci_conda_retry mambabuild --no-build-id --croot ${STRINGS_UDF_BUILD_DIR} -c ${CONDA_BLD_DIR} conda/recipes/cudf ${CONDA_BUILD_ARGS} --python=3.9
-
- gpuci_logger "Build conda pkg for strings_udf (python 3.8)"
- gpuci_conda_retry mambabuild --no-build-id --croot ${CONDA_BLD_DIR} -c ${STRINGS_UDF_BUILD_DIR} -c ${CONDA_BLD_DIR} conda/recipes/strings_udf $CONDA_BUILD_ARGS --python=3.8
- gpuci_logger "Build conda pkg for strings_udf (python 3.9)"
- gpuci_conda_retry mambabuild --no-build-id --croot ${CONDA_BLD_DIR} -c ${STRINGS_UDF_BUILD_DIR} -c ${CONDA_BLD_DIR} conda/recipes/strings_udf $CONDA_BUILD_ARGS --python=3.9
-
- mkdir -p ${CONDA_BLD_DIR}/libcudf/work
- cp -r ${CONDA_BLD_DIR}/work/* ${CONDA_BLD_DIR}/libcudf/work
- gpuci_logger "sccache stats"
- sccache --show-stats
-
- # Copy libcudf build metrics results
- LIBCUDF_BUILD_DIR=$CONDA_BLD_DIR/libcudf/work/cpp/build
- echo "Checking for build metrics log $LIBCUDF_BUILD_DIR/ninja_log.html"
- if [[ -f "$LIBCUDF_BUILD_DIR/ninja_log.html" ]]; then
- gpuci_logger "Copying build metrics results"
- mkdir -p "$WORKSPACE/build-metrics"
- cp "$LIBCUDF_BUILD_DIR/ninja_log.html" "$WORKSPACE/build-metrics/BuildMetrics.html"
- cp "$LIBCUDF_BUILD_DIR/ninja.log" "$WORKSPACE/build-metrics/ninja.log"
- fi
-fi
-
-if [ "$BUILD_CUDF" == '1' ]; then
- gpuci_logger "Build conda pkg for cudf"
- gpuci_conda_retry mambabuild --croot ${CONDA_BLD_DIR} conda/recipes/cudf --python=$PYTHON $CONDA_BUILD_ARGS $CONDA_CHANNEL
-
- gpuci_logger "Build conda pkg for dask-cudf"
- gpuci_conda_retry mambabuild --croot ${CONDA_BLD_DIR} conda/recipes/dask-cudf --python=$PYTHON $CONDA_BUILD_ARGS $CONDA_CHANNEL
-
- gpuci_logger "Build conda pkg for cudf_kafka"
- gpuci_conda_retry mambabuild --croot ${CONDA_BLD_DIR} conda/recipes/cudf_kafka --python=$PYTHON $CONDA_BUILD_ARGS $CONDA_CHANNEL
-
- gpuci_logger "Build conda pkg for custreamz"
- gpuci_conda_retry mambabuild --croot ${CONDA_BLD_DIR} conda/recipes/custreamz --python=$PYTHON $CONDA_BUILD_ARGS $CONDA_CHANNEL
-
- gpuci_logger "Build conda pkg for strings_udf"
- gpuci_conda_retry mambabuild --croot ${CONDA_BLD_DIR} conda/recipes/strings_udf --python=$PYTHON $CONDA_BUILD_ARGS $CONDA_CHANNEL
-
-fi
-################################################################################
-# UPLOAD - Conda packages
-################################################################################
-
-# Uploads disabled due to new GH Actions implementation
-# gpuci_logger "Upload conda pkgs"
-# source ci/cpu/upload.sh
diff --git a/ci/cpu/prebuild.sh b/ci/cpu/prebuild.sh
deleted file mode 100755
index 32589042f7f..00000000000
--- a/ci/cpu/prebuild.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/usr/bin/env bash
-
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
-set -e
-
-#Always upload cudf packages
-export UPLOAD_CUDF=1
-export UPLOAD_LIBCUDF=1
-export UPLOAD_CUDF_KAFKA=1
-
-if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then
- #If project flash is not activate, always build both
- export BUILD_LIBCUDF=1
- export BUILD_CUDF=1
-fi
diff --git a/ci/cpu/upload.sh b/ci/cpu/upload.sh
deleted file mode 100755
index 82c58673605..00000000000
--- a/ci/cpu/upload.sh
+++ /dev/null
@@ -1,65 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
-# Adopted from https://github.com/tmcdonell/travis-scripts/blob/dfaac280ac2082cd6bcaba3217428347899f2975/update-accelerate-buildbot.sh
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
-
-set -e
-
-# Setup 'gpuci_retry' for upload retries (results in 4 total attempts)
-export GPUCI_RETRY_MAX=3
-export GPUCI_RETRY_SLEEP=30
-
-# Set default label options if they are not defined elsewhere
-export LABEL_OPTION=${LABEL_OPTION:-"--label main"}
-
-# Skip uploads unless BUILD_MODE == "branch"
-if [ "${BUILD_MODE}" != "branch" ]; then
- echo "Skipping upload"
- return 0
-fi
-
-# Skip uploads if there is no upload key
-if [ -z "$MY_UPLOAD_KEY" ]; then
- echo "No upload key"
- return 0
-fi
-
-################################################################################
-# UPLOAD - Conda packages
-################################################################################
-
-gpuci_logger "Starting conda uploads"
-if [[ "$BUILD_LIBCUDF" == "1" && "$UPLOAD_LIBCUDF" == "1" ]]; then
- export LIBCUDF_FILES=$(conda build --no-build-id --croot "${CONDA_BLD_DIR}" conda/recipes/libcudf --output)
- LIBCUDF_FILES=$(echo "$LIBCUDF_FILES" | sed 's/.*libcudf-example.*//') # skip libcudf-example pkg upload
- gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing --no-progress $LIBCUDF_FILES
-fi
-
-if [[ "$BUILD_CUDF" == "1" && "$UPLOAD_CUDF" == "1" ]]; then
- export CUDF_FILE=$(conda build --croot "${CONDA_BLD_DIR}" conda/recipes/cudf --python=$PYTHON --output)
- test -e ${CUDF_FILE}
- echo "Upload cudf: ${CUDF_FILE}"
- gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${CUDF_FILE} --no-progress
-
- export STRINGS_UDF_FILE=$(conda build --croot "${CONDA_BLD_DIR}" conda/recipes/strings_udf --python=$PYTHON --output -c "${CONDA_BLD_DIR}")
- test -e ${STRINGS_UDF_FILE}
- echo "Upload strings_udf: ${STRINGS_UDF_FILE}"
- gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${STRINGS_UDF_FILE} --no-progress
-
- export DASK_CUDF_FILE=$(conda build --croot "${CONDA_BLD_DIR}" conda/recipes/dask-cudf --python=$PYTHON --output)
- test -e ${DASK_CUDF_FILE}
- echo "Upload dask-cudf: ${DASK_CUDF_FILE}"
- gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${DASK_CUDF_FILE} --no-progress
-
- export CUSTREAMZ_FILE=$(conda build --croot "${CONDA_BLD_DIR}" conda/recipes/custreamz --python=$PYTHON --output)
- test -e ${CUSTREAMZ_FILE}
- echo "Upload custreamz: ${CUSTREAMZ_FILE}"
- gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${CUSTREAMZ_FILE} --no-progress
-fi
-
-if [[ "$BUILD_CUDF" == "1" && "$UPLOAD_CUDF_KAFKA" == "1" ]]; then
- export CUDF_KAFKA_FILE=$(conda build --croot "${CONDA_BLD_DIR}" conda/recipes/cudf_kafka --python=$PYTHON --output)
- test -e ${CUDF_KAFKA_FILE}
- echo "Upload cudf_kafka: ${CUDF_KAFKA_FILE}"
- gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${CUDF_KAFKA_FILE} --no-progress
-fi
diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
deleted file mode 100755
index 51a2d9ab170..00000000000
--- a/ci/gpu/build.sh
+++ /dev/null
@@ -1,324 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2018-2023, NVIDIA CORPORATION.
-##############################################
-# cuDF GPU build and test script for CI #
-##############################################
-set -e
-NUMARGS=$#
-ARGS=$*
-
-# Arg parsing function
-function hasArg {
- (( ${NUMARGS} != 0 )) && (echo " ${ARGS} " | grep -q " $1 ")
-}
-
-# Set path and build parallel level
-export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH
-export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4}
-
-# Set home to the job's workspace
-export HOME="$WORKSPACE"
-
-# Switch to project root; also root of repo checkout
-cd "$WORKSPACE"
-
-# Determine CUDA release version
-export CUDA_REL=${CUDA_VERSION%.*}
-export CONDA_ARTIFACT_PATH="$WORKSPACE/ci/artifacts/cudf/cpu/.conda-bld/"
-
-# Workaround to keep Jenkins builds working
-# until we migrate fully to GitHub Actions
-export RAPIDS_CUDA_VERSION="${CUDA}"
-export SCCACHE_BUCKET=rapids-sccache
-export SCCACHE_REGION=us-west-2
-export SCCACHE_IDLE_TIMEOUT=32768
-
-# Parse git describe
-export GIT_DESCRIBE_TAG=`git describe --tags`
-export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`
-unset GIT_DESCRIBE_TAG
-
-# Dask & Distributed option to install main(nightly) or `conda-forge` packages.
-export INSTALL_DASK_MAIN=1
-
-# Dask version to install when `INSTALL_DASK_MAIN=0`
-export DASK_STABLE_VERSION="2022.12.0"
-
-# ucx-py version
-export UCX_PY_VERSION='0.31.*'
-
-################################################################################
-# TRAP - Setup trap for removing jitify cache
-################################################################################
-
-# Set `LIBCUDF_KERNEL_CACHE_PATH` environment variable to $HOME/.jitify-cache
-# because it's local to the container's virtual file system, and not shared with
-# other CI jobs like `/tmp` is
-export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache"
-
-function remove_libcudf_kernel_cache_dir {
- EXITCODE=$?
- gpuci_logger "TRAP: Removing kernel cache dir: $LIBCUDF_KERNEL_CACHE_PATH"
- rm -rf "$LIBCUDF_KERNEL_CACHE_PATH" \
- || gpuci_logger "[ERROR] TRAP: Could not rm -rf $LIBCUDF_KERNEL_CACHE_PATH"
- exit $EXITCODE
-}
-
-# Set trap to run on exit
-gpuci_logger "TRAP: Set trap to remove jitify cache on exit"
-trap remove_libcudf_kernel_cache_dir EXIT
-
-mkdir -p "$LIBCUDF_KERNEL_CACHE_PATH" \
- || gpuci_logger "[ERROR] TRAP: Could not mkdir -p $LIBCUDF_KERNEL_CACHE_PATH"
-
-################################################################################
-# SETUP - Check environment
-################################################################################
-
-gpuci_logger "Check environment variables"
-env
-
-gpuci_logger "Check GPU usage"
-nvidia-smi
-
-gpuci_logger "Activate conda env"
-. /opt/conda/etc/profile.d/conda.sh
-conda activate rapids
-
-# Remove `dask/label/dev` channel if INSTALL_DASK_MAIN=0
-if [ "$SOURCE_BRANCH" != "main" ] && [[ "${INSTALL_DASK_MAIN}" == 0 ]]; then
- conda config --system --remove channels dask/label/dev
- gpuci_mamba_retry install conda-forge::dask==$DASK_STABLE_VERSION conda-forge::distributed==$DASK_STABLE_VERSION conda-forge::dask-core==$DASK_STABLE_VERSION --force-reinstall
-fi
-
-gpuci_logger "Check conda environment"
-conda info
-conda config --show-sources
-conda list --show-channel-urls
-gpuci_logger "Check compiler versions"
-python --version
-
-function install_dask {
- # Install the conda-forge or nightly version of dask and distributed
- gpuci_logger "Install the conda-forge or nightly version of dask and distributed"
- set -x
- if [[ "${INSTALL_DASK_MAIN}" == 1 ]]; then
- gpuci_logger "gpuci_mamba_retry install -c dask/label/dev 'dask/label/dev::dask' 'dask/label/dev::distributed'"
- gpuci_mamba_retry install -c dask/label/dev "dask/label/dev::dask" "dask/label/dev::distributed"
- conda list
- else
- gpuci_logger "gpuci_mamba_retry install conda-forge::dask=={$DASK_STABLE_VERSION} conda-forge::distributed=={$DASK_STABLE_VERSION} conda-forge::dask-core=={$DASK_STABLE_VERSION} --force-reinstall"
- gpuci_mamba_retry install conda-forge::dask==$DASK_STABLE_VERSION conda-forge::distributed==$DASK_STABLE_VERSION conda-forge::dask-core==$DASK_STABLE_VERSION --force-reinstall
- fi
- # Install the main version of streamz
- gpuci_logger "Install the main version of streamz"
- # Need to uninstall streamz that is already in the env.
- pip uninstall -y streamz
- pip install "git+https://github.com/python-streamz/streamz.git@master" --upgrade --no-deps
- set +x
-}
-
-install_dask
-
-if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then
-
- gpuci_logger "Install dependencies"
- gpuci_mamba_retry install -y \
- "cudatoolkit=$CUDA_REL" \
- "rapids-build-env=$MINOR_VERSION.*" \
- "rapids-notebook-env=$MINOR_VERSION.*" \
- "dask-cuda=${MINOR_VERSION}" \
- "rmm=$MINOR_VERSION.*" \
- "ucx-py=${UCX_PY_VERSION}"
-
- # https://docs.rapids.ai/maintainers/depmgmt/
- # gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env
- # gpuci_mamba_retry install -y "your-pkg=1.0.0"
-
- ################################################################################
- # BUILD - Build libcudf, cuDF, libcudf_kafka, dask_cudf, and strings_udf from source
- ################################################################################
-
- gpuci_logger "Build from source"
- "$WORKSPACE/build.sh" clean libcudf cudf dask_cudf libcudf_kafka cudf_kafka strings_udf benchmarks tests --ptds
-
- ################################################################################
- # TEST - Run GoogleTest
- ################################################################################
-
- set +e -Eo pipefail
- EXITCODE=0
- trap "EXITCODE=1" ERR
-
-
- if hasArg --skip-tests; then
- gpuci_logger "Skipping Tests"
- exit 0
- else
- gpuci_logger "Check GPU usage"
- nvidia-smi
-
- gpuci_logger "GoogleTests"
- set -x
- cd "$WORKSPACE/cpp/build"
-
- for gt in "$WORKSPACE/cpp/build/gtests/"* ; do
- test_name=$(basename ${gt})
- echo "Running GoogleTest $test_name"
- ${gt} --gtest_output=xml:"$WORKSPACE/test-results/"
- done
- fi
-else
- #Project Flash
-
- if hasArg --skip-tests; then
- gpuci_logger "Skipping Tests"
- exit 0
- fi
-
- gpuci_logger "Check GPU usage"
- nvidia-smi
-
- gpuci_logger "Installing libcudf, libcudf_kafka and libcudf-tests"
- gpuci_mamba_retry install -y -c ${CONDA_ARTIFACT_PATH} libcudf libcudf_kafka libcudf-tests
-
- # TODO: Move boa install to gpuci/rapidsai
- gpuci_mamba_retry install boa
- gpuci_logger "Building cudf, dask-cudf, cudf_kafka and custreamz"
- export CONDA_BLD_DIR="$WORKSPACE/.conda-bld"
- gpuci_conda_retry mambabuild --croot ${CONDA_BLD_DIR} conda/recipes/cudf --python=$PYTHON -c ${CONDA_ARTIFACT_PATH}
- gpuci_conda_retry mambabuild --croot ${CONDA_BLD_DIR} conda/recipes/dask-cudf --python=$PYTHON -c ${CONDA_ARTIFACT_PATH}
- gpuci_conda_retry mambabuild --croot ${CONDA_BLD_DIR} conda/recipes/cudf_kafka --python=$PYTHON -c ${CONDA_ARTIFACT_PATH}
- gpuci_conda_retry mambabuild --croot ${CONDA_BLD_DIR} conda/recipes/custreamz --python=$PYTHON -c ${CONDA_ARTIFACT_PATH}
-
- # the CUDA component of strings_udf must be built on cuda 11.5 just like libcudf
- # but because there is no separate python package, we must also build the python on the 11.5 jobs
- # this means that at this point (on the GPU test jobs) the whole package is already built and has been
- # copied by CI from the upstream 11.5 jobs into $CONDA_ARTIFACT_PATH
- gpuci_logger "Installing cudf, dask-cudf, cudf_kafka, and custreamz"
- gpuci_mamba_retry install cudf dask-cudf cudf_kafka custreamz -c "${CONDA_BLD_DIR}" -c "${CONDA_ARTIFACT_PATH}"
-
- gpuci_logger "Check current conda environment"
- conda list --show-channel-urls
-
- gpuci_logger "GoogleTests"
-
- # Set up library for finding incorrect default stream usage.
- cd "$WORKSPACE/cpp/tests/utilities/identify_stream_usage/"
- mkdir build && cd build && cmake .. -GNinja && ninja && ninja test
- STREAM_IDENTIFY_LIB="$WORKSPACE/cpp/tests/utilities/identify_stream_usage/build/libidentify_stream_usage.so"
-
- # Run libcudf and libcudf_kafka gtests from libcudf-tests package
- for gt in "$CONDA_PREFIX/bin/gtests/libcudf"*/* ; do
- test_name=$(basename ${gt})
-
- echo "Running GoogleTest $test_name"
- if [[ ${test_name} == "SPAN_TEST" ]]; then
- # This one test is specifically designed to test using a thrust device
- # vector, so we expect and allow it to include default stream usage.
- gtest_filter="SpanTest.CanConstructFromDeviceContainers"
- GTEST_CUDF_STREAM_MODE="custom" LD_PRELOAD=${STREAM_IDENTIFY_LIB} ${gt} --gtest_output=xml:"$WORKSPACE/test-results/" --gtest_filter="-${gtest_filter}"
- ${gt} --gtest_output=xml:"$WORKSPACE/test-results/" --gtest_filter="${gtest_filter}"
- else
- GTEST_CUDF_STREAM_MODE="custom" LD_PRELOAD=${STREAM_IDENTIFY_LIB} ${gt} --gtest_output=xml:"$WORKSPACE/test-results/"
- fi
- done
-
- export LIB_BUILD_DIR="$WORKSPACE/ci/artifacts/cudf/cpu/libcudf_work/cpp/build"
- # Copy libcudf build time results
- echo "Checking for build time log $LIB_BUILD_DIR/ninja_log.xml"
- if [[ -f "$LIB_BUILD_DIR/ninja_log.xml" ]]; then
- gpuci_logger "Copying build time results"
- cp "$LIB_BUILD_DIR/ninja_log.xml" "$WORKSPACE/test-results/buildtimes-junit.xml"
- fi
-
- ################################################################################
- # MEMCHECK - Run compute-sanitizer on GoogleTest (only in nightly builds)
- ################################################################################
- if [[ "$BUILD_MODE" == "branch" && "$BUILD_TYPE" == "gpu" ]]; then
- if [[ "$COMPUTE_SANITIZER_ENABLE" == "true" ]]; then
- gpuci_logger "Memcheck on GoogleTests with rmm_mode=cuda"
- export GTEST_CUDF_RMM_MODE=cuda
- COMPUTE_SANITIZER_CMD="compute-sanitizer --tool memcheck"
- mkdir -p "$WORKSPACE/test-results/"
- for gt in "$CONDA_PREFIX/bin/gtests/libcudf"*/* ; do
- test_name=$(basename ${gt})
- if [[ "$test_name" == "ERROR_TEST" ]]; then
- continue
- fi
- echo "Running GoogleTest $test_name"
- ${COMPUTE_SANITIZER_CMD} ${gt} | tee "$WORKSPACE/test-results/${test_name}.cs.log"
- done
- unset GTEST_CUDF_RMM_MODE
- # test-results/*.cs.log are processed in gpuci
- fi
- fi
-fi
-
-# Both regular and Project Flash proceed here
-
-# set environment variable for numpy 1.16
-# will be enabled for later versions by default
-np_ver=$(python -c "import numpy; print('.'.join(numpy.__version__.split('.')[:-1]))")
-if [ "$np_ver" == "1.16" ];then
- export NUMPY_EXPERIMENTAL_ARRAY_FUNCTION=1
-fi
-
-################################################################################
-# TEST - Run py.test, notebooks
-################################################################################
-
-cd "$WORKSPACE/python/cudf/cudf"
-# It is essential to cd into $WORKSPACE/python/cudf/cudf as `pytest-xdist` + `coverage` seem to work only at this directory level.
-gpuci_logger "Check conda packages"
-conda list
-gpuci_logger "Python py.test for cuDF"
-py.test -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" --ignore="$WORKSPACE/python/cudf/cudf/benchmarks" --junitxml="$WORKSPACE/junit-cudf.xml" -v --cov-config="$WORKSPACE/python/cudf/.coveragerc" --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-coverage.xml" --cov-report term --dist=loadscope tests
-
-gpuci_logger "Python py.tests for cuDF with spilling (CUDF_SPILL_DEVICE_LIMIT=1)"
-# Due to time concerns, we only run tests marked "spilling"
-CUDF_SPILL=on CUDF_SPILL_DEVICE_LIMIT=1 py.test -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" --ignore="$WORKSPACE/python/cudf/cudf/benchmarks" -v --cov-config="$WORKSPACE/python/cudf/.coveragerc" --cov-append --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-coverage.xml" --cov-report term --dist=loadscope -m spilling tests
-
-cd "$WORKSPACE/python/dask_cudf"
-gpuci_logger "Python py.test for dask-cudf"
-py.test -n 8 --cache-clear --basetemp="$WORKSPACE/dask-cudf-cuda-tmp" --junitxml="$WORKSPACE/junit-dask-cudf.xml" -v --cov-config=.coveragerc --cov=dask_cudf --cov-report=xml:"$WORKSPACE/python/dask_cudf/dask-cudf-coverage.xml" --cov-report term dask_cudf
-
-cd "$WORKSPACE/python/custreamz"
-gpuci_logger "Python py.test for cuStreamz"
-py.test -n 8 --cache-clear --basetemp="$WORKSPACE/custreamz-cuda-tmp" --junitxml="$WORKSPACE/junit-custreamz.xml" -v --cov-config=.coveragerc --cov=custreamz --cov-report=xml:"$WORKSPACE/python/custreamz/custreamz-coverage.xml" --cov-report term custreamz
-
-
-# only install strings_udf after cuDF is finished testing without its presence
-gpuci_logger "Installing strings_udf"
-gpuci_mamba_retry install strings_udf -c "${CONDA_BLD_DIR}" -c "${CONDA_ARTIFACT_PATH}"
-
-cd "$WORKSPACE/python/strings_udf/strings_udf"
-gpuci_logger "Python py.test for strings_udf"
-py.test -n 8 --cache-clear --basetemp="$WORKSPACE/strings-udf-cuda-tmp" --junitxml="$WORKSPACE/junit-strings-udf.xml" -v --cov-config=.coveragerc --cov=strings_udf --cov-report=xml:"$WORKSPACE/python/strings_udf/strings-udf-coverage.xml" --cov-report term tests
-
-# retest cuDF UDFs
-cd "$WORKSPACE/python/cudf/cudf"
-gpuci_logger "Python py.test retest cuDF UDFs"
-py.test -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-strings-udf-tmp" --ignore="$WORKSPACE/python/cudf/cudf/benchmarks" --junitxml="$WORKSPACE/junit-cudf-strings-udf.xml" -v --cov-config="$WORKSPACE/python/cudf/.coveragerc" --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-strings-udf-coverage.xml" --cov-report term --dist=loadscope tests/test_udf_masked_ops.py
-
-
-# Run benchmarks with both cudf and pandas to ensure compatibility is maintained.
-# Benchmarks are run in DEBUG_ONLY mode, meaning that only small data sizes are used.
-# Therefore, these runs only verify that benchmarks are valid.
-# They do not generate meaningful performance measurements.
-cd "$WORKSPACE/python/cudf"
-gpuci_logger "Python pytest for cuDF benchmarks"
-CUDF_BENCHMARKS_DEBUG_ONLY=ON pytest -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" -v --dist=loadscope benchmarks
-
-gpuci_logger "Python pytest for cuDF benchmarks using pandas"
-CUDF_BENCHMARKS_USE_PANDAS=ON CUDF_BENCHMARKS_DEBUG_ONLY=ON pytest -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" -v --dist=loadscope benchmarks
-
-gpuci_logger "Test notebooks"
-"$WORKSPACE/ci/gpu/test-notebooks.sh" 2>&1 | tee nbtest.log
-python "$WORKSPACE/ci/utils/nbtestlog2junitxml.py" nbtest.log
-
-if [ -n "${CODECOV_TOKEN}" ]; then
- codecov -t $CODECOV_TOKEN
-fi
-
-return ${EXITCODE}
diff --git a/ci/gpu/java.sh b/ci/gpu/java.sh
deleted file mode 100755
index 2db9cd57eb8..00000000000
--- a/ci/gpu/java.sh
+++ /dev/null
@@ -1,115 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
-##############################################
-# cuDF GPU build and test script for CI #
-##############################################
-set -e
-NUMARGS=$#
-ARGS=$*
-
-# Arg parsing function
-function hasArg {
- (( ${NUMARGS} != 0 )) && (echo " ${ARGS} " | grep -q " $1 ")
-}
-
-# Set path and build parallel level
-export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH
-export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4}
-
-# Set home to the job's workspace
-export HOME="$WORKSPACE"
-
-# Switch to project root; also root of repo checkout
-cd "$WORKSPACE"
-
-# Determine CUDA release version
-export CUDA_REL=${CUDA_VERSION%.*}
-export CONDA_ARTIFACT_PATH="$WORKSPACE/ci/artifacts/cudf/cpu/.conda-bld/"
-
-# Parse git describe
-export GIT_DESCRIBE_TAG=`git describe --tags`
-export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`
-
-################################################################################
-# TRAP - Setup trap for removing jitify cache
-################################################################################
-
-# Set `LIBCUDF_KERNEL_CACHE_PATH` environment variable to $HOME/.jitify-cache
-# because it's local to the container's virtual file system, and not shared with
-# other CI jobs like `/tmp` is
-export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache"
-
-function remove_libcudf_kernel_cache_dir {
- EXITCODE=$?
- gpuci_logger "TRAP: Removing kernel cache dir: $LIBCUDF_KERNEL_CACHE_PATH"
- rm -rf "$LIBCUDF_KERNEL_CACHE_PATH" \
- || gpuci_logger "[ERROR] TRAP: Could not rm -rf $LIBCUDF_KERNEL_CACHE_PATH"
- exit $EXITCODE
-}
-
-# Set trap to run on exit
-gpuci_logger "TRAP: Set trap to remove jitify cache on exit"
-trap remove_libcudf_kernel_cache_dir EXIT
-
-mkdir -p "$LIBCUDF_KERNEL_CACHE_PATH" \
- || gpuci_logger "[ERROR] TRAP: Could not mkdir -p $LIBCUDF_KERNEL_CACHE_PATH"
-
-################################################################################
-# SETUP - Check environment
-################################################################################
-
-gpuci_logger "Check environment variables"
-env
-
-gpuci_logger "Check GPU usage"
-nvidia-smi
-
-gpuci_logger "Activate conda env"
-. /opt/conda/etc/profile.d/conda.sh
-conda activate rapids
-
-gpuci_logger "Check conda environment"
-conda info
-conda config --show-sources
-conda list --show-channel-urls
-
-gpuci_logger "Install dependencies"
-gpuci_mamba_retry install -y \
- "cudatoolkit=$CUDA_REL" \
- "rapids-build-env=$MINOR_VERSION.*" \
- "rmm=$MINOR_VERSION.*" \
- "openjdk=8.*" \
- "maven"
-# "mamba install openjdk" adds an activation script to set JAVA_HOME but this is
-# not triggered on installation. Re-activating the conda environment will set
-# this environment variable so that CMake can find JNI.
-conda activate rapids
-
-# https://docs.rapids.ai/maintainers/depmgmt/
-# gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env
-# gpuci_mamba_retry install -y "your-pkg=1.0.0"
-
-gpuci_logger "Check conda environment"
-conda info
-conda config --show-sources
-conda list --show-channel-urls
-
-################################################################################
-# INSTALL - Install libcudf artifacts
-################################################################################
-
-gpuci_logger "Installing libcudf"
-gpuci_mamba_retry install -c ${CONDA_ARTIFACT_PATH} libcudf
-
-################################################################################
-# TEST - Run java tests
-################################################################################
-
-gpuci_logger "Check GPU usage"
-nvidia-smi
-
-gpuci_logger "Running Java Tests"
-cd ${WORKSPACE}/java
-mvn test -B -DCUDF_JNI_ARROW_STATIC=OFF
-
-return ${EXITCODE}
diff --git a/ci/gpu/test-notebooks.sh b/ci/gpu/test-notebooks.sh
deleted file mode 100755
index 36d093d0d28..00000000000
--- a/ci/gpu/test-notebooks.sh
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
-
-NOTEBOOKS_DIR="$WORKSPACE/notebooks"
-NBTEST="$WORKSPACE/ci/utils/nbtest.sh"
-LIBCUDF_KERNEL_CACHE_PATH="$WORKSPACE/.jitcache"
-
-cd ${NOTEBOOKS_DIR}
-TOPLEVEL_NB_FOLDERS=$(find . -name *.ipynb |cut -d'/' -f2|sort -u)
-
-# Add notebooks that should be skipped here
-# (space-separated list of filenames without paths)
-
-SKIPNBS=""
-
-## Check env
-env
-
-EXITCODE=0
-
-# Always run nbtest in all TOPLEVEL_NB_FOLDERS, set EXITCODE to failure
-# if any run fails
-
-cd ${NOTEBOOKS_DIR}
-for nb in $(find . -name "*.ipynb"); do
- nbBasename=$(basename ${nb})
- # Skip all NBs that use dask (in the code or even in their name)
- if ((echo ${nb}|grep -qi dask) || \
- (grep -q dask ${nb})); then
- echo "--------------------------------------------------------------------------------"
- echo "SKIPPING: ${nb} (suspected Dask usage, not currently automatable)"
- echo "--------------------------------------------------------------------------------"
- elif (echo " ${SKIPNBS} " | grep -q " ${nbBasename} "); then
- echo "--------------------------------------------------------------------------------"
- echo "SKIPPING: ${nb} (listed in skip list)"
- echo "--------------------------------------------------------------------------------"
- else
- nvidia-smi
- ${NBTEST} ${nbBasename}
- EXITCODE=$((EXITCODE | $?))
- rm -rf ${LIBCUDF_KERNEL_CACHE_PATH}/*
- fi
-done
-
-
-nvidia-smi
-
-exit ${EXITCODE}
diff --git a/ci/local/README.md b/ci/local/README.md
deleted file mode 100644
index 7754bcaf647..00000000000
--- a/ci/local/README.md
+++ /dev/null
@@ -1,57 +0,0 @@
-## Purpose
-
-This script is designed for developer and contributor use. This tool mimics the actions of gpuCI on your local machine. This allows you to test and even debug your code inside a gpuCI base container before pushing your code as a GitHub commit.
-The script can be helpful in locally triaging and debugging RAPIDS continuous integration failures.
-
-## Requirements
-
-```
-nvidia-docker
-```
-
-## Usage
-
-```
-bash build.sh [-h] [-H] [-s] [-r ] [-i ]
-Build and test your local repository using a base gpuCI Docker image
-
-where:
- -H Show this help text
- -r Path to repository (defaults to working directory)
- -i Use Docker image (default is gpuci/rapidsai:${NIGHTLY_VERSION}-cuda11.5-devel-ubuntu20.04-py3.8)
- -s Skip building and testing and start an interactive shell in a container of the Docker image
-```
-
-Example Usage:
-`bash build.sh -r ~/rapids/cudf -i gpuci/rapidsai:22.02-cuda11.5-devel-ubuntu20.04-py3.8`
-
-For a full list of available gpuCI docker images, visit our [DockerHub](https://hub.docker.com/r/gpuci/rapidsai/tags) page.
-
-Style Check:
-```bash
-$ bash ci/local/build.sh -r ~/rapids/cudf -s
-$ source activate rapids # Activate gpuCI conda environment
-$ cd rapids
-$ flake8 python
-```
-
-## Information
-
-There are some caveats to be aware of when using this script, especially if you plan on developing from within the container itself.
-
-
-### Docker Image Build Repository
-
-The docker image will generate build artifacts in a folder on your machine located in the `root` directory of the repository you passed to the script. For the above example, the directory is named `~/rapids/cudf/build_rapidsai_cuda11.5-ubuntu20.04-py3.8/`. Feel free to remove this directory after the script is finished.
-
-*Note*: The script *will not* override your local build repository. Your local environment stays in tact.
-
-
-### Where The User is Dumped
-
-The script will build your repository and run all tests. If any tests fail, it dumps the user into the docker container itself to allow you to debug from within the container. If all the tests pass as expected the container exits and is automatically removed. Remember to exit the container if tests fail and you do not wish to debug within the container itself.
-
-
-### Container File Structure
-
-Your repository will be located in the `/rapids/` folder of the container. This folder is volume mounted from the local machine. Any changes to the code in this repository are replicated onto the local machine. The `cpp/build` and `python/build` directories within your repository is on a separate mount to avoid conflicting with your local build artifacts.
diff --git a/ci/local/build.sh b/ci/local/build.sh
deleted file mode 100755
index f6479cd76cc..00000000000
--- a/ci/local/build.sh
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
-
-GIT_DESCRIBE_TAG=`git describe --tags`
-MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`
-
-DOCKER_IMAGE="gpuci/rapidsai:${MINOR_VERSION}-cuda11.5-devel-ubuntu20.04-py3.8"
-REPO_PATH=${PWD}
-RAPIDS_DIR_IN_CONTAINER="/rapids"
-CPP_BUILD_DIR="cpp/build"
-PYTHON_BUILD_DIR="python/build"
-CONTAINER_SHELL_ONLY=0
-
-SHORTHELP="$(basename "$0") [-h] [-H] [-s] [-r ] [-i ]"
-LONGHELP="${SHORTHELP}
-Build and test your local repository using a base gpuCI Docker image
-
-where:
- -H Show this help text
- -r Path to repository (defaults to working directory)
- -i Use Docker image (default is ${DOCKER_IMAGE})
- -s Skip building and testing and start an interactive shell in a container of the Docker image
-"
-
-# Limit GPUs available to container based on CUDA_VISIBLE_DEVICES
-if [[ -z "${CUDA_VISIBLE_DEVICES}" ]]; then
- NVIDIA_VISIBLE_DEVICES="all"
-else
- NVIDIA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}
-fi
-
-while getopts ":hHr:i:s" option; do
- case ${option} in
- r)
- REPO_PATH=${OPTARG}
- ;;
- i)
- DOCKER_IMAGE=${OPTARG}
- ;;
- s)
- CONTAINER_SHELL_ONLY=1
- ;;
- h)
- echo "${SHORTHELP}"
- exit 0
- ;;
- H)
- echo "${LONGHELP}"
- exit 0
- ;;
- *)
- echo "ERROR: Invalid flag"
- echo "${SHORTHELP}"
- exit 1
- ;;
- esac
-done
-
-REPO_PATH_IN_CONTAINER="${RAPIDS_DIR_IN_CONTAINER}/$(basename "${REPO_PATH}")"
-CPP_BUILD_DIR_IN_CONTAINER="${RAPIDS_DIR_IN_CONTAINER}/$(basename "${REPO_PATH}")/${CPP_BUILD_DIR}"
-PYTHON_BUILD_DIR_IN_CONTAINER="${RAPIDS_DIR_IN_CONTAINER}/$(basename "${REPO_PATH}")/${PYTHON_BUILD_DIR}"
-
-
-# BASE_CONTAINER_BUILD_DIR is named after the image name, allowing for
-# multiple image builds to coexist on the local filesystem. This will
-# be mapped to the typical BUILD_DIR inside of the container. Builds
-# running in the container generate build artifacts just as they would
-# in a bare-metal environment, and the host filesystem is able to
-# maintain the host build in BUILD_DIR as well.
-# shellcheck disable=SC2001,SC2005,SC2046
-BASE_CONTAINER_BUILD_DIR=${REPO_PATH}/build_$(echo $(basename "${DOCKER_IMAGE}")|sed -e 's/:/_/g')
-CPP_CONTAINER_BUILD_DIR=${BASE_CONTAINER_BUILD_DIR}/cpp
-PYTHON_CONTAINER_BUILD_DIR=${BASE_CONTAINER_BUILD_DIR}/python
-
-
-BUILD_SCRIPT="#!/bin/bash
-set -e
-WORKSPACE=${REPO_PATH_IN_CONTAINER}
-PREBUILD_SCRIPT=${REPO_PATH_IN_CONTAINER}/ci/gpu/prebuild.sh
-BUILD_SCRIPT=${REPO_PATH_IN_CONTAINER}/ci/gpu/build.sh
-if [ -f \${PREBUILD_SCRIPT} ]; then
- source \${PREBUILD_SCRIPT}
-fi
-yes | source \${BUILD_SCRIPT}
-"
-
-if (( CONTAINER_SHELL_ONLY == 0 )); then
- COMMAND="${CPP_BUILD_DIR_IN_CONTAINER}/build.sh || bash"
-else
- COMMAND="bash"
-fi
-
-# Create the build dir for the container to mount, generate the build script inside of it
-mkdir -p "${BASE_CONTAINER_BUILD_DIR}"
-mkdir -p "${CPP_CONTAINER_BUILD_DIR}"
-mkdir -p "${PYTHON_CONTAINER_BUILD_DIR}"
-# Create build directories. This is to ensure correct owner for directories. If
-# directories don't exist there is side effect from docker volume mounting creating build
-# directories owned by root(volume mount point(s))
-mkdir -p "${REPO_PATH}/${CPP_BUILD_DIR}"
-mkdir -p "${REPO_PATH}/${PYTHON_BUILD_DIR}"
-
-echo "${BUILD_SCRIPT}" > "${CPP_CONTAINER_BUILD_DIR}/build.sh"
-chmod ugo+x "${CPP_CONTAINER_BUILD_DIR}/build.sh"
-
-# Mount passwd and group files to docker. This allows docker to resolve username and group
-# avoiding these nags:
-# * groups: cannot find name for group ID ID
-# * I have no name!@id:/$
-# For ldap user user information is not present in system /etc/passwd and /etc/group files.
-# Hence we generate dummy files for ldap users which docker uses to resolve username and group
-
-PASSWD_FILE="/etc/passwd"
-GROUP_FILE="/etc/group"
-
-USER_FOUND=$(grep -wc "$(whoami)" < "$PASSWD_FILE")
-if [ "$USER_FOUND" == 0 ]; then
- echo "Local User not found, LDAP WAR for docker mounts activated. Creating dummy passwd and group"
- echo "files to allow docker resolve username and group"
- cp "$PASSWD_FILE" /tmp/passwd
- PASSWD_FILE="/tmp/passwd"
- cp "$GROUP_FILE" /tmp/group
- GROUP_FILE="/tmp/group"
- echo "$(whoami):x:$(id -u):$(id -g):$(whoami),,,:$HOME:$SHELL" >> "$PASSWD_FILE"
- echo "$(whoami):x:$(id -g):" >> "$GROUP_FILE"
-fi
-
-# Run the generated build script in a container
-docker pull "${DOCKER_IMAGE}"
-
-DOCKER_MAJOR=$(docker -v|sed 's/[^[0-9]*\([0-9]*\).*/\1/')
-GPU_OPTS="--gpus device=${NVIDIA_VISIBLE_DEVICES}"
-if [ "$DOCKER_MAJOR" -lt 19 ]
-then
- GPU_OPTS="--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES='${NVIDIA_VISIBLE_DEVICES}'"
-fi
-
-docker run --rm -it ${GPU_OPTS} \
- -u "$(id -u)":"$(id -g)" \
- -v "${REPO_PATH}":"${REPO_PATH_IN_CONTAINER}" \
- -v "${CPP_CONTAINER_BUILD_DIR}":"${CPP_BUILD_DIR_IN_CONTAINER}" \
- -v "${PYTHON_CONTAINER_BUILD_DIR}":"${PYTHON_BUILD_DIR_IN_CONTAINER}" \
- -v "$PASSWD_FILE":/etc/passwd:ro \
- -v "$GROUP_FILE":/etc/group:ro \
- --cap-add=SYS_PTRACE \
- "${DOCKER_IMAGE}" bash -c "${COMMAND}"
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index 555a67d9cd6..d2be7d5f222 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -43,9 +43,6 @@ sed_runner 's/'"cudf_version .*)"'/'"cudf_version ${NEXT_FULL_TAG})"'/g' python/
# Strings UDF update
sed_runner 's/'"strings_udf_version .*)"'/'"strings_udf_version ${NEXT_FULL_TAG})"'/g' python/strings_udf/CMakeLists.txt
-# Groupby UDF update
-sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' python/cudf/udf_cpp/CMakeLists.txt
-
# cpp libcudf_kafka update
sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/libcudf_kafka/CMakeLists.txt
@@ -56,7 +53,6 @@ sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g'
sed_runner 's/'"branch-.*\/RAPIDS.cmake"'/'"branch-${NEXT_SHORT_TAG}\/RAPIDS.cmake"'/g' fetch_rapids.cmake
# cmake-format rapids-cmake definitions
-sed_runner 's/'"branch-.*\/cmake-format-rapids-cmake.json"'/'"branch-${NEXT_SHORT_TAG}\/cmake-format-rapids-cmake.json"'/g' ci/checks/style.sh
sed_runner 's/'"branch-.*\/cmake-format-rapids-cmake.json"'/'"branch-${NEXT_SHORT_TAG}\/cmake-format-rapids-cmake.json"'/g' ci/check_style.sh
# doxyfile update
@@ -84,10 +80,6 @@ sed_runner "s/cudf=${CURRENT_SHORT_TAG}/cudf=${NEXT_SHORT_TAG}/g" README.md
sed_runner "s/CUDF_TAG branch-${CURRENT_SHORT_TAG}/CUDF_TAG branch-${NEXT_SHORT_TAG}/" cpp/examples/basic/CMakeLists.txt
sed_runner "s/CUDF_TAG branch-${CURRENT_SHORT_TAG}/CUDF_TAG branch-${NEXT_SHORT_TAG}/" cpp/examples/strings/CMakeLists.txt
-# ucx-py version update
-sed_runner "s/export UCX_PY_VERSION=.*/export UCX_PY_VERSION='${NEXT_UCX_PY_VERSION}'/g" ci/gpu/build.sh
-sed_runner "s/export UCX_PY_VERSION=.*/export UCX_PY_VERSION='${NEXT_UCX_PY_VERSION}'/g" ci/gpu/java.sh
-
# Need to distutils-normalize the original version
NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_SHORT_TAG}'))")
diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh
index 5b1e8aa398c..0be72486319 100755
--- a/ci/test_cpp.sh
+++ b/ci/test_cpp.sh
@@ -21,7 +21,6 @@ set -u
CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}/
mkdir -p "${RAPIDS_TESTS_DIR}"
-SUITEERROR=0
rapids-print-env
@@ -32,15 +31,14 @@ rapids-mamba-retry install \
rapids-logger "Check GPU usage"
nvidia-smi
+EXITCODE=0
+trap "EXITCODE=1" ERR
set +e
-# TODO: Disabling stream identification for now.
-# Set up library for finding incorrect default stream usage.
-#pushd "cpp/tests/utilities/identify_stream_usage/"
-#mkdir build && cd build && cmake .. -GNinja && ninja && ninja test
-#STREAM_IDENTIFY_LIB="$(realpath build/libidentify_stream_usage.so)"
-#echo "STREAM_IDENTIFY_LIB=${STREAM_IDENTIFY_LIB}"
-#popd
+# Get library for finding incorrect default stream usage.
+STREAM_IDENTIFY_LIB="${CONDA_PREFIX}/lib/libcudf_identify_stream_usage.so"
+
+echo "STREAM_IDENTIFY_LIB=${STREAM_IDENTIFY_LIB}"
# Run libcudf and libcudf_kafka gtests from libcudf-tests package
rapids-logger "Run gtests"
@@ -50,22 +48,21 @@ rapids-logger "Run gtests"
for gt in "$CONDA_PREFIX"/bin/gtests/{libcudf,libcudf_kafka}/* ; do
test_name=$(basename ${gt})
echo "Running gtest $test_name"
- ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR}
- # TODO: Disabling stream identification for now.
- #if [[ ${test_name} == "SPAN_TEST" ]]; then
- # # This one test is specifically designed to test using a thrust device
- # # vector, so we expect and allow it to include default stream usage.
- # gtest_filter="SpanTest.CanConstructFromDeviceContainers"
- # GTEST_CUDF_STREAM_MODE="custom" LD_PRELOAD=${STREAM_IDENTIFY_LIB} ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR} --gtest_filter="-${gtest_filter}" && \
- # ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR} --gtest_filter="${gtest_filter}"
- #else
- # GTEST_CUDF_STREAM_MODE="custom" LD_PRELOAD=${STREAM_IDENTIFY_LIB} ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR}
- #fi
-
- exitcode=$?
- if (( ${exitcode} != 0 )); then
- SUITEERROR=${exitcode}
- echo "FAILED: GTest ${gt}"
+
+ # TODO: This strategy for using the stream lib will need to change when we
+ # switch to invoking ctest. For one, we will want to set the test
+ # properties to use the lib (which means that the decision will be made at
+ # CMake-configure time instead of runtime). We may also need to leverage
+ # something like gtest_discover_tests to be able to filter on the
+ # underlying test names.
+ if [[ ${test_name} == "SPAN_TEST" ]]; then
+ # This one test is specifically designed to test using a thrust device
+ # vector, so we expect and allow it to include default stream usage.
+ gtest_filter="SpanTest.CanConstructFromDeviceContainers"
+ GTEST_CUDF_STREAM_MODE="custom" LD_PRELOAD=${STREAM_IDENTIFY_LIB} ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR} --gtest_filter="-${gtest_filter}" && \
+ ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR} --gtest_filter="${gtest_filter}"
+ else
+ GTEST_CUDF_STREAM_MODE="custom" LD_PRELOAD=${STREAM_IDENTIFY_LIB} ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR}
fi
done
@@ -82,7 +79,8 @@ if [[ "${RAPIDS_BUILD_TYPE}" == "nightly" ]]; then
${COMPUTE_SANITIZER_CMD} ${gt} | tee "${RAPIDS_TESTS_DIR}${test_name}.cs.log"
done
unset GTEST_CUDF_RMM_MODE
- # TODO: test-results/*.cs.log are processed in gpuci
+ # TODO: test-results/*.cs.log are processed in CI
fi
-exit ${SUITEERROR}
+rapids-logger "Test script exiting with value: $EXITCODE"
+exit ${EXITCODE}
diff --git a/ci/test_java.sh b/ci/test_java.sh
index a0ba7c41607..f905aaa1178 100755
--- a/ci/test_java.sh
+++ b/ci/test_java.sh
@@ -1,5 +1,5 @@
#!/bin/bash
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
set -euo pipefail
@@ -13,6 +13,8 @@ rapids-dependency-file-generator \
rapids-mamba-retry env create --force -f env.yaml -n test
+export CMAKE_GENERATOR=Ninja
+
# Temporarily allow unbound variables for conda activation.
set +u
conda activate test
@@ -27,22 +29,17 @@ rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
libcudf
-SUITEERROR=0
-
rapids-logger "Check GPU usage"
nvidia-smi
+EXITCODE=0
+trap "EXITCODE=1" ERR
set +e
rapids-logger "Run Java tests"
pushd java
mvn test -B -DCUDF_JNI_ARROW_STATIC=OFF -DCUDF_JNI_ENABLE_PROFILING=OFF
-exitcode=$?
-
-if (( ${exitcode} != 0 )); then
- SUITEERROR=${exitcode}
- echo "FAILED: 1 or more tests in cudf Java"
-fi
popd
-exit ${SUITEERROR}
+rapids-logger "Test script exiting with value: $EXITCODE"
+exit ${EXITCODE}
diff --git a/ci/test_notebooks.sh b/ci/test_notebooks.sh
index f1e17162195..7f5f35219b0 100755
--- a/ci/test_notebooks.sh
+++ b/ci/test_notebooks.sh
@@ -36,9 +36,8 @@ pushd notebooks
# (space-separated list of filenames without paths)
SKIPNBS=""
-# Set SUITEERROR to failure if any run fails
-SUITEERROR=0
-
+EXITCODE=0
+trap "EXITCODE=1" ERR
set +e
for nb in $(find . -name "*.ipynb"); do
nbBasename=$(basename ${nb})
@@ -55,8 +54,8 @@ for nb in $(find . -name "*.ipynb"); do
else
nvidia-smi
${NBTEST} ${nbBasename}
- SUITEERROR=$((SUITEERROR | $?))
fi
done
-exit ${SUITEERROR}
+rapids-logger "Test script exiting with value: $EXITCODE"
+exit ${EXITCODE}
diff --git a/ci/test_python_common.sh b/ci/test_python_common.sh
index 107540c0192..0e922c105dd 100755
--- a/ci/test_python_common.sh
+++ b/ci/test_python_common.sh
@@ -1,5 +1,5 @@
#!/bin/bash
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Common setup steps shared by Python test jobs
@@ -27,7 +27,6 @@ PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
RAPIDS_COVERAGE_DIR=${RAPIDS_COVERAGE_DIR:-"${PWD}/coverage-results"}
mkdir -p "${RAPIDS_TESTS_DIR}" "${RAPIDS_COVERAGE_DIR}"
-SUITEERROR=0
rapids-print-env
diff --git a/ci/test_python_cudf.sh b/ci/test_python_cudf.sh
index bea162a9318..bb33d8473ce 100755
--- a/ci/test_python_cudf.sh
+++ b/ci/test_python_cudf.sh
@@ -1,5 +1,5 @@
#!/bin/bash
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Common setup steps shared by Python test jobs
source "$(dirname "$0")/test_python_common.sh"
@@ -7,11 +7,12 @@ source "$(dirname "$0")/test_python_common.sh"
rapids-logger "Check GPU usage"
nvidia-smi
+EXITCODE=0
+trap "EXITCODE=1" ERR
set +e
rapids-logger "pytest cudf"
pushd python/cudf/cudf
-# (TODO: Copied the comment below from gpuCI, need to verify on GitHub Actions)
# It is essential to cd into python/cudf/cudf as `pytest-xdist` + `coverage` seem to work only at this directory level.
pytest \
--cache-clear \
@@ -24,12 +25,6 @@ pytest \
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-coverage.xml" \
--cov-report=term \
tests
-exitcode=$?
-
-if (( ${exitcode} != 0 )); then
- SUITEERROR=${exitcode}
- echo "FAILED: 1 or more tests in cudf"
-fi
popd
# Run benchmarks with both cudf and pandas to ensure compatibility is maintained.
@@ -48,12 +43,6 @@ pytest \
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-benchmark-coverage.xml" \
--cov-report=term \
benchmarks
-exitcode=$?
-
-if (( ${exitcode} != 0 )); then
- SUITEERROR=${exitcode}
- echo "FAILED: 1 or more tests in cudf"
-fi
rapids-logger "pytest for cudf benchmarks using pandas"
CUDF_BENCHMARKS_USE_PANDAS=ON \
@@ -67,12 +56,7 @@ pytest \
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-benchmark-pandas-coverage.xml" \
--cov-report=term \
benchmarks
-exitcode=$?
-
-if (( ${exitcode} != 0 )); then
- SUITEERROR=${exitcode}
- echo "FAILED: 1 or more tests in cudf"
-fi
popd
-exit ${SUITEERROR}
+rapids-logger "Test script exiting with value: $EXITCODE"
+exit ${EXITCODE}
diff --git a/ci/test_python_other.sh b/ci/test_python_other.sh
index d7a5e288193..b79cd44cdbe 100755
--- a/ci/test_python_other.sh
+++ b/ci/test_python_other.sh
@@ -1,5 +1,5 @@
#!/bin/bash
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Common setup steps shared by Python test jobs
source "$(dirname "$0")/test_python_common.sh"
@@ -12,6 +12,8 @@ rapids-mamba-retry install \
rapids-logger "Check GPU usage"
nvidia-smi
+EXITCODE=0
+trap "EXITCODE=1" ERR
set +e
rapids-logger "pytest dask_cudf"
@@ -26,12 +28,6 @@ pytest \
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/dask-cudf-coverage.xml" \
--cov-report=term \
dask_cudf
-exitcode=$?
-
-if (( ${exitcode} != 0 )); then
- SUITEERROR=${exitcode}
- echo "FAILED: 1 or more tests in dask-cudf"
-fi
popd
rapids-logger "pytest custreamz"
@@ -46,12 +42,6 @@ pytest \
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/custreamz-coverage.xml" \
--cov-report=term \
custreamz
-exitcode=$?
-
-if (( ${exitcode} != 0 )); then
- SUITEERROR=${exitcode}
- echo "FAILED: 1 or more tests in custreamz"
-fi
popd
set -e
@@ -73,12 +63,6 @@ pytest \
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/strings-udf-coverage.xml" \
--cov-report=term \
tests
-exitcode=$?
-
-if (( ${exitcode} != 0 )); then
- SUITEERROR=${exitcode}
- echo "FAILED: 1 or more tests in strings_udf"
-fi
popd
rapids-logger "pytest cudf with strings_udf"
@@ -94,12 +78,7 @@ pytest \
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-strings-udf-coverage.xml" \
--cov-report=term \
tests/test_udf_masked_ops.py
-exitcode=$?
-
-if (( ${exitcode} != 0 )); then
- SUITEERROR=${exitcode}
- echo "FAILED: 1 or more tests in cudf with strings_udf"
-fi
popd
-exit ${SUITEERROR}
+rapids-logger "Test script exiting with value: $EXITCODE"
+exit ${EXITCODE}
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index df013c492e8..675df3891c3 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -21,8 +21,8 @@ dependencies:
- cxx-compiler
- cython>=0.29,<0.30
- dask-cuda=23.04.*
-- dask>=2022.12.0
-- distributed>=2022.12.0
+- dask>=2023.1.1
+- distributed>=2023.1.1
- dlpack>=0.5,<0.6.0a0
- doxygen=1.8.20
- fastavro>=0.22.9
diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
index 205ca2a995c..0d5b5d16e08 100644
--- a/conda/recipes/cudf/meta.yaml
+++ b/conda/recipes/cudf/meta.yaml
@@ -1,10 +1,11 @@
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+# Copyright (c) 2018-2023, NVIDIA CORPORATION.
-{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
+{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
{% set py_version = environ['CONDA_PY'] %}
{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
{% set cuda_major = cuda_version.split('.')[0] %}
+{% set date_string = environ['RAPIDS_DATE_STRING'] %}
package:
name: cudf
@@ -15,7 +16,7 @@ source:
build:
number: {{ GIT_DESCRIBE_NUMBER }}
- string: cuda_{{ cuda_major }}_py{{ py_version }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+ string: cuda_{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
script_env:
- PARALLEL_LEVEL
- CMAKE_GENERATOR
diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml
index 5fa0411803b..5cbea78e82b 100644
--- a/conda/recipes/cudf_kafka/meta.yaml
+++ b/conda/recipes/cudf_kafka/meta.yaml
@@ -1,9 +1,10 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
-{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
+{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
{% set py_version = environ['CONDA_PY'] %}
{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
+{% set date_string = environ['RAPIDS_DATE_STRING'] %}
package:
name: cudf_kafka
@@ -14,7 +15,7 @@ source:
build:
number: {{ GIT_DESCRIBE_NUMBER }}
- string: py{{ py_version }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+ string: py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
script_env:
- PARALLEL_LEVEL
- CMAKE_GENERATOR
diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml
index 13d54011e02..af5705341e6 100644
--- a/conda/recipes/custreamz/meta.yaml
+++ b/conda/recipes/custreamz/meta.yaml
@@ -1,9 +1,10 @@
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+# Copyright (c) 2018-2023, NVIDIA CORPORATION.
-{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
+{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
{% set py_version = environ['CONDA_PY'] %}
{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
+{% set date_string = environ['RAPIDS_DATE_STRING'] %}
package:
name: custreamz
@@ -14,7 +15,7 @@ source:
build:
number: {{ GIT_DESCRIBE_NUMBER }}
- string: py{{ py_version }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+ string: py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
script_env:
- PARALLEL_LEVEL
- CMAKE_GENERATOR
@@ -38,8 +39,8 @@ requirements:
- python
- streamz
- cudf ={{ version }}
- - dask >=2022.12.0
- - distributed >=2022.12.0
+ - dask >=2023.1.1
+ - distributed >=2023.1.1
- python-confluent-kafka >=1.7.0,<1.8.0a0
- cudf_kafka ={{ version }}
diff --git a/conda/recipes/dask-cudf/meta.yaml b/conda/recipes/dask-cudf/meta.yaml
index feb9e53e37f..3ee3d4d3952 100644
--- a/conda/recipes/dask-cudf/meta.yaml
+++ b/conda/recipes/dask-cudf/meta.yaml
@@ -1,10 +1,11 @@
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+# Copyright (c) 2018-2023, NVIDIA CORPORATION.
-{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
+{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
{% set py_version = environ['CONDA_PY'] %}
{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
{% set cuda_major = cuda_version.split('.')[0] %}
+{% set date_string = environ['RAPIDS_DATE_STRING'] %}
package:
name: dask-cudf
@@ -15,7 +16,7 @@ source:
build:
number: {{ GIT_DESCRIBE_NUMBER }}
- string: cuda_{{ cuda_major }}_py{{ py_version }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+ string: cuda_{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
script_env:
- PARALLEL_LEVEL
- CMAKE_GENERATOR
@@ -34,14 +35,14 @@ requirements:
host:
- python
- cudf ={{ version }}
- - dask >=2022.12.0
- - distributed >=2022.12.0
+ - dask >=2023.1.1
+ - distributed >=2023.1.1
- cudatoolkit ={{ cuda_version }}
run:
- python
- cudf ={{ version }}
- - dask >=2022.12.0
- - distributed >=2022.12.0
+ - dask >=2023.1.1
+ - distributed >=2023.1.1
- {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }}
test:
diff --git a/conda/recipes/dask-cudf/run_test.sh b/conda/recipes/dask-cudf/run_test.sh
index f56610bea86..0c2f628dcf2 100644
--- a/conda/recipes/dask-cudf/run_test.sh
+++ b/conda/recipes/dask-cudf/run_test.sh
@@ -1,5 +1,5 @@
#!/bin/bash
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
set -e
@@ -17,12 +17,20 @@ if [ "${ARCH}" = "aarch64" ]; then
exit 0
fi
-# Install the latest version of dask and distributed
-logger "pip install git+https://github.com/dask/distributed.git@main --upgrade --no-deps"
-pip install "git+https://github.com/dask/distributed.git@main" --upgrade --no-deps
+# Dask & Distributed option to install main(nightly) or `conda-forge` packages.
+export INSTALL_DASK_MAIN=1
-logger "pip install git+https://github.com/dask/dask.git@main --upgrade --no-deps"
-pip install "git+https://github.com/dask/dask.git@main" --upgrade --no-deps
+# Dask version to install when `INSTALL_DASK_MAIN=0`
+export DASK_STABLE_VERSION="2023.1.1"
+
+# Install the conda-forge or nightly version of dask and distributed
+if [[ "${INSTALL_DASK_MAIN}" == 1 ]]; then
+ rapids-logger "rapids-mamba-retry install -c dask/label/dev 'dask/label/dev::dask' 'dask/label/dev::distributed'"
+ rapids-mamba-retry install -c dask/label/dev "dask/label/dev::dask" "dask/label/dev::distributed"
+else
+ rapids-logger "rapids-mamba-retry install conda-forge::dask=={$DASK_STABLE_VERSION} conda-forge::distributed=={$DASK_STABLE_VERSION} conda-forge::dask-core=={$DASK_STABLE_VERSION} --force-reinstall"
+ rapids-mamba-retry install conda-forge::dask=={$DASK_STABLE_VERSION} conda-forge::distributed=={$DASK_STABLE_VERSION} conda-forge::dask-core=={$DASK_STABLE_VERSION} --force-reinstall
+fi
logger "python -c 'import dask_cudf'"
python -c "import dask_cudf"
diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
index 911080ebdb6..b0b86b427b7 100644
--- a/conda/recipes/libcudf/meta.yaml
+++ b/conda/recipes/libcudf/meta.yaml
@@ -1,10 +1,11 @@
# Copyright (c) 2018-2023, NVIDIA CORPORATION.
-{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
+{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
{% set cuda_major = cuda_version.split('.')[0] %}
{% set cuda_spec = ">=" + cuda_major ~ ",<" + (cuda_major | int + 1) ~ ".0a0" %} # i.e. >=11,<12.0a0
+{% set date_string = environ['RAPIDS_DATE_STRING'] %}
package:
name: libcudf-split
@@ -52,7 +53,7 @@ outputs:
script: install_libcudf.sh
build:
number: {{ GIT_DESCRIBE_NUMBER }}
- string: cuda{{ cuda_major }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+ string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
run_exports:
- {{ pin_subpackage("libcudf", max_pin="x.x") }}
ignore_run_exports_from:
@@ -308,7 +309,7 @@ outputs:
script: install_libcudf_kafka.sh
build:
number: {{ GIT_DESCRIBE_NUMBER }}
- string: {{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+ string: {{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
ignore_run_exports_from:
- {{ compiler('cuda') }}
requirements:
@@ -331,7 +332,7 @@ outputs:
script: install_libcudf_example.sh
build:
number: {{ GIT_DESCRIBE_NUMBER }}
- string: {{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+ string: {{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
ignore_run_exports_from:
- {{ compiler('cuda') }}
requirements:
@@ -358,7 +359,7 @@ outputs:
script: install_libcudf_tests.sh
build:
number: {{ GIT_DESCRIBE_NUMBER }}
- string: cuda{{ cuda_major }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+ string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
ignore_run_exports_from:
- {{ compiler('cuda') }}
requirements:
diff --git a/conda/recipes/strings_udf/meta.yaml b/conda/recipes/strings_udf/meta.yaml
index 0928c5d3315..93316a92c22 100644
--- a/conda/recipes/strings_udf/meta.yaml
+++ b/conda/recipes/strings_udf/meta.yaml
@@ -1,10 +1,11 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
-{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
+{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
{% set py_version = environ['CONDA_PY'] %}
{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
{% set cuda_major = cuda_version.split('.')[0] %}
+{% set date_string = environ['RAPIDS_DATE_STRING'] %}
package:
name: strings_udf
@@ -15,7 +16,7 @@ source:
build:
number: {{ GIT_DESCRIBE_NUMBER }}
- string: cuda_{{ cuda_major }}_py{{ py_version }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+ string: cuda_{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
script_env:
- PARALLEL_LEVEL
- CMAKE_GENERATOR
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 19c118016bf..d402a47628c 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -71,6 +71,18 @@ option(CUDA_ENABLE_LINEINFO
option(CUDA_WARNINGS_AS_ERRORS "Enable -Werror=all-warnings for all CUDA compilation" ON)
# cudart can be statically linked or dynamically linked. The python ecosystem wants dynamic linking
option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF)
+
+set(DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL ON)
+if(${CUDA_STATIC_RUNTIME})
+ set(DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL OFF)
+endif()
+option(
+ CUDF_BUILD_STREAMS_TEST_UTIL
+ "Whether to build the utilities for stream testing contained in libcudf"
+ ${DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL}
+)
+mark_as_advanced(CUDF_BUILD_STREAMS_TEST_UTIL)
+
option(USE_LIBARROW_FROM_PYARROW "Use the libarrow contained within pyarrow." OFF)
mark_as_advanced(USE_LIBARROW_FROM_PYARROW)
@@ -754,10 +766,34 @@ if(CUDF_BUILD_TESTUTIL)
cudftestutil PUBLIC "$"
"$"
)
-
add_library(cudf::cudftestutil ALIAS cudftestutil)
endif()
+
+# * build cudf_identify_stream_usage --------------------------------------------------------------
+
+if(CUDF_BUILD_STREAMS_TEST_UTIL)
+ if(CUDA_STATIC_RUNTIME)
+ message(
+ FATAL_ERROR
+ "Stream identification cannot be used with a static CUDA runtime. Please set CUDA_STATIC_RUNTIME=OFF or CUDF_BUILD_STREAMS_TEST_UTIL=OFF."
+ )
+ endif()
+
+ # Libraries for stream-related testing.
+ add_library(cudf_identify_stream_usage SHARED tests/utilities/identify_stream_usage.cpp)
+
+ set_target_properties(
+ cudf_identify_stream_usage
+ PROPERTIES # set target compile options
+ CXX_STANDARD 17
+ CXX_STANDARD_REQUIRED ON
+ POSITION_INDEPENDENT_CODE ON
+ )
+ target_link_libraries(cudf_identify_stream_usage PUBLIC CUDA::cudart rmm::rmm)
+ add_library(cudf::cudf_identify_stream_usage ALIAS cudf_identify_stream_usage)
+endif()
+
# ##################################################################################################
# * add tests -------------------------------------------------------------------------------------
@@ -784,12 +820,9 @@ if(CUDF_BUILD_BENCHMARKS)
include(${rapids-cmake-dir}/cpm/gbench.cmake)
rapids_cpm_gbench()
- # Find or install NVBench Temporarily force downloading of fmt because current versions of nvbench
- # do not support the latest version of fmt, which is automatically pulled into our conda
- # environments by mamba.
- set(CPM_DOWNLOAD_fmt TRUE)
- include(${rapids-cmake-dir}/cpm/nvbench.cmake)
- rapids_cpm_nvbench()
+ # Find or install nvbench
+ include(cmake/thirdparty/get_nvbench.cmake)
+
add_subdirectory(benchmarks)
endif()
@@ -833,6 +866,10 @@ if(CUDF_BUILD_TESTUTIL)
)
endif()
+if(CUDF_BUILD_STREAMS_TEST_UTIL)
+ install(TARGETS cudf_identify_stream_usage DESTINATION ${lib_dir})
+endif()
+
set(doc_string
[=[
Provide targets for the cudf library.
diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index 6f67cb32b0a..c5ae3345da5 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -169,7 +169,10 @@ ConfigureNVBench(SEARCH_NVBENCH search/contains.cpp)
# ##################################################################################################
# * sort benchmark --------------------------------------------------------------------------------
ConfigureBench(SORT_BENCH sort/rank.cpp sort/sort.cpp sort/sort_strings.cpp)
-ConfigureNVBench(SORT_NVBENCH sort/segmented_sort.cpp sort/sort_lists.cpp sort/sort_structs.cpp)
+ConfigureNVBench(
+ SORT_NVBENCH sort/rank_lists.cpp sort/rank_structs.cpp sort/segmented_sort.cpp
+ sort/sort_lists.cpp sort/sort_structs.cpp
+)
# ##################################################################################################
# * quantiles benchmark
diff --git a/cpp/benchmarks/fixture/rmm_pool_raii.hpp b/cpp/benchmarks/fixture/rmm_pool_raii.hpp
index 60586ef878b..465c53a91ea 100644
--- a/cpp/benchmarks/fixture/rmm_pool_raii.hpp
+++ b/cpp/benchmarks/fixture/rmm_pool_raii.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -69,4 +69,15 @@ class rmm_pool_raii {
std::shared_ptr mr;
};
+/**
+ * Base fixture for cudf benchmarks using nvbench.
+ *
+ * Initializes the default memory resource to use the RMM pool device resource.
+ */
+struct nvbench_base_fixture {
+ rmm_pool_raii _mr;
+};
+
} // namespace cudf
+
+#define NVBENCH_ENVIRONMENT cudf::nvbench_base_fixture
diff --git a/cpp/benchmarks/groupby/group_max.cpp b/cpp/benchmarks/groupby/group_max.cpp
index 4956cce0daf..077558f8709 100644
--- a/cpp/benchmarks/groupby/group_max.cpp
+++ b/cpp/benchmarks/groupby/group_max.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -24,7 +24,6 @@
template
void bench_groupby_max(nvbench::state& state, nvbench::type_list)
{
- cudf::rmm_pool_raii pool_raii;
const auto size = static_cast(state.get_int64("num_rows"));
auto const keys = [&] {
diff --git a/cpp/benchmarks/groupby/group_nunique.cpp b/cpp/benchmarks/groupby/group_nunique.cpp
index 05698c04058..f74ed95200e 100644
--- a/cpp/benchmarks/groupby/group_nunique.cpp
+++ b/cpp/benchmarks/groupby/group_nunique.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -40,7 +40,6 @@ auto make_aggregation_request_vector(cudf::column_view const& values, Args&&...
template
void bench_groupby_nunique(nvbench::state& state, nvbench::type_list)
{
- cudf::rmm_pool_raii pool_raii;
const auto size = static_cast(state.get_int64("num_rows"));
auto const keys = [&] {
diff --git a/cpp/benchmarks/groupby/group_rank.cpp b/cpp/benchmarks/groupby/group_rank.cpp
index f573b63a75d..2a70b95890b 100644
--- a/cpp/benchmarks/groupby/group_rank.cpp
+++ b/cpp/benchmarks/groupby/group_rank.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -30,7 +30,6 @@ static void nvbench_groupby_rank(nvbench::state& state,
{
using namespace cudf;
constexpr auto dtype = type_to_id();
- cudf::rmm_pool_raii pool_raii;
bool const is_sorted = state.get_int64("is_sorted");
cudf::size_type const column_size = state.get_int64("data_size");
diff --git a/cpp/benchmarks/groupby/group_struct_keys.cpp b/cpp/benchmarks/groupby/group_struct_keys.cpp
index cc6f0faaf41..53ef12ffeaa 100644
--- a/cpp/benchmarks/groupby/group_struct_keys.cpp
+++ b/cpp/benchmarks/groupby/group_struct_keys.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -29,8 +29,6 @@
void bench_groupby_struct_keys(nvbench::state& state)
{
- cudf::rmm_pool_raii pool_raii;
-
using Type = int;
using column_wrapper = cudf::test::fixed_width_column_wrapper;
std::default_random_engine generator;
diff --git a/cpp/benchmarks/io/csv/csv_reader_input.cpp b/cpp/benchmarks/io/csv/csv_reader_input.cpp
index 27fea856332..026045acee7 100644
--- a/cpp/benchmarks/io/csv/csv_reader_input.cpp
+++ b/cpp/benchmarks/io/csv/csv_reader_input.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -64,29 +64,26 @@ void csv_read_common(DataType const& data_types,
state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
}
-template
-void BM_csv_read_input(nvbench::state& state, nvbench::type_list>)
+template
+void BM_csv_read_input(nvbench::state& state,
+ nvbench::type_list, nvbench::enum_type>)
{
- cudf::rmm_pool_raii rmm_pool;
-
auto const d_type = get_type_or_group(static_cast(DataType));
- auto const source_type = io_type::FILEPATH;
+ auto const source_type = IOType;
csv_read_common(d_type, source_type, state);
}
-template
-void BM_csv_read_io(nvbench::state& state, nvbench::type_list>)
+template
+void BM_csv_read_io(nvbench::state& state, nvbench::type_list>)
{
- cudf::rmm_pool_raii rmm_pool;
-
auto const d_type = get_type_or_group({static_cast(data_type::INTEGRAL),
static_cast(data_type::FLOAT),
static_cast(data_type::DECIMAL),
static_cast(data_type::TIMESTAMP),
static_cast(data_type::DURATION),
static_cast(data_type::STRING)});
- auto const source_type = IO;
+ auto const source_type = IOType;
csv_read_common(d_type, source_type, state);
}
@@ -101,9 +98,11 @@ using d_type_list = nvbench::enum_type_list;
-NVBENCH_BENCH_TYPES(BM_csv_read_input, NVBENCH_TYPE_AXES(d_type_list))
+NVBENCH_BENCH_TYPES(BM_csv_read_input,
+ NVBENCH_TYPE_AXES(d_type_list,
+ nvbench::enum_type_list))
.set_name("csv_read_data_type")
- .set_type_axes_names({"data_type"})
+ .set_type_axes_names({"data_type", "io"})
.set_min_samples(4);
NVBENCH_BENCH_TYPES(BM_csv_read_io, NVBENCH_TYPE_AXES(io_list))
diff --git a/cpp/benchmarks/io/csv/csv_reader_options.cpp b/cpp/benchmarks/io/csv/csv_reader_options.cpp
index 04522c16d5c..2d0e0e5754e 100644
--- a/cpp/benchmarks/io/csv/csv_reader_options.cpp
+++ b/cpp/benchmarks/io/csv/csv_reader_options.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -32,8 +32,6 @@ void BM_csv_read_varying_options(
nvbench::state& state,
nvbench::type_list, nvbench::enum_type>)
{
- cudf::rmm_pool_raii rmm_pool;
-
auto const data_types =
dtypes_for_column_selection(get_type_or_group({static_cast(data_type::INTEGRAL),
static_cast(data_type::FLOAT),
diff --git a/cpp/benchmarks/io/fst.cu b/cpp/benchmarks/io/fst.cu
index 6d318db12de..7acf69e9d8e 100644
--- a/cpp/benchmarks/io/fst.cu
+++ b/cpp/benchmarks/io/fst.cu
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -78,9 +78,6 @@ constexpr std::size_t single_item = 1;
void BM_FST_JSON(nvbench::state& state)
{
- // TODO: to be replaced by nvbench fixture once it's ready
- cudf::rmm_pool_raii rmm_pool;
-
CUDF_EXPECTS(state.get_int64("string_size") <= std::numeric_limits::max(),
"Benchmarks only support up to size_type's maximum number of items");
auto const string_size{size_type(state.get_int64("string_size"))};
@@ -116,9 +113,6 @@ void BM_FST_JSON(nvbench::state& state)
void BM_FST_JSON_no_outidx(nvbench::state& state)
{
- // TODO: to be replaced by nvbench fixture once it's ready
- cudf::rmm_pool_raii rmm_pool;
-
CUDF_EXPECTS(state.get_int64("string_size") <= std::numeric_limits::max(),
"Benchmarks only support up to size_type's maximum number of items");
auto const string_size{size_type(state.get_int64("string_size"))};
@@ -154,9 +148,6 @@ void BM_FST_JSON_no_outidx(nvbench::state& state)
void BM_FST_JSON_no_out(nvbench::state& state)
{
- // TODO: to be replaced by nvbench fixture once it's ready
- cudf::rmm_pool_raii rmm_pool;
-
CUDF_EXPECTS(state.get_int64("string_size") <= std::numeric_limits::max(),
"Benchmarks only support up to size_type's maximum number of items");
auto const string_size{size_type(state.get_int64("string_size"))};
@@ -190,9 +181,6 @@ void BM_FST_JSON_no_out(nvbench::state& state)
void BM_FST_JSON_no_str(nvbench::state& state)
{
- // TODO: to be replaced by nvbench fixture once it's ready
- cudf::rmm_pool_raii rmm_pool;
-
CUDF_EXPECTS(state.get_int64("string_size") <= std::numeric_limits::max(),
"Benchmarks only support up to size_type's maximum number of items");
auto const string_size{size_type(state.get_int64("string_size"))};
diff --git a/cpp/benchmarks/io/json/nested_json.cpp b/cpp/benchmarks/io/json/nested_json.cpp
index 2abae88dca3..416cf403671 100644
--- a/cpp/benchmarks/io/json/nested_json.cpp
+++ b/cpp/benchmarks/io/json/nested_json.cpp
@@ -157,9 +157,6 @@ auto make_test_json_data(cudf::size_type string_size, rmm::cuda_stream_view stre
void BM_NESTED_JSON(nvbench::state& state)
{
- // TODO: to be replaced by nvbench fixture once it's ready
- cudf::rmm_pool_raii rmm_pool;
-
auto const string_size{cudf::size_type(state.get_int64("string_size"))};
auto const default_options = cudf::io::json_reader_options{};
@@ -189,9 +186,6 @@ NVBENCH_BENCH(BM_NESTED_JSON)
void BM_NESTED_JSON_DEPTH(nvbench::state& state)
{
- // TODO: to be replaced by nvbench fixture once it's ready
- cudf::rmm_pool_raii rmm_pool;
-
auto const string_size{cudf::size_type(state.get_int64("string_size"))};
auto const depth{cudf::size_type(state.get_int64("depth"))};
diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp
index 3f8c096140e..4705c083c02 100644
--- a/cpp/benchmarks/io/orc/orc_reader_input.cpp
+++ b/cpp/benchmarks/io/orc/orc_reader_input.cpp
@@ -57,11 +57,10 @@ void orc_read_common(cudf::io::orc_writer_options const& opts,
state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
}
-template
-void BM_orc_read_data(nvbench::state& state, nvbench::type_list>)
+template
+void BM_orc_read_data(nvbench::state& state,
+ nvbench::type_list, nvbench::enum_type>)
{
- cudf::rmm_pool_raii rmm_pool;
-
auto const d_type = get_type_or_group(static_cast(DataType));
cudf::size_type const cardinality = state.get_int64("cardinality");
cudf::size_type const run_length = state.get_int64("run_length");
@@ -72,20 +71,18 @@ void BM_orc_read_data(nvbench::state& state, nvbench::type_listview();
- cuio_source_sink_pair source_sink(io_type::HOST_BUFFER);
+ cuio_source_sink_pair source_sink(IOType);
cudf::io::orc_writer_options opts =
cudf::io::orc_writer_options::builder(source_sink.make_sink_info(), view);
orc_read_common(opts, source_sink, state);
}
-template
+template
void BM_orc_read_io_compression(
nvbench::state& state,
- nvbench::type_list, nvbench::enum_type>)
+ nvbench::type_list, nvbench::enum_type>)
{
- cudf::rmm_pool_raii rmm_pool;
-
auto const d_type = get_type_or_group({static_cast(data_type::INTEGRAL_SIGNED),
static_cast(data_type::FLOAT),
static_cast(data_type::DECIMAL),
@@ -103,7 +100,7 @@ void BM_orc_read_io_compression(
data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
auto const view = tbl->view();
- cuio_source_sink_pair source_sink(IO);
+ cuio_source_sink_pair source_sink(IOType);
cudf::io::orc_writer_options opts =
cudf::io::orc_writer_options::builder(source_sink.make_sink_info(), view)
.compression(Compression);
@@ -126,9 +123,11 @@ using io_list = nvbench::enum_type_list;
-NVBENCH_BENCH_TYPES(BM_orc_read_data, NVBENCH_TYPE_AXES(d_type_list))
+NVBENCH_BENCH_TYPES(BM_orc_read_data,
+ NVBENCH_TYPE_AXES(d_type_list,
+ nvbench::enum_type_list))
.set_name("orc_read_decode")
- .set_type_axes_names({"data_type"})
+ .set_type_axes_names({"data_type", "io"})
.set_min_samples(4)
.add_int64_axis("cardinality", {0, 1000})
.add_int64_axis("run_length", {1, 32});
diff --git a/cpp/benchmarks/io/orc/orc_reader_options.cpp b/cpp/benchmarks/io/orc/orc_reader_options.cpp
index 1b7d33ccd19..1e841f744ae 100644
--- a/cpp/benchmarks/io/orc/orc_reader_options.cpp
+++ b/cpp/benchmarks/io/orc/orc_reader_options.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -56,8 +56,6 @@ void BM_orc_read_varying_options(nvbench::state& state,
nvbench::enum_type,
nvbench::enum_type>)
{
- cudf::rmm_pool_raii rmm_pool;
-
auto const num_chunks = RowSelection == row_selection::ALL ? 1 : chunked_read_num_chunks;
auto const use_index = UsesIndex == uses_index::YES;
diff --git a/cpp/benchmarks/io/orc/orc_writer.cpp b/cpp/benchmarks/io/orc/orc_writer.cpp
index 545f8d10122..67bf4cb750b 100644
--- a/cpp/benchmarks/io/orc/orc_writer.cpp
+++ b/cpp/benchmarks/io/orc/orc_writer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -46,8 +46,6 @@ constexpr cudf::size_type num_cols = 64;
template
void BM_orc_write_encode(nvbench::state& state, nvbench::type_list>)
{
- cudf::rmm_pool_raii rmm_pool;
-
auto const d_type = get_type_or_group(static_cast(DataType));
cudf::size_type const cardinality = state.get_int64("cardinality");
cudf::size_type const run_length = state.get_int64("run_length");
@@ -90,8 +88,6 @@ void BM_orc_write_io_compression(
nvbench::state& state,
nvbench::type_list, nvbench::enum_type>)
{
- cudf::rmm_pool_raii rmm_pool;
-
auto const d_type = get_type_or_group({static_cast(data_type::INTEGRAL_SIGNED),
static_cast(data_type::FLOAT),
static_cast(data_type::DECIMAL),
@@ -141,8 +137,6 @@ void BM_orc_write_statistics(
nvbench::state& state,
nvbench::type_list, nvbench::enum_type>)
{
- cudf::rmm_pool_raii rmm_pool;
-
auto const d_type = get_type_or_group({static_cast(data_type::INTEGRAL_SIGNED),
static_cast(data_type::FLOAT),
static_cast(data_type::DECIMAL),
diff --git a/cpp/benchmarks/io/orc/orc_writer_chunks.cpp b/cpp/benchmarks/io/orc/orc_writer_chunks.cpp
index 592eae96362..eda70bc05e6 100644
--- a/cpp/benchmarks/io/orc/orc_writer_chunks.cpp
+++ b/cpp/benchmarks/io/orc/orc_writer_chunks.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -35,8 +35,6 @@ constexpr int64_t data_size = 512 << 20;
void nvbench_orc_write(nvbench::state& state)
{
- cudf::rmm_pool_raii rmm_pool;
-
cudf::size_type num_cols = state.get_int64("num_columns");
auto tbl = create_random_table(
@@ -79,8 +77,6 @@ void nvbench_orc_write(nvbench::state& state)
void nvbench_orc_chunked_write(nvbench::state& state)
{
- cudf::rmm_pool_raii rmm_pool;
-
cudf::size_type num_cols = state.get_int64("num_columns");
cudf::size_type num_tables = state.get_int64("num_chunks");
diff --git a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp
index 36a62903f31..e04dfbbc799 100644
--- a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp
@@ -57,16 +57,15 @@ void parquet_read_common(cudf::io::parquet_writer_options const& write_opts,
state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
}
-template
-void BM_parquet_read_data(nvbench::state& state, nvbench::type_list>)
+template
+void BM_parquet_read_data(
+ nvbench::state& state,
+ nvbench::type_list, nvbench::enum_type>)
{
- cudf::rmm_pool_raii rmm_pool;
-
auto const d_type = get_type_or_group(static_cast(DataType));
cudf::size_type const cardinality = state.get_int64("cardinality");
cudf::size_type const run_length = state.get_int64("run_length");
auto const compression = cudf::io::compression_type::SNAPPY;
- auto const source_type = io_type::FILEPATH;
auto const tbl =
create_random_table(cycle_dtypes(d_type, num_cols),
@@ -74,7 +73,7 @@ void BM_parquet_read_data(nvbench::state& state, nvbench::type_listview();
- cuio_source_sink_pair source_sink(source_type);
+ cuio_source_sink_pair source_sink(IOType);
cudf::io::parquet_writer_options write_opts =
cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view)
.compression(compression);
@@ -82,13 +81,11 @@ void BM_parquet_read_data(nvbench::state& state, nvbench::type_list
+template
void BM_parquet_read_io_compression(
nvbench::state& state,
- nvbench::type_list, nvbench::enum_type>)
+ nvbench::type_list, nvbench::enum_type>)
{
- cudf::rmm_pool_raii rmm_pool;
-
auto const d_type = get_type_or_group({static_cast(data_type::INTEGRAL),
static_cast(data_type::FLOAT),
static_cast(data_type::DECIMAL),
@@ -101,7 +98,7 @@ void BM_parquet_read_io_compression(
cudf::size_type const cardinality = state.get_int64("cardinality");
cudf::size_type const run_length = state.get_int64("run_length");
auto const compression = Compression;
- auto const source_type = IO;
+ auto const source_type = IOType;
auto const tbl =
create_random_table(cycle_dtypes(d_type, num_cols),
@@ -133,9 +130,11 @@ using io_list = nvbench::enum_type_list;
-NVBENCH_BENCH_TYPES(BM_parquet_read_data, NVBENCH_TYPE_AXES(d_type_list))
+NVBENCH_BENCH_TYPES(BM_parquet_read_data,
+ NVBENCH_TYPE_AXES(d_type_list,
+ nvbench::enum_type_list))
.set_name("parquet_read_decode")
- .set_type_axes_names({"data_type"})
+ .set_type_axes_names({"data_type", "io"})
.set_min_samples(4)
.add_int64_axis("cardinality", {0, 1000})
.add_int64_axis("run_length", {1, 32});
diff --git a/cpp/benchmarks/io/parquet/parquet_reader_options.cpp b/cpp/benchmarks/io/parquet/parquet_reader_options.cpp
index 6e187afd6ab..3fd46fa08f2 100644
--- a/cpp/benchmarks/io/parquet/parquet_reader_options.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_reader_options.cpp
@@ -57,8 +57,6 @@ void BM_parquet_read_options(nvbench::state& state,
nvbench::enum_type,
nvbench::enum_type>)
{
- cudf::rmm_pool_raii rmm_pool;
-
auto constexpr str_to_categories = ConvertsStrings == converts_strings::YES;
auto constexpr uses_pd_metadata = UsesPandasMetadata == uses_pandas_metadata::YES;
diff --git a/cpp/benchmarks/io/parquet/parquet_writer.cpp b/cpp/benchmarks/io/parquet/parquet_writer.cpp
index a0b076abfda..d3d22e06086 100644
--- a/cpp/benchmarks/io/parquet/parquet_writer.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_writer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -46,8 +46,6 @@ constexpr cudf::size_type num_cols = 64;
template
void BM_parq_write_encode(nvbench::state& state, nvbench::type_list>)
{
- cudf::rmm_pool_raii rmm_pool;
-
auto const data_types = get_type_or_group(static_cast(DataType));
cudf::size_type const cardinality = state.get_int64("cardinality");
cudf::size_type const run_length = state.get_int64("run_length");
@@ -90,8 +88,6 @@ void BM_parq_write_io_compression(
nvbench::state& state,
nvbench::type_list, nvbench::enum_type>)
{
- cudf::rmm_pool_raii rmm_pool;
-
auto const data_types = get_type_or_group({static_cast(data_type::INTEGRAL),
static_cast(data_type::FLOAT),
static_cast(data_type::DECIMAL),
diff --git a/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp b/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp
index 11b29cc2297..ed70f53cad8 100644
--- a/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -33,8 +33,6 @@ constexpr int64_t data_size = 512 << 20;
void PQ_write(nvbench::state& state)
{
- cudf::rmm_pool_raii rmm_pool;
-
cudf::size_type const num_cols = state.get_int64("num_cols");
auto const tbl = create_random_table(cycle_dtypes({cudf::type_id::INT32}, num_cols),
@@ -67,8 +65,6 @@ void PQ_write(nvbench::state& state)
void PQ_write_chunked(nvbench::state& state)
{
- cudf::rmm_pool_raii rmm_pool;
-
cudf::size_type const num_cols = state.get_int64("num_cols");
cudf::size_type const num_tables = state.get_int64("num_chunks");
diff --git a/cpp/benchmarks/io/text/multibyte_split.cpp b/cpp/benchmarks/io/text/multibyte_split.cpp
index c3b7c585055..41b5ddb567e 100644
--- a/cpp/benchmarks/io/text/multibyte_split.cpp
+++ b/cpp/benchmarks/io/text/multibyte_split.cpp
@@ -116,8 +116,6 @@ template
static void bench_multibyte_split(nvbench::state& state,
nvbench::type_list>)
{
- cudf::rmm_pool_raii pool_raii;
-
auto const delim_size = state.get_int64("delim_size");
auto const delim_percent = state.get_int64("delim_percent");
auto const file_size_approx = state.get_int64("size_approx");
@@ -209,10 +207,21 @@ using source_type_list = nvbench::enum_type_list;
-NVBENCH_BENCH_TYPES(bench_multibyte_split, NVBENCH_TYPE_AXES(source_type_list))
- .set_name("multibyte_split")
+NVBENCH_BENCH_TYPES(bench_multibyte_split,
+ NVBENCH_TYPE_AXES(nvbench::enum_type_list))
+ .set_name("multibyte_split_delimiters")
+ .set_min_samples(4)
.add_int64_axis("strip_delimiters", {0, 1})
.add_int64_axis("delim_size", {1, 4, 7})
.add_int64_axis("delim_percent", {1, 25})
+ .add_int64_power_of_two_axis("size_approx", {15})
+ .add_int64_axis("byte_range_percent", {50});
+
+NVBENCH_BENCH_TYPES(bench_multibyte_split, NVBENCH_TYPE_AXES(source_type_list))
+ .set_name("multibyte_split_source")
+ .set_min_samples(4)
+ .add_int64_axis("strip_delimiters", {1})
+ .add_int64_axis("delim_size", {1})
+ .add_int64_axis("delim_percent", {1})
.add_int64_power_of_two_axis("size_approx", {15, 30})
- .add_int64_axis("byte_range_percent", {1, 5, 25, 50, 100});
+ .add_int64_axis("byte_range_percent", {10, 100});
diff --git a/cpp/benchmarks/join/join.cu b/cpp/benchmarks/join/join.cu
index 053eb6c2852..647e37aa97d 100644
--- a/cpp/benchmarks/join/join.cu
+++ b/cpp/benchmarks/join/join.cu
@@ -23,9 +23,6 @@ void nvbench_inner_join(nvbench::state& state,
{
skip_helper(state);
- // TODO: to be replaced by nvbench fixture once it's ready
- cudf::rmm_pool_raii pool_raii;
-
auto join = [](cudf::table_view const& left_input,
cudf::table_view const& right_input,
cudf::null_equality compare_nulls,
@@ -43,9 +40,6 @@ void nvbench_left_join(nvbench::state& state,
{
skip_helper(state);
- // TODO: to be replaced by nvbench fixture once it's ready
- cudf::rmm_pool_raii pool_raii;
-
auto join = [](cudf::table_view const& left_input,
cudf::table_view const& right_input,
cudf::null_equality compare_nulls,
@@ -63,9 +57,6 @@ void nvbench_full_join(nvbench::state& state,
{
skip_helper(state);
- // TODO: to be replaced by nvbench fixture once it's ready
- cudf::rmm_pool_raii pool_raii;
-
auto join = [](cudf::table_view const& left_input,
cudf::table_view const& right_input,
cudf::null_equality compare_nulls,
diff --git a/cpp/benchmarks/join/mixed_join.cu b/cpp/benchmarks/join/mixed_join.cu
index b7da5e2c0b3..1420625bbcd 100644
--- a/cpp/benchmarks/join/mixed_join.cu
+++ b/cpp/benchmarks/join/mixed_join.cu
@@ -23,9 +23,6 @@ void nvbench_mixed_inner_join(
{
skip_helper(state);
- // TODO: to be replaced by nvbench fixture once it's ready
- cudf::rmm_pool_raii pool_raii;
-
auto join = [](cudf::table_view const& left_equality_input,
cudf::table_view const& right_equality_input,
cudf::table_view const& left_conditional_input,
@@ -50,9 +47,6 @@ void nvbench_mixed_left_join(
{
skip_helper(state);
- // TODO: to be replaced by nvbench fixture once it's ready
- cudf::rmm_pool_raii pool_raii;
-
auto join = [](cudf::table_view const& left_equality_input,
cudf::table_view const& right_equality_input,
cudf::table_view const& left_conditional_input,
@@ -77,9 +71,6 @@ void nvbench_mixed_full_join(
{
skip_helper(state);
- // TODO: to be replaced by nvbench fixture once it's ready
- cudf::rmm_pool_raii pool_raii;
-
auto join = [](cudf::table_view const& left_equality_input,
cudf::table_view const& right_equality_input,
cudf::table_view const& left_conditional_input,
@@ -104,9 +95,6 @@ void nvbench_mixed_left_semi_join(
{
skip_helper(state);
- // TODO: to be replaced by nvbench fixture once it's ready
- cudf::rmm_pool_raii pool_raii;
-
auto join = [](cudf::table_view const& left_equality_input,
cudf::table_view const& right_equality_input,
cudf::table_view const& left_conditional_input,
@@ -131,9 +119,6 @@ void nvbench_mixed_left_anti_join(
{
skip_helper(state);
- // TODO: to be replaced by nvbench fixture once it's ready
- cudf::rmm_pool_raii pool_raii;
-
auto join = [](cudf::table_view const& left_equality_input,
cudf::table_view const& right_equality_input,
cudf::table_view const& left_conditional_input,
diff --git a/cpp/benchmarks/reduction/distinct_count.cpp b/cpp/benchmarks/reduction/distinct_count.cpp
index 489d7935809..d2218c270a8 100644
--- a/cpp/benchmarks/reduction/distinct_count.cpp
+++ b/cpp/benchmarks/reduction/distinct_count.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -24,8 +24,6 @@
template
static void bench_reduction_distinct_count(nvbench::state& state, nvbench::type_list)
{
- cudf::rmm_pool_raii pool_raii;
-
auto const dtype = cudf::type_to_id();
auto const size = static_cast(state.get_int64("num_rows"));
auto const null_probability = state.get_float64("null_probability");
diff --git a/cpp/benchmarks/reduction/rank.cpp b/cpp/benchmarks/reduction/rank.cpp
index 5022e029d97..41295f787fc 100644
--- a/cpp/benchmarks/reduction/rank.cpp
+++ b/cpp/benchmarks/reduction/rank.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -26,8 +26,6 @@
template
static void nvbench_reduction_scan(nvbench::state& state, nvbench::type_list)
{
- cudf::rmm_pool_raii pool_raii;
-
auto const dtype = cudf::type_to_id();
double const null_probability = state.get_float64("null_probability");
diff --git a/cpp/benchmarks/reduction/scan_structs.cpp b/cpp/benchmarks/reduction/scan_structs.cpp
index 92016041c9a..d5b19faf773 100644
--- a/cpp/benchmarks/reduction/scan_structs.cpp
+++ b/cpp/benchmarks/reduction/scan_structs.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -28,8 +28,6 @@ static constexpr cudf::size_type max_str_length = 32;
static void nvbench_structs_scan(nvbench::state& state)
{
- cudf::rmm_pool_raii pool_raii;
-
auto const null_probability = [&] {
auto const null_prob_val = state.get_float64("null_probability");
return null_prob_val > 0 ? std::optional{null_prob_val} : std::nullopt;
diff --git a/cpp/benchmarks/reduction/segment_reduce.cu b/cpp/benchmarks/reduction/segment_reduce.cu
index e063adb25f9..127b3598dae 100644
--- a/cpp/benchmarks/reduction/segment_reduce.cu
+++ b/cpp/benchmarks/reduction/segment_reduce.cu
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -86,9 +86,6 @@ template
void BM_Simple_Segmented_Reduction(nvbench::state& state,
nvbench::type_list>)
{
- // TODO: to be replaced by nvbench fixture once it's ready
- cudf::rmm_pool_raii rmm_pool;
-
auto const column_size{cudf::size_type(state.get_int64("column_size"))};
auto const num_segments{cudf::size_type(state.get_int64("num_segments"))};
diff --git a/cpp/benchmarks/search/contains.cpp b/cpp/benchmarks/search/contains.cpp
index 8daa975d4ed..01a0a37b21a 100644
--- a/cpp/benchmarks/search/contains.cpp
+++ b/cpp/benchmarks/search/contains.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -38,7 +38,6 @@ std::unique_ptr create_column_data(cudf::size_type n_rows, bool ha
static void nvbench_contains_scalar(nvbench::state& state)
{
- cudf::rmm_pool_raii pool_raii;
using Type = int;
auto const has_nulls = static_cast(state.get_int64("has_nulls"));
diff --git a/cpp/benchmarks/sort/nested_types_common.hpp b/cpp/benchmarks/sort/nested_types_common.hpp
new file mode 100644
index 00000000000..c4851823534
--- /dev/null
+++ b/cpp/benchmarks/sort/nested_types_common.hpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include
+#include
+
+#include
+
+#include
+
+#include
+
+inline std::unique_ptr create_lists_data(nvbench::state& state)
+{
+ const size_t size_bytes(state.get_int64("size_bytes"));
+ const cudf::size_type depth{static_cast(state.get_int64("depth"))};
+ auto const null_frequency{state.get_float64("null_frequency")};
+
+ data_profile table_profile;
+ table_profile.set_distribution_params(cudf::type_id::LIST, distribution_id::UNIFORM, 0, 5);
+ table_profile.set_list_depth(depth);
+ table_profile.set_null_probability(null_frequency);
+ return create_random_table({cudf::type_id::LIST}, table_size_bytes{size_bytes}, table_profile);
+}
+
+inline std::unique_ptr create_structs_data(nvbench::state& state,
+ cudf::size_type const n_cols = 1)
+{
+ using Type = int;
+ using column_wrapper = cudf::test::fixed_width_column_wrapper;
+ std::default_random_engine generator;
+ std::uniform_int_distribution distribution(0, 100);
+
+ const cudf::size_type n_rows{static_cast(state.get_int64("NumRows"))};
+ const cudf::size_type depth{static_cast(state.get_int64("Depth"))};
+ const bool nulls{static_cast(state.get_int64("Nulls"))};
+
+ // Create columns with values in the range [0,100)
+ std::vector columns;
+ columns.reserve(n_cols);
+ std::generate_n(std::back_inserter(columns), n_cols, [&]() {
+ auto const elements = cudf::detail::make_counting_transform_iterator(
+ 0, [&](auto row) { return distribution(generator); });
+ if (!nulls) return column_wrapper(elements, elements + n_rows);
+ auto valids =
+ cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 10 != 0; });
+ return column_wrapper(elements, elements + n_rows, valids);
+ });
+
+ std::vector> cols;
+ std::transform(columns.begin(), columns.end(), std::back_inserter(cols), [](column_wrapper& col) {
+ return col.release();
+ });
+
+ std::vector> child_cols = std::move(cols);
+ // Nest the child columns in a struct, then nest that struct column inside another
+ // struct column up to the desired depth
+ for (int i = 0; i < depth; i++) {
+ std::vector struct_validity;
+ std::uniform_int_distribution bool_distribution(0, 100 * (i + 1));
+ std::generate_n(
+ std::back_inserter(struct_validity), n_rows, [&]() { return bool_distribution(generator); });
+ cudf::test::structs_column_wrapper struct_col(std::move(child_cols), struct_validity);
+ child_cols = std::vector>{};
+ child_cols.push_back(struct_col.release());
+ }
+
+ // Create table view
+ return std::make_unique(std::move(child_cols));
+}
diff --git a/cpp/benchmarks/sort/rank.cpp b/cpp/benchmarks/sort/rank.cpp
index 2c26f4fa15d..6d0a8e5aedd 100644
--- a/cpp/benchmarks/sort/rank.cpp
+++ b/cpp/benchmarks/sort/rank.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -33,7 +33,7 @@ static void BM_rank(benchmark::State& state, bool nulls)
// Create columns with values in the range [0,100)
data_profile profile = data_profile_builder().cardinality(0).distribution(
cudf::type_to_id(), distribution_id::UNIFORM, 0, 100);
- profile.set_null_probability(nulls ? std::optional{0.01} : std::nullopt);
+ profile.set_null_probability(nulls ? std::optional{0.2} : std::nullopt);
auto keys = create_random_column(cudf::type_to_id(), row_count{n_rows}, profile);
for (auto _ : state) {
diff --git a/cpp/benchmarks/sort/rank_lists.cpp b/cpp/benchmarks/sort/rank_lists.cpp
new file mode 100644
index 00000000000..49dc409ebfc
--- /dev/null
+++ b/cpp/benchmarks/sort/rank_lists.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nested_types_common.hpp"
+#include "rank_types_common.hpp"
+
+#include
+
+#include
+
+#include
+
+template
+void nvbench_rank_lists(nvbench::state& state, nvbench::type_list>)
+{
+ auto const table = create_lists_data(state);
+
+ auto const null_frequency{state.get_float64("null_frequency")};
+
+ state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+ cudf::rank(table->view().column(0),
+ method,
+ cudf::order::ASCENDING,
+ null_frequency ? cudf::null_policy::INCLUDE : cudf::null_policy::EXCLUDE,
+ cudf::null_order::AFTER,
+ rmm::mr::get_current_device_resource());
+ });
+}
+
+NVBENCH_BENCH_TYPES(nvbench_rank_lists, NVBENCH_TYPE_AXES(methods))
+ .set_name("rank_lists")
+ .add_int64_power_of_two_axis("size_bytes", {10, 18, 24, 28})
+ .add_int64_axis("depth", {1, 4})
+ .add_float64_axis("null_frequency", {0, 0.2});
diff --git a/cpp/benchmarks/sort/rank_structs.cpp b/cpp/benchmarks/sort/rank_structs.cpp
new file mode 100644
index 00000000000..c0227e85191
--- /dev/null
+++ b/cpp/benchmarks/sort/rank_structs.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nested_types_common.hpp"
+#include "rank_types_common.hpp"
+
+#include
+
+#include
+
+template
+void nvbench_rank_structs(nvbench::state& state, nvbench::type_list>)
+{
+ auto const table = create_structs_data(state);
+
+ const bool nulls{static_cast(state.get_int64("Nulls"))};
+
+ state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+ cudf::rank(table->view().column(0),
+ method,
+ cudf::order::ASCENDING,
+ nulls ? cudf::null_policy::INCLUDE : cudf::null_policy::EXCLUDE,
+ cudf::null_order::AFTER,
+ rmm::mr::get_current_device_resource());
+ });
+}
+
+NVBENCH_BENCH_TYPES(nvbench_rank_structs, NVBENCH_TYPE_AXES(methods))
+ .set_name("rank_structs")
+ .add_int64_power_of_two_axis("NumRows", {10, 18, 26})
+ .add_int64_axis("Depth", {0, 1, 8})
+ .add_int64_axis("Nulls", {0, 1});
diff --git a/cpp/benchmarks/sort/rank_types_common.hpp b/cpp/benchmarks/sort/rank_types_common.hpp
new file mode 100644
index 00000000000..adb58606c42
--- /dev/null
+++ b/cpp/benchmarks/sort/rank_types_common.hpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include
+
+#include
+
+enum class rank_method : int32_t {};
+
+NVBENCH_DECLARE_ENUM_TYPE_STRINGS(
+ cudf::rank_method,
+ [](cudf::rank_method value) {
+ switch (value) {
+ case cudf::rank_method::FIRST: return "FIRST";
+ case cudf::rank_method::AVERAGE: return "AVERAGE";
+ case cudf::rank_method::MIN: return "MIN";
+ case cudf::rank_method::MAX: return "MAX";
+ case cudf::rank_method::DENSE: return "DENSE";
+ default: return "unknown";
+ }
+ },
+ [](cudf::rank_method value) {
+ switch (value) {
+ case cudf::rank_method::FIRST: return "cudf::rank_method::FIRST";
+ case cudf::rank_method::AVERAGE: return "cudf::rank_method::AVERAGE";
+ case cudf::rank_method::MIN: return "cudf::rank_method::MIN";
+ case cudf::rank_method::MAX: return "cudf::rank_method::MAX";
+ case cudf::rank_method::DENSE: return "cudf::rank_method::DENSE";
+ default: return "unknown";
+ }
+ })
+
+using methods = nvbench::enum_type_list;
diff --git a/cpp/benchmarks/sort/segmented_sort.cpp b/cpp/benchmarks/sort/segmented_sort.cpp
index e3459291caf..22d2b1c4029 100644
--- a/cpp/benchmarks/sort/segmented_sort.cpp
+++ b/cpp/benchmarks/sort/segmented_sort.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -26,8 +26,6 @@
void nvbench_segmented_sort(nvbench::state& state)
{
- cudf::rmm_pool_raii pool_raii;
-
auto const stable = static_cast(state.get_int64("stable"));
auto const dtype = cudf::type_to_id();
auto const size_bytes = static_cast(state.get_int64("size_bytes"));
diff --git a/cpp/benchmarks/sort/sort_lists.cpp b/cpp/benchmarks/sort/sort_lists.cpp
index dac865de479..b55b60f5ec9 100644
--- a/cpp/benchmarks/sort/sort_lists.cpp
+++ b/cpp/benchmarks/sort/sort_lists.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,8 +14,7 @@
* limitations under the License.
*/
-#include
-#include
+#include "nested_types_common.hpp"
#include
@@ -23,18 +22,7 @@
void nvbench_sort_lists(nvbench::state& state)
{
- cudf::rmm_pool_raii pool_raii;
-
- const size_t size_bytes(state.get_int64("size_bytes"));
- const cudf::size_type depth{static_cast(state.get_int64("depth"))};
- auto const null_frequency{state.get_float64("null_frequency")};
-
- data_profile table_profile;
- table_profile.set_distribution_params(cudf::type_id::LIST, distribution_id::UNIFORM, 0, 5);
- table_profile.set_list_depth(depth);
- table_profile.set_null_probability(null_frequency);
- auto const table =
- create_random_table({cudf::type_id::LIST}, table_size_bytes{size_bytes}, table_profile);
+ auto const table = create_lists_data(state);
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
rmm::cuda_stream_view stream_view{launch.get_stream()};
diff --git a/cpp/benchmarks/sort/sort_structs.cpp b/cpp/benchmarks/sort/sort_structs.cpp
index 9b6c32940f5..1d54fa42f6f 100644
--- a/cpp/benchmarks/sort/sort_structs.cpp
+++ b/cpp/benchmarks/sort/sort_structs.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,65 +14,19 @@
* limitations under the License.
*/
-#include
-
-#include
+#include "nested_types_common.hpp"
#include
#include
-#include
-
void nvbench_sort_struct(nvbench::state& state)
{
- cudf::rmm_pool_raii pool_raii;
-
- using Type = int;
- using column_wrapper = cudf::test::fixed_width_column_wrapper;
- std::default_random_engine generator;
- std::uniform_int_distribution distribution(0, 100);
-
- const cudf::size_type n_rows{static_cast(state.get_int64("NumRows"))};
- const cudf::size_type n_cols{1};
- const cudf::size_type depth{static_cast(state.get_int64("Depth"))};
- const bool nulls{static_cast(state.get_int64("Nulls"))};
-
- // Create columns with values in the range [0,100)
- std::vector columns;
- columns.reserve(n_cols);
- std::generate_n(std::back_inserter(columns), n_cols, [&]() {
- auto const elements = cudf::detail::make_counting_transform_iterator(
- 0, [&](auto row) { return distribution(generator); });
- if (!nulls) return column_wrapper(elements, elements + n_rows);
- auto valids =
- cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 10 != 0; });
- return column_wrapper(elements, elements + n_rows, valids);
- });
-
- std::vector> cols;
- std::transform(columns.begin(), columns.end(), std::back_inserter(cols), [](column_wrapper& col) {
- return col.release();
- });
-
- std::vector> child_cols = std::move(cols);
- // Lets add some layers
- for (int i = 0; i < depth; i++) {
- std::vector struct_validity;
- std::uniform_int_distribution bool_distribution(0, 100 * (i + 1));
- std::generate_n(
- std::back_inserter(struct_validity), n_rows, [&]() { return bool_distribution(generator); });
- cudf::test::structs_column_wrapper struct_col(std::move(child_cols), struct_validity);
- child_cols = std::vector>{};
- child_cols.push_back(struct_col.release());
- }
-
- // Create table view
- auto const input = cudf::table(std::move(child_cols));
+ auto const input = create_structs_data(state);
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
rmm::cuda_stream_view stream_view{launch.get_stream()};
- cudf::detail::sorted_order(input, {}, {}, stream_view, rmm::mr::get_current_device_resource());
+ cudf::detail::sorted_order(*input, {}, {}, stream_view, rmm::mr::get_current_device_resource());
});
}
diff --git a/cpp/benchmarks/stream_compaction/distinct.cpp b/cpp/benchmarks/stream_compaction/distinct.cpp
index 512554ff1bc..81eafa3044f 100644
--- a/cpp/benchmarks/stream_compaction/distinct.cpp
+++ b/cpp/benchmarks/stream_compaction/distinct.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -29,8 +29,6 @@ NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_ms, "cudf::timestamp_ms", "cudf::ti
template
void nvbench_distinct(nvbench::state& state, nvbench::type_list)
{
- cudf::rmm_pool_raii pool_raii;
-
cudf::size_type const num_rows = state.get_int64("NumRows");
data_profile profile = data_profile_builder().cardinality(0).null_probability(0.01).distribution(
@@ -61,8 +59,6 @@ NVBENCH_BENCH_TYPES(nvbench_distinct, NVBENCH_TYPE_AXES(data_type))
template
void nvbench_distinct_list(nvbench::state& state, nvbench::type_list)
{
- cudf::rmm_pool_raii pool_raii;
-
auto const size = state.get_int64("ColumnSize");
auto const dtype = cudf::type_to_id();
double const null_probability = state.get_float64("null_probability");
diff --git a/cpp/benchmarks/stream_compaction/unique.cpp b/cpp/benchmarks/stream_compaction/unique.cpp
index 9a0f4c3b743..dafb9d506c7 100644
--- a/cpp/benchmarks/stream_compaction/unique.cpp
+++ b/cpp/benchmarks/stream_compaction/unique.cpp
@@ -54,8 +54,6 @@ void nvbench_unique(nvbench::state& state, nvbench::type_list();
double const null_probability = state.get_float64("null_probability");
diff --git a/cpp/benchmarks/string/like.cpp b/cpp/benchmarks/string/like.cpp
index de7382f5a75..d86c31480dd 100644
--- a/cpp/benchmarks/string/like.cpp
+++ b/cpp/benchmarks/string/like.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -71,7 +71,6 @@ std::unique_ptr build_input_column(cudf::size_type n_rows, int32_t
static void bench_like(nvbench::state& state)
{
- cudf::rmm_pool_raii pool_raii;
auto const n_rows = static_cast(state.get_int64("num_rows"));
auto const hit_rate = static_cast(state.get_int64("hit_rate"));
diff --git a/cpp/benchmarks/string/repeat_strings.cpp b/cpp/benchmarks/string/repeat_strings.cpp
index 1844e93bc53..fe015b27f13 100644
--- a/cpp/benchmarks/string/repeat_strings.cpp
+++ b/cpp/benchmarks/string/repeat_strings.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -79,42 +79,6 @@ static void BM_repeat_strings_column_times(benchmark::State& state)
(strings_col.chars_size() + repeat_times_col.size() * sizeof(int32_t)));
}
-static void BM_compute_output_strings_sizes(benchmark::State& state)
-{
- auto const n_rows = static_cast(state.range(0));
- auto const max_str_length = static_cast(state.range(1));
- auto const table = create_data_table(2, n_rows, max_str_length);
- auto const strings_col = cudf::strings_column_view(table->view().column(0));
- auto const repeat_times_col = table->view().column(1);
-
- for ([[maybe_unused]] auto _ : state) {
- [[maybe_unused]] cuda_event_timer raii(state, true, cudf::get_default_stream());
- cudf::strings::repeat_strings_output_sizes(strings_col, repeat_times_col);
- }
-
- state.SetBytesProcessed(state.iterations() *
- (strings_col.chars_size() + repeat_times_col.size() * sizeof(int32_t)));
-}
-
-static void BM_repeat_strings_column_times_precomputed_sizes(benchmark::State& state)
-{
- auto const n_rows = static_cast(state.range(0));
- auto const max_str_length = static_cast(state.range(1));
- auto const table = create_data_table(2, n_rows, max_str_length);
- auto const strings_col = cudf::strings_column_view(table->view().column(0));
- auto const repeat_times_col = table->view().column(1);
- [[maybe_unused]] auto const [sizes, total_bytes] =
- cudf::strings::repeat_strings_output_sizes(strings_col, repeat_times_col);
-
- for ([[maybe_unused]] auto _ : state) {
- [[maybe_unused]] cuda_event_timer raii(state, true, cudf::get_default_stream());
- cudf::strings::repeat_strings(strings_col, repeat_times_col, *sizes);
- }
-
- state.SetBytesProcessed(state.iterations() *
- (strings_col.chars_size() + repeat_times_col.size() * sizeof(int32_t)));
-}
-
static void generate_bench_args(benchmark::internal::Benchmark* b)
{
int const min_rows = 1 << 8;
@@ -145,23 +109,5 @@ class RepeatStrings : public cudf::benchmark {
->UseManualTime() \
->Unit(benchmark::kMillisecond);
-#define COMPUTE_OUTPUT_STRINGS_SIZES_BENCHMARK_DEFINE(name) \
- BENCHMARK_DEFINE_F(RepeatStrings, name) \
- (::benchmark::State & st) { BM_compute_output_strings_sizes(st); } \
- BENCHMARK_REGISTER_F(RepeatStrings, name) \
- ->Apply(generate_bench_args) \
- ->UseManualTime() \
- ->Unit(benchmark::kMillisecond);
-
-#define REPEAT_STRINGS_COLUMN_TIMES_PRECOMPUTED_SIZES_BENCHMARK_DEFINE(name) \
- BENCHMARK_DEFINE_F(RepeatStrings, name) \
- (::benchmark::State & st) { BM_repeat_strings_column_times_precomputed_sizes(st); } \
- BENCHMARK_REGISTER_F(RepeatStrings, name) \
- ->Apply(generate_bench_args) \
- ->UseManualTime() \
- ->Unit(benchmark::kMillisecond);
-
REPEAT_STRINGS_SCALAR_TIMES_BENCHMARK_DEFINE(scalar_times)
REPEAT_STRINGS_COLUMN_TIMES_BENCHMARK_DEFINE(column_times)
-COMPUTE_OUTPUT_STRINGS_SIZES_BENCHMARK_DEFINE(compute_output_strings_sizes)
-REPEAT_STRINGS_COLUMN_TIMES_PRECOMPUTED_SIZES_BENCHMARK_DEFINE(precomputed_sizes)
diff --git a/cpp/benchmarks/string/reverse.cpp b/cpp/benchmarks/string/reverse.cpp
index 7b08897079b..4c3846c79bb 100644
--- a/cpp/benchmarks/string/reverse.cpp
+++ b/cpp/benchmarks/string/reverse.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -25,7 +25,6 @@
static void bench_reverse(nvbench::state& state)
{
- cudf::rmm_pool_raii pool_raii;
auto const num_rows = static_cast(state.get_int64("num_rows"));
auto const row_width = static_cast(state.get_int64("row_width"));
diff --git a/cpp/cmake/thirdparty/get_nvbench.cmake b/cpp/cmake/thirdparty/get_nvbench.cmake
new file mode 100644
index 00000000000..f0642145fa0
--- /dev/null
+++ b/cpp/cmake/thirdparty/get_nvbench.cmake
@@ -0,0 +1,28 @@
+# =============================================================================
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+# This function finds nvbench and applies any needed patches.
+function(find_and_configure_nvbench)
+
+ include(${rapids-cmake-dir}/cpm/nvbench.cmake)
+ include(${rapids-cmake-dir}/cpm/package_override.cmake)
+
+ set(cudf_patch_dir "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/patches")
+ rapids_cpm_package_override("${cudf_patch_dir}/nvbench_override.json")
+
+ rapids_cpm_nvbench()
+
+endfunction()
+
+find_and_configure_nvbench()
diff --git a/cpp/cmake/thirdparty/patches/nvbench_global_setup.diff b/cpp/cmake/thirdparty/patches/nvbench_global_setup.diff
new file mode 100644
index 00000000000..0487b0a1ac3
--- /dev/null
+++ b/cpp/cmake/thirdparty/patches/nvbench_global_setup.diff
@@ -0,0 +1,27 @@
+diff --git a/nvbench/main.cuh b/nvbench/main.cuh
+index 0ba82d7..7ab02c1 100644
+--- a/nvbench/main.cuh
++++ b/nvbench/main.cuh
+@@ -54,6 +54,14 @@
+ // clang-format on
+ #endif
+
++#ifndef NVBENCH_ENVIRONMENT
++namespace nvbench {
++struct no_environment
++{};
++}
++#define NVBENCH_ENVIRONMENT nvbench::no_environment
++#endif
++
+ #define NVBENCH_MAIN_PARSE(argc, argv) \
+ nvbench::option_parser parser; \
+ parser.parse(argc, argv)
+@@ -77,6 +85,7 @@
+ printer.set_total_state_count(total_states); \
+ \
+ printer.set_completed_state_count(0); \
++ NVBENCH_ENVIRONMENT(); \
+ for (auto &bench_ptr : benchmarks) \
+ { \
+ bench_ptr->set_printer(printer); \
diff --git a/cpp/cmake/thirdparty/patches/nvbench_override.json b/cpp/cmake/thirdparty/patches/nvbench_override.json
new file mode 100644
index 00000000000..7be868081b6
--- /dev/null
+++ b/cpp/cmake/thirdparty/patches/nvbench_override.json
@@ -0,0 +1,19 @@
+
+{
+ "packages" : {
+ "nvbench" : {
+ "patches" : [
+ {
+ "file" : "${current_json_dir}/nvbench_global_setup.diff",
+ "issue" : "Fix add support for global setup to initialize RMM in nvbench [https://github.com/NVIDIA/nvbench/pull/123]",
+ "fixed_in" : ""
+ },
+ {
+ "file" : "nvbench/use_existing_fmt.diff",
+ "issue" : "Fix add support for using an existing fmt [https://github.com/NVIDIA/nvbench/pull/125]",
+ "fixed_in" : ""
+ }
+ ]
+ }
+ }
+}
diff --git a/cpp/include/cudf/detail/segmented_reduction.cuh b/cpp/include/cudf/detail/segmented_reduction.cuh
index 9a49c1abe38..1c39d5eab1e 100644
--- a/cpp/include/cudf/detail/segmented_reduction.cuh
+++ b/cpp/include/cudf/detail/segmented_reduction.cuh
@@ -145,10 +145,10 @@ void segmented_reduce(InputIterator d_in,
size_type* d_valid_counts,
rmm::cuda_stream_view stream)
{
- using OutputType = typename thrust::iterator_value::type;
- using IntermediateType = typename thrust::iterator_value::type;
- auto num_segments = static_cast(std::distance(d_offset_begin, d_offset_end));
- auto const binary_op = op.get_binary_op();
+ using OutputType = typename thrust::iterator_value::type;
+ using IntermediateType = typename thrust::iterator_value::type;
+ auto num_segments = static_cast(std::distance(d_offset_begin, d_offset_end)) - 1;
+ auto const binary_op = op.get_binary_op();
auto const initial_value = op.template get_identity();
rmm::device_uvector intermediate_result{static_cast(num_segments),
diff --git a/cpp/include/cudf/strings/repeat_strings.hpp b/cpp/include/cudf/strings/repeat_strings.hpp
index 0e6ee2126d3..26fe5f95983 100644
--- a/cpp/include/cudf/strings/repeat_strings.hpp
+++ b/cpp/include/cudf/strings/repeat_strings.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -32,15 +32,15 @@ namespace strings {
*/
/**
- * @brief Repeat the given string scalar by a given number of times.
+ * @brief Repeat the given string scalar a given number of times
*
* An output string scalar is generated by repeating the input string by a number of times given by
- * the @p `repeat_times` parameter.
+ * the `repeat_times` parameter.
*
* In special cases:
- * - If @p `repeat_times` is not a positive value, an empty (valid) string scalar will be returned.
+ * - If `repeat_times` is not a positive value, an empty (valid) string scalar will be returned.
* - An invalid input scalar will always result in an invalid output scalar regardless of the
- * value of @p `repeat_times` parameter.
+ * value of `repeat_times` parameter.
*
* @code{.pseudo}
* Example:
@@ -50,13 +50,13 @@ namespace strings {
* @endcode
*
* @throw cudf::logic_error if the size of the output string scalar exceeds the maximum value that
- * can be stored by the index type
- * (i.e., @code input.size() * repeat_times > numeric_limits::max() @endcode).
+ * can be stored by the index type:
+ * `input.size() * repeat_times > max of size_type`
*
- * @param input The scalar containing the string to repeat.
- * @param repeat_times The number of times the input string is repeated.
- * @param mr Device memory resource used to allocate the returned string scalar.
- * @return New string scalar in which the input string is repeated.
+ * @param input The scalar containing the string to repeat
+ * @param repeat_times The number of times the input string is repeated
+ * @param mr Device memory resource used to allocate the returned string scalar
+ * @return New string scalar in which the input string is repeated
*/
std::unique_ptr repeat_string(
string_scalar const& input,
@@ -64,19 +64,16 @@ std::unique_ptr repeat_string(
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
/**
- * @brief Repeat each string in the given strings column by a given number of times.
+ * @brief Repeat each string in the given strings column a given number of times
*
- * An output strings column is generated by repeating each string from the input strings column by a
- * number of times given by the @p `repeat_times` parameter.
+ * An output strings column is generated by repeating each string from the input strings column by
+ * the number of times given by the `repeat_times` parameter.
*
* In special cases:
- * - If @p `repeat_times` is not a positive number, a non-null input string will always result in
+ * - If `repeat_times` is not a positive number, a non-null input string will always result in
* an empty output string.
* - A null input string will always result in a null output string regardless of the value of the
- * @p `repeat_times` parameter.
- *
- * The caller is responsible for checking the output column size will not exceed the maximum size of
- * a strings column (number of total characters is less than the max size_type value).
+ * `repeat_times` parameter.
*
* @code{.pseudo}
* Example:
@@ -85,10 +82,10 @@ std::unique_ptr repeat_string(
* out is ['aaaaaa', null, '', 'bbcbbcbbc']
* @endcode
*
- * @param input The column containing strings to repeat.
- * @param repeat_times The number of times each input string is repeated.
- * @param mr Device memory resource used to allocate the returned strings column.
- * @return New column containing the repeated strings.
+ * @param input The column containing strings to repeat
+ * @param repeat_times The number of times each input string is repeated
+ * @param mr Device memory resource used to allocate the returned strings column
+ * @return New column containing the repeated strings
*/
std::unique_ptr repeat_strings(
strings_column_view const& input,
@@ -97,11 +94,10 @@ std::unique_ptr repeat_strings(
/**
* @brief Repeat each string in the given strings column by the numbers of times given in another
- * numeric column.
+ * numeric column
*
* An output strings column is generated by repeating each of the input string by a number of times
- * given by the corresponding row in a @p `repeat_times` numeric column. The computational time can
- * be reduced if sizes of the output strings are known and provided.
+ * given by the corresponding row in a `repeat_times` numeric column.
*
* In special cases:
* - Any null row (from either the input strings column or the `repeat_times` column) will always
@@ -109,9 +105,6 @@ std::unique_ptr repeat_strings(
* - If any value in the `repeat_times` column is not a positive number and its corresponding input
* string is not null, the output string will be an empty string.
*
- * The caller is responsible for checking the output column size will not exceed the maximum size of
- * a strings column (number of total characters is less than the max size_type value).
- *
* @code{.pseudo}
* Example:
* strs = ['aa', null, '', 'bbc-']
@@ -120,51 +113,16 @@ std::unique_ptr repeat_strings(
* out is ['aa', null, '', 'bbc-bbc-bbc-bbc-']
* @endcode
*
- * @throw cudf::logic_error if the input `repeat_times` column has data type other than integer.
+ * @throw cudf::logic_error if the input `repeat_times` is not an integer type
* @throw cudf::logic_error if the input columns have different sizes.
*
- * @param input The column containing strings to repeat.
+ * @param input The column containing strings to repeat
* @param repeat_times The column containing numbers of times that the corresponding input strings
- * are repeated.
- * @param output_strings_sizes The optional column containing pre-computed sizes of the output
- * strings.
- * @param mr Device memory resource used to allocate the returned strings column.
+ * are repeated
+ * @param mr Device memory resource used to allocate the returned strings column
* @return New column containing the repeated strings.
*/
std::unique_ptr repeat_strings(
- strings_column_view const& input,
- column_view const& repeat_times,
- std::optional output_strings_sizes = std::nullopt,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-
-/**
- * @brief Compute sizes of the output strings if each string in the input strings column
- * is repeated by the numbers of times given in another numeric column.
- *
- * The output column storing string output sizes is not nullable. These string sizes are
- * also summed up and returned (in an `int64_t` value), which can be used to detect if the input
- * strings column can be safely repeated without data corruption due to overflow in string indexing.
- *
- * @code{.pseudo}
- * Example:
- * strs = ['aa', null, '', 'bbc-']
- * repeat_times = [ 1, 2, 3, 4 ]
- * [output_sizes, total_size] = repeat_strings_output_sizes(strs, repeat_times)
- * out is [2, 0, 0, 16], and total_size = 18
- * @endcode
- *
- * @throw cudf::logic_error if the input `repeat_times` column has data type other than integer.
- * @throw cudf::logic_error if the input columns have different sizes.
- *
- * @param input The column containing strings to repeat.
- * @param repeat_times The column containing numbers of times that the corresponding input strings
- * are repeated.
- * @param mr Device memory resource used to allocate the returned strings column.
- * @return A pair with the first item is an int32_t column containing sizes of the output strings,
- * and the second item is an int64_t number containing the total sizes (in bytes) of the
- * output strings column.
- */
-std::pair, int64_t> repeat_strings_output_sizes(
strings_column_view const& input,
column_view const& repeat_times,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh
index 0dc0f4e5315..f9ffbfcdf7b 100644
--- a/cpp/include/cudf/table/experimental/row_operators.cuh
+++ b/cpp/include/cudf/table/experimental/row_operators.cuh
@@ -245,6 +245,16 @@ using optional_dremel_view = thrust::optional;
* second letter in both words is the first non-equal letter, and `a < b`, thus
* `aac < abb`.
*
+ * @note The operator overloads in sub-class `element_comparator` are templated via the
+ * `type_dispatcher` to help select an overload instance for each column in a table.
+ * So, `cudf::is_nested` will return `true` if the table has nested-type columns,
+ * but it will be a runtime error if template parameter `has_nested_columns != true`.
+ *
+ * @tparam has_nested_columns compile-time optimization for primitive types.
+ * This template parameter is to be used by the developer by querying
+ * `cudf::detail::has_nested_columns(input)`. `true` compiles operator
+ * overloads for nested types, while `false` only compiles operator
+ * overloads for primitive types.
* @tparam Nullate A cudf::nullate type describing whether to check for nulls.
* @tparam PhysicalElementComparator A relational comparator functor that compares individual values
* rather than logical elements, defaults to `NaN` aware relational comparator that evaluates `NaN`
@@ -857,6 +867,16 @@ class self_comparator {
*
* `F(i,j)` returns true if and only if row `i` compares lexicographically less than row `j`.
*
+ * @note The operator overloads in sub-class `element_comparator` are templated via the
+ * `type_dispatcher` to help select an overload instance for each column in a table.
+ * So, `cudf::is_nested` will return `true` if the table has nested-type columns,
+ * but it will be a runtime error if template parameter `has_nested_columns != true`.
+ *
+ * @tparam has_nested_columns compile-time optimization for primitive types.
+ * This template parameter is to be used by the developer by querying
+ * `cudf::detail::has_nested_columns(input)`. `true` compiles operator
+ * overloads for nested types, while `false` only compiles operator
+ * overloads for primitive types.
* @tparam Nullate A cudf::nullate type describing whether to check for nulls.
* @tparam PhysicalElementComparator A relational comparator functor that compares individual
* values rather than logical elements, defaults to `NaN` aware relational comparator that
@@ -1009,6 +1029,16 @@ class two_table_comparator {
* only if row `i` of the right table compares lexicographically less than row
* `j` of the left table.
*
+ * @note The operator overloads in sub-class `element_comparator` are templated via the
+ * `type_dispatcher` to help select an overload instance for each column in a table.
+ * So, `cudf::is_nested` will return `true` if the table has nested-type columns,
+ * but it will be a runtime error if template parameter `has_nested_columns != true`.
+ *
+ * @tparam has_nested_columns compile-time optimization for primitive types.
+ * This template parameter is to be used by the developer by querying
+ * `cudf::detail::has_nested_columns(input)`. `true` compiles operator
+ * overloads for nested types, while `false` only compiles operator
+ * overloads for primitive types.
* @tparam Nullate A cudf::nullate type describing whether to check for nulls.
* @tparam PhysicalElementComparator A relational comparator functor that compares individual
* values rather than logical elements, defaults to `NaN` aware relational comparator that
@@ -1131,11 +1161,22 @@ struct nan_equal_physical_equality_comparator {
* returns false, representing unequal rows. If the rows are compared without mismatched elements,
* the rows are equal.
*
+ * @note The operator overloads in sub-class `element_comparator` are templated via the
+ * `type_dispatcher` to help select an overload instance for each column in a table.
+ * So, `cudf::is_nested` will return `true` if the table has nested-type columns,
+ * but it will be a runtime error if template parameter `has_nested_columns != true`.
+ *
+ * @tparam has_nested_columns compile-time optimization for primitive types.
+ * This template parameter is to be used by the developer by querying
+ * `cudf::detail::has_nested_columns(input)`. `true` compiles operator
+ * overloads for nested types, while `false` only compiles operator
+ * overloads for primitive types.
* @tparam Nullate A cudf::nullate type describing whether to check for nulls.
* @tparam PhysicalEqualityComparator A equality comparator functor that compares individual values
* rather than logical elements, defaults to a comparator for which `NaN == NaN`.
*/
-template
class device_row_comparator {
friend class self_comparator; ///< Allow self_comparator to access private members
@@ -1246,14 +1287,14 @@ class device_row_comparator {
template () and
- not cudf::is_nested()),
+ (not has_nested_columns or not cudf::is_nested())),
typename... Args>
__device__ bool operator()(Args...)
{
CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
}
- template ())>
+ template ())>
__device__ bool operator()(size_type const lhs_element_index,
size_type const rhs_element_index) const noexcept
{
@@ -1437,6 +1478,16 @@ class self_comparator {
*
* `F(i,j)` returns true if and only if row `i` compares equal to row `j`.
*
+ * @note The operator overloads in sub-class `element_comparator` are templated via the
+ * `type_dispatcher` to help select an overload instance for each column in a table.
+ * So, `cudf::is_nested` will return `true` if the table has nested-type columns,
+ * but it will be a runtime error if template parameter `has_nested_columns != true`.
+ *
+ * @tparam has_nested_columns compile-time optimization for primitive types.
+ * This template parameter is to be used by the developer by querying
+ * `cudf::detail::has_nested_columns(input)`. `true` compiles operator
+ * overloads for nested types, while `false` only compiles operator
+ * overloads for primitive types.
* @tparam Nullate A cudf::nullate type describing whether to check for nulls.
* @tparam PhysicalEqualityComparator A equality comparator functor that compares individual
* values rather than logical elements, defaults to a comparator for which `NaN == NaN`.
@@ -1445,13 +1496,15 @@ class self_comparator {
* @param comparator Physical element equality comparison functor.
* @return A binary callable object
*/
- template
auto equal_to(Nullate nullate = {},
null_equality nulls_are_equal = null_equality::EQUAL,
PhysicalEqualityComparator comparator = {}) const noexcept
{
- return device_row_comparator{nullate, *d_t, *d_t, nulls_are_equal, comparator};
+ return device_row_comparator{
+ nullate, *d_t, *d_t, nulls_are_equal, comparator};
}
private:
@@ -1539,6 +1592,16 @@ class two_table_comparator {
* Similarly, `F(rhs_index_type i, lhs_index_type j)` returns true if and only if row `i` of the
* right table compares equal to row `j` of the left table.
*
+ * @note The operator overloads in sub-class `element_comparator` are templated via the
+ * `type_dispatcher` to help select an overload instance for each column in a table.
+ * So, `cudf::is_nested` will return `true` if the table has nested-type columns,
+ * but it will be a runtime error if template parameter `has_nested_columns != true`.
+ *
+ * @tparam has_nested_columns compile-time optimization for primitive types.
+ * This template parameter is to be used by the developer by querying
+ * `cudf::detail::has_nested_columns(input)`. `true` compiles operator
+ * overloads for nested types, while `false` only compiles operator
+ * overloads for primitive types.
* @tparam Nullate A cudf::nullate type describing whether to check for nulls.
* @tparam PhysicalEqualityComparator A equality comparator functor that compares individual
* values rather than logical elements, defaults to a `NaN == NaN` equality comparator.
@@ -1547,14 +1610,16 @@ class two_table_comparator {
* @param comparator Physical element equality comparison functor.
* @return A binary callable object
*/
- template
auto equal_to(Nullate nullate = {},
null_equality nulls_are_equal = null_equality::EQUAL,
PhysicalEqualityComparator comparator = {}) const noexcept
{
return strong_index_comparator_adapter{
- device_row_comparator(nullate, *d_left_table, *d_right_table, nulls_are_equal, comparator)};
+ device_row_comparator(
+ nullate, *d_left_table, *d_right_table, nulls_are_equal, comparator)};
}
private:
diff --git a/cpp/src/binaryop/compiled/struct_binary_ops.cuh b/cpp/src/binaryop/compiled/struct_binary_ops.cuh
index 2fcf1ce4e32..d167f0fe3c5 100644
--- a/cpp/src/binaryop/compiled/struct_binary_ops.cuh
+++ b/cpp/src/binaryop/compiled/struct_binary_ops.cuh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -106,6 +106,36 @@ void apply_struct_binary_op(mutable_column_view& out,
}
}
+template
+struct struct_equality_functor {
+ struct_equality_functor(OptionalIteratorType optional_iter,
+ DeviceComparatorType device_comparator,
+ bool is_lhs_scalar,
+ bool is_rhs_scalar,
+ bool preserve_output)
+ : _optional_iter(optional_iter),
+ _device_comparator(device_comparator),
+ _is_lhs_scalar(is_lhs_scalar),
+ _is_rhs_scalar(is_rhs_scalar),
+ _preserve_output(preserve_output)
+ {
+ }
+
+ auto __device__ operator()(size_type i) const noexcept
+ {
+ auto const lhs = cudf::experimental::row::lhs_index_type{_is_lhs_scalar ? 0 : i};
+ auto const rhs = cudf::experimental::row::rhs_index_type{_is_rhs_scalar ? 0 : i};
+ return _optional_iter[i].has_value() and (_device_comparator(lhs, rhs) == _preserve_output);
+ }
+
+ private:
+ OptionalIteratorType _optional_iter;
+ DeviceComparatorType _device_comparator;
+ bool _is_lhs_scalar;
+ bool _is_rhs_scalar;
+ bool _preserve_output;
+};
+
template
void apply_struct_equality_op(mutable_column_view& out,
@@ -125,26 +155,37 @@ void apply_struct_equality_op(mutable_column_view& out,
auto trhs = table_view{{rhs}};
auto table_comparator =
cudf::experimental::row::equality::two_table_comparator{tlhs, trhs, stream};
- auto device_comparator =
- table_comparator.equal_to(nullate::DYNAMIC{has_nested_nulls(tlhs) || has_nested_nulls(trhs)},
- null_equality::EQUAL,
- comparator);
auto outd = column_device_view::create(out, stream);
auto optional_iter =
cudf::detail::make_optional_iterator(*outd, nullate::DYNAMIC{out.has_nulls()});
- thrust::tabulate(rmm::exec_policy(stream),
- out.begin(),
- out.end(),
- [optional_iter,
- is_lhs_scalar,
- is_rhs_scalar,
- preserve_output = (op != binary_operator::NOT_EQUAL),
- device_comparator] __device__(size_type i) {
- auto lhs = cudf::experimental::row::lhs_index_type{is_lhs_scalar ? 0 : i};
- auto rhs = cudf::experimental::row::rhs_index_type{is_rhs_scalar ? 0 : i};
- return optional_iter[i].has_value() and
- (device_comparator(lhs, rhs) == preserve_output);
- });
+
+ auto const comparator_helper = [&](auto const device_comparator) {
+ thrust::tabulate(rmm::exec_policy(stream),
+ out.begin(),
+ out.end(),
+ struct_equality_functor(
+ optional_iter,
+ device_comparator,
+ is_lhs_scalar,
+ is_rhs_scalar,
+ op != binary_operator::NOT_EQUAL));
+ };
+
+ if (cudf::detail::has_nested_columns(tlhs) or cudf::detail::has_nested_columns(trhs)) {
+ auto device_comparator = table_comparator.equal_to(
+ nullate::DYNAMIC{has_nested_nulls(tlhs) || has_nested_nulls(trhs)},
+ null_equality::EQUAL,
+ comparator);
+
+ comparator_helper(device_comparator);
+ } else {
+ auto device_comparator = table_comparator.equal_to(
+ nullate::DYNAMIC{has_nested_nulls(tlhs) || has_nested_nulls(trhs)},
+ null_equality::EQUAL,
+ comparator);
+
+ comparator_helper(device_comparator);
+ }
}
} // namespace cudf::binops::compiled::detail
diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu
index 50173d6a987..72ac6255549 100644
--- a/cpp/src/groupby/hash/groupby.cu
+++ b/cpp/src/groupby/hash/groupby.cu
@@ -68,12 +68,13 @@ namespace {
// TODO: replace it with `cuco::static_map`
// https://github.com/rapidsai/cudf/issues/10401
-using map_type = concurrent_unordered_map<
- cudf::size_type,
- cudf::size_type,
- cudf::experimental::row::hash::device_row_hasher,
- cudf::experimental::row::equality::device_row_comparator>;
+template
+using map_type =
+ concurrent_unordered_map,
+ ComparatorType>;
/**
* @brief List of aggregation operations that can be computed with a hash-based
@@ -189,13 +190,14 @@ class groupby_simple_aggregations_collector final
}
};
+template
class hash_compound_agg_finalizer final : public cudf::detail::aggregation_finalizer {
column_view col;
data_type result_type;
cudf::detail::result_cache* sparse_results;
cudf::detail::result_cache* dense_results;
device_span gather_map;
- map_type const& map;
+ map_type const& map;
bitmask_type const* __restrict__ row_bitmask;
rmm::cuda_stream_view stream;
rmm::mr::device_memory_resource* mr;
@@ -207,7 +209,7 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final
cudf::detail::result_cache* sparse_results,
cudf::detail::result_cache* dense_results,
device_span gather_map,
- map_type const& map,
+ map_type const& map,
bitmask_type const* row_bitmask,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
@@ -336,7 +338,7 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final
rmm::exec_policy(stream),
thrust::make_counting_iterator(0),
col.size(),
- ::cudf::detail::var_hash_functor{
+ ::cudf::detail::var_hash_functor>{
map, row_bitmask, *var_result_view, *values_view, *sum_view, *count_view, agg._ddof});
sparse_results->add_result(col, agg, std::move(var_result));
dense_results->add_result(col, agg, to_dense_agg_result(agg));
@@ -394,12 +396,13 @@ flatten_single_pass_aggs(host_span requests)
*
* @see groupby_null_templated()
*/
+template
void sparse_to_dense_results(table_view const& keys,
host_span requests,
cudf::detail::result_cache* sparse_results,
cudf::detail::result_cache* dense_results,
device_span