diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 59e2ea224f6..573fecca003 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -7,8 +7,9 @@ notebooks/ @rapidsai/cudf-python-codeowners python/dask_cudf/ @rapidsai/cudf-dask-codeowners #cmake code owners -**/CMakeLists.txt @rapidsai/cudf-cmake-codeowners -**/cmake/ @rapidsai/cudf-cmake-codeowners +cpp/CMakeLists.txt @rapidsai/cudf-cmake-codeowners +cpp/libcudf_kafka/CMakeLists.txt @rapidsai/cudf-cmake-codeowners +**/cmake/ @rapidsai/cudf-cmake-codeowners #java code owners java/ @rapidsai/cudf-java-codeowners diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index ae895daf28a..2c5ecf68690 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -19,15 +19,18 @@ Here are some guidelines to help the review process go smoothly. noted here: https://help.github.com/articles/closing-issues-using-keywords/ 5. If your pull request is not ready for review but you want to make use of the - continuous integration testing facilities please label it with `[WIP]`. + continuous integration testing facilities please mark your pull request as Draft. + https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/changing-the-stage-of-a-pull-request#converting-a-pull-request-to-a-draft 6. If your pull request is ready to be reviewed without requiring additional - work on top of it, then remove the `[WIP]` label (if present) and replace - it with `[REVIEW]`. If assistance is required to complete the functionality, - for example when the C/C++ code of a feature is complete but Python bindings - are still required, then add the label `[HELP-REQ]` so that others can triage - and assist. The additional changes then can be implemented on top of the - same PR. If the assistance is done by members of the rapidsAI team, then no + work on top of it, then remove it from "Draft" and make it "Ready for Review". + https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/changing-the-stage-of-a-pull-request#marking-a-pull-request-as-ready-for-review + + If assistance is required to complete the functionality, for example when the + C/C++ code of a feature is complete but Python bindings are still required, + then add the label `help wanted` so that others can triage and assist. + The additional changes then can be implemented on top of the same PR. + If the assistance is done by members of the rapidsAI team, then no additional actions are required by the creator of the original PR for this, otherwise the original author of the PR needs to give permission to the person(s) assisting to commit to their personal fork of the project. If that @@ -39,10 +42,10 @@ Here are some guidelines to help the review process go smoothly. features or make changes out of the scope of those requested by the reviewer (doing this just add delays as already reviewed code ends up having to be re-reviewed/it is hard to tell what is new etc!). Further, please do not - rebase your branch on main/force push/rewrite history, doing any of these - causes the context of any comments made by reviewers to be lost. If - conflicts occur against main they should be resolved by merging main - into the branch used for making the pull request. + rebase your branch on the target branch, force push, or rewrite history. + Doing any of these causes the context of any comments made by reviewers to be lost. + If conflicts occur against the target branch they should be resolved by + merging the target branch into the branch used for making the pull request. Many thanks in advance for your cooperation! diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 69f6634b5c2..f82fa9ef361 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -41,6 +41,12 @@ repos: entry: mypy --config-file=python/cudf/setup.cfg python/cudf/cudf language: system types: [python] + - repo: https://github.com/pycqa/pydocstyle + rev: 6.0.0 + hooks: + - id: pydocstyle + args: ["--config=python/.flake8"] + default_language_version: python: python3 diff --git a/CHANGELOG.md b/CHANGELOG.md index 21ab8ed3274..08a34a07ba3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,417 @@ -# cuDF 0.19.0 (Date TBD) - -Please see https://github.com/rapidsai/cudf/releases/tag/v0.19.0a for the latest changes to this development branch. +# cuDF 21.06.00 (9 Jun 2021) + +## 🚨 Beaking Changes + +- Add suppot fo `make_meta_obj` dispatch in `dask-cudf` ([#8342](https://github.com/rapidsai/cudf/pull/8342)) [@galipemsaga](https://github.com/galipemsaga) +- Add sepaato-on-null paamete to stings concatenate APIs ([#8282](https://github.com/rapidsai/cudf/pull/8282)) [@davidwendt](https://github.com/davidwendt) +- Intoduce a common paent class fo NumeicalColumn and DecimalColumn ([#8278](https://github.com/rapidsai/cudf/pull/8278)) [@vyas](https://github.com/vyas) +- Update ORC statistics API to use C++17 standad libay ([#8241](https://github.com/rapidsai/cudf/pull/8241)) [@vuule](https://github.com/vuule) +- Peseve column hieachy when getting NULL ow fom `LIST` column ([#8206](https://github.com/rapidsai/cudf/pull/8206)) [@isVoid](https://github.com/isVoid) +- `Goupby.shift` c++ API efacto and python binding ([#8131](https://github.com/rapidsai/cudf/pull/8131)) [@isVoid](https://github.com/isVoid) + +## 🐛 Bug Fixes + +- Fix stuct flattening to add a validity column only when the input column has null element ([#8374](https://github.com/rapidsai/cudf/pull/8374)) [@ttnghia](https://github.com/ttnghia) +- Compilation fix: Remove edefinition fo `std::is_same_v()` ([#8369](https://github.com/rapidsai/cudf/pull/8369)) [@mythocks](https://github.com/mythocks) +- Add backwad compatibility fo `dask-cudf` to wok with othe vesions of `dask` ([#8368](https://github.com/rapidsai/cudf/pull/8368)) [@galipemsaga](https://github.com/galipemsaga) +- Handle empty esults with nested types in copy_if_else ([#8359](https://github.com/rapidsai/cudf/pull/8359)) [@nvdbaanec](https://github.com/nvdbaanec) +- Handle nested column types popely fo empty paquet files. ([#8350](https://github.com/rapidsai/cudf/pull/8350)) [@nvdbaanec](https://github.com/nvdbaanec) +- Raise eo when unsuppoted aguments ae passed to `dask_cudf.DataFame.sot_values` ([#8349](https://github.com/rapidsai/cudf/pull/8349)) [@galipemsaga](https://github.com/galipemsaga) +- Raise `NotImplementedEo` fo axis=1 in `ank` ([#8347](https://github.com/rapidsai/cudf/pull/8347)) [@galipemsaga](https://github.com/galipemsaga) +- Add suppot fo `make_meta_obj` dispatch in `dask-cudf` ([#8342](https://github.com/rapidsai/cudf/pull/8342)) [@galipemsaga](https://github.com/galipemsaga) +- Update Java sting concatenate test fo single column ([#8330](https://github.com/rapidsai/cudf/pull/8330)) [@tgavescs](https://github.com/tgavescs) +- Use empty_like in scatte ([#8314](https://github.com/rapidsai/cudf/pull/8314)) [@evans2](https://github.com/evans2) +- Fix concatenate_lists_ignoe_null on ows of all_nulls ([#8312](https://github.com/rapidsai/cudf/pull/8312)) [@spelingxx](https://github.com/spelingxx) +- Add sepaato-on-null paamete to stings concatenate APIs ([#8282](https://github.com/rapidsai/cudf/pull/8282)) [@davidwendt](https://github.com/davidwendt) +- COLLECT_LIST suppot etuning empty output columns. ([#8279](https://github.com/rapidsai/cudf/pull/8279)) [@mythocks](https://github.com/mythocks) +- Update io util to convet path like object to sting ([#8275](https://github.com/rapidsai/cudf/pull/8275)) [@ayushdg](https://github.com/ayushdg) +- Fix esult column types fo empty inputs to olling window ([#8274](https://github.com/rapidsai/cudf/pull/8274)) [@mythocks](https://github.com/mythocks) +- Actually test equality in asset_goupby_esults_equal ([#8272](https://github.com/rapidsai/cudf/pull/8272)) [@shwina](https://github.com/shwina) +- CMake always explicitly specify a souce files extension ([#8270](https://github.com/rapidsai/cudf/pull/8270)) [@obetmaynad](https://github.com/obetmaynad) +- Fix stuct binay seach and stuct flattening ([#8268](https://github.com/rapidsai/cudf/pull/8268)) [@ttnghia](https://github.com/ttnghia) +- Revet "patch thust to fix intmax num elements limitation in scan_by_key" ([#8263](https://github.com/rapidsai/cudf/pull/8263)) [@cwhais](https://github.com/cwhais) +- upgade dlpack to 0.5 ([#8262](https://github.com/rapidsai/cudf/pull/8262)) [@cwhais](https://github.com/cwhais) +- Fixes CSV-eade type infeence fo thousands sepaato and decimal point ([#8261](https://github.com/rapidsai/cudf/pull/8261)) [@elstehle](https://github.com/elstehle) +- Fix incoect assetion in Java concat ([#8258](https://github.com/rapidsai/cudf/pull/8258)) [@spelingxx](https://github.com/spelingxx) +- Copy nested types upon constuction ([#8244](https://github.com/rapidsai/cudf/pull/8244)) [@isVoid](https://github.com/isVoid) +- Peseve column hieachy when getting NULL ow fom `LIST` column ([#8206](https://github.com/rapidsai/cudf/pull/8206)) [@isVoid](https://github.com/isVoid) +- Clip decimal binay op pecision at max pecision ([#8194](https://github.com/rapidsai/cudf/pull/8194)) [@ChisJa](https://github.com/ChisJa) + +## 📖 Documentation + +- Add docsting fo `dask_cudf.ead_csv` ([#8355](https://github.com/rapidsai/cudf/pull/8355)) [@galipemsaga](https://github.com/galipemsaga) +- Fix cudf elease vesion in eadme ([#8331](https://github.com/rapidsai/cudf/pull/8331)) [@galipemsaga](https://github.com/galipemsaga) +- Fix stucts column desciption in dev docs ([#8318](https://github.com/rapidsai/cudf/pull/8318)) [@isVoid](https://github.com/isVoid) +- Update eadme with coect CUDA vesions ([#8315](https://github.com/rapidsai/cudf/pull/8315)) [@aydouglass](https://github.com/aydouglass) +- Add desciption of the cuIO GDS integation ([#8293](https://github.com/rapidsai/cudf/pull/8293)) [@vuule](https://github.com/vuule) +- Remove unused paamete fom copy_patition kenel documentation ([#8283](https://github.com/rapidsai/cudf/pull/8283)) [@obetmaynad](https://github.com/obetmaynad) + +## 🚀 New Featues + +- Add suppot meging b/w categoical data ([#8332](https://github.com/rapidsai/cudf/pull/8332)) [@galipemsaga](https://github.com/galipemsaga) +- Java: Suppot stuct scala ([#8327](https://github.com/rapidsai/cudf/pull/8327)) [@spelingxx](https://github.com/spelingxx) +- added _is_homogeneous popety ([#8299](https://github.com/rapidsai/cudf/pull/8299)) [@shaneding](https://github.com/shaneding) +- Added decimal witing fo CSV wite ([#8296](https://github.com/rapidsai/cudf/pull/8296)) [@kaatish](https://github.com/kaatish) +- Java: Suppot ceating a scala fom utf8 sting ([#8294](https://github.com/rapidsai/cudf/pull/8294)) [@fiestaman](https://github.com/fiestaman) +- Add Java API fo Concatenate stings with sepaato ([#8289](https://github.com/rapidsai/cudf/pull/8289)) [@tgavescs](https://github.com/tgavescs) +- `stings::join_list_elements` options fo empty list inputs ([#8285](https://github.com/rapidsai/cudf/pull/8285)) [@ttnghia](https://github.com/ttnghia) +- Retun python lists fo __getitem__ calls to list type seies ([#8265](https://github.com/rapidsai/cudf/pull/8265)) [@bandon-b-mille](https://github.com/bandon-b-mille) +- add unit tests fo lead/lag on list fo ow window ([#8259](https://github.com/rapidsai/cudf/pull/8259)) [@wbo4958](https://github.com/wbo4958) +- Ceate a Sting column fom UTF8 Sting byte aays ([#8257](https://github.com/rapidsai/cudf/pull/8257)) [@fiestaman](https://github.com/fiestaman) +- Suppot scatteing `list_scala` ([#8256](https://github.com/rapidsai/cudf/pull/8256)) [@isVoid](https://github.com/isVoid) +- Implement `lists::concatenate_list_elements` ([#8231](https://github.com/rapidsai/cudf/pull/8231)) [@ttnghia](https://github.com/ttnghia) +- Suppot fo stuct scalas. ([#8220](https://github.com/rapidsai/cudf/pull/8220)) [@nvdbaanec](https://github.com/nvdbaanec) +- Add suppot fo decimal types in ORC wite ([#8198](https://github.com/rapidsai/cudf/pull/8198)) [@vuule](https://github.com/vuule) +- Suppot ceate lists column fom a `list_scala` ([#8185](https://github.com/rapidsai/cudf/pull/8185)) [@isVoid](https://github.com/isVoid) +- `Goupby.shift` c++ API efacto and python binding ([#8131](https://github.com/rapidsai/cudf/pull/8131)) [@isVoid](https://github.com/isVoid) +- Add `goupby::eplace_nulls(eplace_policy)` api ([#7118](https://github.com/rapidsai/cudf/pull/7118)) [@isVoid](https://github.com/isVoid) + +## 🛠️ Impovements + +- Suppot Dask + Distibuted 2021.05.1 ([#8392](https://github.com/rapidsai/cudf/pull/8392)) [@jakikham](https://github.com/jakikham) +- Add aliases fo sting methods ([#8353](https://github.com/rapidsai/cudf/pull/8353)) [@shwina](https://github.com/shwina) +- Update envionment vaiable used to detemine `cuda_vesion` ([#8321](https://github.com/rapidsai/cudf/pull/8321)) [@ajschmidt8](https://github.com/ajschmidt8) +- JNI: Refacto the code of making column fom scala ([#8310](https://github.com/rapidsai/cudf/pull/8310)) [@fiestaman](https://github.com/fiestaman) +- Update `CHANGELOG.md` links fo calve ([#8303](https://github.com/rapidsai/cudf/pull/8303)) [@ajschmidt8](https://github.com/ajschmidt8) +- Mege `banch-0.19` into `banch-21.06` ([#8302](https://github.com/rapidsai/cudf/pull/8302)) [@ajschmidt8](https://github.com/ajschmidt8) +- use addess and length fo GDS eads/wites ([#8301](https://github.com/rapidsai/cudf/pull/8301)) [@ongou](https://github.com/ongou) +- Update cudfjni vesion to 21.06.0 ([#8292](https://github.com/rapidsai/cudf/pull/8292)) [@pxLi](https://github.com/pxLi) +- Update docs build scipt ([#8284](https://github.com/rapidsai/cudf/pull/8284)) [@ajschmidt8](https://github.com/ajschmidt8) +- Make device_buffe steams explicit and enfoce move constuction ([#8280](https://github.com/rapidsai/cudf/pull/8280)) [@haism](https://github.com/haism) +- Intoduce a common paent class fo NumeicalColumn and DecimalColumn ([#8278](https://github.com/rapidsai/cudf/pull/8278)) [@vyas](https://github.com/vyas) +- Do not add nulls to the hash table when null_equality::NOT_EQUAL is passed to left_semi_join and left_anti_join ([#8277](https://github.com/rapidsai/cudf/pull/8277)) [@nvdbaanec](https://github.com/nvdbaanec) +- Enable implicit casting when concatenating mixed types ([#8276](https://github.com/rapidsai/cudf/pull/8276)) [@ChisJa](https://github.com/ChisJa) +- Fix CMake FindPackage mm, pin dev envs' dlpack to v0.3 ([#8271](https://github.com/rapidsai/cudf/pull/8271)) [@txcllnt](https://github.com/txcllnt) +- Update cudfjni vesion to 21.06 ([#8267](https://github.com/rapidsai/cudf/pull/8267)) [@pxLi](https://github.com/pxLi) +- suppot RMM aligned esouce adapte in JNI ([#8266](https://github.com/rapidsai/cudf/pull/8266)) [@ongou](https://github.com/ongou) +- Pass compile envionment vaiables to conda python build ([#8260](https://github.com/rapidsai/cudf/pull/8260)) [@Ethyling](https://github.com/Ethyling) +- Remove abc inheitance fom Seializable ([#8254](https://github.com/rapidsai/cudf/pull/8254)) [@vyas](https://github.com/vyas) +- Move moe methods into SingleColumnFame ([#8253](https://github.com/rapidsai/cudf/pull/8253)) [@vyas](https://github.com/vyas) +- Update ORC statistics API to use C++17 standad libay ([#8241](https://github.com/rapidsai/cudf/pull/8241)) [@vuule](https://github.com/vuule) +- Coect unused paamete wanings in dictonay algoithms ([#8239](https://github.com/rapidsai/cudf/pull/8239)) [@obetmaynad](https://github.com/obetmaynad) +- Coect unused paametes in the copying algoithms ([#8232](https://github.com/rapidsai/cudf/pull/8232)) [@obetmaynad](https://github.com/obetmaynad) +- IO statistics cleanup ([#8191](https://github.com/rapidsai/cudf/pull/8191)) [@kaatish](https://github.com/kaatish) +- Refacto of olling_window implementation. ([#8158](https://github.com/rapidsai/cudf/pull/8158)) [@nvdbaanec](https://github.com/nvdbaanec) +- Add a flag fo allowing single quotes in JSON stings. ([#8144](https://github.com/rapidsai/cudf/pull/8144)) [@nvdbaanec](https://github.com/nvdbaanec) +- Column efactoing 2 ([#8130](https://github.com/rapidsai/cudf/pull/8130)) [@vyas](https://github.com/vyas) +- suppot space in wokspace ([#7956](https://github.com/rapidsai/cudf/pull/7956)) [@jolounyomi](https://github.com/jolounyomi) +- Suppot collect_set on olling window ([#7881](https://github.com/rapidsai/cudf/pull/7881)) [@spelingxx](https://github.com/spelingxx) + +# cuDF 0.19.0 (21 Apr 2021) + +## 🚨 Breaking Changes + +- Allow hash_partition to take a seed value ([#7771](https://github.com/rapidsai/cudf/pull/7771)) [@magnatelee](https://github.com/magnatelee) +- Allow merging index column with data column using keyword "on" ([#7736](https://github.com/rapidsai/cudf/pull/7736)) [@skirui-source](https://github.com/skirui-source) +- Change JNI API to avoid loading native dependencies when creating sort order classes. ([#7729](https://github.com/rapidsai/cudf/pull/7729)) [@revans2](https://github.com/revans2) +- Replace device_vector with device_uvector in null_mask ([#7715](https://github.com/rapidsai/cudf/pull/7715)) [@harrism](https://github.com/harrism) +- Don't identify decimals as strings. ([#7710](https://github.com/rapidsai/cudf/pull/7710)) [@vyasr](https://github.com/vyasr) +- Fix Java Parquet write after writer API changes ([#7655](https://github.com/rapidsai/cudf/pull/7655)) [@revans2](https://github.com/revans2) +- Convert cudf::concatenate APIs to use spans and device_uvector ([#7621](https://github.com/rapidsai/cudf/pull/7621)) [@harrism](https://github.com/harrism) +- Update missing docstring examples in python public APIs ([#7546](https://github.com/rapidsai/cudf/pull/7546)) [@galipremsagar](https://github.com/galipremsagar) +- Remove unneeded step parameter from strings::detail::copy_slice ([#7525](https://github.com/rapidsai/cudf/pull/7525)) [@davidwendt](https://github.com/davidwendt) +- Rename ARROW_STATIC_LIB because it conflicts with one in FindArrow.cmake ([#7518](https://github.com/rapidsai/cudf/pull/7518)) [@trxcllnt](https://github.com/trxcllnt) +- Match Pandas logic for comparing two objects with nulls ([#7490](https://github.com/rapidsai/cudf/pull/7490)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add struct support to parquet writer ([#7461](https://github.com/rapidsai/cudf/pull/7461)) [@devavret](https://github.com/devavret) +- Join APIs that return gathermaps ([#7454](https://github.com/rapidsai/cudf/pull/7454)) [@shwina](https://github.com/shwina) +- `fixed_point` + `cudf::binary_operation` API Changes ([#7435](https://github.com/rapidsai/cudf/pull/7435)) [@codereport](https://github.com/codereport) +- Fix BUG: Exception when PYTHONOPTIMIZE=2 ([#7434](https://github.com/rapidsai/cudf/pull/7434)) [@skirui-source](https://github.com/skirui-source) +- Change nvtext::load_vocabulary_file to return a unique ptr ([#7424](https://github.com/rapidsai/cudf/pull/7424)) [@davidwendt](https://github.com/davidwendt) +- Refactor strings column factories ([#7397](https://github.com/rapidsai/cudf/pull/7397)) [@harrism](https://github.com/harrism) +- Use CMAKE_CUDA_ARCHITECTURES ([#7391](https://github.com/rapidsai/cudf/pull/7391)) [@robertmaynard](https://github.com/robertmaynard) +- Upgrade pandas to 1.2 ([#7375](https://github.com/rapidsai/cudf/pull/7375)) [@galipremsagar](https://github.com/galipremsagar) +- Rename `logical_cast` to `bit_cast` and allow additional conversions ([#7373](https://github.com/rapidsai/cudf/pull/7373)) [@ttnghia](https://github.com/ttnghia) +- Rework libcudf CMakeLists.txt to export targets for CPM ([#7107](https://github.com/rapidsai/cudf/pull/7107)) [@trxcllnt](https://github.com/trxcllnt) + +## 🐛 Bug Fixes + +- Fix a `NameError` in meta dispatch API ([#7996](https://github.com/rapidsai/cudf/pull/7996)) [@galipremsagar](https://github.com/galipremsagar) +- Reindex in `DataFrame.__setitem__` ([#7957](https://github.com/rapidsai/cudf/pull/7957)) [@galipremsagar](https://github.com/galipremsagar) +- jitify direct-to-cubin compilation and caching. ([#7919](https://github.com/rapidsai/cudf/pull/7919)) [@cwharris](https://github.com/cwharris) +- Use dynamic cudart for nvcomp in java build ([#7896](https://github.com/rapidsai/cudf/pull/7896)) [@abellina](https://github.com/abellina) +- fix "incompatible redefinition" warnings ([#7894](https://github.com/rapidsai/cudf/pull/7894)) [@cwharris](https://github.com/cwharris) +- cudf consistently specifies the cuda runtime ([#7887](https://github.com/rapidsai/cudf/pull/7887)) [@robertmaynard](https://github.com/robertmaynard) +- disable verbose output for jitify_preprocess ([#7886](https://github.com/rapidsai/cudf/pull/7886)) [@cwharris](https://github.com/cwharris) +- CMake jit_preprocess_files function only runs when needed ([#7872](https://github.com/rapidsai/cudf/pull/7872)) [@robertmaynard](https://github.com/robertmaynard) +- Push DeviceScalar construction into cython for list.contains ([#7864](https://github.com/rapidsai/cudf/pull/7864)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- cudf now sets an install rpath of $ORIGIN ([#7863](https://github.com/rapidsai/cudf/pull/7863)) [@robertmaynard](https://github.com/robertmaynard) +- Don't install Thrust examples, tests, docs, and python files ([#7811](https://github.com/rapidsai/cudf/pull/7811)) [@robertmaynard](https://github.com/robertmaynard) +- Sort by index in groupby tests more consistently ([#7802](https://github.com/rapidsai/cudf/pull/7802)) [@shwina](https://github.com/shwina) +- Revert "Update conda recipes pinning of repo dependencies ([#7743)" (#7793](https://github.com/rapidsai/cudf/pull/7743)" (#7793)) [@raydouglass](https://github.com/raydouglass) +- Add decimal column handling in copy_type_metadata ([#7788](https://github.com/rapidsai/cudf/pull/7788)) [@shwina](https://github.com/shwina) +- Add column names validation in parquet writer ([#7786](https://github.com/rapidsai/cudf/pull/7786)) [@galipremsagar](https://github.com/galipremsagar) +- Fix Java explode outer unit tests ([#7782](https://github.com/rapidsai/cudf/pull/7782)) [@jlowe](https://github.com/jlowe) +- Fix compiler warning about non-POD types passed through ellipsis ([#7781](https://github.com/rapidsai/cudf/pull/7781)) [@jrhemstad](https://github.com/jrhemstad) +- User resource fix for replace_nulls ([#7769](https://github.com/rapidsai/cudf/pull/7769)) [@magnatelee](https://github.com/magnatelee) +- Fix type dispatch for columnar replace_nulls ([#7768](https://github.com/rapidsai/cudf/pull/7768)) [@jlowe](https://github.com/jlowe) +- Add `ignore_order` parameter to dask-cudf concat dispatch ([#7765](https://github.com/rapidsai/cudf/pull/7765)) [@galipremsagar](https://github.com/galipremsagar) +- Fix slicing and arrow representations of decimal columns ([#7755](https://github.com/rapidsai/cudf/pull/7755)) [@vyasr](https://github.com/vyasr) +- Fixing issue with explode_outer position not nulling position entries of null rows ([#7754](https://github.com/rapidsai/cudf/pull/7754)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Implement scatter for struct columns ([#7752](https://github.com/rapidsai/cudf/pull/7752)) [@ttnghia](https://github.com/ttnghia) +- Fix data corruption in string columns ([#7746](https://github.com/rapidsai/cudf/pull/7746)) [@galipremsagar](https://github.com/galipremsagar) +- Fix string length in stripe dictionary building ([#7744](https://github.com/rapidsai/cudf/pull/7744)) [@kaatish](https://github.com/kaatish) +- Update conda recipes pinning of repo dependencies ([#7743](https://github.com/rapidsai/cudf/pull/7743)) [@mike-wendt](https://github.com/mike-wendt) +- Enable dask dispatch to cuDF's `is_categorical_dtype` for cuDF objects ([#7740](https://github.com/rapidsai/cudf/pull/7740)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Fix dictionary size computation in ORC writer ([#7737](https://github.com/rapidsai/cudf/pull/7737)) [@vuule](https://github.com/vuule) +- Fix `cudf::cast` overflow for `decimal64` to `int32_t` or smaller in certain cases ([#7733](https://github.com/rapidsai/cudf/pull/7733)) [@codereport](https://github.com/codereport) +- Change JNI API to avoid loading native dependencies when creating sort order classes. ([#7729](https://github.com/rapidsai/cudf/pull/7729)) [@revans2](https://github.com/revans2) +- Disable column_view data accessors for unsupported types ([#7725](https://github.com/rapidsai/cudf/pull/7725)) [@jrhemstad](https://github.com/jrhemstad) +- Materialize `RangeIndex` when `index=True` in parquet writer ([#7711](https://github.com/rapidsai/cudf/pull/7711)) [@galipremsagar](https://github.com/galipremsagar) +- Don't identify decimals as strings. ([#7710](https://github.com/rapidsai/cudf/pull/7710)) [@vyasr](https://github.com/vyasr) +- Fix return type of `DataFrame.argsort` ([#7706](https://github.com/rapidsai/cudf/pull/7706)) [@galipremsagar](https://github.com/galipremsagar) +- Fix/correct cudf installed package requirements ([#7688](https://github.com/rapidsai/cudf/pull/7688)) [@robertmaynard](https://github.com/robertmaynard) +- Fix SparkMurmurHash3_32 hash inconsistencies with Apache Spark ([#7672](https://github.com/rapidsai/cudf/pull/7672)) [@jlowe](https://github.com/jlowe) +- Fix ORC reader issue with reading empty string columns ([#7656](https://github.com/rapidsai/cudf/pull/7656)) [@rgsl888prabhu](https://github.com/rgsl888prabhu) +- Fix Java Parquet write after writer API changes ([#7655](https://github.com/rapidsai/cudf/pull/7655)) [@revans2](https://github.com/revans2) +- Fixing empty null lists throwing explode_outer for a loop. ([#7649](https://github.com/rapidsai/cudf/pull/7649)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Fix internal compiler error during JNI Docker build ([#7645](https://github.com/rapidsai/cudf/pull/7645)) [@jlowe](https://github.com/jlowe) +- Fix Debug build break with device_uvectors in grouped_rolling.cu ([#7633](https://github.com/rapidsai/cudf/pull/7633)) [@mythrocks](https://github.com/mythrocks) +- Parquet reader: Fix issue when using skip_rows on non-nested columns containing nulls ([#7627](https://github.com/rapidsai/cudf/pull/7627)) [@nvdbaranec](https://github.com/nvdbaranec) +- Fix ORC reader for empty DataFrame/Table ([#7624](https://github.com/rapidsai/cudf/pull/7624)) [@rgsl888prabhu](https://github.com/rgsl888prabhu) +- Fix specifying GPU architecture in JNI build ([#7612](https://github.com/rapidsai/cudf/pull/7612)) [@jlowe](https://github.com/jlowe) +- Fix ORC writer OOM issue ([#7605](https://github.com/rapidsai/cudf/pull/7605)) [@vuule](https://github.com/vuule) +- Fix 0.18 --> 0.19 automerge ([#7589](https://github.com/rapidsai/cudf/pull/7589)) [@kkraus14](https://github.com/kkraus14) +- Fix ORC issue with incorrect timestamp nanosecond values ([#7581](https://github.com/rapidsai/cudf/pull/7581)) [@vuule](https://github.com/vuule) +- Fix missing Dask imports ([#7580](https://github.com/rapidsai/cudf/pull/7580)) [@kkraus14](https://github.com/kkraus14) +- CMAKE_CUDA_ARCHITECTURES doesn't change when build-system invokes cmake ([#7579](https://github.com/rapidsai/cudf/pull/7579)) [@robertmaynard](https://github.com/robertmaynard) +- Another fix for offsets_end() iterator in lists_column_view ([#7575](https://github.com/rapidsai/cudf/pull/7575)) [@ttnghia](https://github.com/ttnghia) +- Fix ORC writer output corruption with string columns ([#7565](https://github.com/rapidsai/cudf/pull/7565)) [@vuule](https://github.com/vuule) +- Fix cudf::lists::sort_lists failing for sliced column ([#7564](https://github.com/rapidsai/cudf/pull/7564)) [@ttnghia](https://github.com/ttnghia) +- FIX Fix Anaconda upload args ([#7558](https://github.com/rapidsai/cudf/pull/7558)) [@dillon-cullinan](https://github.com/dillon-cullinan) +- Fix index mismatch issue in equality related APIs ([#7555](https://github.com/rapidsai/cudf/pull/7555)) [@galipremsagar](https://github.com/galipremsagar) +- FIX Revert gpuci_conda_retry on conda file output locations ([#7552](https://github.com/rapidsai/cudf/pull/7552)) [@dillon-cullinan](https://github.com/dillon-cullinan) +- Fix offset_end iterator for lists_column_view, which was not correctl… ([#7551](https://github.com/rapidsai/cudf/pull/7551)) [@ttnghia](https://github.com/ttnghia) +- Fix no such file dlpack.h error when build libcudf ([#7549](https://github.com/rapidsai/cudf/pull/7549)) [@chenrui17](https://github.com/chenrui17) +- Update missing docstring examples in python public APIs ([#7546](https://github.com/rapidsai/cudf/pull/7546)) [@galipremsagar](https://github.com/galipremsagar) +- Decimal32 Build Fix ([#7544](https://github.com/rapidsai/cudf/pull/7544)) [@razajafri](https://github.com/razajafri) +- FIX Retry conda output location ([#7540](https://github.com/rapidsai/cudf/pull/7540)) [@dillon-cullinan](https://github.com/dillon-cullinan) +- fix missing renames of dask git branches from master to main ([#7535](https://github.com/rapidsai/cudf/pull/7535)) [@kkraus14](https://github.com/kkraus14) +- Remove detail from device_span ([#7533](https://github.com/rapidsai/cudf/pull/7533)) [@rwlee](https://github.com/rwlee) +- Change dask and distributed branch to main ([#7532](https://github.com/rapidsai/cudf/pull/7532)) [@dantegd](https://github.com/dantegd) +- Update JNI build to use CUDF_USE_ARROW_STATIC ([#7526](https://github.com/rapidsai/cudf/pull/7526)) [@jlowe](https://github.com/jlowe) +- Make sure rmm::rmm CMake target is visibile to cudf users ([#7524](https://github.com/rapidsai/cudf/pull/7524)) [@robertmaynard](https://github.com/robertmaynard) +- Fix contiguous_split not properly handling output partitions > 2 GB. ([#7515](https://github.com/rapidsai/cudf/pull/7515)) [@nvdbaranec](https://github.com/nvdbaranec) +- Change jit launch to safe_launch ([#7510](https://github.com/rapidsai/cudf/pull/7510)) [@devavret](https://github.com/devavret) +- Fix comparison between Datetime/Timedelta columns and NULL scalars ([#7504](https://github.com/rapidsai/cudf/pull/7504)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Fix off-by-one error in char-parallel string scalar replace ([#7502](https://github.com/rapidsai/cudf/pull/7502)) [@jlowe](https://github.com/jlowe) +- Fix JNI deprecation of all, put it on the wrong version before ([#7501](https://github.com/rapidsai/cudf/pull/7501)) [@revans2](https://github.com/revans2) +- Fix Series/Dataframe Mixed Arithmetic ([#7491](https://github.com/rapidsai/cudf/pull/7491)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Fix JNI build after removal of libcudf sub-libraries ([#7486](https://github.com/rapidsai/cudf/pull/7486)) [@jlowe](https://github.com/jlowe) +- Correctly compile benchmarks ([#7485](https://github.com/rapidsai/cudf/pull/7485)) [@robertmaynard](https://github.com/robertmaynard) +- Fix bool column corruption with ORC Reader ([#7483](https://github.com/rapidsai/cudf/pull/7483)) [@rgsl888prabhu](https://github.com/rgsl888prabhu) +- Fix `__repr__` for categorical dtype ([#7476](https://github.com/rapidsai/cudf/pull/7476)) [@galipremsagar](https://github.com/galipremsagar) +- Java cleaner synchronization ([#7474](https://github.com/rapidsai/cudf/pull/7474)) [@abellina](https://github.com/abellina) +- Fix java float/double parsing tests ([#7473](https://github.com/rapidsai/cudf/pull/7473)) [@revans2](https://github.com/revans2) +- Pass stream and user resource to make_default_constructed_scalar ([#7469](https://github.com/rapidsai/cudf/pull/7469)) [@magnatelee](https://github.com/magnatelee) +- Improve stability of dask_cudf.DataFrame.var and dask_cudf.DataFrame.std ([#7453](https://github.com/rapidsai/cudf/pull/7453)) [@rjzamora](https://github.com/rjzamora) +- Missing `device_storage_dispatch` change affecting `cudf::gather` ([#7449](https://github.com/rapidsai/cudf/pull/7449)) [@codereport](https://github.com/codereport) +- fix cuFile JNI compile errors ([#7445](https://github.com/rapidsai/cudf/pull/7445)) [@rongou](https://github.com/rongou) +- Support `Series.__setitem__` with key to a new row ([#7443](https://github.com/rapidsai/cudf/pull/7443)) [@isVoid](https://github.com/isVoid) +- Fix BUG: Exception when PYTHONOPTIMIZE=2 ([#7434](https://github.com/rapidsai/cudf/pull/7434)) [@skirui-source](https://github.com/skirui-source) +- Make inclusive scan safe for cases with leading nulls ([#7432](https://github.com/rapidsai/cudf/pull/7432)) [@magnatelee](https://github.com/magnatelee) +- Fix typo in list_device_view::pair_rep_end() ([#7423](https://github.com/rapidsai/cudf/pull/7423)) [@mythrocks](https://github.com/mythrocks) +- Fix string to double conversion and row equivalent comparison ([#7410](https://github.com/rapidsai/cudf/pull/7410)) [@ttnghia](https://github.com/ttnghia) +- Fix thrust failure when transfering data from device_vector to host_vector with vectors of size 1 ([#7382](https://github.com/rapidsai/cudf/pull/7382)) [@ttnghia](https://github.com/ttnghia) +- Fix std::exeception catch-by-reference gcc9 compile error ([#7380](https://github.com/rapidsai/cudf/pull/7380)) [@davidwendt](https://github.com/davidwendt) +- Fix skiprows issue with ORC Reader ([#7359](https://github.com/rapidsai/cudf/pull/7359)) [@rgsl888prabhu](https://github.com/rgsl888prabhu) +- fix Arrow CMake file ([#7358](https://github.com/rapidsai/cudf/pull/7358)) [@rongou](https://github.com/rongou) +- Fix lists::contains() for NaN and Decimals ([#7349](https://github.com/rapidsai/cudf/pull/7349)) [@mythrocks](https://github.com/mythrocks) +- Handle cupy array in `Dataframe.__setitem__` ([#7340](https://github.com/rapidsai/cudf/pull/7340)) [@galipremsagar](https://github.com/galipremsagar) +- Fix invalid-device-fn error in cudf::strings::replace_re with multiple regex's ([#7336](https://github.com/rapidsai/cudf/pull/7336)) [@davidwendt](https://github.com/davidwendt) +- FIX Add codecov upload block to gpu script ([#6860](https://github.com/rapidsai/cudf/pull/6860)) [@dillon-cullinan](https://github.com/dillon-cullinan) + +## 📖 Documentation + +- Fix join API doxygen ([#7890](https://github.com/rapidsai/cudf/pull/7890)) [@shwina](https://github.com/shwina) +- Add Resources to README. ([#7697](https://github.com/rapidsai/cudf/pull/7697)) [@bdice](https://github.com/bdice) +- Add `isin` examples in Docstring ([#7479](https://github.com/rapidsai/cudf/pull/7479)) [@galipremsagar](https://github.com/galipremsagar) +- Resolving unlinked type shorthands in cudf doc ([#7416](https://github.com/rapidsai/cudf/pull/7416)) [@isVoid](https://github.com/isVoid) +- Fix typo in regex.md doc page ([#7363](https://github.com/rapidsai/cudf/pull/7363)) [@davidwendt](https://github.com/davidwendt) +- Fix incorrect strings_column_view::chars_size documentation ([#7360](https://github.com/rapidsai/cudf/pull/7360)) [@jlowe](https://github.com/jlowe) + +## 🚀 New Features + +- Enable basic reductions for decimal columns ([#7776](https://github.com/rapidsai/cudf/pull/7776)) [@ChrisJar](https://github.com/ChrisJar) +- Enable join on decimal columns ([#7764](https://github.com/rapidsai/cudf/pull/7764)) [@ChrisJar](https://github.com/ChrisJar) +- Allow merging index column with data column using keyword "on" ([#7736](https://github.com/rapidsai/cudf/pull/7736)) [@skirui-source](https://github.com/skirui-source) +- Implement DecimalColumn + Scalar and add cudf.Scalars of Decimal64Dtype ([#7732](https://github.com/rapidsai/cudf/pull/7732)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add support for `unique` groupby aggregation ([#7726](https://github.com/rapidsai/cudf/pull/7726)) [@shwina](https://github.com/shwina) +- Expose libcudf's label_bins function to cudf ([#7724](https://github.com/rapidsai/cudf/pull/7724)) [@vyasr](https://github.com/vyasr) +- Adding support for equi-join on struct ([#7720](https://github.com/rapidsai/cudf/pull/7720)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Add decimal column comparison operations ([#7716](https://github.com/rapidsai/cudf/pull/7716)) [@isVoid](https://github.com/isVoid) +- Implement scan operations for decimal columns ([#7707](https://github.com/rapidsai/cudf/pull/7707)) [@ChrisJar](https://github.com/ChrisJar) +- Enable typecasting between decimal and int ([#7691](https://github.com/rapidsai/cudf/pull/7691)) [@ChrisJar](https://github.com/ChrisJar) +- Enable decimal support in parquet writer ([#7673](https://github.com/rapidsai/cudf/pull/7673)) [@devavret](https://github.com/devavret) +- Adds `list.unique` API ([#7664](https://github.com/rapidsai/cudf/pull/7664)) [@isVoid](https://github.com/isVoid) +- Fix NaN handling in drop_list_duplicates ([#7662](https://github.com/rapidsai/cudf/pull/7662)) [@ttnghia](https://github.com/ttnghia) +- Add `lists.sort_values` API ([#7657](https://github.com/rapidsai/cudf/pull/7657)) [@isVoid](https://github.com/isVoid) +- Add is_integer API that can check for the validity of a string-to-integer conversion ([#7642](https://github.com/rapidsai/cudf/pull/7642)) [@ttnghia](https://github.com/ttnghia) +- Adds `explode` API ([#7607](https://github.com/rapidsai/cudf/pull/7607)) [@isVoid](https://github.com/isVoid) +- Adds `list.take`, python binding for `cudf::lists::segmented_gather` ([#7591](https://github.com/rapidsai/cudf/pull/7591)) [@isVoid](https://github.com/isVoid) +- Implement cudf::label_bins() ([#7554](https://github.com/rapidsai/cudf/pull/7554)) [@vyasr](https://github.com/vyasr) +- Add Python bindings for `lists::contains` ([#7547](https://github.com/rapidsai/cudf/pull/7547)) [@skirui-source](https://github.com/skirui-source) +- cudf::row_bit_count() support. ([#7534](https://github.com/rapidsai/cudf/pull/7534)) [@nvdbaranec](https://github.com/nvdbaranec) +- Implement drop_list_duplicates ([#7528](https://github.com/rapidsai/cudf/pull/7528)) [@ttnghia](https://github.com/ttnghia) +- Add Python bindings for `lists::extract_lists_element` ([#7505](https://github.com/rapidsai/cudf/pull/7505)) [@skirui-source](https://github.com/skirui-source) +- Add explode_outer and explode_outer_position ([#7499](https://github.com/rapidsai/cudf/pull/7499)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Match Pandas logic for comparing two objects with nulls ([#7490](https://github.com/rapidsai/cudf/pull/7490)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add struct support to parquet writer ([#7461](https://github.com/rapidsai/cudf/pull/7461)) [@devavret](https://github.com/devavret) +- Enable type conversion from float to decimal type ([#7450](https://github.com/rapidsai/cudf/pull/7450)) [@ChrisJar](https://github.com/ChrisJar) +- Add cython for converting strings/fixed-point functions ([#7429](https://github.com/rapidsai/cudf/pull/7429)) [@davidwendt](https://github.com/davidwendt) +- Add struct column support to cudf::sort and cudf::sorted_order ([#7422](https://github.com/rapidsai/cudf/pull/7422)) [@karthikeyann](https://github.com/karthikeyann) +- Implement groupby collect_set ([#7420](https://github.com/rapidsai/cudf/pull/7420)) [@ttnghia](https://github.com/ttnghia) +- Merge branch-0.18 into branch-0.19 ([#7411](https://github.com/rapidsai/cudf/pull/7411)) [@raydouglass](https://github.com/raydouglass) +- Refactor strings column factories ([#7397](https://github.com/rapidsai/cudf/pull/7397)) [@harrism](https://github.com/harrism) +- Add groupby scan operations (sort groupby) ([#7387](https://github.com/rapidsai/cudf/pull/7387)) [@karthikeyann](https://github.com/karthikeyann) +- Add cudf::explode_position ([#7376](https://github.com/rapidsai/cudf/pull/7376)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Add string conversion to/from decimal values libcudf APIs ([#7364](https://github.com/rapidsai/cudf/pull/7364)) [@davidwendt](https://github.com/davidwendt) +- Add groupby SUM_OF_SQUARES support ([#7362](https://github.com/rapidsai/cudf/pull/7362)) [@karthikeyann](https://github.com/karthikeyann) +- Add `Series.drop` api ([#7304](https://github.com/rapidsai/cudf/pull/7304)) [@isVoid](https://github.com/isVoid) +- get_json_object() implementation ([#7286](https://github.com/rapidsai/cudf/pull/7286)) [@nvdbaranec](https://github.com/nvdbaranec) +- Python API for `LIstMethods.len()` ([#7283](https://github.com/rapidsai/cudf/pull/7283)) [@isVoid](https://github.com/isVoid) +- Support null_policy::EXCLUDE for COLLECT rolling aggregation ([#7264](https://github.com/rapidsai/cudf/pull/7264)) [@mythrocks](https://github.com/mythrocks) +- Add support for special tokens in nvtext::subword_tokenizer ([#7254](https://github.com/rapidsai/cudf/pull/7254)) [@davidwendt](https://github.com/davidwendt) +- Fix inplace update of data and add Series.update ([#7201](https://github.com/rapidsai/cudf/pull/7201)) [@galipremsagar](https://github.com/galipremsagar) +- Implement `cudf::group_by` (hash) for `decimal32` and `decimal64` ([#7190](https://github.com/rapidsai/cudf/pull/7190)) [@codereport](https://github.com/codereport) +- Adding support to specify "level" parameter for `Dataframe.rename` ([#7135](https://github.com/rapidsai/cudf/pull/7135)) [@skirui-source](https://github.com/skirui-source) + +## 🛠️ Improvements + +- fix GDS include path for version 0.95 ([#7877](https://github.com/rapidsai/cudf/pull/7877)) [@rongou](https://github.com/rongou) +- Update `dask` + `distributed` to `2021.4.0` ([#7858](https://github.com/rapidsai/cudf/pull/7858)) [@jakirkham](https://github.com/jakirkham) +- Add ability to extract include dirs from `CUDF_HOME` ([#7848](https://github.com/rapidsai/cudf/pull/7848)) [@galipremsagar](https://github.com/galipremsagar) +- Add USE_GDS as an option in build script ([#7833](https://github.com/rapidsai/cudf/pull/7833)) [@pxLi](https://github.com/pxLi) +- add an allocate method with stream in java DeviceMemoryBuffer ([#7826](https://github.com/rapidsai/cudf/pull/7826)) [@rongou](https://github.com/rongou) +- Constrain dask and distributed versions to 2021.3.1 ([#7825](https://github.com/rapidsai/cudf/pull/7825)) [@shwina](https://github.com/shwina) +- Revert dask versioning of concat dispatch ([#7823](https://github.com/rapidsai/cudf/pull/7823)) [@galipremsagar](https://github.com/galipremsagar) +- add copy methods in Java memory buffer ([#7791](https://github.com/rapidsai/cudf/pull/7791)) [@rongou](https://github.com/rongou) +- Update README and CONTRIBUTING for 0.19 ([#7778](https://github.com/rapidsai/cudf/pull/7778)) [@robertmaynard](https://github.com/robertmaynard) +- Allow hash_partition to take a seed value ([#7771](https://github.com/rapidsai/cudf/pull/7771)) [@magnatelee](https://github.com/magnatelee) +- Turn on NVTX by default in java build ([#7761](https://github.com/rapidsai/cudf/pull/7761)) [@tgravescs](https://github.com/tgravescs) +- Add Java bindings to join gather map APIs ([#7751](https://github.com/rapidsai/cudf/pull/7751)) [@jlowe](https://github.com/jlowe) +- Add replacements column support for Java replaceNulls ([#7750](https://github.com/rapidsai/cudf/pull/7750)) [@jlowe](https://github.com/jlowe) +- Add Java bindings for row_bit_count ([#7749](https://github.com/rapidsai/cudf/pull/7749)) [@jlowe](https://github.com/jlowe) +- Remove unused JVM array creation ([#7748](https://github.com/rapidsai/cudf/pull/7748)) [@jlowe](https://github.com/jlowe) +- Added JNI support for new is_integer ([#7739](https://github.com/rapidsai/cudf/pull/7739)) [@revans2](https://github.com/revans2) +- Create and promote library aliases in libcudf installations ([#7734](https://github.com/rapidsai/cudf/pull/7734)) [@trxcllnt](https://github.com/trxcllnt) +- Support groupby operations for decimal dtypes ([#7731](https://github.com/rapidsai/cudf/pull/7731)) [@vyasr](https://github.com/vyasr) +- Memory map the input file only when GDS compatiblity mode is not used ([#7717](https://github.com/rapidsai/cudf/pull/7717)) [@vuule](https://github.com/vuule) +- Replace device_vector with device_uvector in null_mask ([#7715](https://github.com/rapidsai/cudf/pull/7715)) [@harrism](https://github.com/harrism) +- Struct hashing support for SerialMurmur3 and SparkMurmur3 ([#7714](https://github.com/rapidsai/cudf/pull/7714)) [@jlowe](https://github.com/jlowe) +- Add gbenchmark for nvtext replace-tokens function ([#7708](https://github.com/rapidsai/cudf/pull/7708)) [@davidwendt](https://github.com/davidwendt) +- Use stream in groupby calls ([#7705](https://github.com/rapidsai/cudf/pull/7705)) [@karthikeyann](https://github.com/karthikeyann) +- Update codeowners file ([#7701](https://github.com/rapidsai/cudf/pull/7701)) [@ajschmidt8](https://github.com/ajschmidt8) +- Cleanup groupby to use host_span, device_span, device_uvector ([#7698](https://github.com/rapidsai/cudf/pull/7698)) [@karthikeyann](https://github.com/karthikeyann) +- Add gbenchmark for nvtext ngrams functions ([#7693](https://github.com/rapidsai/cudf/pull/7693)) [@davidwendt](https://github.com/davidwendt) +- Misc Python/Cython optimizations ([#7686](https://github.com/rapidsai/cudf/pull/7686)) [@shwina](https://github.com/shwina) +- Add gbenchmark for nvtext tokenize functions ([#7684](https://github.com/rapidsai/cudf/pull/7684)) [@davidwendt](https://github.com/davidwendt) +- Add column_device_view to orc writer ([#7676](https://github.com/rapidsai/cudf/pull/7676)) [@kaatish](https://github.com/kaatish) +- cudf_kafka now uses cuDF CMake export targets (CPM) ([#7674](https://github.com/rapidsai/cudf/pull/7674)) [@robertmaynard](https://github.com/robertmaynard) +- Add gbenchmark for nvtext normalize functions ([#7668](https://github.com/rapidsai/cudf/pull/7668)) [@davidwendt](https://github.com/davidwendt) +- Resolve unnecessary import of thrust/optional.hpp in types.hpp ([#7667](https://github.com/rapidsai/cudf/pull/7667)) [@vyasr](https://github.com/vyasr) +- Feature/optimize accessor copy ([#7660](https://github.com/rapidsai/cudf/pull/7660)) [@vyasr](https://github.com/vyasr) +- Fix `find_package(cudf)` ([#7658](https://github.com/rapidsai/cudf/pull/7658)) [@trxcllnt](https://github.com/trxcllnt) +- Work-around for gcc7 compile error on Centos7 ([#7652](https://github.com/rapidsai/cudf/pull/7652)) [@davidwendt](https://github.com/davidwendt) +- Add in JNI support for count_elements ([#7651](https://github.com/rapidsai/cudf/pull/7651)) [@revans2](https://github.com/revans2) +- Fix issues with building cudf in a non-conda environment ([#7647](https://github.com/rapidsai/cudf/pull/7647)) [@galipremsagar](https://github.com/galipremsagar) +- Refactor ConfigureCUDA to not conditionally insert compiler flags ([#7643](https://github.com/rapidsai/cudf/pull/7643)) [@robertmaynard](https://github.com/robertmaynard) +- Add gbenchmark for converting strings to/from timestamps ([#7641](https://github.com/rapidsai/cudf/pull/7641)) [@davidwendt](https://github.com/davidwendt) +- Handle constructing a `cudf.Scalar` from a `cudf.Scalar` ([#7639](https://github.com/rapidsai/cudf/pull/7639)) [@shwina](https://github.com/shwina) +- Add in JNI support for table partition ([#7637](https://github.com/rapidsai/cudf/pull/7637)) [@revans2](https://github.com/revans2) +- Add explicit fixed_point merge test ([#7635](https://github.com/rapidsai/cudf/pull/7635)) [@codereport](https://github.com/codereport) +- Add JNI support for IDENTITY hash partitioning ([#7626](https://github.com/rapidsai/cudf/pull/7626)) [@revans2](https://github.com/revans2) +- Java support on explode_outer ([#7625](https://github.com/rapidsai/cudf/pull/7625)) [@sperlingxx](https://github.com/sperlingxx) +- Java support of casting string from/to decimal ([#7623](https://github.com/rapidsai/cudf/pull/7623)) [@sperlingxx](https://github.com/sperlingxx) +- Convert cudf::concatenate APIs to use spans and device_uvector ([#7621](https://github.com/rapidsai/cudf/pull/7621)) [@harrism](https://github.com/harrism) +- Add gbenchmark for cudf::strings::translate function ([#7617](https://github.com/rapidsai/cudf/pull/7617)) [@davidwendt](https://github.com/davidwendt) +- Use file(COPY ) over file(INSTALL ) so cmake output is reduced ([#7616](https://github.com/rapidsai/cudf/pull/7616)) [@robertmaynard](https://github.com/robertmaynard) +- Use rmm::device_uvector in place of rmm::device_vector for ORC reader/writer and cudf::io::column_buffer ([#7614](https://github.com/rapidsai/cudf/pull/7614)) [@vuule](https://github.com/vuule) +- Refactor Java host-side buffer concatenation to expose separate steps ([#7610](https://github.com/rapidsai/cudf/pull/7610)) [@jlowe](https://github.com/jlowe) +- Add gbenchmarks for string substrings functions ([#7603](https://github.com/rapidsai/cudf/pull/7603)) [@davidwendt](https://github.com/davidwendt) +- Refactor string conversion check ([#7599](https://github.com/rapidsai/cudf/pull/7599)) [@ttnghia](https://github.com/ttnghia) +- JNI: Pass names of children struct columns to native Arrow IPC writer ([#7598](https://github.com/rapidsai/cudf/pull/7598)) [@firestarman](https://github.com/firestarman) +- Revert "ENH Fix stale GHA and prevent duplicates " ([#7595](https://github.com/rapidsai/cudf/pull/7595)) [@mike-wendt](https://github.com/mike-wendt) +- ENH Fix stale GHA and prevent duplicates ([#7594](https://github.com/rapidsai/cudf/pull/7594)) [@mike-wendt](https://github.com/mike-wendt) +- Fix auto-detecting GPU architectures ([#7593](https://github.com/rapidsai/cudf/pull/7593)) [@trxcllnt](https://github.com/trxcllnt) +- Reduce cudf library size ([#7583](https://github.com/rapidsai/cudf/pull/7583)) [@robertmaynard](https://github.com/robertmaynard) +- Optimize cudf::make_strings_column for long strings ([#7576](https://github.com/rapidsai/cudf/pull/7576)) [@davidwendt](https://github.com/davidwendt) +- Always build and export the cudf::cudftestutil target ([#7574](https://github.com/rapidsai/cudf/pull/7574)) [@trxcllnt](https://github.com/trxcllnt) +- Eliminate literal parameters to uvector::set_element_async and device_scalar::set_value ([#7563](https://github.com/rapidsai/cudf/pull/7563)) [@harrism](https://github.com/harrism) +- Add gbenchmark for strings::concatenate ([#7560](https://github.com/rapidsai/cudf/pull/7560)) [@davidwendt](https://github.com/davidwendt) +- Update Changelog Link ([#7550](https://github.com/rapidsai/cudf/pull/7550)) [@ajschmidt8](https://github.com/ajschmidt8) +- Add gbenchmarks for strings replace regex functions ([#7541](https://github.com/rapidsai/cudf/pull/7541)) [@davidwendt](https://github.com/davidwendt) +- Add `__repr__` for Column and ColumnAccessor ([#7531](https://github.com/rapidsai/cudf/pull/7531)) [@shwina](https://github.com/shwina) +- Support Decimal DIV changes in cudf ([#7527](https://github.com/rapidsai/cudf/pull/7527)) [@razajafri](https://github.com/razajafri) +- Remove unneeded step parameter from strings::detail::copy_slice ([#7525](https://github.com/rapidsai/cudf/pull/7525)) [@davidwendt](https://github.com/davidwendt) +- Use device_uvector, device_span in sort groupby ([#7523](https://github.com/rapidsai/cudf/pull/7523)) [@karthikeyann](https://github.com/karthikeyann) +- Add gbenchmarks for strings extract function ([#7522](https://github.com/rapidsai/cudf/pull/7522)) [@davidwendt](https://github.com/davidwendt) +- Rename ARROW_STATIC_LIB because it conflicts with one in FindArrow.cmake ([#7518](https://github.com/rapidsai/cudf/pull/7518)) [@trxcllnt](https://github.com/trxcllnt) +- Reduce compile time/size for scan.cu ([#7516](https://github.com/rapidsai/cudf/pull/7516)) [@davidwendt](https://github.com/davidwendt) +- Change device_vector to device_uvector in nvtext source files ([#7512](https://github.com/rapidsai/cudf/pull/7512)) [@davidwendt](https://github.com/davidwendt) +- Removed unneeded includes from traits.hpp ([#7509](https://github.com/rapidsai/cudf/pull/7509)) [@davidwendt](https://github.com/davidwendt) +- FIX Remove random build directory generation for ccache ([#7508](https://github.com/rapidsai/cudf/pull/7508)) [@dillon-cullinan](https://github.com/dillon-cullinan) +- xfail failing pytest in pandas 1.2.3 ([#7507](https://github.com/rapidsai/cudf/pull/7507)) [@galipremsagar](https://github.com/galipremsagar) +- JNI bit cast ([#7493](https://github.com/rapidsai/cudf/pull/7493)) [@revans2](https://github.com/revans2) +- Combine rolling window function tests ([#7480](https://github.com/rapidsai/cudf/pull/7480)) [@mythrocks](https://github.com/mythrocks) +- Prepare Changelog for Automation ([#7477](https://github.com/rapidsai/cudf/pull/7477)) [@ajschmidt8](https://github.com/ajschmidt8) +- Java support for explode position ([#7471](https://github.com/rapidsai/cudf/pull/7471)) [@sperlingxx](https://github.com/sperlingxx) +- Update 0.18 changelog entry ([#7463](https://github.com/rapidsai/cudf/pull/7463)) [@ajschmidt8](https://github.com/ajschmidt8) +- JNI: Support skipping nulls for collect aggregation ([#7457](https://github.com/rapidsai/cudf/pull/7457)) [@firestarman](https://github.com/firestarman) +- Join APIs that return gathermaps ([#7454](https://github.com/rapidsai/cudf/pull/7454)) [@shwina](https://github.com/shwina) +- Remove dependence on managed memory for multimap test ([#7451](https://github.com/rapidsai/cudf/pull/7451)) [@jrhemstad](https://github.com/jrhemstad) +- Use cuFile for Parquet IO when available ([#7444](https://github.com/rapidsai/cudf/pull/7444)) [@vuule](https://github.com/vuule) +- Statistics cleanup ([#7439](https://github.com/rapidsai/cudf/pull/7439)) [@kaatish](https://github.com/kaatish) +- Add gbenchmarks for strings filter functions ([#7438](https://github.com/rapidsai/cudf/pull/7438)) [@davidwendt](https://github.com/davidwendt) +- `fixed_point` + `cudf::binary_operation` API Changes ([#7435](https://github.com/rapidsai/cudf/pull/7435)) [@codereport](https://github.com/codereport) +- Improve string gather performance ([#7433](https://github.com/rapidsai/cudf/pull/7433)) [@jlowe](https://github.com/jlowe) +- Don't use user resource for a temporary allocation in sort_by_key ([#7431](https://github.com/rapidsai/cudf/pull/7431)) [@magnatelee](https://github.com/magnatelee) +- Detail APIs for datetime functions ([#7430](https://github.com/rapidsai/cudf/pull/7430)) [@magnatelee](https://github.com/magnatelee) +- Replace thrust::max_element with thrust::reduce in strings findall_re ([#7428](https://github.com/rapidsai/cudf/pull/7428)) [@davidwendt](https://github.com/davidwendt) +- Add gbenchmark for strings split/split_record functions ([#7427](https://github.com/rapidsai/cudf/pull/7427)) [@davidwendt](https://github.com/davidwendt) +- Update JNI build to use CMAKE_CUDA_ARCHITECTURES ([#7425](https://github.com/rapidsai/cudf/pull/7425)) [@jlowe](https://github.com/jlowe) +- Change nvtext::load_vocabulary_file to return a unique ptr ([#7424](https://github.com/rapidsai/cudf/pull/7424)) [@davidwendt](https://github.com/davidwendt) +- Simplify type dispatch with `device_storage_dispatch` ([#7419](https://github.com/rapidsai/cudf/pull/7419)) [@codereport](https://github.com/codereport) +- Java support for casting of nested child columns ([#7417](https://github.com/rapidsai/cudf/pull/7417)) [@razajafri](https://github.com/razajafri) +- Improve scalar string replace performance for long strings ([#7415](https://github.com/rapidsai/cudf/pull/7415)) [@jlowe](https://github.com/jlowe) +- Remove unneeded temporary device vector for strings scatter specialization ([#7409](https://github.com/rapidsai/cudf/pull/7409)) [@davidwendt](https://github.com/davidwendt) +- bitmask_or implementation with bitmask refactor ([#7406](https://github.com/rapidsai/cudf/pull/7406)) [@rwlee](https://github.com/rwlee) +- Add other cudf::strings::replace functions to current strings replace gbenchmark ([#7403](https://github.com/rapidsai/cudf/pull/7403)) [@davidwendt](https://github.com/davidwendt) +- Clean up included headers in `device_operators.cuh` ([#7401](https://github.com/rapidsai/cudf/pull/7401)) [@codereport](https://github.com/codereport) +- Move nullable index iterator to indexalator factory ([#7399](https://github.com/rapidsai/cudf/pull/7399)) [@davidwendt](https://github.com/davidwendt) +- ENH Pass ccache variables to conda recipe & use Ninja in CI ([#7398](https://github.com/rapidsai/cudf/pull/7398)) [@Ethyling](https://github.com/Ethyling) +- upgrade maven-antrun-plugin to support maven parallel builds ([#7393](https://github.com/rapidsai/cudf/pull/7393)) [@rongou](https://github.com/rongou) +- Add gbenchmark for strings find/contains functions ([#7392](https://github.com/rapidsai/cudf/pull/7392)) [@davidwendt](https://github.com/davidwendt) +- Use CMAKE_CUDA_ARCHITECTURES ([#7391](https://github.com/rapidsai/cudf/pull/7391)) [@robertmaynard](https://github.com/robertmaynard) +- Refactor libcudf strings::replace to use make_strings_children utility ([#7384](https://github.com/rapidsai/cudf/pull/7384)) [@davidwendt](https://github.com/davidwendt) +- Added in JNI support for out of core sort algorithm ([#7381](https://github.com/rapidsai/cudf/pull/7381)) [@revans2](https://github.com/revans2) +- Upgrade pandas to 1.2 ([#7375](https://github.com/rapidsai/cudf/pull/7375)) [@galipremsagar](https://github.com/galipremsagar) +- Rename `logical_cast` to `bit_cast` and allow additional conversions ([#7373](https://github.com/rapidsai/cudf/pull/7373)) [@ttnghia](https://github.com/ttnghia) +- jitify 2 support ([#7372](https://github.com/rapidsai/cudf/pull/7372)) [@cwharris](https://github.com/cwharris) +- compile_udf: Cache PTX for similar functions ([#7371](https://github.com/rapidsai/cudf/pull/7371)) [@gmarkall](https://github.com/gmarkall) +- Add string scalar replace benchmark ([#7369](https://github.com/rapidsai/cudf/pull/7369)) [@jlowe](https://github.com/jlowe) +- Add gbenchmark for strings contains_re/count_re functions ([#7366](https://github.com/rapidsai/cudf/pull/7366)) [@davidwendt](https://github.com/davidwendt) +- Update orc reader and writer fuzz tests ([#7357](https://github.com/rapidsai/cudf/pull/7357)) [@galipremsagar](https://github.com/galipremsagar) +- Improve url_decode performance for long strings ([#7353](https://github.com/rapidsai/cudf/pull/7353)) [@jlowe](https://github.com/jlowe) +- `cudf::ast` Small Refactorings ([#7352](https://github.com/rapidsai/cudf/pull/7352)) [@codereport](https://github.com/codereport) +- Remove std::cout and print in the scatter test function EmptyListsOfNullableStrings. ([#7342](https://github.com/rapidsai/cudf/pull/7342)) [@ttnghia](https://github.com/ttnghia) +- Use `cudf::detail::make_counting_transform_iterator` ([#7338](https://github.com/rapidsai/cudf/pull/7338)) [@codereport](https://github.com/codereport) +- Change block size parameter from a global to a template param. ([#7333](https://github.com/rapidsai/cudf/pull/7333)) [@nvdbaranec](https://github.com/nvdbaranec) +- Partial clean up of ORC writer ([#7324](https://github.com/rapidsai/cudf/pull/7324)) [@vuule](https://github.com/vuule) +- Add gbenchmark for cudf::strings::to_lower ([#7316](https://github.com/rapidsai/cudf/pull/7316)) [@davidwendt](https://github.com/davidwendt) +- Update Java bindings version to 0.19-SNAPSHOT ([#7307](https://github.com/rapidsai/cudf/pull/7307)) [@pxLi](https://github.com/pxLi) +- Move `cudf::test::make_counting_transform_iterator` to `cudf/detail/iterator.cuh` ([#7306](https://github.com/rapidsai/cudf/pull/7306)) [@codereport](https://github.com/codereport) +- Use string literals in `fixed_point` `release_assert`s ([#7303](https://github.com/rapidsai/cudf/pull/7303)) [@codereport](https://github.com/codereport) +- Fix merge conflicts for #7295 ([#7297](https://github.com/rapidsai/cudf/pull/7297)) [@ajschmidt8](https://github.com/ajschmidt8) +- Add UTF-8 chars to create_random_column<string_view> benchmark utility ([#7292](https://github.com/rapidsai/cudf/pull/7292)) [@davidwendt](https://github.com/davidwendt) +- Abstracting block reduce and block scan from cuIO kernels with `cub` apis ([#7278](https://github.com/rapidsai/cudf/pull/7278)) [@rgsl888prabhu](https://github.com/rgsl888prabhu) +- Build.sh use cmake --build to drive build system invocation ([#7270](https://github.com/rapidsai/cudf/pull/7270)) [@robertmaynard](https://github.com/robertmaynard) +- Refactor dictionary support for reductions any/all ([#7242](https://github.com/rapidsai/cudf/pull/7242)) [@davidwendt](https://github.com/davidwendt) +- Replace stream.value() with stream for stream_view args ([#7236](https://github.com/rapidsai/cudf/pull/7236)) [@karthikeyann](https://github.com/karthikeyann) +- Interval index and interval_range ([#7182](https://github.com/rapidsai/cudf/pull/7182)) [@marlenezw](https://github.com/marlenezw) +- avro reader integration tests ([#7156](https://github.com/rapidsai/cudf/pull/7156)) [@cwharris](https://github.com/cwharris) +- Rework libcudf CMakeLists.txt to export targets for CPM ([#7107](https://github.com/rapidsai/cudf/pull/7107)) [@trxcllnt](https://github.com/trxcllnt) +- Adding Interval Dtype ([#6984](https://github.com/rapidsai/cudf/pull/6984)) [@marlenezw](https://github.com/marlenezw) +- Cleaning up `for` loops with `make_(counting_)transform_iterator` ([#6546](https://github.com/rapidsai/cudf/pull/6546)) [@codereport](https://github.com/codereport) # cuDF 0.18.0 (24 Feb 2021) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cfed2e1a692..dde3e2107cf 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -131,14 +131,14 @@ run each time you commit changes. Compiler requirements: -* `gcc` version 7.1+ -* `nvcc` version 10.1+ +* `gcc` version 9.3+ +* `nvcc` version 11.0+ * `cmake` version 3.18.0+ CUDA/GPU requirements: -* CUDA 10.1+ -* NVIDIA driver 410.48+ +* CUDA 11.0+ +* NVIDIA driver 450.80.02+ * Pascal architecture or better You can obtain CUDA from [https://developer.nvidia.com/cuda-downloads](https://developer.nvidia.com/cuda-downloads). @@ -160,7 +160,7 @@ git submodule update --init --remote --recursive ```bash # create the conda environment (assuming in base `cudf` directory) # note: RAPIDS currently doesn't support `channel_priority: strict`; use `channel_priority: flexible` instead -conda env create --name cudf_dev --file conda/environments/cudf_dev_cuda10.0.yml +conda env create --name cudf_dev --file conda/environments/cudf_dev_cuda11.0.yml # activate the environment conda activate cudf_dev ``` @@ -281,8 +281,8 @@ A Dockerfile is provided with a preconfigured conda environment for building and ### Prerequisites * Install [nvidia-docker2](https://github.com/nvidia/nvidia-docker/wiki/Installation-(version-2.0)) for Docker + GPU support -* Verify NVIDIA driver is `410.48` or higher -* Ensure CUDA 10.0+ is installed +* Verify NVIDIA driver is `450.80.02` or higher +* Ensure CUDA 11.0+ is installed ### Usage @@ -309,16 +309,16 @@ flag. Below is a list of the available arguments and their purpose: | Build Argument | Default Value | Other Value(s) | Purpose | | --- | --- | --- | --- | -| `CUDA_VERSION` | 10.0 | 10.1, 10.2 | set CUDA version | -| `LINUX_VERSION` | ubuntu16.04 | ubuntu18.04 | set Ubuntu version | -| `CC` & `CXX` | 5 | 7 | set gcc/g++ version; **NOTE:** gcc7 requires Ubuntu 18.04 | +| `CUDA_VERSION` | 11.0 | 11.2.2 | set CUDA version | +| `LINUX_VERSION` | ubuntu18.04 | ubuntu20.04 | set Ubuntu version | +| `CC` & `CXX` | 9 | 10 | set gcc/g++ version | | `CUDF_REPO` | This repo | Forks of cuDF | set git URL to use for `git clone` | | `CUDF_BRANCH` | main | Any branch name | set git branch to checkout of `CUDF_REPO` | | `NUMBA_VERSION` | newest | >=0.40.0 | set numba version | | `NUMPY_VERSION` | newest | >=1.14.3 | set numpy version | | `PANDAS_VERSION` | newest | >=0.23.4 | set pandas version | | `PYARROW_VERSION` | 1.0.1 | Not supported | set pyarrow version | -| `CMAKE_VERSION` | newest | >=3.14 | set cmake version | +| `CMAKE_VERSION` | newest | >=3.18 | set cmake version | | `CYTHON_VERSION` | 0.29 | Not supported | set Cython version | | `PYTHON_VERSION` | 3.7 | 3.8 | set python version | diff --git a/Dockerfile b/Dockerfile index f48ed3646f4..eef8a04067d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,23 +1,24 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. + # An integration test & dev container which builds and installs cuDF from main -ARG CUDA_VERSION=10.1 +ARG CUDA_VERSION=11.0 ARG CUDA_SHORT_VERSION=${CUDA_VERSION} -ARG LINUX_VERSION=ubuntu16.04 +ARG LINUX_VERSION=ubuntu18.04 FROM nvidia/cuda:${CUDA_VERSION}-devel-${LINUX_VERSION} ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/lib -# Needed for cudf.concat(), avoids "OSError: library nvvm not found" -ENV NUMBAPRO_NVVM=/usr/local/cuda/nvvm/lib64/libnvvm.so -ENV NUMBAPRO_LIBDEVICE=/usr/local/cuda/nvvm/libdevice/ ENV DEBIAN_FRONTEND=noninteractive -ARG CC=5 -ARG CXX=5 +ARG CC=9 +ARG CXX=9 RUN apt update -y --fix-missing && \ apt upgrade -y && \ + apt install -y --no-install-recommends software-properties-common && \ + add-apt-repository ppa:ubuntu-toolchain-r/test && \ + apt update -y --fix-missing && \ apt install -y --no-install-recommends \ git \ gcc-${CC} \ g++-${CXX} \ - libboost-all-dev \ tzdata && \ apt-get autoremove -y && \ apt-get clean && \ @@ -66,18 +67,10 @@ RUN if [ -f /cudf/docker/package_versions.sh ]; \ conda env create --name cudf --file /cudf/conda/environments/cudf_dev_cuda${CUDA_SHORT_VERSION}.yml ; \ fi -# libcudf build/install -ENV CC=/usr/bin/gcc-${CC} -ENV CXX=/usr/bin/g++-${CXX} -RUN source activate cudf && \ - mkdir -p /cudf/cpp/build && \ - cd /cudf/cpp/build && \ - cmake .. -DCMAKE_INSTALL_PREFIX=${CONDA_PREFIX} && \ - make -j"$(nproc)" install +ENV CC=/opts/conda/envs/rapids/bin/gcc-${CC} +ENV CXX=/opts/conda/envs/rapids/bin/g++-${CXX} -# cuDF build/install +# libcudf & cudf build/install RUN source activate cudf && \ - cd /cudf/python/cudf && \ - python setup.py build_ext --inplace && \ - python setup.py install && \ - python setup.py install + cd /cudf/ && \ + ./build.sh libcudf cudf diff --git a/README.md b/README.md index 6d67251b845..545e3331681 100644 --- a/README.md +++ b/README.md @@ -57,15 +57,15 @@ Please see the [Demo Docker Repository](https://hub.docker.com/r/rapidsai/rapids ### CUDA/GPU requirements -* CUDA 10.1+ -* NVIDIA driver 418.39+ +* CUDA 11.0+ +* NVIDIA driver 450.80.02+ * Pascal architecture or better (Compute Capability >=6.0) ### Conda cuDF can be installed with conda ([miniconda](https://conda.io/miniconda.html), or the full [Anaconda distribution](https://www.anaconda.com/download)) from the `rapidsai` channel: -For `cudf version == 0.19` : +For `cudf version == 0.19.2` : ```bash # for CUDA 10.1 conda install -c rapidsai -c nvidia -c numba -c conda-forge \ @@ -79,13 +79,13 @@ conda install -c rapidsai -c nvidia -c numba -c conda-forge \ For the nightly version of `cudf` : ```bash -# for CUDA 10.1 +# for CUDA 11.0 conda install -c rapidsai-nightly -c nvidia -c numba -c conda-forge \ - cudf python=3.7 cudatoolkit=10.1 + cudf python=3.7 cudatoolkit=11.0 -# or, for CUDA 10.2 +# or, for CUDA 11.2 conda install -c rapidsai-nightly -c nvidia -c numba -c conda-forge \ - cudf python=3.7 cudatoolkit=10.2 + cudf python=3.7 cudatoolkit=11.2 ``` Note: cuDF is supported only on Linux, and with Python versions 3.7 and later. diff --git a/ci/benchmark/build.sh b/ci/benchmark/build.sh index 8dd133c8fa3..b2426e22605 100755 --- a/ci/benchmark/build.sh +++ b/ci/benchmark/build.sh @@ -21,15 +21,15 @@ function hasArg { export PATH=/conda/bin:/usr/local/cuda/bin:$PATH export PARALLEL_LEVEL=4 export CUDA_REL=${CUDA_VERSION%.*} -export HOME=$WORKSPACE +export HOME="$WORKSPACE" # Parse git describe -cd $WORKSPACE +cd "$WORKSPACE" export GIT_DESCRIBE_TAG=`git describe --tags` export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'` # Set Benchmark Vars -export GBENCH_BENCHMARKS_DIR=${WORKSPACE}/cpp/build/gbenchmarks/ +export GBENCH_BENCHMARKS_DIR="$WORKSPACE/cpp/build/gbenchmarks/" # Set `LIBCUDF_KERNEL_CACHE_PATH` environment variable to $HOME/.jitify-cache because # it's local to the container's virtual file system, and not shared with other CI jobs @@ -77,8 +77,8 @@ conda install "rmm=$MINOR_VERSION.*" "cudatoolkit=$CUDA_REL" \ # Install the master version of dask, distributed, and streamz logger "pip install git+https://github.com/dask/distributed.git@main --upgrade --no-deps" pip install "git+https://github.com/dask/distributed.git@main" --upgrade --no-deps -logger "pip install git+https://github.com/dask/dask.git@main --upgrade --no-deps" -pip install "git+https://github.com/dask/dask.git@main" --upgrade --no-deps +logger "pip install git+https://github.com/dask/dask.git@2021.05.1 --upgrade --no-deps" +pip install "git+https://github.com/dask/dask.git@2021.05.1" --upgrade --no-deps logger "pip install git+https://github.com/python-streamz/streamz.git --upgrade --no-deps" pip install "git+https://github.com/python-streamz/streamz.git" --upgrade --no-deps @@ -96,9 +96,9 @@ conda list --show-channel-urls logger "Build libcudf..." if [[ ${BUILD_MODE} == "pull-request" ]]; then - $WORKSPACE/build.sh clean libcudf cudf dask_cudf benchmarks tests --ptds + "$WORKSPACE/build.sh" clean libcudf cudf dask_cudf benchmarks tests --ptds else - $WORKSPACE/build.sh clean libcudf cudf dask_cudf benchmarks tests -l --ptds + "$WORKSPACE/build.sh" clean libcudf cudf dask_cudf benchmarks tests -l --ptds fi ################################################################################ @@ -144,9 +144,9 @@ function getReqs() { REQS=$(getReqs "${LIBCUDF_DEPS[@]}") -mkdir -p ${WORKSPACE}/tmp/benchmark -touch ${WORKSPACE}/tmp/benchmark/benchmarks.txt -ls ${GBENCH_BENCHMARKS_DIR} > ${WORKSPACE}/tmp/benchmark/benchmarks.txt +mkdir -p "$WORKSPACE/tmp/benchmark" +touch "$WORKSPACE/tmp/benchmark/benchmarks.txt" +ls ${GBENCH_BENCHMARKS_DIR} > "$WORKSPACE/tmp/benchmark/benchmarks.txt" #Disable error aborting while tests run, failed tests will not generate data logger "Running libcudf GBenchmarks..." @@ -161,13 +161,13 @@ do rm ./${BENCH}.json JOBEXITCODE=1 fi -done < ${WORKSPACE}/tmp/benchmark/benchmarks.txt +done < "$WORKSPACE/tmp/benchmark/benchmarks.txt" set -e -rm ${WORKSPACE}/tmp/benchmark/benchmarks.txt -cd ${WORKSPACE} -mv ${GBENCH_BENCHMARKS_DIR}/*.json ${WORKSPACE}/tmp/benchmark/ -python GBenchToASV.py -d ${WORKSPACE}/tmp/benchmark/ -t ${S3_ASV_DIR} -n libcudf -b branch-${MINOR_VERSION} -r "${REQS}" +rm "$WORKSPACE/tmp/benchmark/benchmarks.txt" +cd "$WORKSPACE" +mv ${GBENCH_BENCHMARKS_DIR}/*.json "$WORKSPACE/tmp/benchmark/" +python GBenchToASV.py -d "$WORKSPACE/tmp/benchmark/" -t ${S3_ASV_DIR} -n libcudf -b branch-${MINOR_VERSION} -r "${REQS}" ### # Run Python Benchmarks diff --git a/ci/checks/style.sh b/ci/checks/style.sh index 17599c6d74d..981e886d31c 100755 --- a/ci/checks/style.sh +++ b/ci/checks/style.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2018, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. ##################### # cuDF Style Tester # ##################### @@ -33,6 +33,10 @@ FLAKE_CYTHON_RETVAL=$? MYPY_CUDF=`mypy --config=python/cudf/setup.cfg python/cudf/cudf` MYPY_CUDF_RETVAL=$? +# Run pydocstyle and get results/return code +PYDOCSTYLE=`pydocstyle --config=python/.flake8 python` +PYDOCSTYLE_RETVAL=$? + # Run clang-format and check for a consistent code format CLANG_FORMAT=`python cpp/scripts/run-clang-format.py 2>&1` CLANG_FORMAT_RETVAL=$? @@ -78,6 +82,14 @@ else echo -e "\n\n>>>> PASSED: mypy style check\n\n" fi +if [ "$PYDOCSTYLE_RETVAL" != "0" ]; then + echo -e "\n\n>>>> FAILED: pydocstyle style check; begin output\n\n" + echo -e "$PYDOCSTYLE" + echo -e "\n\n>>>> FAILED: pydocstyle style check; end output\n\n" +else + echo -e "\n\n>>>> PASSED: pydocstyle style check\n\n" +fi + if [ "$CLANG_FORMAT_RETVAL" != "0" ]; then echo -e "\n\n>>>> FAILED: clang format check; begin output\n\n" echo -e "$CLANG_FORMAT" @@ -91,7 +103,7 @@ HEADER_META=`ci/checks/headers_test.sh` HEADER_META_RETVAL=$? echo -e "$HEADER_META" -RETVALS=($ISORT_RETVAL $BLACK_RETVAL $FLAKE_RETVAL $FLAKE_CYTHON_RETVAL $CLANG_FORMAT_RETVAL $HEADER_META_RETVAL $MYPY_CUDF_RETVAL) +RETVALS=($ISORT_RETVAL $BLACK_RETVAL $FLAKE_RETVAL $FLAKE_CYTHON_RETVAL $PYDOCSTYLE_RETVAL $CLANG_FORMAT_RETVAL $HEADER_META_RETVAL $MYPY_CUDF_RETVAL) IFS=$'\n' RETVAL=`echo "${RETVALS[*]}" | sort -nr | head -n1` diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh index 588debc40db..e11a0488624 100755 --- a/ci/cpu/build.sh +++ b/ci/cpu/build.sh @@ -10,7 +10,7 @@ export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} # Set home to the job's workspace -export HOME=$WORKSPACE +export HOME="$WORKSPACE" # Determine CUDA release version export CUDA_REL=${CUDA_VERSION%.*} @@ -21,10 +21,10 @@ export GPUCI_CONDA_RETRY_SLEEP=30 # Use Ninja to build, setup Conda Build Dir export CMAKE_GENERATOR="Ninja" -export CONDA_BLD_DIR="${WORKSPACE}/.conda-bld" +export CONDA_BLD_DIR="$WORKSPACE/.conda-bld" # Switch to project root; also root of repo checkout -cd $WORKSPACE +cd "$WORKSPACE" # If nightly build, append current YYMMDD to version if [[ "$BUILD_MODE" = "branch" && "$SOURCE_BRANCH" = branch-* ]] ; then @@ -42,6 +42,11 @@ gpuci_logger "Activate conda env" . /opt/conda/etc/profile.d/conda.sh conda activate rapids +# Remove rapidsai-nightly channel if we are building main branch +if [ "$SOURCE_BRANCH" = "main" ]; then + conda config --system --remove channels rapidsai-nightly +fi + gpuci_logger "Check compiler versions" python --version $CC --version diff --git a/ci/cpu/prebuild.sh b/ci/cpu/prebuild.sh index 76059867321..ed2484814fb 100755 --- a/ci/cpu/prebuild.sh +++ b/ci/cpu/prebuild.sh @@ -14,14 +14,14 @@ else fi # upload cudf_kafka for all versions of Python -if [[ "$CUDA" == "10.1" ]]; then +if [[ "$CUDA" == "11.0" ]]; then export UPLOAD_CUDF_KAFKA=1 else export UPLOAD_CUDF_KAFKA=0 fi #We only want to upload libcudf_kafka once per python/CUDA combo -if [[ "$PYTHON" == "3.7" ]] && [[ "$CUDA" == "10.1" ]]; then +if [[ "$PYTHON" == "3.7" ]] && [[ "$CUDA" == "11.0" ]]; then export UPLOAD_LIBCUDF_KAFKA=1 else export UPLOAD_LIBCUDF_KAFKA=0 diff --git a/ci/cpu/upload.sh b/ci/cpu/upload.sh index 4f72f6dd772..40e80def8ae 100755 --- a/ci/cpu/upload.sh +++ b/ci/cpu/upload.sh @@ -29,8 +29,8 @@ fi gpuci_logger "Get conda file output locations" -export LIBCUDF_FILE=`conda build --no-build-id --croot ${WORKSPACE}/.conda-bld conda/recipes/libcudf --output` -export LIBCUDF_KAFKA_FILE=`conda build --no-build-id --croot ${WORKSPACE}/.conda-bld conda/recipes/libcudf_kafka --output` +export LIBCUDF_FILE=`conda build --no-build-id --croot "$WORKSPACE/.conda-bld" conda/recipes/libcudf --output` +export LIBCUDF_KAFKA_FILE=`conda build --no-build-id --croot "$WORKSPACE/.conda-bld" conda/recipes/libcudf_kafka --output` export CUDF_FILE=`conda build --croot ${CONDA_BLD_DIR} conda/recipes/cudf --python=$PYTHON --output` export DASK_CUDF_FILE=`conda build --croot ${CONDA_BLD_DIR} conda/recipes/dask-cudf --python=$PYTHON --output` export CUDF_KAFKA_FILE=`conda build --croot ${CONDA_BLD_DIR} conda/recipes/cudf_kafka --python=$PYTHON --output` diff --git a/ci/docs/build.sh b/ci/docs/build.sh index 79aa513c58b..a7771124713 100755 --- a/ci/docs/build.sh +++ b/ci/docs/build.sh @@ -10,12 +10,11 @@ if [ -z "$PROJECT_WORKSPACE" ]; then exit 1 fi -export DOCS_WORKSPACE=$WORKSPACE/docs +export DOCS_WORKSPACE="$WORKSPACE/docs" export PATH=/conda/bin:/usr/local/cuda/bin:$PATH -export HOME=$WORKSPACE +export HOME="$WORKSPACE" export PROJECT_WORKSPACE=/rapids/cudf export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache" -export NIGHTLY_VERSION=$(echo $BRANCH_VERSION | awk -F. '{print $2}') export PROJECTS=(cudf libcudf) gpuci_logger "Check environment..." diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 7614e19cc89..5f163f93410 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -17,14 +17,14 @@ export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} # Set home to the job's workspace -export HOME=$WORKSPACE +export HOME="$WORKSPACE" # Switch to project root; also root of repo checkout -cd $WORKSPACE +cd "$WORKSPACE" # Determine CUDA release version export CUDA_REL=${CUDA_VERSION%.*} -export CONDA_ARTIFACT_PATH=${WORKSPACE}/ci/artifacts/cudf/cpu/.conda-bld/ +export CONDA_ARTIFACT_PATH="$WORKSPACE/ci/artifacts/cudf/cpu/.conda-bld/" # Parse git describe export GIT_DESCRIBE_TAG=`git describe --tags` @@ -80,7 +80,7 @@ gpuci_conda_retry install -y \ "rapids-notebook-env=$MINOR_VERSION.*" \ "dask-cuda=${MINOR_VERSION}" \ "rmm=$MINOR_VERSION.*" \ - "ucx-py=${MINOR_VERSION}" + "ucx-py=0.20.*" # https://docs.rapids.ai/maintainers/depmgmt/ # gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env @@ -101,8 +101,8 @@ function install_dask { # Install the main version of dask, distributed, and streamz gpuci_logger "Install the main version of dask, distributed, and streamz" set -x - pip install "git+https://github.com/dask/distributed.git@main" --upgrade --no-deps - pip install "git+https://github.com/dask/dask.git@main" --upgrade --no-deps + pip install "git+https://github.com/dask/distributed.git@2021.05.1" --upgrade --no-deps + pip install "git+https://github.com/dask/dask.git@2021.05.1" --upgrade --no-deps pip install "git+https://github.com/python-streamz/streamz.git" --upgrade --no-deps set +x } @@ -117,9 +117,9 @@ if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then gpuci_logger "Build from source" if [[ ${BUILD_MODE} == "pull-request" ]]; then - $WORKSPACE/build.sh clean libcudf cudf dask_cudf libcudf_kafka cudf_kafka benchmarks tests --ptds + "$WORKSPACE/build.sh" clean libcudf cudf dask_cudf libcudf_kafka cudf_kafka benchmarks tests --ptds else - $WORKSPACE/build.sh clean libcudf cudf dask_cudf libcudf_kafka cudf_kafka benchmarks tests -l --ptds + "$WORKSPACE/build.sh" clean libcudf cudf dask_cudf libcudf_kafka cudf_kafka benchmarks tests -l --ptds fi ################################################################################ @@ -140,12 +140,12 @@ if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then gpuci_logger "GoogleTests" set -x - cd $WORKSPACE/cpp/build + cd "$WORKSPACE/cpp/build" - for gt in ${WORKSPACE}/cpp/build/gtests/* ; do + for gt in "$WORKSPACE/cpp/build/gtests/"* ; do test_name=$(basename ${gt}) echo "Running GoogleTest $test_name" - ${gt} --gtest_output=xml:${WORKSPACE}/test-results/ + ${gt} --gtest_output=xml:"$WORKSPACE/test-results/" done fi else @@ -168,7 +168,7 @@ else for gt in gtests/* ; do test_name=$(basename ${gt}) echo "Running GoogleTest $test_name" - ${gt} --gtest_output=xml:${WORKSPACE}/test-results/ + ${gt} --gtest_output=xml:"$WORKSPACE/test-results/" done CUDF_CONDA_FILE=`find ${CONDA_ARTIFACT_PATH} -name "libcudf-*.tar.bz2"` @@ -185,9 +185,9 @@ else gpuci_logger "Build python libs from source" if [[ ${BUILD_MODE} == "pull-request" ]]; then - $WORKSPACE/build.sh cudf dask_cudf cudf_kafka --ptds + "$WORKSPACE/build.sh" cudf dask_cudf cudf_kafka --ptds else - $WORKSPACE/build.sh cudf dask_cudf cudf_kafka -l --ptds + "$WORKSPACE/build.sh" cudf dask_cudf cudf_kafka -l --ptds fi fi @@ -205,21 +205,21 @@ fi # TEST - Run py.test, notebooks ################################################################################ -cd $WORKSPACE/python/cudf +cd "$WORKSPACE/python/cudf" gpuci_logger "Python py.test for cuDF" -py.test -n 6 --cache-clear --basetemp=${WORKSPACE}/cudf-cuda-tmp --junitxml=${WORKSPACE}/junit-cudf.xml -v --cov-config=.coveragerc --cov=cudf --cov-report=xml:${WORKSPACE}/python/cudf/cudf-coverage.xml --cov-report term +py.test -n 6 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" --junitxml="$WORKSPACE/junit-cudf.xml" -v --cov-config=.coveragerc --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-coverage.xml" --cov-report term -cd $WORKSPACE/python/dask_cudf +cd "$WORKSPACE/python/dask_cudf" gpuci_logger "Python py.test for dask-cudf" -py.test -n 6 --cache-clear --basetemp=${WORKSPACE}/dask-cudf-cuda-tmp --junitxml=${WORKSPACE}/junit-dask-cudf.xml -v --cov-config=.coveragerc --cov=dask_cudf --cov-report=xml:${WORKSPACE}/python/dask_cudf/dask-cudf-coverage.xml --cov-report term +py.test -n 6 --cache-clear --basetemp="$WORKSPACE/dask-cudf-cuda-tmp" --junitxml="$WORKSPACE/junit-dask-cudf.xml" -v --cov-config=.coveragerc --cov=dask_cudf --cov-report=xml:"$WORKSPACE/python/dask_cudf/dask-cudf-coverage.xml" --cov-report term -cd $WORKSPACE/python/custreamz +cd "$WORKSPACE/python/custreamz" gpuci_logger "Python py.test for cuStreamz" -py.test -n 6 --cache-clear --basetemp=${WORKSPACE}/custreamz-cuda-tmp --junitxml=${WORKSPACE}/junit-custreamz.xml -v --cov-config=.coveragerc --cov=custreamz --cov-report=xml:${WORKSPACE}/python/custreamz/custreamz-coverage.xml --cov-report term +py.test -n 6 --cache-clear --basetemp="$WORKSPACE/custreamz-cuda-tmp" --junitxml="$WORKSPACE/junit-custreamz.xml" -v --cov-config=.coveragerc --cov=custreamz --cov-report=xml:"$WORKSPACE/python/custreamz/custreamz-coverage.xml" --cov-report term gpuci_logger "Test notebooks" -${WORKSPACE}/ci/gpu/test-notebooks.sh 2>&1 | tee nbtest.log -python ${WORKSPACE}/ci/utils/nbtestlog2junitxml.py nbtest.log +"$WORKSPACE/ci/gpu/test-notebooks.sh" 2>&1 | tee nbtest.log +python "$WORKSPACE/ci/utils/nbtestlog2junitxml.py" nbtest.log if [ -n "${CODECOV_TOKEN}" ]; then codecov -t $CODECOV_TOKEN diff --git a/ci/gpu/test-notebooks.sh b/ci/gpu/test-notebooks.sh index ffa2e2a7214..1a5c2614000 100755 --- a/ci/gpu/test-notebooks.sh +++ b/ci/gpu/test-notebooks.sh @@ -1,8 +1,8 @@ #!/bin/bash -NOTEBOOKS_DIR=${WORKSPACE}/notebooks -NBTEST=${WORKSPACE}/ci/utils/nbtest.sh -LIBCUDF_KERNEL_CACHE_PATH=${WORKSPACE}/.jitcache +NOTEBOOKS_DIR="$WORKSPACE/notebooks" +NBTEST="$WORKSPACE/ci/utils/nbtest.sh" +LIBCUDF_KERNEL_CACHE_PATH="$WORKSPACE/.jitcache" cd ${NOTEBOOKS_DIR} TOPLEVEL_NB_FOLDERS=$(find . -name *.ipynb |cut -d'/' -f2|sort -u) diff --git a/ci/local/build.sh b/ci/local/build.sh index 6ee415605b6..1bfb8b63fef 100755 --- a/ci/local/build.sh +++ b/ci/local/build.sh @@ -3,7 +3,7 @@ GIT_DESCRIBE_TAG=`git describe --tags` MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'` -DOCKER_IMAGE="gpuci/rapidsai:${MINOR_VERSION}-cuda10.1-devel-ubuntu16.04-py3.7" +DOCKER_IMAGE="gpuci/rapidsai:${MINOR_VERSION}-cuda11.0-devel-ubuntu18.04-py3.7" REPO_PATH=${PWD} RAPIDS_DIR_IN_CONTAINER="/rapids" CPP_BUILD_DIR="cpp/build" diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 819a0dcf6bf..a6154e3db85 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -4,42 +4,25 @@ ######################## ## Usage -# bash update-version.sh -# where is either `major`, `minor`, `patch` +# bash update-version.sh -set -e -# Grab argument for release type -RELEASE_TYPE=$1 +# Format is YY.MM.PP - no leading 'v' or trailing 'a' +NEXT_FULL_TAG=$1 -# Get current version and calculate next versions -CURRENT_TAG=`git tag | grep -xE 'v[0-9\.]+' | sort --version-sort | tail -n 1 | tr -d 'v'` -CURRENT_MAJOR=`echo $CURRENT_TAG | awk '{split($0, a, "."); print a[1]}'` -CURRENT_MINOR=`echo $CURRENT_TAG | awk '{split($0, a, "."); print a[2]}'` -CURRENT_PATCH=`echo $CURRENT_TAG | awk '{split($0, a, "."); print a[3]}'` -NEXT_MAJOR=$((CURRENT_MAJOR + 1)) -NEXT_MINOR=$((CURRENT_MINOR + 1)) -NEXT_PATCH=$((CURRENT_PATCH + 1)) +# Get current version +CURRENT_TAG=$(git tag --merged HEAD | grep -xE '^v.*' | sort --version-sort | tail -n 1 | tr -d 'v') +CURRENT_MAJOR=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[1]}') +CURRENT_MINOR=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[2]}') +CURRENT_PATCH=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[3]}') CURRENT_SHORT_TAG=${CURRENT_MAJOR}.${CURRENT_MINOR} -NEXT_FULL_TAG="" -NEXT_SHORT_TAG="" -# Determine release type -if [ "$RELEASE_TYPE" == "major" ]; then - NEXT_FULL_TAG="${NEXT_MAJOR}.0.0" - NEXT_SHORT_TAG="${NEXT_MAJOR}.0" -elif [ "$RELEASE_TYPE" == "minor" ]; then - NEXT_FULL_TAG="${CURRENT_MAJOR}.${NEXT_MINOR}.0" - NEXT_SHORT_TAG="${CURRENT_MAJOR}.${NEXT_MINOR}" -elif [ "$RELEASE_TYPE" == "patch" ]; then - NEXT_FULL_TAG="${CURRENT_MAJOR}.${CURRENT_MINOR}.${NEXT_PATCH}" - NEXT_SHORT_TAG="${CURRENT_MAJOR}.${CURRENT_MINOR}" -else - echo "Incorrect release type; use 'major', 'minor', or 'patch' as an argument" - exit 1 -fi +#Get . for next version +NEXT_MAJOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[1]}') +NEXT_MINOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[2]}') +NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR} -echo "Preparing '$RELEASE_TYPE' release [$CURRENT_TAG -> $NEXT_FULL_TAG]" +echo "Preparing release $CURRENT_TAG => $NEXT_FULL_TAG" # Inplace sed replace; workaround for Linux and Mac function sed_runner() { @@ -47,11 +30,14 @@ function sed_runner() { } # cpp update -sed_runner 's/'"CUDA_DATAFRAME VERSION .* LANGUAGES"'/'"CUDA_DATAFRAME VERSION ${NEXT_FULL_TAG} LANGUAGES"'/g' cpp/CMakeLists.txt +sed_runner 's/'"CUDF VERSION .* LANGUAGES"'/'"CUDF VERSION ${NEXT_FULL_TAG} LANGUAGES"'/g' cpp/CMakeLists.txt # cpp libcudf_kafka update sed_runner 's/'"CUDA_KAFKA VERSION .* LANGUAGES"'/'"CUDA_KAFKA VERSION ${NEXT_FULL_TAG} LANGUAGES"'/g' cpp/libcudf_kafka/CMakeLists.txt +# cpp cudf_jni update +sed_runner 's/'"CUDF_JNI VERSION .* LANGUAGES"'/'"CUDF_JNI VERSION ${NEXT_FULL_TAG} LANGUAGES"'/g' java/src/main/native/CMakeLists.txt + # doxyfile update sed_runner 's/PROJECT_NUMBER = .*/PROJECT_NUMBER = '${NEXT_FULL_TAG}'/g' cpp/doxygen/Doxyfile @@ -69,4 +55,4 @@ sed_runner "s|\(TAGFILES.*librmm/\).*|\1${NEXT_SHORT_TAG}|" cpp/doxygen/Doxyfile # README.md update sed_runner "s/version == ${CURRENT_SHORT_TAG}/version == ${NEXT_SHORT_TAG}/g" README.md -sed_runner "s/cudf=${CURRENT_SHORT_TAG}/cudf=${NEXT_SHORT_TAG}/g" README.md +sed_runner "s/cudf=${CURRENT_SHORT_TAG}/cudf=${NEXT_SHORT_TAG}/g" README.md \ No newline at end of file diff --git a/ci/utils/nbtest.sh b/ci/utils/nbtest.sh index f7b9774c6fd..1b39f267c65 100755 --- a/ci/utils/nbtest.sh +++ b/ci/utils/nbtest.sh @@ -22,7 +22,7 @@ get_ipython().run_cell_magic=my_run_cell_magic NO_COLORS=--colors=NoColor EXITCODE=0 -NBTMPDIR=${WORKSPACE}/tmp +NBTMPDIR="$WORKSPACE/tmp" mkdir -p ${NBTMPDIR} for nb in $*; do diff --git a/conda/environments/cudf_dev_cuda10.1.yml b/conda/environments/cudf_dev_cuda10.1.yml deleted file mode 100644 index 26d6067b768..00000000000 --- a/conda/environments/cudf_dev_cuda10.1.yml +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. - -name: cudf_dev -channels: - - rapidsai - - nvidia - - rapidsai-nightly - - conda-forge - - defaults -dependencies: - - clang=8.0.1 - - clang-tools=8.0.1 - - cupy>7.1.0,<9.0.0a0 - - rmm=0.19.* - - cmake>=3.14 - - cmake_setuptools>=0.1.3 - - python>=3.7,<3.9 - - numba>=0.49.0,!=0.51.0 - - numpy - - pandas>=1.0,<=1.2.4 - - pyarrow=1.0.1 - - fastavro>=0.22.9 - - notebook>=0.5.0 - - cython>=0.29,<0.30 - - fsspec>=0.6.0 - - pytest - - pytest-benchmark - - pytest-xdist - - sphinx - - sphinx_rtd_theme - - sphinxcontrib-websupport - - nbsphinx - - numpydoc - - ipython - - recommonmark - - pandoc=<2.0.0 - - cudatoolkit=10.1 - - pip - - flake8=3.8.3 - - black=19.10 - - isort=5.0.7 - - mypy=0.782 - - typing_extensions - - pre_commit - - dask==2021.4.0 - - distributed>=2.22.0,<=2021.4.0 - - streamz - - dlpack - - arrow-cpp=1.0.1 - - arrow-cpp-proc * cuda - - boost-cpp>=1.72.0 - - double-conversion - - rapidjson - - flatbuffers - - hypothesis - - sphinx-markdown-tables - - sphinx-copybutton - - mimesis - - packaging - - protobuf - - nvtx>=0.2.1 - - cachetools - - pip: - - git+https://github.com/dask/dask.git@main - - git+https://github.com/dask/distributed.git@main - - git+https://github.com/python-streamz/streamz.git - - pyorc diff --git a/conda/environments/cudf_dev_cuda10.2.yml b/conda/environments/cudf_dev_cuda10.2.yml deleted file mode 100644 index da7e4a91106..00000000000 --- a/conda/environments/cudf_dev_cuda10.2.yml +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. - -name: cudf_dev -channels: - - rapidsai - - nvidia - - rapidsai-nightly - - conda-forge - - defaults -dependencies: - - clang=8.0.1 - - clang-tools=8.0.1 - - cupy>7.1.0,<9.0.0a0 - - rmm=0.19.* - - cmake>=3.14 - - cmake_setuptools>=0.1.3 - - python>=3.7,<3.9 - - numba>=0.49,!=0.51.0 - - numpy - - pandas>=1.0,<=1.2.4 - - pyarrow=1.0.1 - - fastavro>=0.22.9 - - notebook>=0.5.0 - - cython>=0.29,<0.30 - - fsspec>=0.6.0 - - pytest - - pytest-benchmark - - pytest-xdist - - sphinx - - sphinx_rtd_theme - - sphinxcontrib-websupport - - nbsphinx - - numpydoc - - ipython - - recommonmark - - pandoc=<2.0.0 - - cudatoolkit=10.2 - - pip - - flake8=3.8.3 - - black=19.10 - - isort=5.0.7 - - mypy=0.782 - - typing_extensions - - pre_commit - - dask==2021.4.0 - - distributed>=2.22.0,<=2021.4.0 - - streamz - - dlpack - - arrow-cpp=1.0.1 - - arrow-cpp-proc * cuda - - boost-cpp>=1.72.0 - - double-conversion - - rapidjson - - flatbuffers - - hypothesis - - sphinx-markdown-tables - - sphinx-copybutton - - mimesis - - packaging - - protobuf - - nvtx>=0.2.1 - - cachetools - - pip: - - git+https://github.com/dask/dask.git@main - - git+https://github.com/dask/distributed.git@main - - git+https://github.com/python-streamz/streamz.git - - pyorc diff --git a/conda/environments/cudf_dev_cuda11.0.yml b/conda/environments/cudf_dev_cuda11.0.yml index b3aab1da1e5..6c742adbed1 100644 --- a/conda/environments/cudf_dev_cuda11.0.yml +++ b/conda/environments/cudf_dev_cuda11.0.yml @@ -6,18 +6,17 @@ channels: - nvidia - rapidsai-nightly - conda-forge - - defaults dependencies: - clang=8.0.1 - clang-tools=8.0.1 - - cupy>7.1.0,<9.0.0a0 - - rmm=0.19.* - - cmake>=3.14 + - cupy>7.1.0,<10.0.0a0 + - rmm=21.06.* + - cmake>=3.18 - cmake_setuptools>=0.1.3 - python>=3.7,<3.9 - - numba>=0.49,!=0.51.0 + - numba>=0.53.1 - numpy - - pandas>=1.0,<=1.2.4 + - pandas>=1.0,<1.3.0dev0 - pyarrow=1.0.1 - fastavro>=0.22.9 - notebook>=0.5.0 @@ -42,13 +41,12 @@ dependencies: - mypy=0.782 - typing_extensions - pre_commit - - dask==2021.4.0 - - distributed>=2.22.0,<=2021.4.0 + - dask>=2021.4.0,<=2021.5.1 + - distributed>=2.22.0,<=2021.5.1 - streamz - - dlpack + - dlpack>=0.5,<0.6.0a0 - arrow-cpp=1.0.1 - arrow-cpp-proc * cuda - - boost-cpp>=1.72.0 - double-conversion - rapidjson - flatbuffers @@ -60,8 +58,9 @@ dependencies: - protobuf - nvtx>=0.2.1 - cachetools + - transformers - pip: - - git+https://github.com/dask/dask.git@main - - git+https://github.com/dask/distributed.git@main + - git+https://github.com/dask/dask.git@2021.05.1 + - git+https://github.com/dask/distributed.git@2021.05.1 - git+https://github.com/python-streamz/streamz.git - pyorc diff --git a/conda/environments/cudf_dev_cuda11.1.yml b/conda/environments/cudf_dev_cuda11.1.yml deleted file mode 100644 index 7feadb5de82..00000000000 --- a/conda/environments/cudf_dev_cuda11.1.yml +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. - -name: cudf_dev -channels: - - rapidsai - - nvidia - - rapidsai-nightly - - conda-forge - - defaults -dependencies: - - clang=8.0.1 - - clang-tools=8.0.1 - - cupy>7.1.0,<9.0.0a0 - - rmm=0.19.* - - cmake>=3.14 - - cmake_setuptools>=0.1.3 - - python>=3.7,<3.9 - - numba>=0.49,!=0.51.0 - - numpy - - pandas>=1.0,<=1.2.4 - - pyarrow=1.0.1 - - fastavro>=0.22.9 - - notebook>=0.5.0 - - cython>=0.29,<0.30 - - fsspec>=0.6.0 - - pytest - - pytest-benchmark - - pytest-xdist - - sphinx - - sphinx_rtd_theme - - sphinxcontrib-websupport - - nbsphinx - - numpydoc - - ipython - - recommonmark - - pandoc=<2.0.0 - - cudatoolkit=11.1 - - pip - - flake8=3.8.3 - - black=19.10 - - isort=5.0.7 - - mypy=0.782 - - typing_extensions - - pre_commit - - dask==2021.4.0 - - distributed>=2.22.0,<=2021.4.0 - - streamz - - dlpack - - arrow-cpp=1.0.1 - - arrow-cpp-proc * cuda - - boost-cpp>=1.72.0 - - double-conversion - - rapidjson - - flatbuffers - - hypothesis - - sphinx-markdown-tables - - sphinx-copybutton - - mimesis - - packaging - - protobuf - - nvtx>=0.2.1 - - cachetools - - pip: - - git+https://github.com/dask/dask.git@main - - git+https://github.com/dask/distributed.git@main - - git+https://github.com/python-streamz/streamz.git - - pyorc diff --git a/conda/environments/cudf_dev_cuda11.2.yml b/conda/environments/cudf_dev_cuda11.2.yml index 10ae1931d3c..41bc72d5c5b 100644 --- a/conda/environments/cudf_dev_cuda11.2.yml +++ b/conda/environments/cudf_dev_cuda11.2.yml @@ -6,18 +6,17 @@ channels: - nvidia - rapidsai-nightly - conda-forge - - defaults dependencies: - clang=8.0.1 - clang-tools=8.0.1 - - cupy>7.1.0,<9.0.0a0 - - rmm=0.19.* - - cmake>=3.14 + - cupy>7.1.0,<10.0.0a0 + - rmm=21.06.* + - cmake>=3.18 - cmake_setuptools>=0.1.3 - python>=3.7,<3.9 - - numba>=0.49,!=0.51.0 + - numba>=0.53.1 - numpy - - pandas>=1.0,<=1.2.4 + - pandas>=1.0,<1.3.0dev0 - pyarrow=1.0.1 - fastavro>=0.22.9 - notebook>=0.5.0 @@ -42,13 +41,12 @@ dependencies: - mypy=0.782 - typing_extensions - pre_commit - - dask==2021.4.0 - - distributed>=2.22.0,<=2021.4.0 + - dask>=2021.4.0,<=2021.5.1 + - distributed>=2.22.0,<=2021.5.1 - streamz - - dlpack + - dlpack>=0.5,<0.6.0a0 - arrow-cpp=1.0.1 - arrow-cpp-proc * cuda - - boost-cpp>=1.72.0 - double-conversion - rapidjson - flatbuffers @@ -60,8 +58,9 @@ dependencies: - protobuf - nvtx>=0.2.1 - cachetools + - transformers - pip: - - git+https://github.com/dask/dask.git@main - - git+https://github.com/dask/distributed.git@main + - git+https://github.com/dask/dask.git@2021.05.1 + - git+https://github.com/dask/distributed.git@2021.05.1 - git+https://github.com/python-streamz/streamz.git - pyorc diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml index c9d2ee06d58..d1aaf924555 100644 --- a/conda/recipes/cudf/meta.yaml +++ b/conda/recipes/cudf/meta.yaml @@ -3,7 +3,7 @@ {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} {% set py_version=environ.get('CONDA_PY', 36) %} -{% set cuda_version='.'.join(environ.get('CUDA_VERSION', '10.1').split('.')[:2]) %} +{% set cuda_version='.'.join(environ.get('CUDA', '10.1').split('.')[:2]) %} package: name: cudf @@ -18,6 +18,9 @@ build: script_env: - VERSION_SUFFIX - PARALLEL_LEVEL + - CC + - CXX + - CUDAHOSTCXX requirements: build: @@ -25,8 +28,8 @@ requirements: - python - cython >=0.29,<0.30 - setuptools - - numba >=0.49.0 - - dlpack + - numba >=0.53.1 + - dlpack>=0.5,<0.6.0a0 - pyarrow 1.0.1 - libcudf {{ version }} - rmm {{ minor_version }} @@ -35,9 +38,9 @@ requirements: - protobuf - python - typing_extensions - - pandas >=1.0,<=1.2.4 - - cupy >7.1.0,<9.0.0a0 - - numba >=0.49.0 + - pandas >=1.0,<1.3.0dev0 + - cupy >7.1.0,<10.0.0a0 + - numba >=0.53.1 - numpy - {{ pin_compatible('pyarrow', max_pin='x.x.x') }} - fastavro >=0.22.0 diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index 0acd9ec4bb2..b59a49b0db7 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -1,9 +1,9 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} {% set py_version=environ.get('CONDA_PY', 36) %} -{% set cuda_version='.'.join(environ.get('CUDA_VERSION', '10.1').split('.')[:2]) %} +{% set cuda_version='.'.join(environ.get('CUDA', '10.1').split('.')[:2]) %} package: name: cudf_kafka @@ -24,7 +24,7 @@ build: requirements: build: - - cmake >=3.17.0 + - cmake >=3.18 host: - python - cython >=0.29,<0.30 diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml index f65b3cafbd7..34b83bb1492 100644 --- a/conda/recipes/custreamz/meta.yaml +++ b/conda/recipes/custreamz/meta.yaml @@ -3,7 +3,7 @@ {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} {% set py_version=environ.get('CONDA_PY', 36) %} -{% set cuda_version='.'.join(environ.get('CUDA_VERSION', '10.1').split('.')[:2]) %} +{% set cuda_version='.'.join(environ.get('CUDA', '10.1').split('.')[:2]) %} package: name: custreamz @@ -18,6 +18,9 @@ build: script_env: - VERSION_SUFFIX - PARALLEL_LEVEL + - CC + - CXX + - CUDAHOSTCXX requirements: host: @@ -28,8 +31,8 @@ requirements: - python - streamz - cudf {{ version }} - - dask >=2.22.0,<=2021.4.0 - - distributed >=2.22.0,<=2021.4.0 + - dask>=2021.4.0,<=2021.5.1 + - distributed>=2.22.0,<=2021.5.1 - python-confluent-kafka - cudf_kafka {{ version }} diff --git a/conda/recipes/dask-cudf/meta.yaml b/conda/recipes/dask-cudf/meta.yaml index 8b503840b34..11c3634d2aa 100644 --- a/conda/recipes/dask-cudf/meta.yaml +++ b/conda/recipes/dask-cudf/meta.yaml @@ -3,7 +3,7 @@ {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} {% set py_version=environ.get('CONDA_PY', 36) %} -{% set cuda_version='.'.join(environ.get('CUDA_VERSION', '10.1').split('.')[:2]) %} +{% set cuda_version='.'.join(environ.get('CUDA', '10.1').split('.')[:2]) %} package: name: dask-cudf @@ -18,18 +18,21 @@ build: script_env: - VERSION_SUFFIX - PARALLEL_LEVEL + - CC + - CXX + - CUDAHOSTCXX requirements: host: - python - cudf {{ version }} - - dask==2021.4.0 - - distributed >=2.22.0,<=2021.4.0 + - dask>=2021.4.0,<=2021.5.1 + - distributed>=2.22.0,<=2021.5.1 run: - python - cudf {{ version }} - - dask==2021.4.0 - - distributed >=2.22.0,<=2021.4.0 + - dask>=2021.4.0,<=2021.5.1 + - distributed>=2.22.0,<=2021.5.1 test: requires: diff --git a/conda/recipes/dask-cudf/run_test.sh b/conda/recipes/dask-cudf/run_test.sh index 3fc1182b33b..472e59149b5 100644 --- a/conda/recipes/dask-cudf/run_test.sh +++ b/conda/recipes/dask-cudf/run_test.sh @@ -12,8 +12,8 @@ function logger() { logger "pip install git+https://github.com/dask/distributed.git@main --upgrade --no-deps" pip install "git+https://github.com/dask/distributed.git@main" --upgrade --no-deps -logger "pip install git+https://github.com/dask/dask.git@main --upgrade --no-deps" -pip install "git+https://github.com/dask/dask.git@main" --upgrade --no-deps +logger "pip install git+https://github.com/dask/dask.git@2021.05.1 --upgrade --no-deps" +pip install "git+https://github.com/dask/dask.git@2021.05.1" --upgrade --no-deps logger "python -c 'import dask_cudf'" python -c "import dask_cudf" diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index 75955428eab..dc41c439d27 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -2,7 +2,7 @@ {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} -{% set cuda_version='.'.join(environ.get('CUDA_VERSION', '10.1').split('.')[:2]) %} +{% set cuda_version='.'.join(environ.get('CUDA', '10.1').split('.')[:2]) %} package: name: libcudf @@ -39,12 +39,10 @@ requirements: - cudatoolkit {{ cuda_version }}.* - arrow-cpp 1.0.1 - arrow-cpp-proc * cuda - - boost-cpp 1.72.0 - - dlpack + - dlpack>=0.5,<0.6.0a0 run: - {{ pin_compatible('cudatoolkit', max_pin='x.x') }} - arrow-cpp-proc * cuda - - {{ pin_compatible('boost-cpp', max_pin='x.x.x') }} - {{ pin_compatible('dlpack', max_pin='x.x') }} test: @@ -55,7 +53,7 @@ test: - test -f $PREFIX/include/cudf/ast/transform.hpp - test -f $PREFIX/include/cudf/ast/detail/linearizer.hpp - test -f $PREFIX/include/cudf/ast/detail/operators.hpp - - test -f $PREFIX/include/cudf/ast/linearizer.hpp + - test -f $PREFIX/include/cudf/ast/nodes.hpp - test -f $PREFIX/include/cudf/ast/operators.hpp - test -f $PREFIX/include/cudf/binaryop.hpp - test -f $PREFIX/include/cudf/labeling/label_bins.hpp @@ -76,8 +74,10 @@ test: - test -f $PREFIX/include/cudf/detail/gather.hpp - test -f $PREFIX/include/cudf/detail/groupby.hpp - test -f $PREFIX/include/cudf/detail/groupby/sort_helper.hpp + - test -f $PREFIX/include/cudf/detail/groupby/group_replace_nulls.hpp - test -f $PREFIX/include/cudf/detail/hashing.hpp - test -f $PREFIX/include/cudf/detail/interop.hpp + - test -f $PREFIX/include/cudf/detail/is_element_valid.hpp - test -f $PREFIX/include/cudf/detail/null_mask.hpp - test -f $PREFIX/include/cudf/detail/nvtx/nvtx3.hpp - test -f $PREFIX/include/cudf/detail/nvtx/ranges.hpp @@ -86,7 +86,9 @@ test: - test -f $PREFIX/include/cudf/detail/repeat.hpp - test -f $PREFIX/include/cudf/detail/replace.hpp - test -f $PREFIX/include/cudf/detail/reshape.hpp + - test -f $PREFIX/include/cudf/detail/rolling.hpp - test -f $PREFIX/include/cudf/detail/round.hpp + - test -f $PREFIX/include/cudf/detail/scan.hpp - test -f $PREFIX/include/cudf/detail/scatter.hpp - test -f $PREFIX/include/cudf/detail/search.hpp - test -f $PREFIX/include/cudf/detail/sequence.hpp @@ -132,10 +134,14 @@ test: - test -f $PREFIX/include/cudf/io/types.hpp - test -f $PREFIX/include/cudf/ipc.hpp - test -f $PREFIX/include/cudf/join.hpp + - test -f $PREFIX/include/cudf/lists/detail/combine.hpp - test -f $PREFIX/include/cudf/lists/detail/concatenate.hpp - test -f $PREFIX/include/cudf/lists/detail/copying.hpp + - test -f $PREFIX/include/cudf/lists/lists_column_factories.hpp - test -f $PREFIX/include/cudf/lists/detail/drop_list_duplicates.hpp + - test -f $PREFIX/include/cudf/lists/detail/interleave_columns.hpp - test -f $PREFIX/include/cudf/lists/detail/sorting.hpp + - test -f $PREFIX/include/cudf/lists/combine.hpp - test -f $PREFIX/include/cudf/lists/count_elements.hpp - test -f $PREFIX/include/cudf/lists/explode.hpp - test -f $PREFIX/include/cudf/lists/drop_list_duplicates.hpp @@ -152,6 +158,7 @@ test: - test -f $PREFIX/include/cudf/replace.hpp - test -f $PREFIX/include/cudf/reshape.hpp - test -f $PREFIX/include/cudf/rolling.hpp + - test -f $PREFIX/include/cudf/rolling/range_window_bounds.hpp - test -f $PREFIX/include/cudf/round.hpp - test -f $PREFIX/include/cudf/scalar/scalar_factories.hpp - test -f $PREFIX/include/cudf/scalar/scalar.hpp diff --git a/conda/recipes/libcudf_kafka/meta.yaml b/conda/recipes/libcudf_kafka/meta.yaml index 5348ec471e9..5e06c074433 100644 --- a/conda/recipes/libcudf_kafka/meta.yaml +++ b/conda/recipes/libcudf_kafka/meta.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2018, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} @@ -23,7 +23,7 @@ build: requirements: build: - - cmake >=3.17.0 + - cmake >=3.18 host: - libcudf {{ version }} - librdkafka >=1.5.0,<1.5.3 diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 525e5f9225d..b961080d162 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -28,7 +28,7 @@ elseif(CMAKE_CUDA_ARCHITECTURES STREQUAL "") set(CUDF_BUILD_FOR_DETECTED_ARCHS TRUE) endif() -project(CUDF VERSION 0.19.0 LANGUAGES C CXX) +project(CUDF VERSION 21.06.00 LANGUAGES C CXX) # Needed because GoogleBenchmark changes the state of FindThreads.cmake, # causing subsequent runs to have different values for the `Threads::Threads` target. @@ -121,8 +121,6 @@ find_package(ZLIB REQUIRED) find_package(Threads REQUIRED) # add third party dependencies using CPM include(cmake/thirdparty/CUDF_GetCPM.cmake) -# find boost -include(cmake/thirdparty/CUDF_FindBoost.cmake) # find jitify include(cmake/thirdparty/CUDF_GetJitify.cmake) # find thrust/cub @@ -155,9 +153,11 @@ add_library(cudf src/binaryop/compiled/binary_ops.cu src/labeling/label_bins.cu src/bitmask/null_mask.cu + src/bitmask/is_element_valid.cpp src/column/column.cu src/column/column_device_view.cu src/column/column_factories.cpp + src/column/column_factories.cu src/column/column_view.cpp src/comms/ipc/ipc.cpp src/copying/concatenate.cu @@ -173,6 +173,7 @@ add_library(cudf src/copying/shift.cu src/copying/slice.cpp src/copying/split.cpp + src/copying/segmented_shift.cu src/datetime/datetime_ops.cu src/dictionary/add_keys.cu src/dictionary/decode.cu @@ -199,6 +200,7 @@ add_library(cudf src/groupby/sort/group_min.cu src/groupby/sort/group_nth_element.cu src/groupby/sort/group_nunique.cu + src/groupby/sort/group_product.cu src/groupby/sort/group_quantiles.cu src/groupby/sort/group_std.cu src/groupby/sort/group_sum.cu @@ -207,11 +209,14 @@ add_library(cudf src/groupby/sort/group_max_scan.cu src/groupby/sort/group_min_scan.cu src/groupby/sort/group_sum_scan.cu + src/groupby/sort/group_replace_nulls.cu src/groupby/sort/sort_helper.cu src/hash/hashing.cu + src/hash/md5_hash.cu + src/hash/murmur_hash.cu src/interop/dlpack.cpp - src/interop/from_arrow.cpp - src/interop/to_arrow.cpp + src/interop/from_arrow.cu + src/interop/to_arrow.cu src/io/avro/avro.cpp src/io/avro/avro_gpu.cu src/io/avro/reader_impl.cu @@ -246,11 +251,14 @@ add_library(cudf src/io/parquet/parquet.cpp src/io/parquet/reader_impl.cu src/io/parquet/writer_impl.cu - src/io/statistics/column_stats.cu + src/io/statistics/orc_column_statistics.cu + src/io/statistics/parquet_column_statistics.cu + src/io/utilities/column_buffer.cpp src/io/utilities/data_sink.cpp src/io/utilities/datasource.cpp src/io/utilities/file_io_utilities.cpp src/io/utilities/parsing_utils.cu + src/io/utilities/trie.cu src/io/utilities/type_conversion.cpp src/jit/cache.cpp src/jit/parser.cpp @@ -260,14 +268,17 @@ add_library(cudf src/join/join.cu src/join/semi_join.cu src/lists/contains.cu + src/lists/combine/concatenate_list_elements.cu + src/lists/combine/concatenate_rows.cu src/lists/copying/concatenate.cu src/lists/copying/copying.cu src/lists/copying/gather.cu src/lists/copying/segmented_gather.cu src/lists/count_elements.cu + src/lists/drop_list_duplicates.cu src/lists/explode.cu src/lists/extract.cu - src/lists/drop_list_duplicates.cu + src/lists/interleave_columns.cu src/lists/lists_column_factories.cu src/lists/lists_column_view.cu src/lists/segmented_sort.cu @@ -285,7 +296,9 @@ add_library(cudf src/reductions/nth_element.cu src/reductions/product.cu src/reductions/reductions.cpp - src/reductions/scan.cu + src/reductions/scan/scan.cpp + src/reductions/scan/scan_exclusive.cu + src/reductions/scan/scan_inclusive.cu src/reductions/std.cu src/reductions/sum.cu src/reductions/sum_of_squares.cu @@ -299,6 +312,7 @@ add_library(cudf src/reshape/tile.cu src/rolling/grouped_rolling.cu src/rolling/rolling.cu + src/rolling/range_window_bounds.cpp src/round/round.cu src/scalar/scalar.cpp src/scalar/scalar_factories.cpp @@ -320,7 +334,9 @@ add_library(cudf src/strings/case.cu src/strings/char_types/char_cases.cu src/strings/char_types/char_types.cu - src/strings/combine.cu + src/strings/combine/concatenate.cu + src/strings/combine/join.cu + src/strings/combine/join_list_elements.cu src/strings/contains.cu src/strings/convert/convert_booleans.cu src/strings/convert/convert_datetime.cu @@ -363,7 +379,7 @@ add_library(cudf src/structs/copying/concatenate.cu src/structs/structs_column_factories.cu src/structs/structs_column_view.cpp - src/structs/utilities.cu + src/structs/utilities.cpp src/table/table.cpp src/table/table_device_view.cu src/table/table_view.cpp @@ -397,9 +413,9 @@ set_target_properties(cudf PROPERTIES BUILD_RPATH "\$ORIGIN" INSTALL_RPATH "\$ORIGIN" # set target compile options - CXX_STANDARD 14 + CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON - CUDA_STANDARD 14 + CUDA_STANDARD 17 CUDA_STANDARD_REQUIRED ON POSITION_INDEPENDENT_CODE ON INTERFACE_POSITION_INDEPENDENT_CODE ON @@ -464,7 +480,6 @@ add_dependencies(cudf jitify_preprocess_run) # Specify the target module library dependencies target_link_libraries(cudf PUBLIC ZLIB::ZLIB - Boost::filesystem ${ARROW_LIBRARIES} cudf::Thrust rmm::rmm) @@ -517,7 +532,7 @@ target_compile_options(cudftestutil ) target_compile_features(cudftestutil - PUBLIC cxx_std_14 $) + PUBLIC cxx_std_17 $) target_link_libraries(cudftestutil PUBLIC GTest::gmock diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 11af408f1c5..25d012b1b33 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -17,7 +17,7 @@ find_package(Threads REQUIRED) add_library(cudf_datagen STATIC common/generate_benchmark_input.cpp) -target_compile_features(cudf_datagen PUBLIC cxx_std_14 cuda_std_14) +target_compile_features(cudf_datagen PUBLIC cxx_std_17 cuda_std_17) target_compile_options(cudf_datagen PUBLIC "$<$:${CUDF_CXX_FLAGS}>" @@ -97,14 +97,20 @@ ConfigureBench(ITERATOR_BENCH iterator/iterator_benchmark.cu) ################################################################################################### # - search benchmark ------------------------------------------------------------------------------ -ConfigureBench(SEARCH_BENCH search/search_benchmark.cu) +ConfigureBench(SEARCH_BENCH search/search_benchmark.cpp) ################################################################################################### # - sort benchmark -------------------------------------------------------------------------------- ConfigureBench(SORT_BENCH + sort/rank_benchmark.cpp sort/sort_benchmark.cpp sort/sort_strings_benchmark.cpp) +################################################################################################### +# - quantiles benchmark -------------------------------------------------------------------------------- +ConfigureBench(QUANTILES_BENCH + quantiles/quantiles_benchmark.cpp) + ################################################################################################### # - type_dispatcher benchmark --------------------------------------------------------------------- ConfigureBench(TYPE_DISPATCHER_BENCH type_dispatcher/type_dispatcher_benchmark.cu) @@ -118,15 +124,28 @@ ConfigureBench(REDUCTION_BENCH reduction/scan_benchmark.cpp reduction/minmax_benchmark.cpp) +################################################################################################### +# - reduction benchmark --------------------------------------------------------------------------- +ConfigureBench(REPLACE_BENCH + replace/clamp_benchmark.cpp) + +################################################################################################### +# - filling benchmark ----------------------------------------------------------------------------- +ConfigureBench(FILL_BENCH + filling/repeat_benchmark.cpp) + ################################################################################################### # - groupby benchmark ----------------------------------------------------------------------------- ConfigureBench(GROUPBY_BENCH groupby/group_sum_benchmark.cu - groupby/group_nth_benchmark.cu) + groupby/group_nth_benchmark.cu + groupby/group_shift_benchmark.cu) ################################################################################################### # - hashing benchmark ----------------------------------------------------------------------------- -ConfigureBench(HASHING_BENCH hashing/hashing_benchmark.cpp) +ConfigureBench(HASHING_BENCH + hashing/hash_benchmark.cpp + hashing/partition_benchmark.cpp) ################################################################################################### # - merge benchmark ------------------------------------------------------------------------------- @@ -170,7 +189,9 @@ ConfigureBench(AST_BENCH ast/transform_benchmark.cpp) ################################################################################################### # - binaryop benchmark ---------------------------------------------------------------------------- -ConfigureBench(BINARYOP_BENCH binaryop/binaryop_benchmark.cu) +ConfigureBench(BINARYOP_BENCH + binaryop/binaryop_benchmark.cpp + binaryop/jit_binaryop_benchmark.cpp) ################################################################################################### # - nvtext benchmark ------------------------------------------------------------------- diff --git a/cpp/benchmarks/binaryop/binaryop_benchmark.cu b/cpp/benchmarks/binaryop/binaryop_benchmark.cpp similarity index 100% rename from cpp/benchmarks/binaryop/binaryop_benchmark.cu rename to cpp/benchmarks/binaryop/binaryop_benchmark.cpp diff --git a/cpp/benchmarks/binaryop/jit_binaryop_benchmark.cpp b/cpp/benchmarks/binaryop/jit_binaryop_benchmark.cpp new file mode 100644 index 00000000000..29ca02a843d --- /dev/null +++ b/cpp/benchmarks/binaryop/jit_binaryop_benchmark.cpp @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + +#include + +#include + +template +class JIT_BINARYOP : public cudf::benchmark { +}; + +template +void BM_binaryop(benchmark::State& state, cudf::binary_operator binop) +{ + const cudf::size_type column_size{(cudf::size_type)state.range(0)}; + + auto data_it = thrust::make_counting_iterator(0); + cudf::test::fixed_width_column_wrapper input1(data_it, data_it + column_size); + cudf::test::fixed_width_column_wrapper input2(data_it, data_it + column_size); + + auto lhs = cudf::column_view(input1); + auto rhs = cudf::column_view(input2); + auto output_dtype = cudf::data_type(cudf::type_to_id()); + + // Call once for hot cache. + cudf::binary_operation(lhs, rhs, binop, output_dtype); + + for (auto _ : state) { + cuda_event_timer timer(state, true); + cudf::binary_operation(lhs, rhs, binop, output_dtype); + } +} + +// TODO tparam boolean for null. +#define BINARYOP_BENCHMARK_DEFINE(TypeLhs, TypeRhs, binop, TypeOut) \ + BENCHMARK_TEMPLATE_DEFINE_F(JIT_BINARYOP, binop, TypeLhs, TypeRhs, TypeOut) \ + (::benchmark::State & st) \ + { \ + BM_binaryop(st, cudf::binary_operator::binop); \ + } \ + BENCHMARK_REGISTER_F(JIT_BINARYOP, binop) \ + ->Unit(benchmark::kMillisecond) \ + ->UseManualTime() \ + ->Arg(10000) /* 10k */ \ + ->Arg(100000) /* 100k */ \ + ->Arg(1000000) /* 1M */ \ + ->Arg(10000000) /* 10M */ \ + ->Arg(100000000); /* 100M */ + +using namespace cudf; + +// clang-format off +BINARYOP_BENCHMARK_DEFINE(float, int64_t, ADD, int32_t); +BINARYOP_BENCHMARK_DEFINE(duration_s, duration_D, SUB, duration_ms); +BINARYOP_BENCHMARK_DEFINE(float, float, MUL, int64_t); +BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, DIV, int64_t); +BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, TRUE_DIV, int64_t); +BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, FLOOR_DIV, int64_t); +BINARYOP_BENCHMARK_DEFINE(double, double, MOD, double); +BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, POW, double); +BINARYOP_BENCHMARK_DEFINE(int64_t, int32_t, BITWISE_AND, int16_t); +BINARYOP_BENCHMARK_DEFINE(int16_t, int32_t, BITWISE_OR, int64_t); +BINARYOP_BENCHMARK_DEFINE(int16_t, int64_t, BITWISE_XOR, int32_t); +BINARYOP_BENCHMARK_DEFINE(double, int8_t, LOGICAL_AND, int16_t); +BINARYOP_BENCHMARK_DEFINE(int16_t, int64_t, LOGICAL_OR, bool); +BINARYOP_BENCHMARK_DEFINE(timestamp_s, timestamp_s, LESS, bool); +BINARYOP_BENCHMARK_DEFINE(timestamp_ms, timestamp_s, GREATER, bool); +BINARYOP_BENCHMARK_DEFINE(int, int, SHIFT_LEFT, int); +BINARYOP_BENCHMARK_DEFINE(int16_t, int64_t, SHIFT_RIGHT, int); +BINARYOP_BENCHMARK_DEFINE(int64_t, int32_t, SHIFT_RIGHT_UNSIGNED, int64_t); +BINARYOP_BENCHMARK_DEFINE(int32_t, int64_t, PMOD, double); +BINARYOP_BENCHMARK_DEFINE(float, double, ATAN2, double); diff --git a/cpp/benchmarks/column/concatenate_benchmark.cpp b/cpp/benchmarks/column/concatenate_benchmark.cpp index b04cfba7d07..3634b2f08a2 100644 --- a/cpp/benchmarks/column/concatenate_benchmark.cpp +++ b/cpp/benchmarks/column/concatenate_benchmark.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -62,7 +62,7 @@ static void BM_concatenate(benchmark::State& state) CHECK_CUDA(0); for (auto _ : state) { - cuda_event_timer raii(state, true, 0); + cuda_event_timer raii(state, true, rmm::cuda_stream_default); auto result = cudf::concatenate(column_views); } @@ -124,7 +124,7 @@ static void BM_concatenate_tables(benchmark::State& state) CHECK_CUDA(0); for (auto _ : state) { - cuda_event_timer raii(state, true, 0); + cuda_event_timer raii(state, true, rmm::cuda_stream_default); auto result = cudf::concatenate(table_views); } @@ -184,7 +184,7 @@ static void BM_concatenate_strings(benchmark::State& state) CHECK_CUDA(0); for (auto _ : state) { - cuda_event_timer raii(state, true, 0); + cuda_event_timer raii(state, true, rmm::cuda_stream_default); auto result = cudf::concatenate(column_views); } diff --git a/cpp/benchmarks/common/generate_benchmark_input.cpp b/cpp/benchmarks/common/generate_benchmark_input.cpp index a66416ad40b..591e42ceddf 100644 --- a/cpp/benchmarks/common/generate_benchmark_input.cpp +++ b/cpp/benchmarks/common/generate_benchmark_input.cpp @@ -18,6 +18,7 @@ #include "random_distribution_factory.hpp" #include +#include #include #include @@ -26,7 +27,7 @@ #include #include -#include +#include #include #include @@ -413,9 +414,9 @@ std::unique_ptr create_random_column(data_profi } } - rmm::device_vector d_chars(out_col.chars); - rmm::device_vector d_offsets(out_col.offsets); - rmm::device_vector d_null_mask(out_col.null_mask); + auto d_chars = cudf::detail::make_device_uvector_sync(out_col.chars); + auto d_offsets = cudf::detail::make_device_uvector_sync(out_col.offsets); + auto d_null_mask = cudf::detail::make_device_uvector_sync(out_col.null_mask); return cudf::make_strings_column(d_chars, d_offsets, d_null_mask); } diff --git a/cpp/benchmarks/filling/repeat_benchmark.cpp b/cpp/benchmarks/filling/repeat_benchmark.cpp new file mode 100644 index 00000000000..3cedd55767d --- /dev/null +++ b/cpp/benchmarks/filling/repeat_benchmark.cpp @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include + +#include +#include + +#include + +#include + +#include "../fixture/benchmark_fixture.hpp" +#include "../synchronization/synchronization.hpp" + +class Repeat : public cudf::benchmark { +}; + +template +void BM_repeat(benchmark::State& state) +{ + using column_wrapper = cudf::test::fixed_width_column_wrapper; + auto const n_rows = static_cast(state.range(0)); + auto const n_cols = static_cast(state.range(1)); + + auto idx_begin = thrust::make_counting_iterator(0); + auto idx_end = thrust::make_counting_iterator(n_rows); + + std::vector columns; + columns.reserve(n_rows); + std::generate_n(std::back_inserter(columns), n_cols, [&]() { + return nulls ? column_wrapper( + idx_begin, + idx_end, + thrust::make_transform_iterator(idx_begin, [](auto idx) { return true; })) + : column_wrapper(idx_begin, idx_end); + }); + + // repeat counts + std::default_random_engine generator; + std::uniform_int_distribution distribution(0, 3); + + std::vector host_repeat_count(n_rows); + std::generate( + host_repeat_count.begin(), host_repeat_count.end(), [&] { return distribution(generator); }); + + cudf::test::fixed_width_column_wrapper repeat_count(host_repeat_count.begin(), + host_repeat_count.end()); + + // Create column views + auto const column_views = std::vector(columns.begin(), columns.end()); + + // Create table view + auto input = cudf::table_view(column_views); + + // warm up + auto output = cudf::repeat(input, repeat_count); + + for (auto _ : state) { + cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 + cudf::repeat(input, repeat_count); + } + + auto data_bytes = + (input.num_columns() * input.num_rows() + output->num_columns() * output->num_rows()) * + sizeof(TypeParam); + auto null_bytes = + nulls ? input.num_columns() * cudf::bitmask_allocation_size_bytes(input.num_rows()) + + output->num_columns() * cudf::bitmask_allocation_size_bytes(output->num_rows()) + : 0; + state.SetBytesProcessed(state.iterations() * (data_bytes + null_bytes)); +} + +#define REPEAT_BENCHMARK_DEFINE(name, type, nulls) \ + BENCHMARK_DEFINE_F(Repeat, name)(::benchmark::State & state) { BM_repeat(state); } \ + BENCHMARK_REGISTER_F(Repeat, name) \ + ->RangeMultiplier(8) \ + ->Ranges({{1 << 10, 1 << 26}, {1, 8}}) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +REPEAT_BENCHMARK_DEFINE(double_nulls, double, true); +REPEAT_BENCHMARK_DEFINE(double_no_nulls, double, false); diff --git a/cpp/benchmarks/groupby/group_shift_benchmark.cu b/cpp/benchmarks/groupby/group_shift_benchmark.cu new file mode 100644 index 00000000000..81afcdd80e1 --- /dev/null +++ b/cpp/benchmarks/groupby/group_shift_benchmark.cu @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include + +#include + +class Groupby : public cudf::benchmark { +}; + +// TODO: put it in a struct so `uniform` can be remade with different min, max +template +T random_int(T min, T max) +{ + static unsigned seed = 13377331; + static std::mt19937 engine{seed}; + static std::uniform_int_distribution uniform{min, max}; + + return uniform(engine); +} + +void BM_group_shift(benchmark::State& state) +{ + using wrapper = cudf::test::fixed_width_column_wrapper; + + const cudf::size_type column_size{(cudf::size_type)state.range(0)}; + const int num_groups = 100; + + auto data_it = cudf::detail::make_counting_transform_iterator( + 0, [](cudf::size_type row) { return random_int(0, num_groups); }); + + wrapper keys(data_it, data_it + column_size); + wrapper vals(data_it, data_it + column_size); + + cudf::groupby::groupby gb_obj(cudf::table_view({keys})); + + std::vector offsets{ + static_cast(column_size / float(num_groups) * 0.5)}; // forward shift half way + // null fill value + auto fill_value = cudf::make_default_constructed_scalar(cudf::data_type(cudf::type_id::INT64)); + // non null fill value + // auto fill_value = cudf::make_fixed_width_scalar(static_cast(42)); + + for (auto _ : state) { + cuda_event_timer timer(state, true); + auto result = gb_obj.shift(cudf::table_view{{vals}}, offsets, {*fill_value}); + } +} + +BENCHMARK_DEFINE_F(Groupby, Shift)(::benchmark::State& state) { BM_group_shift(state); } + +BENCHMARK_REGISTER_F(Groupby, Shift) + ->Arg(1000000) + ->Arg(10000000) + ->Arg(100000000) + ->UseManualTime() + ->Unit(benchmark::kMillisecond); diff --git a/cpp/benchmarks/hashing/hash_benchmark.cpp b/cpp/benchmarks/hashing/hash_benchmark.cpp new file mode 100644 index 00000000000..77b10399693 --- /dev/null +++ b/cpp/benchmarks/hashing/hash_benchmark.cpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include + +class HashBenchmark : public cudf::benchmark { +}; + +static void BM_hash(benchmark::State& state, cudf::hash_id hid) +{ + cudf::size_type const n_rows{(cudf::size_type)state.range(0)}; + auto const data = create_random_table({cudf::type_id::INT64}, 1, row_count{n_rows}); + + for (auto _ : state) { + cuda_event_timer raii(state, true, rmm::cuda_stream_default); + cudf::hash(data->view(), hid); + } +} + +#define HASH_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(HashBenchmark, name) \ + (::benchmark::State & st) { BM_hash(st, cudf::hash_id::name); } \ + BENCHMARK_REGISTER_F(HashBenchmark, name) \ + ->RangeMultiplier(4) \ + ->Ranges({{1 << 14, 1 << 24}}) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +HASH_BENCHMARK_DEFINE(HASH_MURMUR3) +HASH_BENCHMARK_DEFINE(HASH_MD5) +HASH_BENCHMARK_DEFINE(HASH_SERIAL_MURMUR3) +HASH_BENCHMARK_DEFINE(HASH_SPARK_MURMUR3) diff --git a/cpp/benchmarks/hashing/hashing_benchmark.cpp b/cpp/benchmarks/hashing/partition_benchmark.cpp similarity index 100% rename from cpp/benchmarks/hashing/hashing_benchmark.cpp rename to cpp/benchmarks/hashing/partition_benchmark.cpp diff --git a/cpp/benchmarks/io/orc/orc_reader_benchmark.cpp b/cpp/benchmarks/io/orc/orc_reader_benchmark.cpp index d38747b934f..2f3f454fda6 100644 --- a/cpp/benchmarks/io/orc/orc_reader_benchmark.cpp +++ b/cpp/benchmarks/io/orc/orc_reader_benchmark.cpp @@ -84,7 +84,6 @@ void BM_orc_read_varying_options(benchmark::State& state) auto const flags = state.range(state_idx++); auto const use_index = (flags & 1) != 0; auto const use_np_dtypes = (flags & 2) != 0; - auto const dec_as_float = (flags & 4) != 0; auto const ts_type = cudf::data_type{static_cast(state.range(state_idx++))}; auto const data_types = @@ -107,8 +106,7 @@ void BM_orc_read_varying_options(benchmark::State& state) .columns(cols_to_read) .use_index(use_index) .use_np_dtypes(use_np_dtypes) - .timestamp_type(ts_type) - .decimals_as_float64(dec_as_float); + .timestamp_type(ts_type); auto const num_stripes = data_size / (64 << 20); cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks; @@ -167,7 +165,7 @@ BENCHMARK_REGISTER_F(OrcRead, column_selection) int32_t(column_selection::SECOND_HALF)}, {int32_t(row_selection::ALL)}, {1}, - {0b111}, // defaults + {0b11}, // defaults {int32_t(cudf::type_id::EMPTY)}}) ->Unit(benchmark::kMillisecond) ->UseManualTime(); @@ -178,7 +176,7 @@ BENCHMARK_REGISTER_F(OrcRead, row_selection) ->ArgsProduct({{int32_t(column_selection::ALL)}, {int32_t(row_selection::STRIPES), int32_t(row_selection::NROWS)}, {1, 8}, - {0b111}, // defaults + {0b11}, // defaults {int32_t(cudf::type_id::EMPTY)}}) ->Unit(benchmark::kMillisecond) ->UseManualTime(); @@ -189,7 +187,7 @@ BENCHMARK_REGISTER_F(OrcRead, misc_options) ->ArgsProduct({{int32_t(column_selection::ALL)}, {int32_t(row_selection::NROWS)}, {1}, - {0b111, 0b110, 0b101, 0b011}, // `true` is default for each boolean parameter here + {0b11, 0b10, 0b01}, // `true` is default for each boolean parameter here {int32_t(cudf::type_id::EMPTY), int32_t(cudf::type_id::TIMESTAMP_NANOSECONDS)}}) ->Unit(benchmark::kMillisecond) ->UseManualTime(); diff --git a/cpp/benchmarks/iterator/iterator_benchmark.cu b/cpp/benchmarks/iterator/iterator_benchmark.cu index 6c3255328cb..04307f5db25 100644 --- a/cpp/benchmarks/iterator/iterator_benchmark.cu +++ b/cpp/benchmarks/iterator/iterator_benchmark.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,19 +14,21 @@ * limitations under the License. */ -#include +#include "../fixture/benchmark_fixture.hpp" +#include "../synchronization/synchronization.hpp" +#include +#include +#include #include -#include -#include "../fixture/benchmark_fixture.hpp" -#include "../synchronization/synchronization.hpp" +#include -#include // include iterator header -// for reduction tests -#include #include -#include + +#include + +#include template T random_int(T min, T max) @@ -48,7 +50,7 @@ inline auto reduce_by_cub(OutputIterator result, InputIterator d_in, int num_ite nullptr, temp_storage_bytes, d_in, result, num_items, cudf::DeviceSum{}, init); // Allocate temporary storage - rmm::device_buffer d_temp_storage(temp_storage_bytes); + rmm::device_buffer d_temp_storage(temp_storage_bytes, rmm::cuda_stream_default); // Run reduction cub::DeviceReduce::Reduce( @@ -59,7 +61,7 @@ inline auto reduce_by_cub(OutputIterator result, InputIterator d_in, int num_ite // ----------------------------------------------------------------------------- template -void raw_stream_bench_cub(cudf::column_view &col, rmm::device_vector &result) +void raw_stream_bench_cub(cudf::column_view &col, rmm::device_uvector &result) { // std::cout << "raw stream cub: " << "\t"; @@ -71,7 +73,7 @@ void raw_stream_bench_cub(cudf::column_view &col, rmm::device_vector &result) }; template -void iterator_bench_cub(cudf::column_view &col, rmm::device_vector &result) +void iterator_bench_cub(cudf::column_view &col, rmm::device_uvector &result) { // std::cout << "iterator cub " << ( (has_null) ? ": " : ": " ) << "\t"; @@ -89,7 +91,7 @@ void iterator_bench_cub(cudf::column_view &col, rmm::device_vector &result) // ----------------------------------------------------------------------------- template -void raw_stream_bench_thrust(cudf::column_view &col, rmm::device_vector &result) +void raw_stream_bench_thrust(cudf::column_view &col, rmm::device_uvector &result) { // std::cout << "raw stream thust: " << "\t\t"; @@ -100,7 +102,7 @@ void raw_stream_bench_thrust(cudf::column_view &col, rmm::device_vector &resu } template -void iterator_bench_thrust(cudf::column_view &col, rmm::device_vector &result) +void iterator_bench_thrust(cudf::column_view &col, rmm::device_uvector &result) { // std::cout << "iterator thust " << ( (has_null) ? ": " : ": " ) << "\t"; @@ -131,7 +133,8 @@ void BM_iterator(benchmark::State &state) cudf::test::fixed_width_column_wrapper wrap_hasnull_F(num_gen, num_gen + column_size); cudf::column_view hasnull_F = wrap_hasnull_F; - rmm::device_vector dev_result(1, T{0}); + // Initialize dev_result to false + auto dev_result = cudf::detail::make_zeroed_device_uvector_sync(1); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 if (cub_or_thrust) { @@ -163,7 +166,7 @@ __device__ thrust::pair operator+(thrust::pair lhs, thrust::pa // ----------------------------------------------------------------------------- template void pair_iterator_bench_cub(cudf::column_view &col, - rmm::device_vector> &result) + rmm::device_uvector> &result) { thrust::pair init{0, false}; auto d_col = cudf::column_device_view::create(col); @@ -174,7 +177,7 @@ void pair_iterator_bench_cub(cudf::column_view &col, template void pair_iterator_bench_thrust(cudf::column_view &col, - rmm::device_vector> &result) + rmm::device_uvector> &result) { thrust::pair init{0, false}; auto d_col = cudf::column_device_view::create(col); @@ -198,7 +201,8 @@ void BM_pair_iterator(benchmark::State &state) cudf::column_view hasnull_F = wrap_hasnull_F; cudf::column_view hasnull_T = wrap_hasnull_T; - rmm::device_vector> dev_result(1, {T{0}, false}); + // Initialize dev_result to false + auto dev_result = cudf::detail::make_zeroed_device_uvector_sync>(1); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 if (cub_or_thrust) { diff --git a/cpp/benchmarks/join/generate_input_tables.cuh b/cpp/benchmarks/join/generate_input_tables.cuh index 79cb2d3e44d..285a9241a26 100644 --- a/cpp/benchmarks/join/generate_input_tables.cuh +++ b/cpp/benchmarks/join/generate_input_tables.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,18 +14,22 @@ * limitations under the License. */ -#ifndef __GENERATE_INPUT_TABLES_CUH -#define __GENERATE_INPUT_TABLES_CUH +#pragma once + +#include +#include + +#include +#include -#include -#include #include #include #include -#include -#include -#include +#include +#include + +#include __global__ static void init_curand(curandState* state, const int nstates) { @@ -188,64 +192,63 @@ void generate_input_tables(key_type* const build_tbl, const int num_states = num_sms * std::max(num_blocks_init_build_tbl, num_blocks_init_probe_tbl) * block_size; - rmm::device_vector devStates(num_states); + rmm::device_uvector devStates(num_states, rmm::cuda_stream_default); - init_curand<<<(num_states - 1) / block_size + 1, block_size>>>(devStates.data().get(), - num_states); + init_curand<<<(num_states - 1) / block_size + 1, block_size>>>(devStates.data(), num_states); CHECK_CUDA(0); - rmm::device_vector build_tbl_sorted(build_tbl_size); - size_type lottery_size = rand_max < std::numeric_limits::max() - 1 ? rand_max + 1 : rand_max; - rmm::device_vector lottery(lottery_size); + rmm::device_uvector lottery(lottery_size, rmm::cuda_stream_default); - if (uniq_build_tbl_keys) { thrust::sequence(thrust::device, lottery.begin(), lottery.end(), 0); } + if (uniq_build_tbl_keys) { + thrust::sequence(rmm::exec_policy(), lottery.begin(), lottery.end(), 0); + } init_build_tbl <<>>(build_tbl, build_tbl_size, rand_max, uniq_build_tbl_keys, - lottery.data().get(), + lottery.data(), lottery_size, - devStates.data().get(), + devStates.data(), num_states); CHECK_CUDA(0); - CUDA_TRY(cudaMemcpy(build_tbl_sorted.data().get(), + rmm::device_uvector build_tbl_sorted(build_tbl_size, rmm::cuda_stream_default); + + CUDA_TRY(cudaMemcpy(build_tbl_sorted.data(), build_tbl, build_tbl_size * sizeof(key_type), cudaMemcpyDeviceToDevice)); - thrust::sort(thrust::device, build_tbl_sorted.begin(), build_tbl_sorted.end()); + thrust::sort(rmm::exec_policy(), build_tbl_sorted.begin(), build_tbl_sorted.end()); // Exclude keys used in build table from lottery thrust::counting_iterator first_lottery_elem(0); thrust::counting_iterator last_lottery_elem = first_lottery_elem + lottery_size; - key_type* lottery_end = thrust::set_difference(thrust::device, + key_type* lottery_end = thrust::set_difference(rmm::exec_policy(), first_lottery_elem, last_lottery_elem, build_tbl_sorted.begin(), build_tbl_sorted.end(), - lottery.data().get()); + lottery.data()); - lottery_size = thrust::distance(lottery.data().get(), lottery_end); + lottery_size = thrust::distance(lottery.data(), lottery_end); init_probe_tbl <<>>(probe_tbl, probe_tbl_size, build_tbl, build_tbl_size, - lottery.data().get(), + lottery.data(), lottery_size, selectivity, - devStates.data().get(), + devStates.data(), num_states); CHECK_CUDA(0); } - -#endif // __GENERATE_INPUT_TABLES_CUH diff --git a/cpp/benchmarks/join/join_benchmark.cu b/cpp/benchmarks/join/join_benchmark.cu index fa6afdd908c..a7c109db9b4 100644 --- a/cpp/benchmarks/join/join_benchmark.cu +++ b/cpp/benchmarks/join/join_benchmark.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,8 +37,8 @@ template class Join : public cudf::benchmark { }; -template -static void BM_join(benchmark::State &state) +template +static void BM_join(benchmark::State& state, Join JoinFunc) { const cudf::size_type build_table_size{(cudf::size_type)state.range(0)}; const cudf::size_type probe_table_size{(cudf::size_type)state.range(1)}; @@ -103,22 +103,71 @@ static void BM_join(benchmark::State &state) // Benchmark the inner join operation for (auto _ : state) { - cuda_event_timer raii(state, true, 0); + cuda_event_timer raii(state, true, rmm::cuda_stream_default); - auto result = cudf::inner_join( + auto result = JoinFunc( probe_table, build_table, columns_to_join, columns_to_join, cudf::null_equality::UNEQUAL); } } -#define JOIN_BENCHMARK_DEFINE(name, key_type, payload_type, nullable) \ - BENCHMARK_TEMPLATE_DEFINE_F(Join, name, key_type, payload_type) \ - (::benchmark::State & st) { BM_join(st); } +#define JOIN_BENCHMARK_DEFINE(name, key_type, payload_type, nullable) \ + BENCHMARK_TEMPLATE_DEFINE_F(Join, name, key_type, payload_type) \ + (::benchmark::State & st) \ + { \ + auto join = [](cudf::table_view const& left, \ + cudf::table_view const& right, \ + std::vector const& left_on, \ + std::vector const& right_on, \ + cudf::null_equality compare_nulls) { \ + return cudf::inner_join(left, right, left_on, right_on, compare_nulls); \ + }; \ + BM_join(st, join); \ + } JOIN_BENCHMARK_DEFINE(join_32bit, int32_t, int32_t, false); JOIN_BENCHMARK_DEFINE(join_64bit, int64_t, int64_t, false); JOIN_BENCHMARK_DEFINE(join_32bit_nulls, int32_t, int32_t, true); JOIN_BENCHMARK_DEFINE(join_64bit_nulls, int64_t, int64_t, true); +#define LEFT_ANTI_JOIN_BENCHMARK_DEFINE(name, key_type, payload_type, nullable) \ + BENCHMARK_TEMPLATE_DEFINE_F(Join, name, key_type, payload_type) \ + (::benchmark::State & st) \ + { \ + auto join = [](cudf::table_view const& left, \ + cudf::table_view const& right, \ + std::vector const& left_on, \ + std::vector const& right_on, \ + cudf::null_equality compare_nulls) { \ + return cudf::left_anti_join(left, right, left_on, right_on, compare_nulls); \ + }; \ + BM_join(st, join); \ + } + +LEFT_ANTI_JOIN_BENCHMARK_DEFINE(left_anti_join_32bit, int32_t, int32_t, false); +LEFT_ANTI_JOIN_BENCHMARK_DEFINE(left_anti_join_64bit, int64_t, int64_t, false); +LEFT_ANTI_JOIN_BENCHMARK_DEFINE(left_anti_join_32bit_nulls, int32_t, int32_t, true); +LEFT_ANTI_JOIN_BENCHMARK_DEFINE(left_anti_join_64bit_nulls, int64_t, int64_t, true); + +#define LEFT_SEMI_JOIN_BENCHMARK_DEFINE(name, key_type, payload_type, nullable) \ + BENCHMARK_TEMPLATE_DEFINE_F(Join, name, key_type, payload_type) \ + (::benchmark::State & st) \ + { \ + auto join = [](cudf::table_view const& left, \ + cudf::table_view const& right, \ + std::vector const& left_on, \ + std::vector const& right_on, \ + cudf::null_equality compare_nulls) { \ + return cudf::left_semi_join(left, right, left_on, right_on, compare_nulls); \ + }; \ + BM_join(st, join); \ + } + +LEFT_SEMI_JOIN_BENCHMARK_DEFINE(left_semi_join_32bit, int32_t, int32_t, false); +LEFT_SEMI_JOIN_BENCHMARK_DEFINE(left_semi_join_64bit, int64_t, int64_t, false); +LEFT_SEMI_JOIN_BENCHMARK_DEFINE(left_semi_join_32bit_nulls, int32_t, int32_t, true); +LEFT_SEMI_JOIN_BENCHMARK_DEFINE(left_semi_join_64bit_nulls, int64_t, int64_t, true); + +// join ----------------------------------------------------------------------- BENCHMARK_REGISTER_F(Join, join_32bit) ->Unit(benchmark::kMillisecond) ->Args({100'000, 100'000}) @@ -154,3 +203,77 @@ BENCHMARK_REGISTER_F(Join, join_64bit_nulls) ->Args({50'000'000, 50'000'000}) ->Args({40'000'000, 120'000'000}) ->UseManualTime(); + +// left anti-join ------------------------------------------------------------- +BENCHMARK_REGISTER_F(Join, left_anti_join_32bit) + ->Unit(benchmark::kMillisecond) + ->Args({100'000, 100'000}) + ->Args({100'000, 400'000}) + ->Args({100'000, 1'000'000}) + ->Args({10'000'000, 10'000'000}) + ->Args({10'000'000, 40'000'000}) + ->Args({10'000'000, 100'000'000}) + ->Args({100'000'000, 100'000'000}) + ->Args({80'000'000, 240'000'000}) + ->UseManualTime(); + +BENCHMARK_REGISTER_F(Join, left_anti_join_64bit) + ->Unit(benchmark::kMillisecond) + ->Args({50'000'000, 50'000'000}) + ->Args({40'000'000, 120'000'000}) + ->UseManualTime(); + +BENCHMARK_REGISTER_F(Join, left_anti_join_32bit_nulls) + ->Unit(benchmark::kMillisecond) + ->Args({100'000, 100'000}) + ->Args({100'000, 400'000}) + ->Args({100'000, 1'000'000}) + ->Args({10'000'000, 10'000'000}) + ->Args({10'000'000, 40'000'000}) + ->Args({10'000'000, 100'000'000}) + ->Args({100'000'000, 100'000'000}) + ->Args({80'000'000, 240'000'000}) + ->UseManualTime(); + +BENCHMARK_REGISTER_F(Join, left_anti_join_64bit_nulls) + ->Unit(benchmark::kMillisecond) + ->Args({50'000'000, 50'000'000}) + ->Args({40'000'000, 120'000'000}) + ->UseManualTime(); + +// left semi-join ------------------------------------------------------------- +BENCHMARK_REGISTER_F(Join, left_semi_join_32bit) + ->Unit(benchmark::kMillisecond) + ->Args({100'000, 100'000}) + ->Args({100'000, 400'000}) + ->Args({100'000, 1'000'000}) + ->Args({10'000'000, 10'000'000}) + ->Args({10'000'000, 40'000'000}) + ->Args({10'000'000, 100'000'000}) + ->Args({100'000'000, 100'000'000}) + ->Args({80'000'000, 240'000'000}) + ->UseManualTime(); + +BENCHMARK_REGISTER_F(Join, left_semi_join_64bit) + ->Unit(benchmark::kMillisecond) + ->Args({50'000'000, 50'000'000}) + ->Args({40'000'000, 120'000'000}) + ->UseManualTime(); + +BENCHMARK_REGISTER_F(Join, left_semi_join_32bit_nulls) + ->Unit(benchmark::kMillisecond) + ->Args({100'000, 100'000}) + ->Args({100'000, 400'000}) + ->Args({100'000, 1'000'000}) + ->Args({10'000'000, 10'000'000}) + ->Args({10'000'000, 40'000'000}) + ->Args({10'000'000, 100'000'000}) + ->Args({100'000'000, 100'000'000}) + ->Args({80'000'000, 240'000'000}) + ->UseManualTime(); + +BENCHMARK_REGISTER_F(Join, left_semi_join_64bit_nulls) + ->Unit(benchmark::kMillisecond) + ->Args({50'000'000, 50'000'000}) + ->Args({40'000'000, 120'000'000}) + ->UseManualTime(); diff --git a/cpp/benchmarks/merge/merge_benchmark.cpp b/cpp/benchmarks/merge/merge_benchmark.cpp index 13eb284a903..1af0fcbb237 100644 --- a/cpp/benchmarks/merge/merge_benchmark.cpp +++ b/cpp/benchmarks/merge/merge_benchmark.cpp @@ -88,7 +88,7 @@ void BM_merge(benchmark::State& state) #define MBM_BENCHMARK_DEFINE(name) \ BENCHMARK_DEFINE_F(Merge, name)(::benchmark::State & state) { BM_merge(state); } \ BENCHMARK_REGISTER_F(Merge, name) \ - ->Unit(benchmark::kNanosecond) \ + ->Unit(benchmark::kMillisecond) \ ->UseManualTime() \ ->RangeMultiplier(2) \ ->Ranges({{2, 128}}); diff --git a/cpp/benchmarks/quantiles/quantiles_benchmark.cpp b/cpp/benchmarks/quantiles/quantiles_benchmark.cpp new file mode 100644 index 00000000000..fa602304dec --- /dev/null +++ b/cpp/benchmarks/quantiles/quantiles_benchmark.cpp @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +class Quantiles : public cudf::benchmark { +}; + +static void BM_quantiles(benchmark::State& state, bool nulls) +{ + using Type = int; + using column_wrapper = cudf::test::fixed_width_column_wrapper; + std::default_random_engine generator; + std::uniform_int_distribution distribution(0, 100); + + const cudf::size_type n_rows{(cudf::size_type)state.range(0)}; + const cudf::size_type n_cols{(cudf::size_type)state.range(1)}; + const cudf::size_type n_quantiles{(cudf::size_type)state.range(2)}; + + // Create columns with values in the range [0,100) + std::vector columns; + columns.reserve(n_cols); + std::generate_n(std::back_inserter(columns), n_cols, [&, n_rows]() { + auto elements = cudf::detail::make_counting_transform_iterator( + 0, [&](auto row) { return distribution(generator); }); + if (!nulls) return column_wrapper(elements, elements + n_rows); + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 100 == 0 ? false : true; }); + return column_wrapper(elements, elements + n_rows, valids); + }); + + // Create column views + auto column_views = std::vector(columns.begin(), columns.end()); + + // Create table view + auto input = cudf::table_view(column_views); + + std::vector q(n_quantiles); + thrust::tabulate( + thrust::seq, q.begin(), q.end(), [n_quantiles](auto i) { return i * (1.0f / n_quantiles); }); + + for (auto _ : state) { + cuda_event_timer raii(state, true, rmm::cuda_stream_default); + + auto result = cudf::quantiles(input, q); + // auto result = (stable) ? cudf::stable_sorted_order(input) : cudf::sorted_order(input); + } +} + +#define QUANTILES_BENCHMARK_DEFINE(name, nulls) \ + BENCHMARK_DEFINE_F(Quantiles, name) \ + (::benchmark::State & st) { BM_quantiles(st, nulls); } \ + BENCHMARK_REGISTER_F(Quantiles, name) \ + ->RangeMultiplier(4) \ + ->Ranges({{1 << 16, 1 << 26}, {1, 8}, {1, 12}}) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +QUANTILES_BENCHMARK_DEFINE(no_nulls, false) +QUANTILES_BENCHMARK_DEFINE(nulls, true) diff --git a/cpp/benchmarks/replace/clamp_benchmark.cpp b/cpp/benchmarks/replace/clamp_benchmark.cpp new file mode 100644 index 00000000000..f897b9d82cc --- /dev/null +++ b/cpp/benchmarks/replace/clamp_benchmark.cpp @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +class ReplaceClamp : public cudf::benchmark { +}; + +template +static void BM_reduction_scan(benchmark::State& state, bool include_nulls) +{ + cudf::size_type const n_rows{(cudf::size_type)state.range(0)}; + auto const dtype = cudf::type_to_id(); + auto const table = create_random_table({dtype}, 1, row_count{n_rows}); + if (!include_nulls) { table->get_column(0).set_null_mask(rmm::device_buffer{}, 0); } + cudf::column_view input(table->view().column(0)); + + auto [low_scalar, high_scalar] = cudf::minmax(input); + + // set the clamps 2 in from the min and max + { + using ScalarType = cudf::scalar_type_t; + auto lvalue = static_cast(low_scalar.get()); + auto hvalue = static_cast(high_scalar.get()); + + // super heavy clamp + auto mid = lvalue->value() + (hvalue->value() - lvalue->value()) / 2; + lvalue->set_value(mid - 10); + hvalue->set_value(mid + 10); + } + + for (auto _ : state) { + cuda_event_timer timer(state, true); + auto result = cudf::clamp(input, *low_scalar, *high_scalar); + } +} + +#define CLAMP_BENCHMARK_DEFINE(name, type, nulls) \ + BENCHMARK_DEFINE_F(ReplaceClamp, name) \ + (::benchmark::State & state) { BM_reduction_scan(state, nulls); } \ + BENCHMARK_REGISTER_F(ReplaceClamp, name) \ + ->UseManualTime() \ + ->Arg(10000) /* 10k */ \ + ->Arg(100000) /* 100k */ \ + ->Arg(1000000) /* 1M */ \ + ->Arg(10000000) /* 10M */ \ + ->Arg(100000000); /* 100M */ + +CLAMP_BENCHMARK_DEFINE(int8_no_nulls, int8_t, false); +CLAMP_BENCHMARK_DEFINE(int32_no_nulls, int32_t, false); +CLAMP_BENCHMARK_DEFINE(uint64_no_nulls, uint64_t, false); +CLAMP_BENCHMARK_DEFINE(float_no_nulls, float, false); +CLAMP_BENCHMARK_DEFINE(int16_nulls, int16_t, true); +CLAMP_BENCHMARK_DEFINE(uint32_nulls, uint32_t, true); +CLAMP_BENCHMARK_DEFINE(double_nulls, double, true); diff --git a/cpp/benchmarks/search/search_benchmark.cu b/cpp/benchmarks/search/search_benchmark.cpp similarity index 63% rename from cpp/benchmarks/search/search_benchmark.cu rename to cpp/benchmarks/search/search_benchmark.cpp index 7b4b8060514..7fb196fb500 100644 --- a/cpp/benchmarks/search/search_benchmark.cu +++ b/cpp/benchmarks/search/search_benchmark.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,35 +31,6 @@ class Search : public cudf::benchmark { }; -void BM_non_null_column(benchmark::State& state) -{ - const cudf::size_type column_size{(cudf::size_type)state.range(0)}; - const cudf::size_type values_size = column_size; - - auto col_data_it = cudf::detail::make_counting_transform_iterator( - 0, [=](cudf::size_type row) { return static_cast(row); }); - auto val_data_it = cudf::detail::make_counting_transform_iterator( - 0, [=](cudf::size_type row) { return static_cast(values_size - row); }); - - cudf::test::fixed_width_column_wrapper column(col_data_it, col_data_it + column_size); - cudf::test::fixed_width_column_wrapper values(val_data_it, val_data_it + values_size); - - for (auto _ : state) { - cuda_event_timer timer(state, true); - auto col = cudf::upper_bound(cudf::table_view({column}), - cudf::table_view({values}), - {cudf::order::ASCENDING}, - {cudf::null_order::BEFORE}); - } -} - -BENCHMARK_DEFINE_F(Search, AllValidColumn)(::benchmark::State& state) { BM_non_null_column(state); } - -BENCHMARK_REGISTER_F(Search, AllValidColumn) - ->UseManualTime() - ->Unit(benchmark::kMillisecond) - ->Arg(100000000); - auto make_validity_iter() { static constexpr int r_min = 1; @@ -71,7 +42,7 @@ auto make_validity_iter() 0, [mod_base](auto row) { return (row % mod_base) > 0; }); } -void BM_nullable_column(benchmark::State& state) +void BM_column(benchmark::State& state, bool nulls) { const cudf::size_type column_size{(cudf::size_type)state.range(0)}; const cudf::size_type values_size = column_size; @@ -81,25 +52,39 @@ void BM_nullable_column(benchmark::State& state) auto val_data_it = cudf::detail::make_counting_transform_iterator( 0, [=](cudf::size_type row) { return static_cast(values_size - row); }); - cudf::test::fixed_width_column_wrapper column( - col_data_it, col_data_it + column_size, make_validity_iter()); - cudf::test::fixed_width_column_wrapper values( - val_data_it, val_data_it + values_size, make_validity_iter()); - - auto sorted = cudf::sort(cudf::table_view({column})); + auto column = [&]() { + return nulls ? cudf::test::fixed_width_column_wrapper( + col_data_it, col_data_it + column_size, make_validity_iter()) + : cudf::test::fixed_width_column_wrapper(col_data_it, + col_data_it + column_size); + }(); + auto values = [&]() { + return nulls ? cudf::test::fixed_width_column_wrapper( + val_data_it, val_data_it + values_size, make_validity_iter()) + : cudf::test::fixed_width_column_wrapper(val_data_it, + val_data_it + values_size); + }(); + + auto data_table = cudf::sort(cudf::table_view({column})); for (auto _ : state) { cuda_event_timer timer(state, true); - auto col = cudf::upper_bound(sorted->view(), + auto col = cudf::upper_bound(data_table->view(), cudf::table_view({values}), {cudf::order::ASCENDING}, {cudf::null_order::BEFORE}); } } -BENCHMARK_DEFINE_F(Search, NullableColumn)(::benchmark::State& state) { BM_nullable_column(state); } +BENCHMARK_DEFINE_F(Search, Column_AllValid)(::benchmark::State& state) { BM_column(state, false); } +BENCHMARK_DEFINE_F(Search, Column_Nulls)(::benchmark::State& state) { BM_column(state, true); } + +BENCHMARK_REGISTER_F(Search, Column_AllValid) + ->UseManualTime() + ->Unit(benchmark::kMillisecond) + ->Arg(100000000); -BENCHMARK_REGISTER_F(Search, NullableColumn) +BENCHMARK_REGISTER_F(Search, Column_Nulls) ->UseManualTime() ->Unit(benchmark::kMillisecond) ->Arg(100000000); @@ -153,3 +138,53 @@ BENCHMARK_REGISTER_F(Search, Table) ->UseManualTime() ->Unit(benchmark::kMillisecond) ->Apply(CustomArguments); + +void BM_contains(benchmark::State& state, bool nulls) +{ + const cudf::size_type column_size{(cudf::size_type)state.range(0)}; + const cudf::size_type values_size = column_size; + + auto col_data_it = cudf::detail::make_counting_transform_iterator( + 0, [=](cudf::size_type row) { return static_cast(row); }); + auto val_data_it = cudf::detail::make_counting_transform_iterator( + 0, [=](cudf::size_type row) { return static_cast(values_size - row); }); + + auto column = [&]() { + return nulls ? cudf::test::fixed_width_column_wrapper( + col_data_it, col_data_it + column_size, make_validity_iter()) + : cudf::test::fixed_width_column_wrapper(col_data_it, + col_data_it + column_size); + }(); + auto values = [&]() { + return nulls ? cudf::test::fixed_width_column_wrapper( + val_data_it, val_data_it + values_size, make_validity_iter()) + : cudf::test::fixed_width_column_wrapper(val_data_it, + val_data_it + values_size); + }(); + + for (auto _ : state) { + cuda_event_timer timer(state, true); + auto col = cudf::contains(column, values); + } +} + +BENCHMARK_DEFINE_F(Search, ColumnContains_AllValid)(::benchmark::State& state) +{ + BM_contains(state, false); +} +BENCHMARK_DEFINE_F(Search, ColumnContains_Nulls)(::benchmark::State& state) +{ + BM_contains(state, true); +} + +BENCHMARK_REGISTER_F(Search, ColumnContains_AllValid) + ->RangeMultiplier(8) + ->Ranges({{1 << 10, 1 << 26}}) + ->UseManualTime() + ->Unit(benchmark::kMillisecond); + +BENCHMARK_REGISTER_F(Search, ColumnContains_Nulls) + ->RangeMultiplier(8) + ->Ranges({{1 << 10, 1 << 26}}) + ->UseManualTime() + ->Unit(benchmark::kMillisecond); diff --git a/cpp/benchmarks/sort/rank_benchmark.cpp b/cpp/benchmarks/sort/rank_benchmark.cpp new file mode 100644 index 00000000000..60be95b9112 --- /dev/null +++ b/cpp/benchmarks/sort/rank_benchmark.cpp @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +class Rank : public cudf::benchmark { +}; + +static void BM_rank(benchmark::State& state, bool nulls) +{ + using Type = int; + using column_wrapper = cudf::test::fixed_width_column_wrapper; + std::default_random_engine generator; + std::uniform_int_distribution distribution(0, 100); + + const cudf::size_type n_rows{(cudf::size_type)state.range(0)}; + + // Create columns with values in the range [0,100) + column_wrapper input = [&, n_rows]() { + auto elements = cudf::detail::make_counting_transform_iterator( + 0, [&](auto row) { return distribution(generator); }); + if (!nulls) return column_wrapper(elements, elements + n_rows); + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 100 == 0 ? false : true; }); + return column_wrapper(elements, elements + n_rows, valids); + }(); + + for (auto _ : state) { + cuda_event_timer raii(state, true, rmm::cuda_stream_default); + + auto result = cudf::rank(input, + cudf::rank_method::FIRST, + cudf::order::ASCENDING, + nulls ? cudf::null_policy::INCLUDE : cudf::null_policy::EXCLUDE, + cudf::null_order::AFTER, + false); + } +} + +#define RANK_BENCHMARK_DEFINE(name, nulls) \ + BENCHMARK_DEFINE_F(Rank, name) \ + (::benchmark::State & st) { BM_rank(st, nulls); } \ + BENCHMARK_REGISTER_F(Rank, name) \ + ->RangeMultiplier(8) \ + ->Ranges({{1 << 10, 1 << 26}}) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +RANK_BENCHMARK_DEFINE(no_nulls, false) +RANK_BENCHMARK_DEFINE(nulls, true) diff --git a/cpp/benchmarks/sort/sort_benchmark.cpp b/cpp/benchmarks/sort/sort_benchmark.cpp index fb74469e7c0..fe68ddd0051 100644 --- a/cpp/benchmarks/sort/sort_benchmark.cpp +++ b/cpp/benchmarks/sort/sort_benchmark.cpp @@ -61,7 +61,7 @@ static void BM_sort(benchmark::State& state, bool nulls) auto input = cudf::table_view(column_views); for (auto _ : state) { - cuda_event_timer raii(state, true, 0); + cuda_event_timer raii(state, true, rmm::cuda_stream_default); auto result = (stable) ? cudf::stable_sorted_order(input) : cudf::sorted_order(input); } diff --git a/cpp/benchmarks/sort/sort_strings_benchmark.cpp b/cpp/benchmarks/sort/sort_strings_benchmark.cpp index 54e85b7ea8c..f5effcafcfb 100644 --- a/cpp/benchmarks/sort/sort_strings_benchmark.cpp +++ b/cpp/benchmarks/sort/sort_strings_benchmark.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,7 +32,7 @@ static void BM_sort(benchmark::State& state) auto const table = create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}); for (auto _ : state) { - cuda_event_timer raii(state, true, 0); + cuda_event_timer raii(state, true, rmm::cuda_stream_default); cudf::sort(table->view()); } } diff --git a/cpp/benchmarks/string/case_benchmark.cpp b/cpp/benchmarks/string/case_benchmark.cpp index 9c1c81da22a..508ae49e093 100644 --- a/cpp/benchmarks/string/case_benchmark.cpp +++ b/cpp/benchmarks/string/case_benchmark.cpp @@ -32,7 +32,7 @@ static void BM_case(benchmark::State& state) cudf::strings_column_view input(table->view().column(0)); for (auto _ : state) { - cuda_event_timer raii(state, true, 0); + cuda_event_timer raii(state, true, rmm::cuda_stream_default); cudf::strings::to_lower(input); } diff --git a/cpp/benchmarks/string/combine_benchmark.cpp b/cpp/benchmarks/string/combine_benchmark.cpp index 2a5013a9ae7..7dabd32e874 100644 --- a/cpp/benchmarks/string/combine_benchmark.cpp +++ b/cpp/benchmarks/string/combine_benchmark.cpp @@ -43,7 +43,7 @@ static void BM_combine(benchmark::State& state) cudf::string_scalar separator("+"); for (auto _ : state) { - cuda_event_timer raii(state, true, 0); + cuda_event_timer raii(state, true, rmm::cuda_stream_default); cudf::strings::concatenate(table->view(), separator); } diff --git a/cpp/benchmarks/string/contains_benchmark.cpp b/cpp/benchmarks/string/contains_benchmark.cpp index 1a2ac8ad602..79bdda77634 100644 --- a/cpp/benchmarks/string/contains_benchmark.cpp +++ b/cpp/benchmarks/string/contains_benchmark.cpp @@ -35,7 +35,7 @@ static void BM_contains(benchmark::State& state, contains_type ct) cudf::strings_column_view input(table->view().column(0)); for (auto _ : state) { - cuda_event_timer raii(state, true, 0); + cuda_event_timer raii(state, true, rmm::cuda_stream_default); // contains_re(), matches_re(), and count_re() all have similar functions // with count_re() being the most regex intensive switch (ct) { diff --git a/cpp/benchmarks/string/copy_benchmark.cpp b/cpp/benchmarks/string/copy_benchmark.cpp index af9f5b4fa4a..b49bc878ca7 100644 --- a/cpp/benchmarks/string/copy_benchmark.cpp +++ b/cpp/benchmarks/string/copy_benchmark.cpp @@ -54,7 +54,7 @@ static void BM_copy(benchmark::State& state, copy_type ct) host_map_data.end()); for (auto _ : state) { - cuda_event_timer raii(state, true, 0); + cuda_event_timer raii(state, true, rmm::cuda_stream_default); switch (ct) { case gather: cudf::gather(source->view(), index_map); break; case scatter: cudf::scatter(source->view(), index_map, target->view()); break; diff --git a/cpp/benchmarks/string/extract_benchmark.cpp b/cpp/benchmarks/string/extract_benchmark.cpp index dbae18dde3b..aa1e59a22bf 100644 --- a/cpp/benchmarks/string/extract_benchmark.cpp +++ b/cpp/benchmarks/string/extract_benchmark.cpp @@ -14,6 +14,8 @@ * limitations under the License. */ +#include "string_bench_args.hpp" + #include #include #include @@ -23,43 +25,55 @@ #include #include -#include "string_bench_args.hpp" +#include class StringExtract : public cudf::benchmark { }; -static void BM_extract(benchmark::State& state, int re_instructions) +static void BM_extract(benchmark::State& state, int groups) { - cudf::size_type const n_rows{static_cast(state.range(0))}; - cudf::size_type const max_str_length{static_cast(state.range(1))}; - data_profile table_profile; - table_profile.set_distribution_params( - cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); - auto const table = - create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile); - cudf::strings_column_view input(table->view().column(0)); - std::string const raw_pattern = - "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234" - "5678901234567890123456789012345678901234567890"; - std::string const pattern = "(" + raw_pattern.substr(0, re_instructions) + ")"; + auto const n_rows = static_cast(state.range(0)); + auto const n_length = static_cast(state.range(1)); + + std::default_random_engine generator; + std::uniform_int_distribution words_dist(0, 999); + + std::vector samples(100); // 100 unique rows of data to reuse + std::generate(samples.begin(), samples.end(), [&]() { + std::string row; // build a row of random tokens + while (static_cast(row.size()) < n_length) { + row += std::to_string(words_dist(generator)) + " "; + } + return row; + }); + + std::string pattern; + while (static_cast(pattern.size()) < groups) { pattern += "(\\d+) "; } + + std::uniform_int_distribution distribution(0, samples.size() - 1); + auto elements = cudf::detail::make_counting_transform_iterator( + 0, [&](auto idx) { return samples.at(distribution(generator)); }); + cudf::test::strings_column_wrapper input(elements, elements + n_rows); + cudf::strings_column_view view(input); for (auto _ : state) { - cuda_event_timer raii(state, true, 0); - auto results = cudf::strings::extract(input, pattern); + cuda_event_timer raii(state, true); + auto results = cudf::strings::extract(view, pattern); } - state.SetBytesProcessed(state.iterations() * input.chars_size()); + state.SetBytesProcessed(state.iterations() * view.chars_size()); } static void generate_bench_args(benchmark::internal::Benchmark* b) { - int const min_rows = 1 << 12; - int const max_rows = 1 << 24; - int const row_mult = 8; - int const min_rowlen = 1 << 5; - int const max_rowlen = 1 << 13; - int const len_mult = 4; - generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); + int const min_rows = 1 << 12; + int const max_rows = 1 << 24; + int const row_multiplier = 8; + int const min_row_length = 1 << 5; + int const max_row_length = 1 << 13; + int const length_multiplier = 4; + generate_string_bench_args( + b, min_rows, max_rows, row_multiplier, min_row_length, max_row_length, length_multiplier); } #define STRINGS_BENCHMARK_DEFINE(name, instructions) \ @@ -70,6 +84,6 @@ static void generate_bench_args(benchmark::internal::Benchmark* b) ->UseManualTime() \ ->Unit(benchmark::kMillisecond); -STRINGS_BENCHMARK_DEFINE(small, 4) -STRINGS_BENCHMARK_DEFINE(medium, 48) -STRINGS_BENCHMARK_DEFINE(large, 128) +STRINGS_BENCHMARK_DEFINE(small, 2) +STRINGS_BENCHMARK_DEFINE(medium, 10) +STRINGS_BENCHMARK_DEFINE(large, 30) diff --git a/cpp/benchmarks/string/factory_benchmark.cu b/cpp/benchmarks/string/factory_benchmark.cu index 6c5dceffaa8..bae08431b51 100644 --- a/cpp/benchmarks/string/factory_benchmark.cu +++ b/cpp/benchmarks/string/factory_benchmark.cu @@ -25,6 +25,7 @@ #include #include +#include #include #include @@ -55,7 +56,7 @@ static void BM_factory(benchmark::State& state) auto const table = create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile); auto d_column = cudf::column_device_view::create(table->view().column(0)); - rmm::device_vector pairs(d_column->size()); + rmm::device_uvector pairs(d_column->size(), rmm::cuda_stream_default); thrust::transform(thrust::device, d_column->pair_begin(), d_column->pair_end(), @@ -63,7 +64,7 @@ static void BM_factory(benchmark::State& state) string_view_to_pair{}); for (auto _ : state) { - cuda_event_timer raii(state, true, 0); + cuda_event_timer raii(state, true, rmm::cuda_stream_default); cudf::make_strings_column(pairs); } diff --git a/cpp/benchmarks/string/filter_benchmark.cpp b/cpp/benchmarks/string/filter_benchmark.cpp index 123c5597df9..97228122c42 100644 --- a/cpp/benchmarks/string/filter_benchmark.cpp +++ b/cpp/benchmarks/string/filter_benchmark.cpp @@ -50,7 +50,7 @@ static void BM_filter_chars(benchmark::State& state, FilterAPI api) {cudf::char_utf8{'a'}, cudf::char_utf8{'c'}}}; for (auto _ : state) { - cuda_event_timer raii(state, true, 0); + cuda_event_timer raii(state, true, rmm::cuda_stream_default); switch (api) { case filter: cudf::strings::filter_characters_of_type(input, types); break; case filter_chars: cudf::strings::filter_characters(input, filter_table); break; @@ -73,7 +73,7 @@ static void generate_bench_args(benchmark::internal::Benchmark* b) for (int rowlen = min_rowlen; rowlen <= max_rowlen; rowlen *= len_mult) { // avoid generating combinations that exceed the cudf column limit size_t total_chars = static_cast(row_count) * rowlen; - if (total_chars < std::numeric_limits::max()) { + if (total_chars < static_cast(std::numeric_limits::max())) { b->Args({row_count, rowlen}); } } diff --git a/cpp/benchmarks/string/find_benchmark.cpp b/cpp/benchmarks/string/find_benchmark.cpp index 200527d606e..8e570a55440 100644 --- a/cpp/benchmarks/string/find_benchmark.cpp +++ b/cpp/benchmarks/string/find_benchmark.cpp @@ -46,7 +46,7 @@ static void BM_find_scalar(benchmark::State& state, FindAPI find_api) cudf::test::strings_column_wrapper targets({"+", "-"}); for (auto _ : state) { - cuda_event_timer raii(state, true, 0); + cuda_event_timer raii(state, true, rmm::cuda_stream_default); switch (find_api) { case find: cudf::strings::find(input, target); break; case find_multi: @@ -73,7 +73,7 @@ static void generate_bench_args(benchmark::internal::Benchmark* b) for (int rowlen = min_rowlen; rowlen <= max_rowlen; rowlen *= len_mult) { // avoid generating combinations that exceed the cudf column limit size_t total_chars = static_cast(row_count) * rowlen; - if (total_chars < std::numeric_limits::max()) { + if (total_chars < static_cast(std::numeric_limits::max())) { b->Args({row_count, rowlen}); } } diff --git a/cpp/benchmarks/string/json_benchmark.cpp b/cpp/benchmarks/string/json_benchmark.cpp index 6fb6a07a8d0..c6a6b757951 100644 --- a/cpp/benchmarks/string/json_benchmark.cpp +++ b/cpp/benchmarks/string/json_benchmark.cpp @@ -113,7 +113,7 @@ static void BM_case(benchmark::State& state, QueryArg&&... query_arg) std::string json_path(query_arg...); for (auto _ : state) { - cuda_event_timer raii(state, true, 0); + cuda_event_timer raii(state, true); auto result = cudf::strings::get_json_object(scv, json_path); cudaStreamSynchronize(0); } diff --git a/cpp/benchmarks/string/replace_benchmark.cpp b/cpp/benchmarks/string/replace_benchmark.cpp index 968b8f5abb0..0d785fd25aa 100644 --- a/cpp/benchmarks/string/replace_benchmark.cpp +++ b/cpp/benchmarks/string/replace_benchmark.cpp @@ -49,7 +49,7 @@ static void BM_replace(benchmark::State& state, replace_type rt) cudf::test::strings_column_wrapper repls({"", ""}); for (auto _ : state) { - cuda_event_timer raii(state, true, 0); + cuda_event_timer raii(state, true, rmm::cuda_stream_default); switch (rt) { case scalar: cudf::strings::replace(input, target, repl); break; case slice: cudf::strings::replace_slice(input, repl, 1, 10); break; diff --git a/cpp/benchmarks/string/replace_re_benchmark.cpp b/cpp/benchmarks/string/replace_re_benchmark.cpp index 616e2c0f22c..18ec28371e3 100644 --- a/cpp/benchmarks/string/replace_re_benchmark.cpp +++ b/cpp/benchmarks/string/replace_re_benchmark.cpp @@ -43,7 +43,7 @@ static void BM_replace(benchmark::State& state, replace_type rt) cudf::test::strings_column_wrapper repls({"#", ""}); for (auto _ : state) { - cuda_event_timer raii(state, true, 0); + cuda_event_timer raii(state, true, rmm::cuda_stream_default); switch (rt) { case replace_type::replace_re: // contains_re and matches_re use the same main logic cudf::strings::replace_re(input, "\\d+"); diff --git a/cpp/benchmarks/string/split_benchmark.cpp b/cpp/benchmarks/string/split_benchmark.cpp index 35bedb1b767..cab477754a6 100644 --- a/cpp/benchmarks/string/split_benchmark.cpp +++ b/cpp/benchmarks/string/split_benchmark.cpp @@ -44,7 +44,7 @@ static void BM_split(benchmark::State& state, split_type rt) cudf::string_scalar target("+"); for (auto _ : state) { - cuda_event_timer raii(state, true, 0); + cuda_event_timer raii(state, true, rmm::cuda_stream_default); switch (rt) { case split: cudf::strings::split(input, target); break; case split_ws: cudf::strings::split(input); break; @@ -68,7 +68,7 @@ static void generate_bench_args(benchmark::internal::Benchmark* b) for (int rowlen = min_rowlen; rowlen <= max_rowlen; rowlen *= len_mult) { // avoid generating combinations that exceed the cudf column limit size_t total_chars = static_cast(row_count) * rowlen; - if (total_chars < std::numeric_limits::max()) { + if (total_chars < static_cast(std::numeric_limits::max())) { b->Args({row_count, rowlen}); } } diff --git a/cpp/benchmarks/string/string_bench_args.hpp b/cpp/benchmarks/string/string_bench_args.hpp index 05ed1bf5b33..92a46374438 100644 --- a/cpp/benchmarks/string/string_bench_args.hpp +++ b/cpp/benchmarks/string/string_bench_args.hpp @@ -48,7 +48,7 @@ inline void generate_string_bench_args(benchmark::internal::Benchmark* b, for (int rowlen = min_rowlen; rowlen <= max_rowlen; rowlen *= rowlen_mult) { // avoid generating combinations that exceed the cudf column limit size_t total_chars = static_cast(row_count) * rowlen; - if (total_chars < std::numeric_limits::max()) { + if (total_chars < static_cast(std::numeric_limits::max())) { b->Args({row_count, rowlen}); } } diff --git a/cpp/benchmarks/string/substring_benchmark.cpp b/cpp/benchmarks/string/substring_benchmark.cpp index d47c42e45be..e8a66f7b323 100644 --- a/cpp/benchmarks/string/substring_benchmark.cpp +++ b/cpp/benchmarks/string/substring_benchmark.cpp @@ -54,7 +54,7 @@ static void BM_substring(benchmark::State& state, substring_type rt) cudf::test::strings_column_wrapper delimiters(delim_itr, delim_itr + n_rows); for (auto _ : state) { - cuda_event_timer raii(state, true, 0); + cuda_event_timer raii(state, true, rmm::cuda_stream_default); switch (rt) { case position: cudf::strings::slice_strings(input, 1, max_str_length / 2); break; case multi_position: cudf::strings::slice_strings(input, starts, stops); break; diff --git a/cpp/benchmarks/string/translate_benchmark.cpp b/cpp/benchmarks/string/translate_benchmark.cpp index c49a986d744..49396b0ce71 100644 --- a/cpp/benchmarks/string/translate_benchmark.cpp +++ b/cpp/benchmarks/string/translate_benchmark.cpp @@ -54,7 +54,7 @@ static void BM_translate(benchmark::State& state, int entry_count) }); for (auto _ : state) { - cuda_event_timer raii(state, true, 0); + cuda_event_timer raii(state, true, rmm::cuda_stream_default); cudf::strings::translate(input, entries); } diff --git a/cpp/benchmarks/string/url_decode_benchmark.cpp b/cpp/benchmarks/string/url_decode_benchmark.cpp index 26c23ea23b4..fbb99bf3e8f 100644 --- a/cpp/benchmarks/string/url_decode_benchmark.cpp +++ b/cpp/benchmarks/string/url_decode_benchmark.cpp @@ -80,7 +80,7 @@ void BM_url_decode(benchmark::State& state) auto strings_view = cudf::strings_column_view(column); for (auto _ : state) { - cuda_event_timer raii(state, true, 0); + cuda_event_timer raii(state, true, rmm::cuda_stream_default); auto result = cudf::strings::url_decode(strings_view); } diff --git a/cpp/benchmarks/text/ngrams_benchmark.cpp b/cpp/benchmarks/text/ngrams_benchmark.cpp index 1fe8e3b7f2e..52f55249631 100644 --- a/cpp/benchmarks/text/ngrams_benchmark.cpp +++ b/cpp/benchmarks/text/ngrams_benchmark.cpp @@ -43,7 +43,7 @@ static void BM_ngrams(benchmark::State& state, ngrams_type nt) cudf::strings_column_view input(table->view().column(0)); for (auto _ : state) { - cuda_event_timer raii(state, true, 0); + cuda_event_timer raii(state, true); switch (nt) { case ngrams_type::tokens: nvtext::generate_ngrams(input); break; case ngrams_type::characters: nvtext::generate_character_ngrams(input); break; diff --git a/cpp/benchmarks/text/normalize_benchmark.cpp b/cpp/benchmarks/text/normalize_benchmark.cpp index 32c4fb7dcde..f041547d021 100644 --- a/cpp/benchmarks/text/normalize_benchmark.cpp +++ b/cpp/benchmarks/text/normalize_benchmark.cpp @@ -41,7 +41,7 @@ static void BM_normalize(benchmark::State& state, bool to_lower) cudf::strings_column_view input(table->view().column(0)); for (auto _ : state) { - cuda_event_timer raii(state, true, 0); + cuda_event_timer raii(state, true, rmm::cuda_stream_default); nvtext::normalize_characters(input, to_lower); } @@ -60,7 +60,7 @@ static void generate_bench_args(benchmark::internal::Benchmark* b) for (int rowlen = min_rowlen; rowlen <= max_rowlen; rowlen *= len_mult) { // avoid generating combinations that exceed the cudf column limit size_t total_chars = static_cast(row_count) * rowlen * 4; - if (total_chars < std::numeric_limits::max()) { + if (total_chars < static_cast(std::numeric_limits::max())) { b->Args({row_count, rowlen}); } } diff --git a/cpp/benchmarks/text/normalize_spaces_benchmark.cpp b/cpp/benchmarks/text/normalize_spaces_benchmark.cpp index dcabb0c225c..6260bb02c55 100644 --- a/cpp/benchmarks/text/normalize_spaces_benchmark.cpp +++ b/cpp/benchmarks/text/normalize_spaces_benchmark.cpp @@ -42,7 +42,7 @@ static void BM_normalize(benchmark::State& state) cudf::strings_column_view input(table->view().column(0)); for (auto _ : state) { - cuda_event_timer raii(state, true, 0); + cuda_event_timer raii(state, true, rmm::cuda_stream_default); nvtext::normalize_spaces(input); } diff --git a/cpp/benchmarks/text/replace_benchmark.cpp b/cpp/benchmarks/text/replace_benchmark.cpp index f5428aee225..8f6704ab1af 100644 --- a/cpp/benchmarks/text/replace_benchmark.cpp +++ b/cpp/benchmarks/text/replace_benchmark.cpp @@ -54,7 +54,7 @@ static void BM_replace(benchmark::State& state) cudf::test::strings_column_wrapper replacements({"1", "2", "7", "0"}); for (auto _ : state) { - cuda_event_timer raii(state, true, 0); + cuda_event_timer raii(state, true); nvtext::replace_tokens( view, cudf::strings_column_view(targets), cudf::strings_column_view(replacements)); } diff --git a/cpp/benchmarks/text/tokenize_benchmark.cpp b/cpp/benchmarks/text/tokenize_benchmark.cpp index f9e742f0f31..cd6428a9406 100644 --- a/cpp/benchmarks/text/tokenize_benchmark.cpp +++ b/cpp/benchmarks/text/tokenize_benchmark.cpp @@ -31,7 +31,7 @@ class TextTokenize : public cudf::benchmark { }; -enum class tokenize_type { single, multi, count, count_multi, ngrams }; +enum class tokenize_type { single, multi, count, count_multi, ngrams, characters }; static void BM_tokenize(benchmark::State& state, tokenize_type tt) { @@ -46,13 +46,19 @@ static void BM_tokenize(benchmark::State& state, tokenize_type tt) cudf::test::strings_column_wrapper delimiters({" ", "+", "-"}); for (auto _ : state) { - cuda_event_timer raii(state, true, 0); + cuda_event_timer raii(state, true, rmm::cuda_stream_default); switch (tt) { - case tokenize_type::single: nvtext::tokenize(input); break; + case tokenize_type::single: + // single whitespace delimiter + nvtext::tokenize(input); + break; case tokenize_type::multi: nvtext::tokenize(input, cudf::strings_column_view(delimiters)); break; - case tokenize_type::count: nvtext::count_tokens(input); break; + case tokenize_type::count: + // single whitespace delimiter + nvtext::count_tokens(input); + break; case tokenize_type::count_multi: nvtext::count_tokens(input, cudf::strings_column_view(delimiters)); break; @@ -60,6 +66,10 @@ static void BM_tokenize(benchmark::State& state, tokenize_type tt) // default is bigrams nvtext::ngrams_tokenize(input); break; + case tokenize_type::characters: + // every character becomes a string + nvtext::character_tokenize(input); + break; } } @@ -90,3 +100,4 @@ NVTEXT_BENCHMARK_DEFINE(multi) NVTEXT_BENCHMARK_DEFINE(count) NVTEXT_BENCHMARK_DEFINE(count_multi) NVTEXT_BENCHMARK_DEFINE(ngrams) +NVTEXT_BENCHMARK_DEFINE(characters) diff --git a/cpp/benchmarks/type_dispatcher/type_dispatcher_benchmark.cu b/cpp/benchmarks/type_dispatcher/type_dispatcher_benchmark.cu index 18ef5a1168e..b09a7911595 100644 --- a/cpp/benchmarks/type_dispatcher/type_dispatcher_benchmark.cu +++ b/cpp/benchmarks/type_dispatcher/type_dispatcher_benchmark.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,26 +14,22 @@ * limitations under the License. */ +#include "../fixture/benchmark_fixture.hpp" +#include "../synchronization/synchronization.hpp" + +#include + #include #include +#include +#include #include #include -#include -#include -#include -#include -#include - -#include +#include +#include -#include -#include #include -#include "../fixture/benchmark_fixture.hpp" -#include "../synchronization/synchronization.hpp" - -using namespace cudf; enum DispatchingType { HOST_DISPATCHING, DEVICE_DISPATCHING, NO_DISPATCHING }; @@ -75,7 +71,7 @@ __global__ void no_dispatching_kernel(T** A, cudf::size_type n_rows, cudf::size_ // This is for HOST_DISPATCHING template -__global__ void host_dispatching_kernel(mutable_column_device_view source_column) +__global__ void host_dispatching_kernel(cudf::mutable_column_device_view source_column) { using F = Functor; T* A = source_column.data(); @@ -89,7 +85,7 @@ __global__ void host_dispatching_kernel(mutable_column_device_view source_column template struct ColumnHandle { template ())> - void operator()(mutable_column_device_view source_column, int work_per_thread) + void operator()(cudf::mutable_column_device_view source_column, int work_per_thread) { cudf::detail::grid_1d grid_config{source_column.size(), block_size}; int grid_size = grid_config.num_blocks; @@ -98,7 +94,7 @@ struct ColumnHandle { } template ())> - void operator()(mutable_column_device_view source_column, int work_per_thread) + void operator()(cudf::mutable_column_device_view source_column, int work_per_thread) { CUDF_FAIL("Invalid type to benchmark."); } @@ -112,14 +108,14 @@ struct ColumnHandle { template struct RowHandle { template ())> - __device__ void operator()(mutable_column_device_view source, cudf::size_type index) + __device__ void operator()(cudf::mutable_column_device_view source, cudf::size_type index) { using F = Functor; source.data()[index] = F::f(source.data()[index]); } template ())> - __device__ void operator()(mutable_column_device_view source, cudf::size_type index) + __device__ void operator()(cudf::mutable_column_device_view source, cudf::size_type index) { cudf_assert(false && "Unsupported type."); } @@ -127,7 +123,7 @@ struct RowHandle { // This is for DEVICE_DISPATCHING template -__global__ void device_dispatching_kernel(mutable_table_device_view source) +__global__ void device_dispatching_kernel(cudf::mutable_table_device_view source) { const cudf::size_type n_rows = source.num_rows(); cudf::size_type index = threadIdx.x + blockIdx.x * blockDim.x; @@ -142,7 +138,7 @@ __global__ void device_dispatching_kernel(mutable_table_device_view source) } template -void launch_kernel(mutable_table_view input, T** d_ptr, int work_per_thread) +void launch_kernel(cudf::mutable_table_view input, T** d_ptr, int work_per_thread) { const cudf::size_type n_rows = input.num_rows(); const cudf::size_type n_cols = input.num_columns(); @@ -153,12 +149,12 @@ void launch_kernel(mutable_table_view input, T** d_ptr, int work_per_thread) if (dispatching_type == HOST_DISPATCHING) { // std::vector v_stream(n_cols); for (int c = 0; c < n_cols; c++) { - auto d_column = mutable_column_device_view::create(input.column(c)); + auto d_column = cudf::mutable_column_device_view::create(input.column(c)); cudf::type_dispatcher( d_column->type(), ColumnHandle{}, *d_column, work_per_thread); } } else if (dispatching_type == DEVICE_DISPATCHING) { - auto d_table_view = mutable_table_device_view::create(input); + auto d_table_view = cudf::mutable_table_device_view::create(input); auto f = device_dispatching_kernel; // Launch the kernel f<<>>(*d_table_view); @@ -191,25 +187,26 @@ void type_dispatcher_benchmark(::benchmark::State& state) cudf::mutable_table_view source_table{source_columns}; // For no dispatching - std::vector> h_vec(n_cols, - rmm::device_vector(source_size, 0)); + std::vector h_vec(n_cols); std::vector h_vec_p(n_cols); - for (int c = 0; c < n_cols; c++) { h_vec_p[c] = h_vec[c].data().get(); } - rmm::device_vector d_vec(n_cols); + std::transform(h_vec.begin(), h_vec.end(), h_vec_p.begin(), [source_size](auto& col) { + col.resize(source_size * sizeof(TypeParam), rmm::cuda_stream_default); + return static_cast(col.data()); + }); + rmm::device_uvector d_vec(n_cols, rmm::cuda_stream_default); if (dispatching_type == NO_DISPATCHING) { CUDA_TRY(cudaMemcpy( - d_vec.data().get(), h_vec_p.data(), sizeof(TypeParam*) * n_cols, cudaMemcpyHostToDevice)); + d_vec.data(), h_vec_p.data(), sizeof(TypeParam*) * n_cols, cudaMemcpyHostToDevice)); } // Warm up - launch_kernel(source_table, d_vec.data().get(), work_per_thread); + launch_kernel(source_table, d_vec.data(), work_per_thread); CUDA_TRY(cudaDeviceSynchronize()); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 - launch_kernel( - source_table, d_vec.data().get(), work_per_thread); + launch_kernel(source_table, d_vec.data(), work_per_thread); } state.SetBytesProcessed(static_cast(state.iterations()) * source_size * n_cols * 2 * diff --git a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake index d05b4b347f1..eb1ade61440 100644 --- a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake +++ b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake @@ -40,7 +40,7 @@ function(jit_preprocess_files) -o ${CUDF_GENERATED_INCLUDE_DIR}/include/jit_preprocessed_files -i -m - -std=c++14 + -std=c++17 -remove-unused-globals -D__CUDACC_RTC__ -I${CUDF_SOURCE_DIR}/include diff --git a/cpp/cmake/Modules/SetGPUArchs.cmake b/cpp/cmake/Modules/SetGPUArchs.cmake index f09d5ead8e2..8ab3c14d671 100644 --- a/cpp/cmake/Modules/SetGPUArchs.cmake +++ b/cpp/cmake/Modules/SetGPUArchs.cmake @@ -38,16 +38,6 @@ if(NOT DEFINED CUDAToolkit_VERSION AND CMAKE_CUDA_COMPILER) unset(NVCC_OUT) endif() -if(CUDAToolkit_VERSION_MAJOR LESS 11) - list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "80") -endif() -if(CUDAToolkit_VERSION_MAJOR LESS 10) - list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "75") -endif() -if(CUDAToolkit_VERSION_MAJOR LESS 9) - list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "70") -endif() - if(${PROJECT_NAME}_BUILD_FOR_ALL_ARCHS) set(CMAKE_CUDA_ARCHITECTURES ${SUPPORTED_CUDA_ARCHITECTURES}) diff --git a/cpp/cmake/cudf-build-config.cmake.in b/cpp/cmake/cudf-build-config.cmake.in index ed1926f20f0..358c4377078 100644 --- a/cpp/cmake/cudf-build-config.cmake.in +++ b/cpp/cmake/cudf-build-config.cmake.in @@ -43,8 +43,6 @@ find_dependency(ZLIB) # add third party dependencies using CPM include(@CUDF_SOURCE_DIR@/cmake/thirdparty/CUDF_GetCPM.cmake) -# find boost -include(@CUDF_SOURCE_DIR@/cmake/thirdparty/CUDF_FindBoost.cmake) # find jitify include(@CUDF_SOURCE_DIR@/cmake/thirdparty/CUDF_GetJitify.cmake) # find thrust/cub diff --git a/cpp/cmake/cudf-config.cmake.in b/cpp/cmake/cudf-config.cmake.in index 66c669851fa..86755696607 100644 --- a/cpp/cmake/cudf-config.cmake.in +++ b/cpp/cmake/cudf-config.cmake.in @@ -71,11 +71,6 @@ find_dependency(CUDAToolkit) find_dependency(Threads) find_dependency(ZLIB) -# Don't look for a Boost CMake configuration file because it adds the -# `-DBOOST_ALL_NO_LIB` and `-DBOOST_FILESYSTEM_DYN_LINK` compile defs -set(Boost_NO_BOOST_CMAKE ON) -find_dependency(Boost @CUDF_MIN_VERSION_Boost@ COMPONENTS filesystem) - find_dependency(Arrow @CUDF_VERSION_Arrow@) set(ArrowCUDA_DIR "${Arrow_DIR}") diff --git a/cpp/cmake/thirdparty/CUDF_FindBoost.cmake b/cpp/cmake/thirdparty/CUDF_FindBoost.cmake deleted file mode 100644 index fef393d7f20..00000000000 --- a/cpp/cmake/thirdparty/CUDF_FindBoost.cmake +++ /dev/null @@ -1,38 +0,0 @@ -#============================================================================= -# Copyright (c) 2020-2021, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -#============================================================================= - -# Min version set to newest boost in Ubuntu bionic apt repositories -set(CUDF_MIN_VERSION_Boost 1.65.0) - -# Don't look for a Boost CMake configuration file because it adds the -# `-DBOOST_ALL_NO_LIB` and `-DBOOST_FILESYSTEM_DYN_LINK` compile defs -set(Boost_NO_BOOST_CMAKE ON) - -# TODO: Use CPMFindPackage to add or build Boost - -find_package(Boost ${CUDF_MIN_VERSION_Boost} QUIET MODULE COMPONENTS filesystem) - -message(VERBOSE "CUDF: Boost_FOUND: ${Boost_FOUND}") - -if(NOT Boost_FOUND) - message(FATAL_ERROR "CUDF: Boost not found, please check your settings.") -endif() - -message(VERBOSE "CUDF: Boost_LIBRARIES: ${Boost_LIBRARIES}") -message(VERBOSE "CUDF: Boost_INCLUDE_DIRS: ${Boost_INCLUDE_DIRS}") - -list(APPEND CUDF_CXX_DEFINITIONS BOOST_NO_CXX14_CONSTEXPR) -list(APPEND CUDF_CUDA_DEFINITIONS BOOST_NO_CXX14_CONSTEXPR) diff --git a/cpp/cmake/thirdparty/CUDF_GetCPM.cmake b/cpp/cmake/thirdparty/CUDF_GetCPM.cmake index 19c07933d42..ce2921f5954 100644 --- a/cpp/cmake/thirdparty/CUDF_GetCPM.cmake +++ b/cpp/cmake/thirdparty/CUDF_GetCPM.cmake @@ -1,6 +1,8 @@ -set(CPM_DOWNLOAD_VERSION 3b404296b539e596f39421c4e92bc803b299d964) # v0.27.5 +set(CPM_DOWNLOAD_VERSION 7644c3a40fc7889f8dee53ce21e85dc390b883dc) # v0.32.1 if(CPM_SOURCE_CACHE) + # Expand relative path. This is important if the provided path contains a tilde (~) + get_filename_component(CPM_SOURCE_CACHE ${CPM_SOURCE_CACHE} ABSOLUTE) set(CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake") elseif(DEFINED ENV{CPM_SOURCE_CACHE}) set(CPM_DOWNLOAD_LOCATION "$ENV{CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake") @@ -12,7 +14,7 @@ if(NOT (EXISTS ${CPM_DOWNLOAD_LOCATION})) message(VERBOSE "CUDF: Downloading CPM.cmake to ${CPM_DOWNLOAD_LOCATION}") file( DOWNLOAD - https://raw.githubusercontent.com/TheLartians/CPM.cmake/${CPM_DOWNLOAD_VERSION}/cmake/CPM.cmake + https://raw.githubusercontent.com/cpm-cmake/CPM.cmake/${CPM_DOWNLOAD_VERSION}/cmake/CPM.cmake ${CPM_DOWNLOAD_LOCATION}) endif() diff --git a/cpp/cmake/thirdparty/CUDF_GetDLPack.cmake b/cpp/cmake/thirdparty/CUDF_GetDLPack.cmake index b41c6d3b8d2..349f75d604f 100644 --- a/cpp/cmake/thirdparty/CUDF_GetDLPack.cmake +++ b/cpp/cmake/thirdparty/CUDF_GetDLPack.cmake @@ -36,6 +36,6 @@ function(find_and_configure_dlpack VERSION) set(DLPACK_INCLUDE_DIR "${dlpack_SOURCE_DIR}/include" PARENT_SCOPE) endfunction() -set(CUDF_MIN_VERSION_dlpack 0.3) +set(CUDF_MIN_VERSION_dlpack 0.5) find_and_configure_dlpack(${CUDF_MIN_VERSION_dlpack}) diff --git a/cpp/cmake/thirdparty/CUDF_GetRMM.cmake b/cpp/cmake/thirdparty/CUDF_GetRMM.cmake index 136947674f9..b2861ae48c4 100644 --- a/cpp/cmake/thirdparty/CUDF_GetRMM.cmake +++ b/cpp/cmake/thirdparty/CUDF_GetRMM.cmake @@ -14,49 +14,37 @@ # limitations under the License. #============================================================================= -function(cudf_save_if_enabled var) - if(CUDF_${var}) - unset(${var} PARENT_SCOPE) - unset(${var} CACHE) - endif() -endfunction() - -function(cudf_restore_if_enabled var) - if(CUDF_${var}) - set(${var} ON CACHE INTERNAL "" FORCE) - endif() -endfunction() - function(find_and_configure_rmm VERSION) if(TARGET rmm::rmm) return() endif() + if(${VERSION} MATCHES [=[([0-9]+)\.([0-9]+)\.([0-9]+)]=]) + set(MAJOR_AND_MINOR "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}") + else() + set(MAJOR_AND_MINOR "${VERSION}") + endif() + # Consumers have two options for local source builds: # 1. Pass `-D CPM_rmm_SOURCE=/path/to/rmm` to build a local RMM source tree # 2. Pass `-D CMAKE_PREFIX_PATH=/path/to/rmm/build` to use an existing local # RMM build directory as the install location for find_package(rmm) - cudf_save_if_enabled(BUILD_TESTS) - cudf_save_if_enabled(BUILD_BENCHMARKS) - CPMFindPackage(NAME rmm VERSION ${VERSION} GIT_REPOSITORY https://github.com/rapidsai/rmm.git - GIT_TAG branch-${VERSION} + GIT_TAG branch-${MAJOR_AND_MINOR} GIT_SHALLOW TRUE OPTIONS "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF" "CUDA_STATIC_RUNTIME ${CUDA_STATIC_RUNTIME}" "DISABLE_DEPRECATION_WARNING ${DISABLE_DEPRECATION_WARNING}" ) - cudf_restore_if_enabled(BUILD_TESTS) - cudf_restore_if_enabled(BUILD_BENCHMARKS) # Make sure consumers of cudf can also see rmm::rmm fix_cmake_global_defaults(rmm::rmm) endfunction() -set(CUDF_MIN_VERSION_rmm "${CUDF_VERSION_MAJOR}.${CUDF_VERSION_MINOR}") +set(CUDF_MIN_VERSION_rmm "${CUDF_VERSION_MAJOR}.${CUDF_VERSION_MINOR}.00") find_and_configure_rmm(${CUDF_MIN_VERSION_rmm}) diff --git a/cpp/cmake/thirdparty/CUDF_GetThrust.cmake b/cpp/cmake/thirdparty/CUDF_GetThrust.cmake index 5a304f234d2..343ade8664d 100644 --- a/cpp/cmake/thirdparty/CUDF_GetThrust.cmake +++ b/cpp/cmake/thirdparty/CUDF_GetThrust.cmake @@ -15,18 +15,29 @@ #============================================================================= function(find_and_configure_thrust VERSION) + # We only want to set `UPDATE_DISCONNECTED` while + # the GIT tag hasn't moved from the last time we cloned + set(cpm_thrust_disconnect_update "UPDATE_DISCONNECTED TRUE") + set(CPM_THRUST_CURRENT_VERSION ${VERSION} CACHE STRING "version of thrust we checked out") + if(NOT VERSION VERSION_EQUAL CPM_THRUST_CURRENT_VERSION) + set(CPM_THRUST_CURRENT_VERSION ${VERSION} CACHE STRING "version of thrust we checked out" FORCE) + set(cpm_thrust_disconnect_update "") + endif() + CPMAddPackage(NAME Thrust VERSION ${VERSION} GIT_REPOSITORY https://github.com/NVIDIA/thrust.git GIT_TAG ${VERSION} GIT_SHALLOW TRUE - PATCH_COMMAND patch -p1 -N < ${CUDF_SOURCE_DIR}/cmake/thrust.patch || true) + ${cpm_thrust_disconnect_update} + PATCH_COMMAND patch --reject-file=- -p1 -N < ${CUDF_SOURCE_DIR}/cmake/thrust.patch || true + ) thrust_create_target(cudf::Thrust FROM_OPTIONS) set(THRUST_LIBRARY "cudf::Thrust" PARENT_SCOPE) set(Thrust_SOURCE_DIR "${Thrust_SOURCE_DIR}" PARENT_SCOPE) endfunction() -set(CUDF_MIN_VERSION_Thrust 1.10.0) +set(CUDF_MIN_VERSION_Thrust 1.12.0) find_and_configure_thrust(${CUDF_MIN_VERSION_Thrust}) diff --git a/cpp/cmake/thrust.patch b/cpp/cmake/thrust.patch index 3f876f7ffb7..2f9201d8ab4 100644 --- a/cpp/cmake/thrust.patch +++ b/cpp/cmake/thrust.patch @@ -42,3 +42,42 @@ index 1ffeef0..5e80800 100644 for (int ITEM = 1; ITEM < ITEMS_PER_THREAD; ++ITEM) { if (ITEMS_PER_THREAD * tid + ITEM < num_remaining) +diff a/cub/device/dispatch/dispatch_radix_sort.cuh b/cub/device/dispatch/dispatch_radix_sort.cuh +index 41eb1d2..f2893b4 100644 +--- a/cub/device/dispatch/dispatch_radix_sort.cuh ++++ b/cub/device/dispatch/dispatch_radix_sort.cuh +@@ -723,7 +723,7 @@ struct DeviceRadixSortPolicy + + + /// SM60 (GP100) +- struct Policy600 : ChainedPolicy<600, Policy600, Policy500> ++ struct Policy600 : ChainedPolicy<600, Policy600, Policy600> + { + enum { + PRIMARY_RADIX_BITS = (sizeof(KeyT) > 1) ? 7 : 5, // 6.9B 32b keys/s (Quadro P100) +diff a/cub/device/dispatch/dispatch_reduce.cuh b/cub/device/dispatch/dispatch_reduce.cuh +index f6aee45..dd64301 100644 +--- a/cub/device/dispatch/dispatch_reduce.cuh ++++ b/cub/device/dispatch/dispatch_reduce.cuh +@@ -284,7 +284,7 @@ struct DeviceReducePolicy + }; + + /// SM60 +- struct Policy600 : ChainedPolicy<600, Policy600, Policy350> ++ struct Policy600 : ChainedPolicy<600, Policy600, Policy600> + { + // ReducePolicy (P100: 591 GB/s @ 64M 4B items; 583 GB/s @ 256M 1B items) + typedef AgentReducePolicy< +diff a/cub/device/dispatch/dispatch_scan.cuh b/cub/device/dispatch/dispatch_scan.cuh +index c0c6d59..937ee31 100644 +--- a/cub/device/dispatch/dispatch_scan.cuh ++++ b/cub/device/dispatch/dispatch_scan.cuh +@@ -178,7 +178,7 @@ struct DeviceScanPolicy + }; + + /// SM600 +- struct Policy600 : ChainedPolicy<600, Policy600, Policy520> ++ struct Policy600 : ChainedPolicy<600, Policy600, Policy600> + { + typedef AgentScanPolicy< + 128, 15, ///< Threads per block, items per thread diff --git a/cpp/docs/DEVELOPER_GUIDE.md b/cpp/docs/DEVELOPER_GUIDE.md index fa59162c345..0f6e110ffd0 100644 --- a/cpp/docs/DEVELOPER_GUIDE.md +++ b/cpp/docs/DEVELOPER_GUIDE.md @@ -255,6 +255,11 @@ currently supported by cudf. Each type of value is represented by a separate typ which are all derived from `cudf::scalar`. e.g. A `numeric_scalar` holds a single numerical value, a `string_scalar` holds a single string. The data for the stored value resides in device memory. +A `list_scalar` holds the underlying data of a single list. This means the underlying data can be any type +that cudf supports. For example, a `list_scalar` representing a list of integers stores a `cudf::column` +of type `INT32`. A `list_scalar` representing a list of lists of integers stores a `cudf::column` of +type `LIST`, which in turn stores a column of type `INT32`. + |Value type|Scalar class|Notes| |-|-|-| |fixed-width|`fixed_width_scalar`| `T` can be any fixed-width type| @@ -263,6 +268,7 @@ a `string_scalar` holds a single string. The data for the stored value resides i |timestamp|`timestamp_scalar` | `T` can be `timestamp_D`, `timestamp_s`, etc.| |duration|`duration_scalar` | `T` can be `duration_D`, `duration_s`, etc.| |string|`string_scalar`| This class object is immutable| +|list|`list_scalar`| Underlying data can be any type supported by cudf | ### Construction `scalar`s can be created using either their respective constructors or using factory functions like @@ -285,11 +291,16 @@ auto s1 = static_cast(s.get()); ``` ### Passing to device -Each scalar type has a corresponding non-owning device view class which allows access to the value -and its validity from the device. This can be obtained using the function +Each scalar type, except `list_scalar`, has a corresponding non-owning device view class which allows +access to the value and its validity from the device. This can be obtained using the function `get_scalar_device_view(ScalarType s)`. Note that a device view is not provided for a base scalar object, only for the derived typed scalar class objects. +The underlying data for `list_scalar` can be accessed via `view()` method. For non-nested data, +the device view can be obtained via function `column_device_view::create(column_view)`. For nested +data, a specialized device view for list columns can be constructed via +`lists_column_device_view(column_device_view)`. + # libcudf++ API and Implementation ## Streams @@ -403,9 +414,9 @@ Allocates a specified number of bytes of untyped, uninitialized device memory us `device_memory_resource`. If no resource is explicitly provided, uses `rmm::mr::get_current_device_resource()`. -`rmm::device_buffer` is copyable and movable. A copy performs a deep copy of the `device_buffer`'s -device memory, whereas a move moves ownership of the device memory from one `device_buffer` to -another. +`rmm::device_buffer` is movable and copyable on a stream. A copy performs a deep copy of the +`device_buffer`'s device memory on the specified stream, whereas a move moves ownership of the +device memory from one `device_buffer` to another. ```c++ // Allocates at least 100 bytes of uninitialized device memory @@ -413,11 +424,15 @@ another. rmm::device_buffer buff(100, stream, mr); void * raw_data = buff.data(); // Raw pointer to underlying device memory -rmm::device_buffer copy(buff); // Deep copies `buff` into `copy` -rmm::device_buffer moved_to(std::move(buff)); // Moves contents of `buff` into `moved_to` +// Deep copies `buff` into `copy` on `stream` +rmm::device_buffer copy(buff, stream); + +// Moves contents of `buff` into `moved_to` +rmm::device_buffer moved_to(std::move(buff)); custom_memory_resource *mr...; -rmm::device_buffer custom_buff(100, mr); // Allocates 100 bytes from the custom_memory_resource +// Allocates 100 bytes from the custom_memory_resource +rmm::device_buffer custom_buff(100, mr, stream); ``` #### `rmm::device_scalar` @@ -530,6 +545,30 @@ Note: `std::tuple` _could_ be used if not for the fact that Cython does not s only two objects of different types. Multiple objects of the same type may be returned via a `std::vector`. +Alternatively, with C++17 (supported from cudf v0.20), [structured binding](https://en.cppreference.com/w/cpp/language/structured_binding) +may be used to disaggregate multiple return values: + +```c++ +auto [out0, out1] = cudf::return_two_outputs(); +``` + +Note that the compiler might not support capturing aliases defined in a structured binding +in a lambda. One may work around this by using a capture with an initializer instead: + +```c++ +auto [out0, out1] = cudf::return_two_outputs(); + +// Direct capture of alias from structured binding might fail with: +// "error: structured binding cannot be captured" +// auto foo = [out0]() {...}; + +// Use an initializing capture: +auto foo = [&out0 = out0] { + // Use out0 to compute something. + // ... +}; +``` + ## Iterator-based interfaces Increasingly, libcudf is moving toward internal (`detail`) APIs with iterator parameters rather @@ -929,21 +968,18 @@ this compound column representation of strings. ## Structs columns -Structs are represented similarly to lists, except that they have multiple child data columns. -The parent column's type is `STRUCT` and contains no data, but its size represents the number of -structs in the column, and its null mask represents the validity of each struct element. The parent -has `N + 1` children, where `N` is the number of fields in the struct. +A struct is a nested data type with a set of child columns each representing an individual field +of a logical struct. Field names are not represented. -1. A non-nullable column of `INT32` elements that indicates the offset to the beginning of each - struct in each dense column of elements. -2. For each field, a column containing the actual field data and optional null mask for all elements - of all the structs packed together. - -With this representation, `child[0][offsets[i]]` is the first field of struct `i`, -`child[1][offsets[i]]` is the second field of struct `i`, etc. +A structs column with `N` fields has `N` children. Each child is a column storing all the data +of a single field packed column-wise, with an optional null mask. The parent column's type is +`STRUCT` and contains no data, its size represents the number of struct rows in the column, and its +null mask represents the validity of each struct element. + +With this representation, `child[0][10]` is row 10 of the first field of the struct, `child[1][42]` +is row 42 of the second field of the struct. -As defined in the [Apache Arrow specification](https://arrow.apache.org/docs/format/Columnar.html#struct-layout), -in addition to the struct column's null mask, each struct field column has its own optional null +Notice that in addition to the struct column's null mask, each struct field column has its own optional null mask. A struct field's validity can vary independently from the corresponding struct row. For instance, a non-null struct row might have a null field. However, the fields of a null struct row are deemed to be null as well. For example, consider a struct column of type diff --git a/cpp/docs/TESTING.md b/cpp/docs/TESTING.md index 638f7224ab8..2c7b62b8b6d 100644 --- a/cpp/docs/TESTING.md +++ b/cpp/docs/TESTING.md @@ -1,7 +1,7 @@ # Unit Testing in libcudf Unit tests in libcudf are written using -[Google Test](https://github.com/google/googletest/blob/master/googletest/docs/primer.md). +[Google Test](https://github.com/google/googletest/blob/master/docs/primer.md). **Important:** Instead of including `gtest/gtest.h` directly, use `#include `. @@ -59,7 +59,7 @@ files, and are therefore preferred in test code over `thrust::device_vector`. ## Base Fixture -All libcudf unit tests should make use of a GTest ["Test Fixture"](https://github.com/google/googletest/blob/master/googletest/docs/primer.md#test-fixtures-using-the-same-data-configuration-for-multiple-tests-same-data-multiple-tests). +All libcudf unit tests should make use of a GTest ["Test Fixture"](https://github.com/google/googletest/blob/master/docs/primer.md#test-fixtures-using-the-same-data-configuration-for-multiple-tests-same-data-multiple-tests). Even if the fixture is empty, it should inherit from the base fixture `cudf::test::BaseFixture` found in `include/cudf_test/base_fixture.hpp`. This ensures that RMM is properly initialized and finalized. `cudf::test::BaseFixture` already inherits from `::testing::Test` and therefore it is @@ -75,7 +75,7 @@ class MyTestFiture : public cudf::test::BaseFixture {...}; In general, libcudf features must work across all of the supported types (there are exceptions e.g. not all binary operations are supported for all types). In order to automate the process of running the same tests across multiple types, we use GTest's -[Typed Tests](https://github.com/google/googletest/blob/master/googletest/docs/advanced.md#typed-tests). +[Typed Tests](https://github.com/google/googletest/blob/master/docs/advanced.md#typed-tests). Typed tests allow you to write a test once and run it across a list of types. For example: diff --git a/cpp/doxygen/Doxyfile b/cpp/doxygen/Doxyfile index 8fde8098bd3..d359fe59c1a 100644 --- a/cpp/doxygen/Doxyfile +++ b/cpp/doxygen/Doxyfile @@ -38,7 +38,7 @@ PROJECT_NAME = "libcudf" # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = 0.19.0 +PROJECT_NUMBER = 21.06.00 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a @@ -2167,7 +2167,7 @@ SKIP_FUNCTION_MACROS = YES # the path). If a tag file is not located in the directory in which doxygen is # run, you must also specify the path to the tagfile here. -TAGFILES = rmm.tag=https://docs.rapids.ai/api/librmm/0.19 +TAGFILES = rmm.tag=https://docs.rapids.ai/api/librmm/21.06 # When a file name is specified after GENERATE_TAGFILE, doxygen will create a # tag file that is based on the input files it reads. See section "Linking to diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index 74ce6e42d7e..2600926d363 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -40,14 +40,16 @@ namespace cudf { // forward declaration namespace detail { +class simple_aggregations_collector; class aggregation_finalizer; } // namespace detail /** - * @brief Base class for specifying the desired aggregation in an + * @brief Abstract base class for specifying the desired aggregation in an * `aggregation_request`. * - * Other kinds of aggregations may derive from this class to encapsulate - * additional information needed to compute the aggregation. + * All aggregations must derive from this class to implement the pure virtual + * functions and potentially encapsulate additional information needed to + * compute the aggregation. */ class aggregation { public: @@ -82,58 +84,78 @@ class aggregation { CUDA ///< CUDA UDF based reduction }; + aggregation() = delete; aggregation(aggregation::Kind a) : kind{a} {} Kind kind; ///< The aggregation to perform + virtual ~aggregation() = default; virtual bool is_equal(aggregation const& other) const { return kind == other.kind; } - virtual size_t do_hash() const { return std::hash{}(kind); } + virtual std::unique_ptr clone() const = 0; - virtual std::unique_ptr clone() const - { - return std::make_unique(*this); - } + // override functions for compound aggregations + virtual std::vector> get_simple_aggregations( + data_type col_type, cudf::detail::simple_aggregations_collector& collector) const = 0; + virtual void finalize(cudf::detail::aggregation_finalizer& finalizer) const = 0; +}; - virtual ~aggregation() = default; +/** + * @brief Derived class intended for enforcing operation-specific restrictions + * when calling various cudf functions. + * + * As an example, rolling_window will only accept rolling_aggregation inputs, + * and the appropriate derived classes (sum_aggregation, mean_aggregation, etc) + * derive from this interface to represent these valid options. + */ +class rolling_aggregation : public virtual aggregation { + public: + ~rolling_aggregation() = default; - // override functions for compound aggregations - virtual std::vector get_simple_aggregations(data_type col_type) const; - virtual void finalize(cudf::detail::aggregation_finalizer& finalizer); + protected: + rolling_aggregation() {} }; enum class udf_type : bool { CUDA, PTX }; /// Factory to create a SUM aggregation -std::unique_ptr make_sum_aggregation(); +template +std::unique_ptr make_sum_aggregation(); /// Factory to create a PRODUCT aggregation -std::unique_ptr make_product_aggregation(); +template +std::unique_ptr make_product_aggregation(); /// Factory to create a MIN aggregation -std::unique_ptr make_min_aggregation(); +template +std::unique_ptr make_min_aggregation(); /// Factory to create a MAX aggregation -std::unique_ptr make_max_aggregation(); +template +std::unique_ptr make_max_aggregation(); /** * @brief Factory to create a COUNT aggregation * * @param null_handling Indicates if null values will be counted. */ -std::unique_ptr make_count_aggregation( - null_policy null_handling = null_policy::EXCLUDE); +template +std::unique_ptr make_count_aggregation(null_policy null_handling = null_policy::EXCLUDE); -/// Factory to create a ANY aggregation -std::unique_ptr make_any_aggregation(); +/// Factory to create an ANY aggregation +template +std::unique_ptr make_any_aggregation(); /// Factory to create a ALL aggregation -std::unique_ptr make_all_aggregation(); +template +std::unique_ptr make_all_aggregation(); /// Factory to create a SUM_OF_SQUARES aggregation -std::unique_ptr make_sum_of_squares_aggregation(); +template +std::unique_ptr make_sum_of_squares_aggregation(); /// Factory to create a MEAN aggregation -std::unique_ptr make_mean_aggregation(); +template +std::unique_ptr make_mean_aggregation(); /** * @brief Factory to create a VARIANCE aggregation @@ -141,7 +163,8 @@ std::unique_ptr make_mean_aggregation(); * @param ddof Delta degrees of freedom. The divisor used in calculation of * `variance` is `N - ddof`, where `N` is the population size. */ -std::unique_ptr make_variance_aggregation(size_type ddof = 1); +template +std::unique_ptr make_variance_aggregation(size_type ddof = 1); /** * @brief Factory to create a STD aggregation @@ -149,10 +172,12 @@ std::unique_ptr make_variance_aggregation(size_type ddof = 1); * @param ddof Delta degrees of freedom. The divisor used in calculation of * `std` is `N - ddof`, where `N` is the population size. */ -std::unique_ptr make_std_aggregation(size_type ddof = 1); +template +std::unique_ptr make_std_aggregation(size_type ddof = 1); /// Factory to create a MEDIAN aggregation -std::unique_ptr make_median_aggregation(); +template +std::unique_ptr make_median_aggregation(); /** * @brief Factory to create a QUANTILE aggregation @@ -160,22 +185,25 @@ std::unique_ptr make_median_aggregation(); * @param quantiles The desired quantiles * @param interpolation The desired interpolation */ -std::unique_ptr make_quantile_aggregation(std::vector const& q, - interpolation i = interpolation::LINEAR); +template +std::unique_ptr make_quantile_aggregation(std::vector const& q, + interpolation i = interpolation::LINEAR); /** * @brief Factory to create an `argmax` aggregation * * `argmax` returns the index of the maximum element. */ -std::unique_ptr make_argmax_aggregation(); +template +std::unique_ptr make_argmax_aggregation(); /** * @brief Factory to create an `argmin` aggregation * * `argmin` returns the index of the minimum element. */ -std::unique_ptr make_argmin_aggregation(); +template +std::unique_ptr make_argmin_aggregation(); /** * @brief Factory to create a `nunique` aggregation @@ -183,8 +211,8 @@ std::unique_ptr make_argmin_aggregation(); * `nunique` returns the number of unique elements. * @param null_handling Indicates if null values will be counted. */ -std::unique_ptr make_nunique_aggregation( - null_policy null_handling = null_policy::EXCLUDE); +template +std::unique_ptr make_nunique_aggregation(null_policy null_handling = null_policy::EXCLUDE); /** * @brief Factory to create a `nth_element` aggregation @@ -199,11 +227,13 @@ std::unique_ptr make_nunique_aggregation( * @param n index of nth element in each group. * @param null_handling Indicates to include/exclude nulls during indexing. */ -std::unique_ptr make_nth_element_aggregation( +template +std::unique_ptr make_nth_element_aggregation( size_type n, null_policy null_handling = null_policy::INCLUDE); /// Factory to create a ROW_NUMBER aggregation -std::unique_ptr make_row_number_aggregation(); +template +std::unique_ptr make_row_number_aggregation(); /** * @brief Factory to create a COLLECT_LIST aggregation @@ -215,7 +245,8 @@ std::unique_ptr make_row_number_aggregation(); * * @param null_handling Indicates whether to include/exclude nulls in list elements. */ -std::unique_ptr make_collect_list_aggregation( +template +std::unique_ptr make_collect_list_aggregation( null_policy null_handling = null_policy::INCLUDE); /** @@ -233,16 +264,18 @@ std::unique_ptr make_collect_list_aggregation( * @param nans_equal Flag to specify whether NaN values in floating point column should be * considered equal */ -std::unique_ptr make_collect_set_aggregation( - null_policy null_handling = null_policy::INCLUDE, - null_equality nulls_equal = null_equality::EQUAL, - nan_equality nans_equal = nan_equality::UNEQUAL); +template +std::unique_ptr make_collect_set_aggregation(null_policy null_handling = null_policy::INCLUDE, + null_equality nulls_equal = null_equality::EQUAL, + nan_equality nans_equal = nan_equality::UNEQUAL); /// Factory to create a LAG aggregation -std::unique_ptr make_lag_aggregation(size_type offset); +template +std::unique_ptr make_lag_aggregation(size_type offset); /// Factory to create a LEAD aggregation -std::unique_ptr make_lead_aggregation(size_type offset); +template +std::unique_ptr make_lead_aggregation(size_type offset); /** * @brief Factory to create an aggregation base on UDF for PTX or CUDA @@ -253,9 +286,10 @@ std::unique_ptr make_lead_aggregation(size_type offset); * * @return aggregation unique pointer housing user_defined_aggregator string. */ -std::unique_ptr make_udf_aggregation(udf_type type, - std::string const& user_defined_aggregator, - data_type output_type); +template +std::unique_ptr make_udf_aggregation(udf_type type, + std::string const& user_defined_aggregator, + data_type output_type); /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/ast/detail/linearizer.hpp b/cpp/include/cudf/ast/detail/linearizer.hpp index 68319a24e5d..166a0408703 100644 --- a/cpp/include/cudf/ast/detail/linearizer.hpp +++ b/cpp/include/cudf/ast/detail/linearizer.hpp @@ -83,10 +83,7 @@ class linearizer; * This class is a part of a "visitor" pattern with the `linearizer` class. * Nodes inheriting from this class can accept visitors. */ -class node { - friend class detail::linearizer; - - private: +struct node { virtual cudf::size_type accept(detail::linearizer& visitor) const = 0; }; @@ -102,10 +99,6 @@ class node { * resolved into intermediate data storage in shared memory. */ class linearizer { - friend class literal; - friend class column_reference; - friend class expression; - public: /** * @brief Construct a new linearizer object diff --git a/cpp/include/cudf/ast/detail/operators.hpp b/cpp/include/cudf/ast/detail/operators.hpp index 27bcb0d320b..8ae60f96997 100644 --- a/cpp/include/cudf/ast/detail/operators.hpp +++ b/cpp/include/cudf/ast/detail/operators.hpp @@ -753,43 +753,6 @@ struct operator_functor { } }; -#if 0 -/** - * @brief Functor used to double-type-dispatch binary operators. - * - * This functor's `operator()` is templated to validate calls to its operators based on the input - * type, as determined by the `is_valid_binary_op` trait. - * - * @tparam OperatorFunctor Binary operator functor. - */ -template -struct double_dispatch_binary_operator_types { - template >* = nullptr> - CUDA_HOST_DEVICE_CALLABLE void operator()(F&& f, Ts&&... args) - { - f.template operator()(std::forward(args)...); - } - - template >* = nullptr> - CUDA_HOST_DEVICE_CALLABLE void operator()(F&& f, Ts&&... args) - { -#ifndef __CUDA_ARCH__ - CUDF_FAIL("Invalid binary operation."); -#else - cudf_assert(false && "Invalid binary operation."); -#endif - } -}; -#endif - /** * @brief Functor used to single-type-dispatch binary operators. * @@ -856,16 +819,6 @@ struct type_dispatch_binary_op { F&& f, Ts&&... args) { -#if 0 - // Double dispatch - /* - double_type_dispatcher(lhs_type, - rhs_type, - detail::double_dispatch_binary_operator_types>{}, - std::forward(f), - std::forward(args)...); - */ -#endif // Single dispatch (assume lhs_type == rhs_type) type_dispatcher(lhs_type, detail::single_dispatch_binary_operator_types>{}, diff --git a/cpp/include/cudf/ast/detail/transform.cuh b/cpp/include/cudf/ast/detail/transform.cuh index da15ac07c63..f69927a3601 100644 --- a/cpp/include/cudf/ast/detail/transform.cuh +++ b/cpp/include/cudf/ast/detail/transform.cuh @@ -15,8 +15,9 @@ */ #pragma once +#include #include -#include +#include #include #include #include @@ -25,6 +26,7 @@ #include #include #include +#include #include #include @@ -155,10 +157,11 @@ struct row_evaluator { * storing intermediates. * @param output_column The output column where results are stored. */ - __device__ row_evaluator(table_device_view const& table, - const cudf::detail::fixed_width_scalar_device_view_base* literals, - std::int64_t* thread_intermediate_storage, - mutable_column_device_view* output_column) + __device__ row_evaluator( + table_device_view const& table, + device_span literals, + std::int64_t* thread_intermediate_storage, + mutable_column_device_view* output_column) : table(table), literals(literals), thread_intermediate_storage(thread_intermediate_storage), @@ -264,7 +267,7 @@ struct row_evaluator { private: table_device_view const& table; - const cudf::detail::fixed_width_scalar_device_view_base* literals; + device_span literals; std::int64_t* thread_intermediate_storage; mutable_column_device_view* output_column; }; @@ -298,15 +301,15 @@ __device__ void row_output::resolve_output(detail::device_data_reference device_ * @param num_operators Number of operators. * @param row_index Row index of data column(s). */ -__device__ void evaluate_row_expression(detail::row_evaluator const& evaluator, - const detail::device_data_reference* data_references, - const ast_operator* operators, - const cudf::size_type* operator_source_indices, - cudf::size_type num_operators, - cudf::size_type row_index) +__device__ void evaluate_row_expression( + detail::row_evaluator const& evaluator, + device_span data_references, + device_span operators, + device_span operator_source_indices, + cudf::size_type row_index) { - auto operator_source_index = cudf::size_type(0); - for (cudf::size_type operator_index(0); operator_index < num_operators; operator_index++) { + auto operator_source_index = static_cast(0); + for (cudf::size_type operator_index = 0; operator_index < operators.size(); operator_index++) { // Execute operator auto const op = operators[operator_index]; auto const arity = ast_operator_arity(op); @@ -336,43 +339,79 @@ __device__ void evaluate_row_expression(detail::row_evaluator const& evaluator, } } +/** + * @brief The AST plan creates a device buffer of data needed to execute an AST. + * + * On construction, an AST plan creates a single "packed" host buffer of all necessary data arrays, + * and copies that to the device with a single host-device memory copy. Because the plan tends to be + * small, this is the most efficient approach for low latency. + * + */ struct ast_plan { - public: - ast_plan() : sizes(), data_pointers() {} + ast_plan(linearizer const& expr_linearizer, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + : _sizes{}, _data_pointers{} + { + add_to_plan(expr_linearizer.data_references()); + add_to_plan(expr_linearizer.literals()); + add_to_plan(expr_linearizer.operators()); + add_to_plan(expr_linearizer.operator_source_indices()); + + // Create device buffer + auto const buffer_size = std::accumulate(_sizes.cbegin(), _sizes.cend(), 0); + auto buffer_offsets = std::vector(_sizes.size()); + thrust::exclusive_scan(_sizes.cbegin(), _sizes.cend(), buffer_offsets.begin(), 0); + + auto h_data_buffer = std::make_unique(buffer_size); + for (unsigned int i = 0; i < _data_pointers.size(); ++i) { + std::memcpy(h_data_buffer.get() + buffer_offsets[i], _data_pointers[i], _sizes[i]); + } - using buffer_type = std::pair, int>; + _device_data_buffer = rmm::device_buffer(h_data_buffer.get(), buffer_size, stream, mr); + + stream.synchronize(); + + // Create device pointers to components of plan + auto device_data_buffer_ptr = static_cast(_device_data_buffer.data()); + _device_data_references = device_span( + reinterpret_cast(device_data_buffer_ptr + + buffer_offsets[0]), + expr_linearizer.data_references().size()); + _device_literals = device_span( + reinterpret_cast( + device_data_buffer_ptr + buffer_offsets[1]), + expr_linearizer.literals().size()); + _device_operators = device_span( + reinterpret_cast(device_data_buffer_ptr + buffer_offsets[2]), + expr_linearizer.operators().size()); + _device_operator_source_indices = device_span( + reinterpret_cast(device_data_buffer_ptr + buffer_offsets[3]), + expr_linearizer.operator_source_indices().size()); + } + /** + * @brief Helper function for adding components (operators, literals, etc) to AST plan + * + * @tparam T The underlying type of the input `std::vector` + * @param v The `std::vector` containing components (operators, literals, etc) + */ template void add_to_plan(std::vector const& v) { auto const data_size = sizeof(T) * v.size(); - sizes.push_back(data_size); - data_pointers.push_back(v.data()); + _sizes.push_back(data_size); + _data_pointers.push_back(v.data()); } - buffer_type get_host_data_buffer() const - { - auto const total_size = std::accumulate(sizes.cbegin(), sizes.cend(), 0); - auto host_data_buffer = std::make_unique(total_size); - auto const offsets = get_offsets(); - for (unsigned int i = 0; i < data_pointers.size(); ++i) { - std::memcpy(host_data_buffer.get() + offsets[i], data_pointers[i], sizes[i]); - } - return std::make_pair(std::move(host_data_buffer), total_size); - } + std::vector _sizes; + std::vector _data_pointers; - std::vector get_offsets() const - { - auto offsets = std::vector(sizes.size()); - // When C++17, use std::exclusive_scan - offsets[0] = 0; - std::partial_sum(sizes.cbegin(), sizes.cend() - 1, offsets.begin() + 1); - return offsets; - } - - private: - std::vector sizes; - std::vector data_pointers; + rmm::device_buffer _device_data_buffer; + device_span _device_data_references; + device_span _device_literals; + device_span _device_operators; + device_span _device_operator_source_indices; }; /** diff --git a/cpp/include/cudf/ast/linearizer.hpp b/cpp/include/cudf/ast/nodes.hpp similarity index 90% rename from cpp/include/cudf/ast/linearizer.hpp rename to cpp/include/cudf/ast/nodes.hpp index e5ccb2e8069..70dda58816e 100644 --- a/cpp/include/cudf/ast/linearizer.hpp +++ b/cpp/include/cudf/ast/nodes.hpp @@ -38,17 +38,10 @@ enum class table_reference { OUTPUT // Column index in the output table }; -// Forward declaration -class literal; -class column_reference; -class expression; - /** * @brief A literal value used in an abstract syntax tree. */ class literal : public detail::node { - friend class detail::linearizer; - public: /** * @brief Construct a new literal object. @@ -90,7 +83,6 @@ class literal : public detail::node { */ cudf::data_type get_data_type() const { return get_value().type(); } - private: /** * @brief Get the value object. * @@ -106,6 +98,7 @@ class literal : public detail::node { */ cudf::size_type accept(detail::linearizer& visitor) const override; + private: const cudf::detail::fixed_width_scalar_device_view_base value; }; @@ -113,8 +106,6 @@ class literal : public detail::node { * @brief A node referring to data from a column in a table. */ class column_reference : public detail::node { - friend class detail::linearizer; - public: /** * @brief Construct a new column reference object @@ -175,7 +166,6 @@ class column_reference : public detail::node { return table.column(get_column_index()).type(); } - private: /** * @brief Accepts a visitor class. * @@ -184,6 +174,7 @@ class column_reference : public detail::node { */ cudf::size_type accept(detail::linearizer& visitor) const override; + private: cudf::size_type column_index; table_reference table_source; }; @@ -192,8 +183,6 @@ class column_reference : public detail::node { * @brief An expression node holds an operator and zero or more operands. */ class expression : public detail::node { - friend class detail::linearizer; - public: /** * @brief Construct a new unary expression object. @@ -208,11 +197,6 @@ class expression : public detail::node { } } - /** - * @brief `expression` doesn't accept r-value references for expression nodes - */ - expression(ast_operator op, node&& input) = delete; - /** * @brief Construct a new binary expression object. * @@ -227,19 +211,11 @@ class expression : public detail::node { } } - /** - * @brief `expression` doesn't accept r-value references for expression nodes - */ - expression(ast_operator op, node&& left, node&& right) = delete; - - /** - * @brief `expression` doesn't accept r-value references for expression nodes - */ + // expression only stores references to nodes, so it does not accept r-value + // references: the calling code must own the nodes. + expression(ast_operator op, node&& input) = delete; + expression(ast_operator op, node&& left, node&& right) = delete; expression(ast_operator op, node&& left, node const& right) = delete; - - /** - * @brief `expression` doesn't accept r-value references for expression nodes - */ expression(ast_operator op, node const& left, node&& right) = delete; /** @@ -256,7 +232,6 @@ class expression : public detail::node { */ std::vector> get_operands() const { return operands; } - private: /** * @brief Accepts a visitor class. * @@ -265,6 +240,7 @@ class expression : public detail::node { */ cudf::size_type accept(detail::linearizer& visitor) const override; + private: const ast_operator op; const std::vector> operands; }; diff --git a/cpp/include/cudf/ast/transform.hpp b/cpp/include/cudf/ast/transform.hpp index 513f92ea251..59697e5f75c 100644 --- a/cpp/include/cudf/ast/transform.hpp +++ b/cpp/include/cudf/ast/transform.hpp @@ -15,7 +15,7 @@ */ #pragma once -#include +#include #include namespace cudf { diff --git a/cpp/include/cudf/column/column.hpp b/cpp/include/cudf/column/column.hpp index a08b10df6f4..ee367840644 100644 --- a/cpp/include/cudf/column/column.hpp +++ b/cpp/include/cudf/column/column.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -49,13 +49,6 @@ class column { column& operator=(column const& other) = delete; column& operator=(column&& other) = delete; - /** - * @brief Construct a new column by deep copying the contents of `other`. - * - * @param other The column to copy - */ - column(column const& other); - /** * @brief Construct a new column object by deep copying the contents of *`other`. @@ -68,7 +61,7 @@ class column { * @param mr Device memory resource to use for all device memory allocations */ column(column const& other, - rmm::cuda_stream_view stream, + rmm::cuda_stream_view stream = rmm::cuda_stream_view{}, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -165,18 +158,21 @@ class column { /** * @brief Sets the column's null value indicator bitmask to `new_null_mask`. * - * @throws cudf::logic_error if new_null_count is larger than 0 and the size - * of `new_null_mask` does not match the size of this column. - * - * @param new_null_mask New null value indicator bitmask (lvalue overload & - * copied) to set the column's null value indicator mask. May be empty if - * `new_null_count` is 0 or `UNKOWN_NULL_COUNT`. - * @param new_null_count Optional, the count of null elements. If unknown, - * specify `UNKNOWN_NULL_COUNT` to indicate that the null count should be - * computed on the first invocation of `null_count()`. + * @throws cudf::logic_error if new_null_count is larger than 0 and the size of `new_null_mask` + * does not match the size of this column. + * + * @param new_null_mask New null value indicator bitmask (lvalue overload & copied) to set the + * column's null value indicator mask. May be empty if `new_null_count` is 0 or + * `UNKOWN_NULL_COUNT`. + * @param new_null_count Optional, the count of null elements. If unknown, specify + * `UNKNOWN_NULL_COUNT` to indicate that the null count should be computed on the first invocation + * of `null_count()`. + * @param stream The stream on which to perform the allocation and copy. Uses the default CUDA + * stream if none is specified. */ void set_null_mask(rmm::device_buffer const& new_null_mask, - size_type new_null_count = UNKNOWN_NULL_COUNT); + size_type new_null_count = UNKNOWN_NULL_COUNT, + rmm::cuda_stream_view stream = rmm::cuda_stream_view{}); /** * @brief Updates the count of null elements. diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index a842e51c94a..5f42823afe4 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -31,6 +31,7 @@ #include #include +#include #include @@ -40,6 +41,28 @@ */ namespace cudf { + +/** + * @brief Policy for what assumptions the optional iterator has about null values + * + * - `YES` means that the column supports nulls and has null values, therefore + * the optional might not contain a value + * + * - `NO` means that the column has no null values, therefore the optional will + * always have a value + * + * - `DYNAMIC` defers the assumption of nullability to runtime with the users stating + * on construction of the iterator if column has nulls. + */ +namespace contains_nulls { +struct YES { +}; +struct NO { +}; +struct DYNAMIC { +}; +} // namespace contains_nulls + namespace detail { /** * @brief An immutable, non-owning view of device data as a column of elements @@ -255,10 +278,11 @@ class alignas(16) column_device_view_base { : std::true_type { }; }; - // Forward declaration template struct value_accessor; +template +struct optional_accessor; template struct pair_accessor; template @@ -484,6 +508,13 @@ class alignas(16) column_device_view : public detail::column_device_view_base { return const_iterator{count_it{size()}, detail::value_accessor{*this}}; } + /** + * @brief optional iterator for navigating this column + */ + template + using const_optional_iterator = + thrust::transform_iterator, count_it>; + /** * @brief Pair iterator for navigating this column */ @@ -500,6 +531,124 @@ class alignas(16) column_device_view : public detail::column_device_view_base { using const_pair_rep_iterator = thrust::transform_iterator, count_it>; + /** + * @brief Return an optional iterator to the first element of the column. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * When the element of an iterator contextually converted to bool, the conversion returns true + * if the object contains a value and false if it does not contain a value. + * + * optional_begin with mode `DYNAMIC` defers the assumption of nullability to + * runtime, with the user stating on construction of the iterator if column has nulls. + * `DYNAMIC` mode is nice when an algorithm is going to execute on mutliple + * iterators and you don't want to compile all the combinations of iterator types + * + * Example: + * + * \code{.cpp} + * template + * void some_function(cudf::column_view const& col_view){ + * auto d_col = cudf::column_device_view::create(col_view); + * // Create a `DYNAMIC` optional iterator + * auto optional_iterator = d_col->optional_begin(cudf::contains_nulls::DYNAMIC{}, + * col_view.has_nulls()); + * } + * \endcode + * + * This function does not participate in overload resolution if + * `column_device_view::has_element_accessor()` is false. + * + * @throws cudf::logic_error if the column is not nullable, and `DYNAMIC` mode used and + * the user has stated nulls exist + * @throws cudf::logic_error if column datatype and Element type mismatch. + */ + template ())> + auto optional_begin(contains_nulls::DYNAMIC, bool has_nulls) const + { + return const_optional_iterator{ + count_it{0}, detail::optional_accessor{*this, has_nulls}}; + } + + /** + * @brief Return an optional iterator to the first element of the column. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * When the element of an iterator contextually converted to bool, the conversion returns true + * if the object contains a value and false if it does not contain a value. + * + * optional_begin with mode `YES` means that the column supports nulls and + * potentially has null values, therefore the optional might not contain a value + * + * Example: + * + * \code{.cpp} + * template + * void some_function(cudf::column_view const& col_view){ + * auto d_col = cudf::column_device_view::create(col_view); + * if constexpr(has_nulls) { + * auto optional_iterator = d_col->optional_begin(cudf::contains_nulls::YES{}); + * //use optional_iterator + * } else { + * auto optional_iterator = d_col->optional_begin(cudf::contains_nulls::NO{}); + * //use optional_iterator + * } + * } + * \endcode + * + * This function does not participate in overload resolution if + * `column_device_view::has_element_accessor()` is false. + * + * @throws cudf::logic_error if the column is not nullable, and `YES` mode used + * @throws cudf::logic_error if column datatype and Element type mismatch. + */ + template ())> + auto optional_begin(contains_nulls::YES) const + { + return const_optional_iterator{ + count_it{0}, detail::optional_accessor{*this}}; + } + + /** + * @brief Return an optional iterator to the first element of the column. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * When the element of an iterator contextually converted to bool, the conversion returns true + * if the object contains a value and false if it does not contain a value. + * + * optional_begin with mode `NO` means that the column has no null values, + * therefore the optional will always contain a value. + * + * Example: + * + * \code{.cpp} + * template + * void some_function(cudf::column_view const& col_view){ + * auto d_col = cudf::column_device_view::create(col_view); + * if constexpr(has_nulls) { + * auto optional_iterator = d_col->optional_begin(cudf::contains_nulls::YES{}); + * //use optional_iterator + * } else { + * auto optional_iterator = d_col->optional_begin(cudf::contains_nulls::NO{}); + * //use optional_iterator + * } + * } + * \endcode + * + * This function does not participate in overload resolution if + * `column_device_view::has_element_accessor()` is false. + * + * @throws cudf::logic_error if column datatype and Element type mismatch. + */ + template ())> + auto optional_begin(contains_nulls::NO) const + { + return const_optional_iterator{ + count_it{0}, detail::optional_accessor{*this}}; + } + /** * @brief Return a pair iterator to the first element of the column. * @@ -558,6 +707,63 @@ class alignas(16) column_device_view : public detail::column_device_view_base { detail::pair_rep_accessor{*this}}; } + /** + * @brief Return an optional iterator to the element following the last element of + * the column. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * This function does not participate in overload resolution if + * `column_device_view::has_element_accessor()` is false. + * + * @throws cudf::logic_error if the column is not nullable, and `DYNAMIC` mode used and + * the user has stated nulls exist + * @throws cudf::logic_error if column datatype and Element type mismatch. + */ + template ())> + auto optional_end(contains_nulls::DYNAMIC, bool has_nulls) const + { + return const_optional_iterator{ + count_it{size()}, detail::optional_accessor{*this, has_nulls}}; + } + + /** + * @brief Return an optional iterator to the element following the last element of + * the column. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * This function does not participate in overload resolution if + * `column_device_view::has_element_accessor()` is false. + * + * @throws cudf::logic_error if the column is not nullable, and `YES` mode used + * @throws cudf::logic_error if column datatype and Element type mismatch. + */ + template ())> + auto optional_end(contains_nulls::YES) const + { + return const_optional_iterator{ + count_it{size()}, detail::optional_accessor{*this}}; + } + + /** + * @brief Return an optional iterator to the element following the last element of + * the column. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * This function does not participate in overload resolution if + * `column_device_view::has_element_accessor()` is false. + * + * @throws cudf::logic_error if column datatype and Element type mismatch. + */ + template ())> + auto optional_end(contains_nulls::NO) const + { + return const_optional_iterator{ + count_it{size()}, detail::optional_accessor{*this}}; + } + /** * @brief Return a pair iterator to the element following the last element of * the column. @@ -999,6 +1205,82 @@ struct value_accessor { __device__ T operator()(cudf::size_type i) const { return col.element(i); } }; +/** + * @brief optional accessor of a column + * + * + * The optional_accessor always returns a thrust::optional of column[i]. The validity + * of the optional is determined by the contains_nulls_mode template parameter + * which has the following modes: + * + * - `YES` means that the column supports nulls and has null values, therefore + * the optional might be valid or invalid + * + * - `NO` the user has attested that the column has no null values, + * no checks will occur and `thrust::optional{column[i]}` will be + * return for each `i`. + * + * - `DYNAMIC` defers the assumption of nullability to runtime with the users stating + * on construction of the iterator if column has nulls. + * When `with_nulls=true` the return value validity will be determined if column[i] + * is not null. + * When `with_nulls=false` the return value will always be valid + * + * @throws cudf::logic_error if column datatype and template T type mismatch. + * @throws cudf::logic_error if the column is not nullable, and `with_nulls=true` + * + * + * @tparam T The type of elements in the column + * @tparam contains_nulls_mode Specifies if nulls are checked at runtime or compile time. + */ +template +struct optional_accessor { + column_device_view const col; ///< column view of column in device + + /** + * @brief constructor + * @param[in] _col column device view of cudf column + */ + optional_accessor(column_device_view const& _col) : col{_col} + { + CUDF_EXPECTS(type_id_matches_device_storage_type(col.type().id()), "the data type mismatch"); + } + + CUDA_DEVICE_CALLABLE + thrust::optional operator()(cudf::size_type i) const + { + if constexpr (std::is_same_v) { + return (col.is_valid_nocheck(i)) ? thrust::optional{col.element(i)} + : thrust::optional{thrust::nullopt}; + } + return thrust::optional{col.element(i)}; + } +}; + +template +struct optional_accessor { + column_device_view const col; ///< column view of column in device + bool has_nulls; + + /** + * @brief constructor + * @param[in] _col column device view of cudf column + */ + optional_accessor(column_device_view const& _col, bool with_nulls) + : col{_col}, has_nulls{with_nulls} + { + CUDF_EXPECTS(type_id_matches_device_storage_type(col.type().id()), "the data type mismatch"); + if (with_nulls) { CUDF_EXPECTS(_col.nullable(), "Unexpected non-nullable column."); } + } + + CUDA_DEVICE_CALLABLE + thrust::optional operator()(cudf::size_type i) const + { + return (has_nulls and col.is_null_nocheck(i)) ? thrust::optional{thrust::nullopt} + : thrust::optional{col.element(i)}; + } +}; + /** * @brief pair accessor of column with/without null bitmask * A unary functor returns pair with scalar value at `id` and boolean validity diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp index 43c2407d629..e5424f0fc44 100644 --- a/cpp/include/cudf/column/column_factories.hpp +++ b/cpp/include/cudf/column/column_factories.hpp @@ -541,7 +541,8 @@ std::unique_ptr make_structs_column( * * The output column will have the same type as `s.type()` * The output column will contain all null rows if `s.invalid()==false` - * The output column will be empty if `size==0`. + * The output column will be empty if `size==0`. For LIST scalars, the column hierarchy + * from @p s is preserved. * * @param[in] s The scalar to use for values in the column. * @param[in] size The number of rows for the output column. diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp index bb44e33f786..c9a4eab2154 100644 --- a/cpp/include/cudf/copying.hpp +++ b/cpp/include/cudf/copying.hpp @@ -182,6 +182,14 @@ enum class mask_allocation_policy { */ std::unique_ptr empty_like(column_view const& input); +/** + * @brief Initializes and returns an empty column of the same type as the `input`. + * + * @param[in] input Scalar to emulate + * @return std::unique_ptr An empty column of same type as `input` + */ +std::unique_ptr empty_like(scalar const& input); + /** * @brief Creates an uninitialized new column of the same size and type as the `input`. * Supports only fixed-width types. diff --git a/cpp/include/cudf/detail/aggregation/aggregation.cuh b/cpp/include/cudf/detail/aggregation/aggregation.cuh index 3f5f5a91632..09763d66403 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.cuh +++ b/cpp/include/cudf/detail/aggregation/aggregation.cuh @@ -53,6 +53,14 @@ struct corresponding_operator { using type = DeviceMax; }; template <> +struct corresponding_operator { + using type = DeviceMin; +}; +template <> +struct corresponding_operator { + using type = DeviceMax; +}; +template <> struct corresponding_operator { using type = DeviceMax; }; @@ -81,6 +89,10 @@ struct corresponding_operator { using type = DeviceSum; }; template <> +struct corresponding_operator { + using type = DeviceSum; +}; +template <> struct corresponding_operator { using type = DeviceCount; }; @@ -92,6 +104,12 @@ struct corresponding_operator { template using corresponding_operator_t = typename corresponding_operator::type; +template +constexpr bool has_corresponding_operator() +{ + return !std::is_same::type, void>::value; +} + template ; @@ -152,8 +168,6 @@ struct update_target_element(source.element(source_index))); if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); } - -#endif } }; @@ -190,8 +204,6 @@ struct update_target_element; @@ -202,8 +214,6 @@ struct update_target_element(source.element(source_index))); if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); } - -#endif } }; @@ -240,8 +250,6 @@ struct update_target_element; @@ -252,7 +260,6 @@ struct update_target_element(source.element(source_index))); if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); } -#endif } }; @@ -260,42 +267,55 @@ struct update_target_element struct update_target_from_dictionary { - template () && !is_fixed_point()>* = nullptr> - __device__ void operator()(mutable_column_device_view& target, + template ()>* = nullptr> + __device__ void operator()(mutable_column_device_view target, size_type target_index, - column_device_view& d_dictionary, + column_device_view source, size_type source_index) const noexcept { -// This code will segfault in nvcc/ptxas 10.2 only -// https://nvbugswb.nvidia.com/NvBugs5/SWBug.aspx?bugid=3186317 -#if (__CUDACC_VER_MAJOR__ != 10) or (__CUDACC_VER_MINOR__ != 2) - auto const keys = d_dictionary.child(cudf::dictionary_column_view::keys_column_index); - auto const value = keys.element( - static_cast(d_dictionary.element(source_index))); - using Target = target_type_t; - atomicAdd(&target.element(target_index), static_cast(value)); -#endif + update_target_element{}( + target, target_index, source, source_index); } - template () || is_fixed_point()>* = nullptr> - __device__ void operator()(mutable_column_device_view& target, + template ()>* = nullptr> + __device__ void operator()(mutable_column_device_view target, size_type target_index, - column_device_view& d_dictionary, - size_type source_index) const noexcept {}; + column_device_view source, + size_type source_index) const noexcept + { + } }; /** - * @brief Specialization function for dictionary type and aggregation SUM. + * @brief Specialization function for dictionary type and aggregations. + * + * The `source` column is a dictionary type. This functor de-references the + * dictionary's keys child column and maps the input source index through + * the dictionary's indices child column to pass to the `update_target_element` + * in the above `update_target_from_dictionary` using the type-dispatcher to + * resolve the keys column type. + * + * `update_target_element( target, target_index, source.keys(), source.indices()[source_index] )` * * @tparam target_has_nulls Indicates presence of null elements in `target` * @tparam source_has_nulls Indicates presence of null elements in `source`. */ -template -struct update_target_element { +template +struct update_target_element< + dictionary32, + k, + target_has_nulls, + source_has_nulls, + std::enable_if_t> { __device__ void operator()(mutable_column_device_view target, size_type target_index, column_device_view source, @@ -303,40 +323,29 @@ struct update_target_element{}, + target, + target_index, + source.child(cudf::dictionary_column_view::keys_column_index), + static_cast(source.element(source_index))); } }; -// This code will segfault in nvcc/ptxas 10.2 only -// https://nvbugswb.nvidia.com/NvBugs5/SWBug.aspx?bugid=3186317 -// Enabling only for 2 types does not segfault. Using for unit tests. -#if (__CUDACC_VER_MAJOR__ == 10) and (__CUDACC_VER_MINOR__ == 2) -template -constexpr bool is_SOS_supported() -{ - return std::is_floating_point::value; -} -#else template -constexpr bool is_SOS_supported() +constexpr bool is_product_supported() { return is_numeric(); } -#endif template struct update_target_element()>> { + std::enable_if_t()>> { __device__ void operator()(mutable_column_device_view target, size_type target_index, column_device_view source, @@ -351,6 +360,26 @@ struct update_target_element +struct update_target_element()>> { + __device__ void operator()(mutable_column_device_view target, + size_type target_index, + column_device_view source, + size_type source_index) const noexcept + { + if (source_has_nulls and source.is_null(source_index)) { return; } + + using Target = target_type_t; + atomicMul(&target.element(target_index), + static_cast(source.element(source_index))); + if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); } + } +}; + template struct update_target_element< Source, @@ -559,7 +588,8 @@ struct identity_initializer { k == aggregation::COUNT_VALID or k == aggregation::COUNT_ALL or k == aggregation::ARGMAX or k == aggregation::ARGMIN or k == aggregation::SUM_OF_SQUARES or k == aggregation::STD or - k == aggregation::VARIANCE); + k == aggregation::VARIANCE or + (k == aggregation::PRODUCT and is_product_supported())); } template @@ -577,27 +607,17 @@ struct identity_initializer { } template - typename std::enable_if::value, T>::type get_identity() + T get_identity() { - if (k == aggregation::ARGMAX) - return T{typename T::duration(ARGMAX_SENTINEL)}; - else if (k == aggregation::ARGMIN) - return T{typename T::duration(ARGMIN_SENTINEL)}; - else - // In C++17, we can use compile time if and not make this function SFINAE - return identity_from_operator(); - } - - template - typename std::enable_if::value, T>::type get_identity() - { - if (k == aggregation::ARGMAX) - return static_cast(ARGMAX_SENTINEL); - else if (k == aggregation::ARGMIN) - return static_cast(ARGMIN_SENTINEL); - else - // In C++17, we can use compile time if and not make this function SFINAE - return identity_from_operator(); + if (k == aggregation::ARGMAX || k == aggregation::ARGMIN) { + if constexpr (cudf::is_timestamp()) + return k == aggregation::ARGMAX ? T{typename T::duration(ARGMAX_SENTINEL)} + : T{typename T::duration(ARGMIN_SENTINEL)}; + else + return k == aggregation::ARGMAX ? static_cast(ARGMAX_SENTINEL) + : static_cast(ARGMIN_SENTINEL); + } + return identity_from_operator(); } public: diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index 0bfe6b84ae2..e230ce0b757 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -28,252 +28,495 @@ namespace cudf { namespace detail { -// Forward declare compound aggregations. -class mean_aggregation; -class var_aggregation; -class std_aggregation; -class min_aggregation; -class max_aggregation; - // Visitor pattern +class simple_aggregations_collector { // Declares the interface for the simple aggregations + // collector + public: + // Declare overloads for each kind of a agg to dispatch + virtual std::vector> visit(data_type col_type, + aggregation const& agg); + virtual std::vector> visit(data_type col_type, + class sum_aggregation const& agg); + virtual std::vector> visit(data_type col_type, + class product_aggregation const& agg); + virtual std::vector> visit(data_type col_type, + class min_aggregation const& agg); + virtual std::vector> visit(data_type col_type, + class max_aggregation const& agg); + virtual std::vector> visit(data_type col_type, + class count_aggregation const& agg); + virtual std::vector> visit(data_type col_type, + class any_aggregation const& agg); + virtual std::vector> visit(data_type col_type, + class all_aggregation const& agg); + virtual std::vector> visit( + data_type col_type, class sum_of_squares_aggregation const& agg); + virtual std::vector> visit(data_type col_type, + class mean_aggregation const& agg); + virtual std::vector> visit(data_type col_type, + class var_aggregation const& agg); + virtual std::vector> visit(data_type col_type, + class std_aggregation const& agg); + virtual std::vector> visit(data_type col_type, + class median_aggregation const& agg); + virtual std::vector> visit(data_type col_type, + class quantile_aggregation const& agg); + virtual std::vector> visit(data_type col_type, + class argmax_aggregation const& agg); + virtual std::vector> visit(data_type col_type, + class argmin_aggregation const& agg); + virtual std::vector> visit(data_type col_type, + class nunique_aggregation const& agg); + virtual std::vector> visit(data_type col_type, + class nth_element_aggregation const& agg); + virtual std::vector> visit(data_type col_type, + class row_number_aggregation const& agg); + virtual std::vector> visit( + data_type col_type, class collect_list_aggregation const& agg); + virtual std::vector> visit(data_type col_type, + class collect_set_aggregation const& agg); + virtual std::vector> visit(data_type col_type, + class lead_lag_aggregation const& agg); + virtual std::vector> visit(data_type col_type, + class udf_aggregation const& agg); +}; + class aggregation_finalizer { // Declares the interface for the finalizer public: // Declare overloads for each kind of a agg to dispatch - virtual void visit(aggregation const& agg) = 0; - virtual void visit(min_aggregation const& agg) = 0; - virtual void visit(max_aggregation const& agg) = 0; - virtual void visit(mean_aggregation const& agg) = 0; - virtual void visit(var_aggregation const& agg) = 0; - virtual void visit(std_aggregation const& agg) = 0; + virtual void visit(aggregation const& agg); + virtual void visit(class sum_aggregation const& agg); + virtual void visit(class product_aggregation const& agg); + virtual void visit(class min_aggregation const& agg); + virtual void visit(class max_aggregation const& agg); + virtual void visit(class count_aggregation const& agg); + virtual void visit(class any_aggregation const& agg); + virtual void visit(class all_aggregation const& agg); + virtual void visit(class sum_of_squares_aggregation const& agg); + virtual void visit(class mean_aggregation const& agg); + virtual void visit(class var_aggregation const& agg); + virtual void visit(class std_aggregation const& agg); + virtual void visit(class median_aggregation const& agg); + virtual void visit(class quantile_aggregation const& agg); + virtual void visit(class argmax_aggregation const& agg); + virtual void visit(class argmin_aggregation const& agg); + virtual void visit(class nunique_aggregation const& agg); + virtual void visit(class nth_element_aggregation const& agg); + virtual void visit(class row_number_aggregation const& agg); + virtual void visit(class collect_list_aggregation const& agg); + virtual void visit(class collect_set_aggregation const& agg); + virtual void visit(class lead_lag_aggregation const& agg); + virtual void visit(class udf_aggregation const& agg); }; /** - * @brief Derived class for specifying a min aggregation + * @brief Derived class for specifying a sum aggregation */ -struct min_aggregation final : aggregation { - min_aggregation() : aggregation{MIN} {} +class sum_aggregation final : public rolling_aggregation { + public: + sum_aggregation() : aggregation(SUM) {} - std::vector get_simple_aggregations(data_type col_type) const override + std::unique_ptr clone() const override + { + return std::make_unique(*this); + } + std::vector> get_simple_aggregations( + data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override { - if (col_type.id() == type_id::STRING) - return {aggregation::ARGMIN}; - else - return {this->kind}; + return collector.visit(col_type, *this); } - void finalize(aggregation_finalizer& finalizer) override { finalizer.visit(*this); } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } +}; + +/** + * @brief Derived class for specifying a product aggregation + */ +class product_aggregation final : public aggregation { + public: + product_aggregation() : aggregation(PRODUCT) {} std::unique_ptr clone() const override { - return std::make_unique(*this); + return std::make_unique(*this); + } + std::vector> get_simple_aggregations( + data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override + { + return collector.visit(col_type, *this); } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } }; /** - * @brief Derived class for specifying a max aggregation + * @brief Derived class for specifying a min aggregation */ -struct max_aggregation final : aggregation { - max_aggregation() : aggregation{MAX} {} +class min_aggregation final : public rolling_aggregation { + public: + min_aggregation() : aggregation(MIN) {} - std::vector get_simple_aggregations(data_type col_type) const override + std::unique_ptr clone() const override + { + return std::make_unique(*this); + } + std::vector> get_simple_aggregations( + data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override { - if (col_type.id() == type_id::STRING) - return {aggregation::ARGMAX}; - else - return {this->kind}; + return collector.visit(col_type, *this); } - void finalize(aggregation_finalizer& finalizer) override { finalizer.visit(*this); } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } +}; + +/** + * @brief Derived class for specifying a max aggregation + */ +class max_aggregation final : public rolling_aggregation { + public: + max_aggregation() : aggregation(MAX) {} std::unique_ptr clone() const override { return std::make_unique(*this); } + std::vector> get_simple_aggregations( + data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override + { + return collector.visit(col_type, *this); + } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } }; /** - * @brief A wrapper to simplify inheritance of virtual methods from aggregation - * - * Derived aggregations are required to implement operator==() and hash_impl(). - * - * https://en.wikipedia.org/wiki/Curiously_recurring_template_pattern + * @brief Derived class for specifying a count aggregation */ -template -class derived_aggregation : public aggregation { +class count_aggregation final : public rolling_aggregation { public: - derived_aggregation(aggregation::Kind a) : aggregation(a) {} + count_aggregation(aggregation::Kind kind) : aggregation(kind) {} - bool is_equal(aggregation const& other) const override + std::unique_ptr clone() const override { - if (this->aggregation::is_equal(other)) { - // Dispatch to operator== using static polymorphism - return static_cast(*this) == static_cast(other); - } else { - return false; - } + return std::make_unique(*this); } - - size_t do_hash() const override + std::vector> get_simple_aggregations( + data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override { - // Dispatch to hash_impl() using static polymorphism - return this->aggregation::do_hash() ^ static_cast(*this).hash_impl(); + return collector.visit(col_type, *this); } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } +}; + +/** + * @brief Derived class for specifying an any aggregation + */ +class any_aggregation final : public aggregation { + public: + any_aggregation() : aggregation(ANY) {} std::unique_ptr clone() const override { - // Dispatch to copy constructor using static polymorphism - return std::make_unique(static_cast(*this)); + return std::make_unique(*this); } + std::vector> get_simple_aggregations( + data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override + { + return collector.visit(col_type, *this); + } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } }; /** - * @brief Derived class for specifying a quantile aggregation + * @brief Derived class for specifying an all aggregation */ -struct quantile_aggregation final : derived_aggregation { - quantile_aggregation(std::vector const& q, interpolation i) - : derived_aggregation{QUANTILE}, _quantiles{q}, _interpolation{i} +class all_aggregation final : public aggregation { + public: + all_aggregation() : aggregation(ALL) {} + + std::unique_ptr clone() const override { + return std::make_unique(*this); } - std::vector _quantiles; ///< Desired quantile(s) - interpolation _interpolation; ///< Desired interpolation + std::vector> get_simple_aggregations( + data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override + { + return collector.visit(col_type, *this); + } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } +}; - protected: - friend class derived_aggregation; +/** + * @brief Derived class for specifying a sum_of_squares aggregation + */ +class sum_of_squares_aggregation final : public aggregation { + public: + sum_of_squares_aggregation() : aggregation(SUM_OF_SQUARES) {} - bool operator==(quantile_aggregation const& other) const + std::unique_ptr clone() const override { - return _interpolation == other._interpolation and - std::equal(_quantiles.begin(), _quantiles.end(), other._quantiles.begin()); + return std::make_unique(*this); } + std::vector> get_simple_aggregations( + data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override + { + return collector.visit(col_type, *this); + } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } +}; - size_t hash_impl() const +/** + * @brief Derived class for specifying a mean aggregation + */ +class mean_aggregation final : public rolling_aggregation { + public: + mean_aggregation() : aggregation(MEAN) {} + + std::unique_ptr clone() const override { - return std::hash{}(static_cast(_interpolation)) ^ - std::accumulate( - _quantiles.cbegin(), _quantiles.cend(), size_t{0}, [](size_t a, double b) { - return a ^ std::hash{}(b); - }); + return std::make_unique(*this); } + std::vector> get_simple_aggregations( + data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override + { + return collector.visit(col_type, *this); + } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } }; /** - * @brief Derived aggregation class for specifying LEAD/LAG window aggregations + * @brief Derived class for specifying a standard deviation/variance aggregation */ -struct lead_lag_aggregation final : derived_aggregation { - lead_lag_aggregation(Kind kind, size_type offset) - : derived_aggregation{offset < 0 ? (kind == LAG ? LEAD : LAG) : kind}, - row_offset{std::abs(offset)} +class std_var_aggregation : public aggregation { + public: + size_type _ddof; ///< Delta degrees of freedom + + bool is_equal(aggregation const& _other) const override { + if (!this->aggregation::is_equal(_other)) { return false; } + auto const& other = dynamic_cast(_other); + return _ddof == other._ddof; } - size_type row_offset; + size_t do_hash() const override { return this->aggregation::do_hash() ^ hash_impl(); } protected: - friend class derived_aggregation; - - bool operator==(lead_lag_aggregation const& rhs) const { return row_offset == rhs.row_offset; } + std_var_aggregation(aggregation::Kind k, size_type ddof) : aggregation(k), _ddof{ddof} + { + CUDF_EXPECTS(k == aggregation::STD or k == aggregation::VARIANCE, + "std_var_aggregation can accept only STD, VARIANCE"); + } - size_t hash_impl() const { return std::hash()(row_offset); } + size_type hash_impl() const { return std::hash{}(_ddof); } }; /** - * @brief Derived class for specifying a mean aggregation + * @brief Derived class for specifying a variance aggregation */ -struct mean_aggregation final : aggregation { - mean_aggregation() : aggregation{MEAN} {} +class var_aggregation final : public std_var_aggregation { + public: + var_aggregation(size_type ddof) : std_var_aggregation{aggregation::VARIANCE, ddof} {} - std::vector get_simple_aggregations(data_type col_type) const override + std::unique_ptr clone() const override + { + return std::make_unique(*this); + } + std::vector> get_simple_aggregations( + data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override { - CUDF_EXPECTS(is_fixed_width(col_type), "MEAN aggregation expects fixed width type"); - return {aggregation::SUM, aggregation::COUNT_VALID}; + return collector.visit(col_type, *this); } - void finalize(aggregation_finalizer& finalizer) override { finalizer.visit(*this); } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } +}; + +/** + * @brief Derived class for specifying a standard deviation aggregation + */ +class std_aggregation final : public std_var_aggregation { + public: + std_aggregation(size_type ddof) : std_var_aggregation{aggregation::STD, ddof} {} std::unique_ptr clone() const override { - return std::make_unique(*this); + return std::make_unique(*this); + } + std::vector> get_simple_aggregations( + data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override + { + return collector.visit(col_type, *this); } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } }; /** - * @brief Derived class for specifying a standard deviation/variance aggregation + * @brief Derived class for specifying a median aggregation */ -struct std_var_aggregation : derived_aggregation { - size_type _ddof; ///< Delta degrees of freedom +class median_aggregation final : public aggregation { + public: + median_aggregation() : aggregation(MEDIAN) {} - virtual std::vector get_simple_aggregations(data_type col_type) const override + std::unique_ptr clone() const override { - return {aggregation::SUM, aggregation::COUNT_VALID}; + return std::make_unique(*this); } + std::vector> get_simple_aggregations( + data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override + { + return collector.visit(col_type, *this); + } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } +}; - protected: - friend class derived_aggregation; +/** + * @brief Derived class for specifying a quantile aggregation + */ +class quantile_aggregation final : public aggregation { + public: + quantile_aggregation(std::vector const& q, interpolation i) + : aggregation{QUANTILE}, _quantiles{q}, _interpolation{i} + { + } + std::vector _quantiles; ///< Desired quantile(s) + interpolation _interpolation; ///< Desired interpolation - bool operator==(std_var_aggregation const& other) const { return _ddof == other._ddof; } + bool is_equal(aggregation const& _other) const override + { + if (!this->aggregation::is_equal(_other)) { return false; } - size_t hash_impl() const { return std::hash{}(_ddof); } + auto const& other = dynamic_cast(_other); - std_var_aggregation(aggregation::Kind k, size_type ddof) : derived_aggregation{k}, _ddof{ddof} + return _interpolation == other._interpolation && + std::equal(_quantiles.begin(), _quantiles.end(), other._quantiles.begin()); + } + + size_t do_hash() const override { return this->aggregation::do_hash() ^ hash_impl(); } + + std::unique_ptr clone() const override { - CUDF_EXPECTS(k == aggregation::STD or k == aggregation::VARIANCE, - "std_var_aggregation can accept only STD, VARIANCE"); + return std::make_unique(*this); + } + std::vector> get_simple_aggregations( + data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override + { + return collector.visit(col_type, *this); + } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } + + private: + size_t hash_impl() const + { + return std::hash{}(static_cast(_interpolation)) ^ + std::accumulate( + _quantiles.cbegin(), _quantiles.cend(), size_t{0}, [](size_t a, double b) { + return a ^ std::hash{}(b); + }); } }; /** - * @brief Derived class for specifying a standard deviation aggregation + * @brief Derived class for specifying an argmax aggregation */ -struct std_aggregation final : std_var_aggregation { - std_aggregation(size_type ddof) : std_var_aggregation{aggregation::STD, ddof} {} - void finalize(aggregation_finalizer& finalizer) override { finalizer.visit(*this); } +class argmax_aggregation final : public rolling_aggregation { + public: + argmax_aggregation() : aggregation(ARGMAX) {} + + std::unique_ptr clone() const override + { + return std::make_unique(*this); + } + std::vector> get_simple_aggregations( + data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override + { + return collector.visit(col_type, *this); + } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } }; /** - * @brief Derived class for specifying a variance aggregation + * @brief Derived class for specifying an argmin aggregation */ -struct var_aggregation final : std_var_aggregation { - var_aggregation(size_type ddof) : std_var_aggregation{aggregation::VARIANCE, ddof} {} - void finalize(aggregation_finalizer& finalizer) override { finalizer.visit(*this); } +class argmin_aggregation final : public rolling_aggregation { + public: + argmin_aggregation() : aggregation(ARGMIN) {} + + std::unique_ptr clone() const override + { + return std::make_unique(*this); + } + std::vector> get_simple_aggregations( + data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override + { + return collector.visit(col_type, *this); + } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } }; /** * @brief Derived class for specifying a nunique aggregation */ -struct nunique_aggregation final : derived_aggregation { +class nunique_aggregation final : public aggregation { + public: nunique_aggregation(null_policy null_handling) - : derived_aggregation{NUNIQUE}, _null_handling{null_handling} + : aggregation{NUNIQUE}, _null_handling{null_handling} { } - null_policy _null_handling; ///< include or exclude nulls - protected: - friend class derived_aggregation; + null_policy _null_handling; ///< include or exclude nulls - bool operator==(nunique_aggregation const& other) const + bool is_equal(aggregation const& _other) const override { + if (!this->aggregation::is_equal(_other)) { return false; } + auto const& other = dynamic_cast(_other); return _null_handling == other._null_handling; } + size_t do_hash() const override { return this->aggregation::do_hash() ^ hash_impl(); } + + std::unique_ptr clone() const override + { + return std::make_unique(*this); + } + std::vector> get_simple_aggregations( + data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override + { + return collector.visit(col_type, *this); + } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } + + private: size_t hash_impl() const { return std::hash{}(static_cast(_null_handling)); } }; /** * @brief Derived class for specifying a nth element aggregation */ -struct nth_element_aggregation final : derived_aggregation { +class nth_element_aggregation final : public aggregation { + public: nth_element_aggregation(size_type n, null_policy null_handling) - : derived_aggregation{NTH_ELEMENT}, _n{n}, _null_handling{null_handling} + : aggregation{NTH_ELEMENT}, _n{n}, _null_handling{null_handling} { } + size_type _n; ///< nth index to return null_policy _null_handling; ///< include or exclude nulls - protected: - friend class derived_aggregation; - - bool operator==(nth_element_aggregation const& other) const + bool is_equal(aggregation const& _other) const override { + if (!this->aggregation::is_equal(_other)) { return false; } + auto const& other = dynamic_cast(_other); return _n == other._n and _null_handling == other._null_handling; } + size_t do_hash() const override { return this->aggregation::do_hash() ^ hash_impl(); } + + std::unique_ptr clone() const override + { + return std::make_unique(*this); + } + std::vector> get_simple_aggregations( + data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override + { + return collector.visit(col_type, *this); + } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } + + private: size_t hash_impl() const { return std::hash{}(_n) ^ std::hash{}(static_cast(_null_handling)); @@ -281,92 +524,102 @@ struct nth_element_aggregation final : derived_aggregation { - udf_aggregation(aggregation::Kind type, - std::string const& user_defined_aggregator, - data_type output_type) - : derived_aggregation{type}, - _source{user_defined_aggregator}, - _operator_name{(type == aggregation::PTX) ? "rolling_udf_ptx" : "rolling_udf_cuda"}, - _function_name{"rolling_udf"}, - _output_type{output_type} - { - CUDF_EXPECTS(type == aggregation::PTX or type == aggregation::CUDA, - "udf_aggregation can accept only PTX, CUDA"); - } - std::string const _source; - std::string const _operator_name; - std::string const _function_name; - data_type _output_type; - - protected: - friend class derived_aggregation; +class row_number_aggregation final : public rolling_aggregation { + public: + row_number_aggregation() : aggregation(ROW_NUMBER) {} - bool operator==(udf_aggregation const& other) const + std::unique_ptr clone() const override { - return _source == other._source and _operator_name == other._operator_name and - _function_name == other._function_name and _output_type == other._output_type; + return std::make_unique(*this); } - - size_t hash_impl() const + std::vector> get_simple_aggregations( + data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override { - return std::hash{}(_source) ^ std::hash{}(_operator_name) ^ - std::hash{}(_function_name) ^ - std::hash{}(static_cast(_output_type.id())); + return collector.visit(col_type, *this); } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } }; /** * @brief Derived aggregation class for specifying COLLECT_LIST aggregation */ -struct collect_list_aggregation final : derived_aggregation { +class collect_list_aggregation final : public rolling_aggregation { + public: explicit collect_list_aggregation(null_policy null_handling = null_policy::INCLUDE) - : derived_aggregation{COLLECT_LIST}, _null_handling{null_handling} + : aggregation{COLLECT_LIST}, _null_handling{null_handling} { } + null_policy _null_handling; ///< include or exclude nulls - protected: - friend class derived_aggregation; + bool is_equal(aggregation const& _other) const override + { + if (!this->aggregation::is_equal(_other)) { return false; } + auto const& other = dynamic_cast(_other); + return (_null_handling == other._null_handling); + } - bool operator==(nunique_aggregation const& other) const + size_t do_hash() const override { return this->aggregation::do_hash() ^ hash_impl(); } + + std::unique_ptr clone() const override { - return _null_handling == other._null_handling; + return std::make_unique(*this); + } + std::vector> get_simple_aggregations( + data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override + { + return collector.visit(col_type, *this); } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } + private: size_t hash_impl() const { return std::hash{}(static_cast(_null_handling)); } }; /** * @brief Derived aggregation class for specifying COLLECT_SET aggregation */ -struct collect_set_aggregation final : derived_aggregation { +class collect_set_aggregation final : public rolling_aggregation { + public: explicit collect_set_aggregation(null_policy null_handling = null_policy::INCLUDE, null_equality nulls_equal = null_equality::EQUAL, nan_equality nans_equal = nan_equality::UNEQUAL) - : derived_aggregation{COLLECT_SET}, + : aggregation{COLLECT_SET}, _null_handling{null_handling}, _nulls_equal(nulls_equal), _nans_equal(nans_equal) { } + null_policy _null_handling; ///< include or exclude nulls null_equality _nulls_equal; ///< whether to consider nulls as equal values nan_equality _nans_equal; ///< whether to consider NaNs as equal value (applicable only to ///< floating point types) - protected: - friend class derived_aggregation; + bool is_equal(aggregation const& _other) const override + { + if (!this->aggregation::is_equal(_other)) { return false; } + auto const& other = dynamic_cast(_other); + return (_null_handling == other._null_handling && _nulls_equal == other._nulls_equal && + _nans_equal == other._nans_equal); + } + + size_t do_hash() const override { return this->aggregation::do_hash() ^ hash_impl(); } - bool operator==(collect_set_aggregation const& other) const + std::unique_ptr clone() const override + { + return std::make_unique(*this); + } + std::vector> get_simple_aggregations( + data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override { - return _null_handling == other._null_handling && _nulls_equal == other._nulls_equal && - _nans_equal == other._nans_equal; + return collector.visit(col_type, *this); } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } + protected: size_t hash_impl() const { return std::hash{}(static_cast(_null_handling) ^ static_cast(_nulls_equal) ^ @@ -374,6 +627,96 @@ struct collect_set_aggregation final : derived_aggregationaggregation::is_equal(_other)) { return false; } + auto const& other = dynamic_cast(_other); + return (row_offset == other.row_offset); + } + + size_t do_hash() const override { return this->aggregation::do_hash() ^ hash_impl(); } + + std::unique_ptr clone() const override + { + return std::make_unique(*this); + } + std::vector> get_simple_aggregations( + data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override + { + return collector.visit(col_type, *this); + } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } + + size_type row_offset; + + private: + size_t hash_impl() const { return std::hash()(row_offset); } +}; + +/** + * @brief Derived class for specifying a custom aggregation + * specified in udf + */ +class udf_aggregation final : public rolling_aggregation { + public: + udf_aggregation(aggregation::Kind type, + std::string const& user_defined_aggregator, + data_type output_type) + : aggregation{type}, + _source{user_defined_aggregator}, + _operator_name{(type == aggregation::PTX) ? "rolling_udf_ptx" : "rolling_udf_cuda"}, + _function_name{"rolling_udf"}, + _output_type{output_type} + { + CUDF_EXPECTS(type == aggregation::PTX or type == aggregation::CUDA, + "udf_aggregation can accept only PTX, CUDA"); + } + + bool is_equal(aggregation const& _other) const override + { + if (!this->aggregation::is_equal(_other)) { return false; } + auto const& other = dynamic_cast(_other); + return (_source == other._source and _operator_name == other._operator_name and + _function_name == other._function_name and _output_type == other._output_type); + } + + size_t do_hash() const override { return this->aggregation::do_hash() ^ hash_impl(); } + + std::unique_ptr clone() const override + { + return std::make_unique(*this); + } + std::vector> get_simple_aggregations( + data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override + { + return collector.visit(col_type, *this); + } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } + + std::string const _source; + std::string const _operator_name; + std::string const _function_name; + data_type _output_type; + + protected: + size_t hash_impl() const + { + return std::hash{}(_source) ^ std::hash{}(_operator_name) ^ + std::hash{}(_function_name) ^ + std::hash{}(static_cast(_output_type.id())); + } +}; + /** * @brief Sentinel value used for `ARGMAX` aggregation. * @@ -441,9 +784,10 @@ struct target_type_impl { // Except for chrono types where result is chrono. (Use FloorDiv) // TODO: MEAN should be only be enabled for duration types - not for timestamps template -struct target_type_impl() && (k == aggregation::MEAN)>> { +struct target_type_impl< + Source, + k, + std::enable_if_t() && !is_chrono() && (k == aggregation::MEAN)>> { using type = double; }; @@ -689,7 +1033,7 @@ template struct dispatch_aggregation { #pragma nv_exec_check_disable template - CUDA_HOST_DEVICE_CALLABLE decltype(auto) operator()(F&& f, Ts&&... args) const noexcept + CUDA_HOST_DEVICE_CALLABLE decltype(auto) operator()(F&& f, Ts&&... args) const { return f.template operator()(std::forward(args)...); } @@ -700,7 +1044,7 @@ struct dispatch_source { template CUDA_HOST_DEVICE_CALLABLE decltype(auto) operator()(aggregation::Kind k, F&& f, - Ts&&... args) const noexcept + Ts&&... args) const { return aggregation_dispatcher( k, dispatch_aggregation{}, std::forward(f), std::forward(args)...); @@ -763,4 +1107,4 @@ constexpr inline bool is_valid_aggregation() bool is_valid_aggregation(data_type source, aggregation::Kind k); } // namespace detail -} // namespace cudf +} // namespace cudf \ No newline at end of file diff --git a/cpp/include/cudf/detail/copy.hpp b/cpp/include/cudf/detail/copy.hpp index 2783bd7729f..aebf0c23469 100644 --- a/cpp/include/cudf/detail/copy.hpp +++ b/cpp/include/cudf/detail/copy.hpp @@ -88,6 +88,47 @@ std::unique_ptr shift( rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Performs segmented shifts for specified values. + * + * For each segment, `i`th element is determined by the `i - offset`th element + * of the segment. If `i - offset < 0 or >= segment_size`, the value is determined by + * @p fill_value. + * + * Example: + * @code{.pseudo} + * segmented_values: { 3 1 2 | 3 5 3 | 2 6 } + * segment_offsets: {0 3 6 8} + * offset: 2 + * fill_value: @ + * result: { @ @ 3 | @ @ 3 | @ @ } + * ------------------------------------------------- + * segmented_values: { 3 1 2 | 3 5 3 | 2 6 } + * segment_offsets: {0 3 6 8} + * offset: -1 + * fill_value: -1 + * result: { 1 2 -1 | 5 3 -1 | 6 -1 } + * @endcode + * + * @param segmented_values Segmented column, specified by @p segment_offsets + * @param segment_offsets Each segment's offset of @p segmented_values. A list of offsets + * with size `num_segments + 1`. The size of each segment is `segment_offsets[i+1] - + * segment_offsets[i]`. + * @param offset The offset by which to shift the input + * @param fill_value Fill value for indeterminable outputs + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned table and columns' device memory + * + * @note If `offset == 0`, a copy of @p segmented_values is returned. + */ +std::unique_ptr segmented_shift( + column_view const& segmented_values, + device_span segment_offsets, + size_type offset, + scalar const& fill_value, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @copydoc cudf::contiguous_split * diff --git a/cpp/include/cudf/detail/copy_if.cuh b/cpp/include/cudf/detail/copy_if.cuh index fbf68a20364..2051daec00b 100644 --- a/cpp/include/cudf/detail/copy_if.cuh +++ b/cpp/include/cudf/detail/copy_if.cuh @@ -278,9 +278,9 @@ struct scatter_gather_functor { std::unique_ptr operator()( cudf::column_view const& input, cudf::size_type const& output_size, - cudf::size_type const* block_offsets, + cudf::size_type const*, Filter filter, - cudf::size_type per_thread, + cudf::size_type, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh index 7a560e4c048..1dd0d472d0d 100644 --- a/cpp/include/cudf/detail/gather.cuh +++ b/cpp/include/cudf/detail/gather.cuh @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -35,7 +36,6 @@ #include #include -#include #include #include @@ -567,15 +567,14 @@ void gather_bitmask(table_view const& source, } // Make device array of target bitmask pointers - thrust::host_vector target_masks(target.size()); + std::vector target_masks(target.size()); std::transform(target.begin(), target.end(), target_masks.begin(), [](auto const& col) { return col->mutable_view().null_mask(); }); - rmm::device_vector d_target_masks(target_masks); + auto d_target_masks = make_device_uvector_async(target_masks, stream); - auto const masks = d_target_masks.data().get(); auto const device_source = table_device_view::create(source, stream); - auto d_valid_counts = rmm::device_vector(target.size()); + auto d_valid_counts = make_zeroed_device_uvector_async(target.size(), stream); // Dispatch operation enum to get implementation auto const impl = [op]() { @@ -591,14 +590,14 @@ void gather_bitmask(table_view const& source, }(); impl(*device_source, gather_map, - masks, + d_target_masks.data(), target.size(), target_rows, - d_valid_counts.data().get(), + d_valid_counts.data(), stream); // Copy the valid counts into each column - auto const valid_counts = thrust::host_vector(d_valid_counts); + auto const valid_counts = make_std_vector_sync(d_valid_counts, stream); for (size_t i = 0; i < target.size(); ++i) { if (target[i]->nullable()) { auto const null_count = target_rows - valid_counts[i]; diff --git a/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp b/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp new file mode 100644 index 00000000000..5fb7379734f --- /dev/null +++ b/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include +namespace cudf { +namespace groupby { +namespace detail { + +/** + * @brief Internal API to replace nulls with preceding/following non-null values in @p value + * + * @param[in] grouped_value A column whose null values will be replaced. + * @param[in] group_labels Group labels for @p grouped_value, corresponding to group keys. + * @param[in] replace_policy Specify the position of replacement values relative to null values. + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param[in] mr Device memory resource used to allocate device memory of the returned column. + */ +std::unique_ptr group_replace_nulls( + cudf::column_view const& grouped_value, + device_span group_labels, + cudf::replace_policy replace_policy, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +} // namespace detail +} // namespace groupby +} // namespace cudf diff --git a/cpp/include/cudf/detail/hashing.hpp b/cpp/include/cudf/detail/hashing.hpp index 06f523c2320..83d6be14709 100644 --- a/cpp/include/cudf/detail/hashing.hpp +++ b/cpp/include/cudf/detail/hashing.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,17 +29,17 @@ namespace detail { */ std::unique_ptr hash( table_view const& input, - hash_id hash_function = hash_id::HASH_MURMUR3, - std::vector const& initial_hash = {}, - uint32_t seed = 0, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + hash_id hash_function = hash_id::HASH_MURMUR3, + cudf::host_span initial_hash = {}, + uint32_t seed = 0, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); std::unique_ptr murmur_hash3_32( table_view const& input, - std::vector const& initial_hash = {}, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + cudf::host_span initial_hash = {}, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); std::unique_ptr md5_hash( table_view const& input, diff --git a/cpp/include/cudf/detail/is_element_valid.hpp b/cpp/include/cudf/detail/is_element_valid.hpp new file mode 100644 index 00000000000..fff67f107d9 --- /dev/null +++ b/cpp/include/cudf/detail/is_element_valid.hpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include + +namespace cudf { +namespace detail { + +/** + * @brief Return validity of a row + * + * Retrieves the validity (NULL or non-NULL) of the specified row from device memory. + * + * @note Synchronizes `stream`. + * + * @throw cudf::logic_error if `element_index < 0 or >= col_view.size()` + * + * @param col_view The column to retrieve the validity from. + * @param element_index The index of the row to retrieve. + * @param stream The stream to use for copying the validity to the host. + * @return Host boolean that indicates the validity of the row. + */ + +bool is_element_valid_sync(column_view const& col_view, + size_type element_index, + rmm::cuda_stream_view stream = rmm::cuda_stream_default); + +} // namespace detail +} // namespace cudf diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh index 881afa63ca5..4cb0c6e1877 100644 --- a/cpp/include/cudf/detail/iterator.cuh +++ b/cpp/include/cudf/detail/iterator.cuh @@ -167,6 +167,134 @@ auto make_null_replacement_iterator(column_device_view const& column, 0, null_replaced_value_accessor{column, null_replacement, has_nulls}); } +/** + * @brief Constructs an optional iterator over a column's values and its validity. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * When the element of an iterator contextually converted to bool, the conversion returns true + * if the object contains a value and false if it does not contain a value. + * + * make_optional_iterator with mode `DYNAMIC` defers the assumption of nullability to + * runtime, with the user stating on construction of the iterator if column has nulls. + * `DYNAMIC` mode is nice when an algorithm is going to execute on mutliple + * iterators and you don't want to compile all the combinations of iterator types + * + * Example: + * + * \code{.cpp} + * template + * void some_function(cudf::column_view const& col_view){ + * auto d_col = cudf::column_device_view::create(col_view); + * // Create a `DYNAMIC` optional iterator + * auto optional_iterator = cudf::detail::make_optional_iterator(d_col, + * cudf::contains_nulls::DYNAMIC{}, + * col_view.has_nulls()); + * } + * \endcode + * + * @throws cudf::logic_error if the column is not nullable, and `DYNAMIC` mode used and + * the user has stated nulls exist + * @throws cudf::logic_error if column datatype and Element type mismatch. + * + * @tparam Element The type of elements in the column + * @param column The column to iterate + * @return Iterator that returns valid column elements and the validity of the + * element in a thrust::optional + */ +template +auto make_optional_iterator(column_device_view const& column, + contains_nulls::DYNAMIC, + bool has_nulls) +{ + return column.optional_begin(contains_nulls::DYNAMIC{}, has_nulls); +} + +/** + * @brief Constructs an optional iterator over a column's values and its validity. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * When the element of an iterator contextually converted to bool, the conversion returns true + * if the object contains a value and false if it does not contain a value. + * + * make_optional_iterator with mode `YES` means that the column supports nulls and + * potentially has null values, therefore the optional might not contain a value + * + * Example: + * + * \code{.cpp} + * template + * void some_function(cudf::column_view const& col_view){ + * auto d_col = cudf::column_device_view::create(col_view); + * if constexpr(has_nulls) { + * auto optional_iterator = cudf::detail::make_optional_iterator(d_col, + * cudf::contains_nulls::YES{}); + * //use optional_iterator + * } else { + * auto optional_iterator = cudf::detail::make_optional_iterator(d_col, + * cudf::contains_nulls::NO{}); + * //use optional_iterator + * } + * } + * \endcode + * + * @throws cudf::logic_error if the column is not nullable, and `YES` mode used + * @throws cudf::logic_error if column datatype and Element type mismatch. + * + * @tparam Element The type of elements in the column + * @param column The column to iterate + * @return Iterator that returns column elements and the validity of the + * element as a thrust::optional + */ +template +auto make_optional_iterator(column_device_view const& column, contains_nulls::YES) +{ + return column.optional_begin(contains_nulls::YES{}); +} + +/** + * @brief Constructs an optional iterator over a column's values and its validity. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * When the element of an iterator contextually converted to bool, the conversion returns true + * if the object contains a value and false if it does not contain a value. + * + * make_optional_iterator with mode `NO` means that the column has no null values, + * therefore the optional will always contain a value. + * + * Example: + * + * \code{.cpp} + * template + * void some_function(cudf::column_view const& col_view){ + * auto d_col = cudf::column_device_view::create(col_view); + * if constexpr(has_nulls) { + * auto optional_iterator = cudf::detail::make_optional_iterator(d_col, + * cudf::contains_nulls::YES{}); + * //use optional_iterator + * } else { + * auto optional_iterator = cudf::detail::make_optional_iterator(d_col, + * cudf::contains_nulls::NO{}); + * //use optional_iterator + * } + * } + * \endcode + * + * @throws cudf::logic_error if column datatype and Element type mismatch. + * + * @tparam Element The type of elements in the column + * @param column The column to iterate + * @return Iterator that returns column elements and the validity of the + * element in a thrust::optional + */ +template +auto make_optional_iterator(column_device_view const& column, contains_nulls::NO) +{ + return column.optional_begin(contains_nulls::NO{}); +} + /** * @brief Constructs a pair iterator over a column's values and its validity. * @@ -320,6 +448,81 @@ auto inline make_scalar_iterator(scalar const& scalar_value) scalar_value_accessor{scalar_value}); } +template +struct scalar_optional_accessor; + +/** + * @brief optional accessor of a maybe-nullable scalar + * + * The scalar_optional_accessor always returns a thrust::optional of the scalar. + * The validity of the optional is determined by the contains_nulls_mode template parameter + * which has the following modes: + * + * `DYNAMIC`: Defer nullability checks to runtime + * + * - When `with_nulls=true` the return value will be a `thrust::optional{scalar}` + * when scalar is valid, and `thrust::optional{}` when the scalar is invalid. + * + * - When `with_nulls=false` the return value will always be `thrust::optional{scalar}` + * + * `NO`: No null values will occur for this scalar, no checks will occur + * and `thrust::optional{scalar}` will always be returned. + * + * `YES`: null values will occur for this scalar, + * and `thrust::optional{scalar}` will always be returned. + * + * @throws `cudf::logic_error` if scalar datatype and Element type mismatch. + * + * @tparam Element The type of return type of functor + */ +template +struct scalar_optional_accessor : public scalar_value_accessor { + using super_t = scalar_value_accessor; + using value_type = thrust::optional; + + scalar_optional_accessor(scalar const& scalar_value) + : scalar_value_accessor(scalar_value) + { + } + + /** + * @brief returns a thrust::optional. + * + * @throw `cudf::logic_error` if this function is called in host. + * + * @return a thrust::optional for the scalar value. + */ + CUDA_HOST_DEVICE_CALLABLE + const value_type operator()(size_type) const + { + if constexpr (std::is_same_v) { + return (super_t::dscalar.is_valid()) ? Element{super_t::dscalar.value()} + : value_type{thrust::nullopt}; + } + return Element{super_t::dscalar.value()}; + } +}; + +template +struct scalar_optional_accessor + : public scalar_value_accessor { + using super_t = scalar_value_accessor; + using value_type = thrust::optional; + bool has_nulls; + + scalar_optional_accessor(scalar const& scalar_value, bool with_nulls) + : scalar_value_accessor(scalar_value), has_nulls{with_nulls} + { + } + + CUDA_HOST_DEVICE_CALLABLE + const value_type operator()(size_type) const + { + return (has_nulls and !super_t::dscalar.is_valid()) ? value_type{thrust::nullopt} + : Element{super_t::dscalar.value()}; + } +}; + /** * @brief pair accessor for scalar. * The unary functor returns a pair of data of Element type and bool validity of the scalar. @@ -415,6 +618,163 @@ struct scalar_representation_pair_accessor : public scalar_value_accessor`. + * + * When the element of an iterator contextually converted to bool, the conversion returns true + * if the object contains a value and false if it does not contain a value. + * + * The iterator behavior is undefined if the scalar is destroyed before iterator dereferencing. + * + * make_optional_iterator with mode `DYNAMIC` defers the assumption of nullability to + * runtime, with the user stating on construction of the iterator if scalar has nulls. + * + * Example: + * + * \code{.cpp} + * template + * void some_function(cudf::column_view const& col_view, + * scalar const& scalar_value, + * bool col_has_nulls){ + * auto d_col = cudf::column_device_view::create(col_view); + * auto column_iterator = cudf::detail::make_optional_iterator(d_col, + cudf::contains_nulls::DYNAMIC{}, col_has_nulls); + * auto scalar_iterator = cudf::detail::make_optional_iterator(scalar_value, + cudf::contains_nulls::DYNAMIC{}, scalar_value.is_valid()); + * //use iterators + * } + * \endcode + * + * @throws cudf::logic_error if the scalar is not nullable, and `DYNAMIC` mode used and + * the user has stated nulls exist + * @throws cudf::logic_error if scalar datatype and Element type mismatch. + * + * @tparam Element The type of elements in the scalar + * @tparam has_nulls If the scalar value will have a null at runtime + * @param scalar_value The scalar to iterate + * @return Iterator that returns scalar elements and validity of the + * element in a thrust::optional + */ +template +auto inline make_optional_iterator(scalar const& scalar_value, + contains_nulls::DYNAMIC, + bool has_nulls) +{ + CUDF_EXPECTS(type_id_matches_device_storage_type(scalar_value.type().id()), + "the data type mismatch"); + return thrust::make_transform_iterator( + thrust::make_constant_iterator(0), + scalar_optional_accessor{scalar_value, has_nulls}); +} + +/** + * @brief Constructs an optional iterator over a scalar's values and its validity. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * When the element of an iterator contextually converted to bool, the conversion returns true + * if the object contains a value and false if it does not contain a value. + * + * The iterator behavior is undefined if the scalar is destroyed before iterator dereferencing. + * + * make_optional_iterator ith mode `YES` means that the scalar supports nulls and + * potentially has null values, therefore the optional might not contain a value + * therefore the optional will always contain a value. + * + * Example: + * + * \code{.cpp} + * template + * void some_function(cudf::column_view const& col_view, scalar const& scalar_value){ + * auto d_col = cudf::column_device_view::create(col_view); + * if constexpr(any_nulls) { + * auto column_iterator = cudf::detail::make_optional_iterator(d_col, + * cudf::contains_nulls::YES{}); + * auto scalar_iterator = cudf::detail::make_optional_iterator(scalar_value, + * cudf::contains_nulls::YES{}); + * //use iterators + * } else { + * auto column_iterator = cudf::detail::make_optional_iterator(d_col, + * cudf::contains_nulls::NO{}); + * auto scalar_iterator = cudf::detail::make_optional_iterator(scalar_value, + * cudf::contains_nulls::NO{}); + * //use iterators + * } + * } + * \endcode + * + * @throws cudf::logic_error if the scalar is not nullable, and `YES` mode used + * @throws cudf::logic_error if scalar datatype and Element type mismatch. + * + * @tparam Element The type of elements in the scalar + * @param scalar_value The scalar to iterate + * @return Iterator that returns scalar elements and the validity of the + * element in a thrust::optional + */ +template +auto inline make_optional_iterator(scalar const& scalar_value, contains_nulls::YES) +{ + CUDF_EXPECTS(type_id_matches_device_storage_type(scalar_value.type().id()), + "the data type mismatch"); + return thrust::make_transform_iterator( + thrust::make_constant_iterator(0), + scalar_optional_accessor{scalar_value}); +} + +/** + * @brief Constructs an optional iterator over a scalar's values and its validity. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * When the element of an iterator contextually converted to bool, the conversion returns true + * if the object contains a value and false if it does not contain a value. + * + * The iterator behavior is undefined if the scalar is destroyed before iterator dereferencing. + * + * make_optional_iterator with mode `NO` means that the scalar has no null values, + * therefore the optional will always contain a value. + * + * Example: + * + * \code{.cpp} + * template + * void some_function(cudf::column_view const& col_view, scalar const& scalar_value){ + * auto d_col = cudf::column_device_view::create(col_view); + * if constexpr(any_nulls) { + * auto column_iterator = cudf::detail::make_optional_iterator(d_col, + * cudf::contains_nulls::YES{}); + * auto scalar_iterator = cudf::detail::make_optional_iterator(scalar_value, + * cudf::contains_nulls::YES{}); + * //use iterators + * } else { + * auto column_iterator = cudf::detail::make_optional_iterator(d_col, + * cudf::contains_nulls::NO{}); + * auto scalar_iterator = cudf::detail::make_optional_iterator(scalar_value, + * cudf::contains_nulls::NO{}); + * //use iterators + * } + * } + * \endcode + * + * @throws cudf::logic_error if scalar datatype and Element type mismatch. + * + * @tparam Element The type of elements in the scalar + * @param scalar_value The scalar to iterate + * @return Iterator that returns scalar elements and the validity of the + * element in a thrust::optional + */ +template +auto inline make_optional_iterator(scalar const& scalar_value, contains_nulls::NO) +{ + CUDF_EXPECTS(type_id_matches_device_storage_type(scalar_value.type().id()), + "the data type mismatch"); + return thrust::make_transform_iterator( + thrust::make_constant_iterator(0), + scalar_optional_accessor{scalar_value}); +} + /** * @brief Constructs a constant device pair iterator over a scalar's value and its validity. * diff --git a/cpp/include/cudf/detail/merge.cuh b/cpp/include/cudf/detail/merge.cuh index 06f9bfc5034..a938a3a053a 100644 --- a/cpp/include/cudf/detail/merge.cuh +++ b/cpp/include/cudf/detail/merge.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, NVIDIA CORPORATION. + * Copyright (c) 2018-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,12 +30,12 @@ enum class side : bool { LEFT, RIGHT }; * @brief Tagged index type: `thrust::get<0>` indicates left/right side, * `thrust::get<1>` indicates the row index */ -using index_type = thrust::tuple; +using index_type = thrust::pair; /** * @brief Vector of `index_type` values. */ -using index_vector = rmm::device_vector; +using index_vector = rmm::device_uvector; /** * @brief tagged_element_relational_comparator uses element_relational_comparator to provide @@ -80,11 +80,11 @@ struct tagged_element_relational_comparator { __device__ weak_ordering compare(index_type lhs_tagged_index, index_type rhs_tagged_index) const noexcept { - side l_side = thrust::get<0>(lhs_tagged_index); - side r_side = thrust::get<0>(rhs_tagged_index); + side const l_side = thrust::get<0>(lhs_tagged_index); + side const r_side = thrust::get<0>(rhs_tagged_index); - cudf::size_type l_indx = thrust::get<1>(lhs_tagged_index); - cudf::size_type r_indx = thrust::get<1>(rhs_tagged_index); + cudf::size_type const l_indx = thrust::get<1>(lhs_tagged_index); + cudf::size_type const r_indx = thrust::get<1>(rhs_tagged_index); column_device_view const* ptr_left_dview{l_side == side::LEFT ? &lhs : &rhs}; diff --git a/cpp/include/cudf/detail/replace/nulls.cuh b/cpp/include/cudf/detail/replace/nulls.cuh new file mode 100644 index 00000000000..1500bdfb0b8 --- /dev/null +++ b/cpp/include/cudf/detail/replace/nulls.cuh @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include + +namespace cudf { +namespace detail { + +using idx_valid_pair_t = thrust::tuple; + +/** + * @brief Functor used by `replace_nulls(replace_policy)` to determine the index to gather from in + * the result column. + * + * Binary functor passed to `inclusive_scan` or `inclusive_scan_by_key`. Arguments are a tuple of + * index and validity of a row. Returns a tuple of current index and a discarded boolean if current + * row is valid, otherwise a tuple of the nearest non-null row index and a discarded boolean. + */ +struct replace_policy_functor { + __device__ idx_valid_pair_t operator()(idx_valid_pair_t const& lhs, idx_valid_pair_t const& rhs) + { + return thrust::get<1>(rhs) ? thrust::make_tuple(thrust::get<0>(rhs), true) + : thrust::make_tuple(thrust::get<0>(lhs), true); + } +}; + +} // namespace detail +} // namespace cudf diff --git a/cpp/include/cudf/detail/rolling.hpp b/cpp/include/cudf/detail/rolling.hpp new file mode 100644 index 00000000000..2b06d11c5a9 --- /dev/null +++ b/cpp/include/cudf/detail/rolling.hpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include + +#include + +namespace cudf { +namespace detail { + +/** + * @copydoc std::unique_ptr rolling_window( + * column_view const& input, + * column_view const& preceding_window, + * column_view const& following_window, + * size_type min_periods, + * rolling_aggregation const& agg, + * rmm::mr::device_memory_resource* mr) + * + * @param stream CUDA stream used for device memory operations and kernel launches. + */ +std::unique_ptr rolling_window( + column_view const& input, + column_view const& preceding_window, + column_view const& following_window, + size_type min_periods, + rolling_aggregation const& agg, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +} // namespace detail +} // namespace cudf diff --git a/cpp/include/cudf/detail/scan.hpp b/cpp/include/cudf/detail/scan.hpp new file mode 100644 index 00000000000..5691adecb5e --- /dev/null +++ b/cpp/include/cudf/detail/scan.hpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include + +namespace cudf { +namespace detail { + +/** + * @brief Computes the exclusive scan of a column. + * + * The null values are skipped for the operation, and if an input element + * at `i` is null, then the output element at `i` will also be null. + * + * The identity value for the column type as per the aggregation type + * is used for the value of the first element in the output column. + * + * @throws cudf::logic_error if column data_type is not an arithmetic type. + * + * @param input The input column view for the scan + * @param agg unique_ptr to aggregation operator applied by the scan + * @param null_handling Exclude null values when computing the result if + * null_policy::EXCLUDE. Include nulls if null_policy::INCLUDE. + * Any operation with a null results in a null. + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned scalar's device memory + * @returns Column with scan results + */ +std::unique_ptr scan_exclusive(column_view const& input, + std::unique_ptr const& agg, + null_policy null_handling, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + +/** + * @brief Computes the inclusive scan of a column. + * + * The null values are skipped for the operation, and if an input element + * at `i` is null, then the output element at `i` will also be null. + * + * String columns are allowed with aggregation types Min and Max. + * + * @throws cudf::logic_error if column data_type is not an arithmetic type + * or string type but the `agg` is not Min or Max + * + * @param input The input column view for the scan + * @param agg unique_ptr to aggregation operator applied by the scan + * @param null_handling Exclude null values when computing the result if + * null_policy::EXCLUDE. Include nulls if null_policy::INCLUDE. + * Any operation with a null results in a null. + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned scalar's device memory + * @returns Column with scan results + */ +std::unique_ptr scan_inclusive(column_view const& input, + std::unique_ptr const& agg, + null_policy null_handling, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + +} // namespace detail +} // namespace cudf diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh index d069ed06cae..410cd213618 100644 --- a/cpp/include/cudf/detail/scatter.cuh +++ b/cpp/include/cudf/detail/scatter.cuh @@ -296,7 +296,7 @@ struct column_scatterer_impl { // We still need to call `gather_bitmask` even when the source's children are not nullable, // as if the target's children have null_masks, those null_masks need to be updated after - // being scattered onto + // being scattered onto. auto const child_nullable = std::any_of(structs_src.child_begin(), structs_src.child_end(), [](auto const& col) { return col.nullable(); }) or @@ -315,9 +315,9 @@ struct column_scatterer_impl { mr); } - // Need to put the result column in a vector to call `gather_bitmask` + // Need to put the result column in a vector to call `gather_bitmask`. std::vector> result; - result.emplace_back(cudf::make_structs_column(source.size(), + result.emplace_back(cudf::make_structs_column(target.size(), std::move(output_struct_members), 0, rmm::device_buffer{0, stream, mr}, @@ -325,7 +325,7 @@ struct column_scatterer_impl { mr)); // Only gather bitmask from the target column for the rows that have not been scattered onto - // The bitmask from the source column will be gathered at the top level `scatter()` call + // The bitmask from the source column will be gathered at the top level `scatter()` call. if (target.nullable()) { auto const gather_map = scatter_to_gather_complement(scatter_map_begin, scatter_map_end, target.size(), stream); @@ -402,7 +402,7 @@ std::unique_ptr scatter( CUDF_EXPECTS(std::distance(scatter_map_begin, scatter_map_end) <= source.num_rows(), "scatter map size should be <= to number of rows in source"); - // Transform negative indices to index + target size + // Transform negative indices to index + target size. auto updated_scatter_map_begin = thrust::make_transform_iterator(scatter_map_begin, index_converter{target.num_rows()}); auto updated_scatter_map_end = @@ -425,7 +425,7 @@ std::unique_ptr
scatter( }); // We still need to call `gather_bitmask` even when the source columns are not nullable, - // as if the target has null_mask, that null_mask needs to be updated after scattering + // as if the target has null_mask, that null_mask needs to be updated after scattering. auto const nullable = std::any_of(source.begin(), source.end(), [](auto const& col) { return col.nullable(); }) or std::any_of(target.begin(), target.end(), [](auto const& col) { return col.nullable(); }); @@ -433,6 +433,25 @@ std::unique_ptr
scatter( auto const gather_map = scatter_to_gather( updated_scatter_map_begin, updated_scatter_map_end, target.num_rows(), stream); gather_bitmask(source, gather_map.begin(), result, gather_bitmask_op::PASSTHROUGH, stream, mr); + + // For struct columns, we need to superimpose the null_mask of the parent over the null_mask of + // the children. + std::for_each(result.begin(), result.end(), [=](auto& col) { + auto const col_view = col->view(); + if (col_view.type().id() == type_id::STRUCT and col_view.nullable()) { + auto const num_rows = col_view.size(); + auto const null_count = col_view.null_count(); + auto contents = col->release(); + + // Children null_mask will be superimposed during structs column construction. + col = cudf::make_structs_column(num_rows, + std::move(contents.children), + null_count, + std::move(*contents.null_mask), + stream, + mr); + } + }); } return std::make_unique
(std::move(result)); } diff --git a/cpp/include/cudf/detail/stream_compaction.hpp b/cpp/include/cudf/detail/stream_compaction.hpp index 5bc12fb0713..87823d71c6f 100644 --- a/cpp/include/cudf/detail/stream_compaction.hpp +++ b/cpp/include/cudf/detail/stream_compaction.hpp @@ -71,6 +71,7 @@ std::unique_ptr
drop_duplicates( std::vector const& keys, duplicate_keep_option keep, null_equality nulls_equal = null_equality::EQUAL, + null_order null_precedence = null_order::BEFORE, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/detail/utilities/cuda.cuh b/cpp/include/cudf/detail/utilities/cuda.cuh index 33c61414a1c..11dbba70c3f 100644 --- a/cpp/include/cudf/detail/utilities/cuda.cuh +++ b/cpp/include/cudf/detail/utilities/cuda.cuh @@ -134,7 +134,7 @@ cudf::size_type elements_per_thread(Kernel kernel, int num_sms = 0; CUDA_TRY(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, device)); int per_thread = total_size / (max_blocks * num_sms * block_size); - return std::max(1, std::min(per_thread, max_per_thread)); // switch to std::clamp with C++17 + return std::clamp(per_thread, 1, max_per_thread); } /** diff --git a/cpp/include/cudf/detail/utilities/device_atomics.cuh b/cpp/include/cudf/detail/utilities/device_atomics.cuh index 246817a5cb5..16b7da0a083 100644 --- a/cpp/include/cudf/detail/utilities/device_atomics.cuh +++ b/cpp/include/cudf/detail/utilities/device_atomics.cuh @@ -42,9 +42,6 @@ namespace cudf { namespace detail { -// TODO: remove this if C++17 is supported. -// `static_assert` requires a string literal at C++14. -#define errmsg_cast "`long long int` has different size to `int64_t`" template __forceinline__ __device__ T_output type_reinterpret(T_input value) @@ -142,7 +139,7 @@ struct genericAtomicOperationImpl { __forceinline__ __device__ T operator()(T* addr, T const& update_value, Op op) { using T_int = unsigned long long int; - static_assert(sizeof(T) == sizeof(T_int), errmsg_cast); + static_assert(sizeof(T) == sizeof(T_int)); T old_value = *addr; T assumed{old_value}; @@ -210,7 +207,7 @@ struct genericAtomicOperationImpl { __forceinline__ __device__ T operator()(T* addr, T const& update_value, DeviceSum op) { using T_int = unsigned long long int; - static_assert(sizeof(T) == sizeof(T_int), errmsg_cast); + static_assert(sizeof(T) == sizeof(T_int)); T ret = atomicAdd(reinterpret_cast(addr), type_reinterpret(update_value)); return ret; } @@ -240,7 +237,7 @@ struct genericAtomicOperationImpl { __forceinline__ __device__ T operator()(T* addr, T const& update_value, DeviceMin op) { using T_int = long long int; - static_assert(sizeof(T) == sizeof(T_int), errmsg_cast); + static_assert(sizeof(T) == sizeof(T_int)); T ret = atomicMin(reinterpret_cast(addr), type_reinterpret(update_value)); return ret; } @@ -252,7 +249,7 @@ struct genericAtomicOperationImpl { __forceinline__ __device__ T operator()(T* addr, T const& update_value, DeviceMax op) { using T_int = long long int; - static_assert(sizeof(T) == sizeof(T_int), errmsg_cast); + static_assert(sizeof(T) == sizeof(T_int)); T ret = atomicMax(reinterpret_cast(addr), type_reinterpret(update_value)); return ret; } @@ -271,7 +268,7 @@ struct genericAtomicOperationImpl { __forceinline__ __device__ T operator()(T* addr, T const& update_value, DeviceAnd op) { using T_int = long long int; - static_assert(sizeof(T) == sizeof(T_int), errmsg_cast); + static_assert(sizeof(T) == sizeof(T_int)); T ret = atomicAnd(reinterpret_cast(addr), type_reinterpret(update_value)); return ret; } @@ -290,7 +287,7 @@ struct genericAtomicOperationImpl { __forceinline__ __device__ T operator()(T* addr, T const& update_value, DeviceOr op) { using T_int = long long int; - static_assert(sizeof(T) == sizeof(T_int), errmsg_cast); + static_assert(sizeof(T) == sizeof(T_int)); T ret = atomicOr(reinterpret_cast(addr), type_reinterpret(update_value)); return ret; } @@ -309,7 +306,7 @@ struct genericAtomicOperationImpl { __forceinline__ __device__ T operator()(T* addr, T const& update_value, DeviceXor op) { using T_int = long long int; - static_assert(sizeof(T) == sizeof(T_int), errmsg_cast); + static_assert(sizeof(T) == sizeof(T_int)); T ret = atomicXor(reinterpret_cast(addr), type_reinterpret(update_value)); return ret; } @@ -400,7 +397,7 @@ struct typesAtomicCASImpl { __forceinline__ __device__ T operator()(T* addr, T const& compare, T const& update_value) { using T_int = unsigned long long int; - static_assert(sizeof(T) == sizeof(T_int), errmsg_cast); + static_assert(sizeof(T) == sizeof(T_int)); T_int ret = atomicCAS(reinterpret_cast(addr), type_reinterpret(compare), @@ -503,6 +500,28 @@ __forceinline__ __device__ T atomicAdd(T* address, T val) return cudf::genericAtomicOperation(address, val, cudf::DeviceSum{}); } +/** + * @brief Overloads for `atomicMul` + * reads the `old` located at the `address` in global or shared memory, + * computes (old * val), and stores the result back to memory at the same + * address. These three operations are performed in one atomic transaction. + * + * The supported cudf types for `atomicMul` are: + * int8_t, int16_t, int32_t, int64_t, float, double, and bool + * + * All types are implemented by `atomicCAS`. + * + * @param[in] address The address of old value in global or shared memory + * @param[in] val The value to be multiplied + * + * @returns The old value at `address` + */ +template +__forceinline__ __device__ T atomicMul(T* address, T val) +{ + return cudf::genericAtomicOperation(address, val, cudf::DeviceProduct{}); +} + /** * @brief Overloads for `atomicMin` * reads the `old` located at the `address` in global or shared memory, diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh index 7f3c05134e2..6eab13ae9af 100644 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh @@ -91,21 +91,21 @@ void CUDA_DEVICE_CALLABLE md5_process(TKey const& key, md5_intermediate_data* ha // 64 bytes for the number of byt es processed in a given step constexpr int md5_chunk_size = 64; if (hash_state->buffer_length + len < md5_chunk_size) { - thrust::copy_n(thrust::seq, data, len, hash_state->buffer + hash_state->buffer_length); + std::memcpy(hash_state->buffer + hash_state->buffer_length, data, len); hash_state->buffer_length += len; } else { uint32_t copylen = md5_chunk_size - hash_state->buffer_length; - thrust::copy_n(thrust::seq, data, copylen, hash_state->buffer + hash_state->buffer_length); + std::memcpy(hash_state->buffer + hash_state->buffer_length, data, copylen); md5_hash_step(hash_state); while (len > md5_chunk_size + copylen) { - thrust::copy_n(thrust::seq, data + copylen, md5_chunk_size, hash_state->buffer); + std::memcpy(hash_state->buffer, data + copylen, md5_chunk_size); md5_hash_step(hash_state); copylen += md5_chunk_size; } - thrust::copy_n(thrust::seq, data + copylen, len - copylen, hash_state->buffer); + std::memcpy(hash_state->buffer, data + copylen, len - copylen); hash_state->buffer_length = len - copylen; } } @@ -146,7 +146,7 @@ void CUDA_DEVICE_CALLABLE uint32ToLowercaseHexString(uint32_t num, char* destina x |= 0x3030303030303030; x += offsets; - thrust::copy_n(thrust::seq, reinterpret_cast(&x), 8, destination); + std::memcpy(destination, reinterpret_cast(&x), 8); } struct MD5ListHasher { @@ -211,20 +211,20 @@ MD5ListHasher::operator()(column_device_view data_col, hash_state->message_length += len; if (hash_state->buffer_length + len < 64) { - thrust::copy_n(thrust::seq, data, len, hash_state->buffer + hash_state->buffer_length); + std::memcpy(hash_state->buffer + hash_state->buffer_length, data, len); hash_state->buffer_length += len; } else { uint32_t copylen = 64 - hash_state->buffer_length; - thrust::copy_n(thrust::seq, data, copylen, hash_state->buffer + hash_state->buffer_length); + std::memcpy(hash_state->buffer + hash_state->buffer_length, data, copylen); md5_hash_step(hash_state); while (len > 64 + copylen) { - thrust::copy_n(thrust::seq, data + copylen, 64, hash_state->buffer); + std::memcpy(hash_state->buffer, data + copylen, 64); md5_hash_step(hash_state); copylen += 64; } - thrust::copy_n(thrust::seq, data + copylen, len - copylen, hash_state->buffer); + std::memcpy(hash_state->buffer, data + copylen, len - copylen); hash_state->buffer_length = len - copylen; } } @@ -262,10 +262,9 @@ struct MD5Hash { thrust::fill_n(thrust::seq, hash_state->buffer, md5_chunk_size - message_length_size, 0x00); } - thrust::copy_n(thrust::seq, - reinterpret_cast(&full_length), - message_length_size, - hash_state->buffer + md5_chunk_size - message_length_size); + std::memcpy(hash_state->buffer + md5_chunk_size - message_length_size, + reinterpret_cast(&full_length), + message_length_size); md5_hash_step(hash_state); #pragma unroll @@ -323,20 +322,20 @@ void CUDA_DEVICE_CALLABLE MD5Hash::operator()(column_device_view co hash_state->message_length += len; if (hash_state->buffer_length + len < 64) { - thrust::copy_n(thrust::seq, data, len, hash_state->buffer + hash_state->buffer_length); + std::memcpy(hash_state->buffer + hash_state->buffer_length, data, len); hash_state->buffer_length += len; } else { uint32_t copylen = 64 - hash_state->buffer_length; - thrust::copy_n(thrust::seq, data, copylen, hash_state->buffer + hash_state->buffer_length); + std::memcpy(hash_state->buffer + hash_state->buffer_length, data, copylen); md5_hash_step(hash_state); while (len > 64 + copylen) { - thrust::copy_n(thrust::seq, data + copylen, 64, hash_state->buffer); + std::memcpy(hash_state->buffer, data + copylen, 64); md5_hash_step(hash_state); copylen += 64; } - thrust::copy_n(thrust::seq, data + copylen, len - copylen, hash_state->buffer); + std::memcpy(hash_state->buffer, data + copylen, len - copylen); hash_state->buffer_length = len - copylen; } } @@ -549,6 +548,20 @@ hash_value_type CUDA_DEVICE_CALLABLE MurmurHash3_32::operator()(double c return this->compute_floating_point(key); } +template <> +hash_value_type CUDA_DEVICE_CALLABLE +MurmurHash3_32::operator()(numeric::decimal32 const& key) const +{ + return this->compute(key.value()); +} + +template <> +hash_value_type CUDA_DEVICE_CALLABLE +MurmurHash3_32::operator()(numeric::decimal64 const& key) const +{ + return this->compute(key.value()); +} + template <> hash_value_type CUDA_DEVICE_CALLABLE MurmurHash3_32::operator()(cudf::list_view const& key) const diff --git a/cpp/include/cudf/detail/utilities/vector_factories.hpp b/cpp/include/cudf/detail/utilities/vector_factories.hpp index 030d2c331c5..1e735719400 100644 --- a/cpp/include/cudf/detail/utilities/vector_factories.hpp +++ b/cpp/include/cudf/detail/utilities/vector_factories.hpp @@ -14,20 +14,70 @@ * limitations under the License. */ +#pragma once + /** * @brief Convenience factories for creating device vectors from host spans * @file vector_factories.hpp */ +#include #include #include #include #include +#include + namespace cudf { namespace detail { +/** + * @brief Asynchronously construct a `device_uvector` and set all elements to zero. + * + * @note This function does not synchronize `stream`. + * + * @tparam T The type of the data to copy + * @param size The number of elements in the created vector + * @param stream The stream on which to allocate memory and perform the memset + * @param mr The memory resource to use for allocating the returned device_uvector + * @return A device_uvector containing zeros + */ +template +rmm::device_uvector make_zeroed_device_uvector_async( + std::size_t size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +{ + rmm::device_uvector ret(size, stream, mr); + CUDA_TRY(cudaMemsetAsync(ret.data(), 0, size * sizeof(T), stream.value())); + return ret; +} + +/** + * @brief Synchronously construct a `device_uvector` and set all elements to zero. + * + * @note This function synchronizes `stream`. + * + * @tparam T The type of the data to copy + * @param size The number of elements in the created vector + * @param stream The stream on which to allocate memory and perform the memset + * @param mr The memory resource to use for allocating the returned device_uvector + * @return A device_uvector containing zeros + */ +template +rmm::device_uvector make_zeroed_device_uvector_sync( + std::size_t size, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +{ + rmm::device_uvector ret(size, stream, mr); + CUDA_TRY(cudaMemsetAsync(ret.data(), 0, size * sizeof(T), stream.value())); + stream.synchronize(); + return ret; +} + /** * @brief Asynchronously construct a `device_uvector` containing a deep copy of data from a * `host_span` @@ -43,7 +93,7 @@ namespace detail { template rmm::device_uvector make_device_uvector_async( host_span source_data, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { rmm::device_uvector ret(source_data.size(), stream, mr); @@ -74,7 +124,7 @@ template >::value>* = nullptr> rmm::device_uvector make_device_uvector_async( Container const& c, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { return make_device_uvector_async(host_span{c}, stream, mr); @@ -127,7 +177,7 @@ template < nullptr> rmm::device_uvector make_device_uvector_async( Container const& c, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { return make_device_uvector_async( @@ -231,6 +281,181 @@ rmm::device_uvector make_device_uvector_sync( return make_device_uvector_sync(device_span{c}, stream, mr); } +// Utility function template to allow copying to either a thrust::host_vector or std::vector +template +OutContainer make_vector_async(device_span v, rmm::cuda_stream_view stream) +{ + OutContainer result(v.size()); + CUDA_TRY(cudaMemcpyAsync( + result.data(), v.data(), v.size() * sizeof(T), cudaMemcpyDeviceToHost, stream.value())); + return result; +} + +/** + * @brief Asynchronously construct a `std::vector` containing a copy of data from a + * `device_span` + * + * @note This function does not synchronize `stream`. + * + * @tparam T The type of the data to copy + * @param source_data The device data to copy + * @param stream The stream on which to perform the copy + * @return The data copied to the host + */ +template +std::vector make_std_vector_async(device_span v, rmm::cuda_stream_view stream) +{ + return make_vector_async>(v, stream); +} + +/** + * @brief Asynchronously construct a `std::vector` containing a copy of data from a device + * container + * + * @note This function synchronizes `stream`. + * + * @tparam Container The type of the container to copy from + * @tparam T The type of the data to copy + * @param c The input device container from which to copy + * @param stream The stream on which to perform the copy + * @return The data copied to the host + */ +template < + typename Container, + std::enable_if_t< + std::is_convertible>::value>* = + nullptr> +std::vector make_std_vector_async(Container const& c, + rmm::cuda_stream_view stream) +{ + return make_std_vector_async(device_span{c}, stream); +} + +/** + * @brief Synchronously construct a `std::vector` containing a copy of data from a + * `device_span` + * + * @note This function does a synchronize on `stream`. + * + * @tparam T The type of the data to copy + * @param source_data The device data to copy + * @param stream The stream on which to perform the copy + * @return The data copied to the host + */ +template +std::vector make_std_vector_sync(device_span v, rmm::cuda_stream_view stream) +{ + auto result = make_std_vector_async(v, stream); + stream.synchronize(); + return result; +} + +/** + * @brief Synchronously construct a `std::vector` containing a copy of data from a device + * container + * + * @note This function synchronizes `stream`. + * + * @tparam Container The type of the container to copy from + * @tparam T The type of the data to copy + * @param c The input device container from which to copy + * @param stream The stream on which to perform the copy + * @return The data copied to the host + */ +template < + typename Container, + std::enable_if_t< + std::is_convertible>::value>* = + nullptr> +std::vector make_std_vector_sync( + Container const& c, rmm::cuda_stream_view stream = rmm::cuda_stream_default) +{ + return make_std_vector_sync(device_span{c}, stream); +} + +/** + * @brief Asynchronously construct a `thrust::host_vector` containing a copy of data from a + * `device_span` + * + * @note This function does not synchronize `stream`. + * + * @tparam T The type of the data to copy + * @param source_data The device data to copy + * @param stream The stream on which to perform the copy + * @return The data copied to the host + */ +template +thrust::host_vector make_host_vector_async(device_span v, rmm::cuda_stream_view stream) +{ + return make_vector_async>(v, stream); +} + +/** + * @brief Asynchronously construct a `std::vector` containing a copy of data from a device + * container + * + * @note This function synchronizes `stream`. + * + * @tparam Container The type of the container to copy from + * @tparam T The type of the data to copy + * @param c The input device container from which to copy + * @param stream The stream on which to perform the copy + * @return The data copied to the host + */ +template < + typename Container, + std::enable_if_t< + std::is_convertible>::value>* = + nullptr> +thrust::host_vector make_host_vector_async( + Container const& c, rmm::cuda_stream_view stream) +{ + return make_host_vector_async(device_span{c}, stream); +} + +/** + * @brief Synchronously construct a `std::vector` containing a copy of data from a + * `device_span` + * + * @note This function does a synchronize on `stream`. + * + * @tparam T The type of the data to copy + * @param source_data The device data to copy + * @param stream The stream on which to perform the copy + * @return The data copied to the host + */ +template +thrust::host_vector make_host_vector_sync( + device_span v, rmm::cuda_stream_view stream = rmm::cuda_stream_default) +{ + auto result = make_host_vector_async(v, stream); + stream.synchronize(); + return result; +} + +/** + * @brief Synchronously construct a `std::vector` containing a copy of data from a device + * container + * + * @note This function synchronizes `stream`. + * + * @tparam Container The type of the container to copy from + * @tparam T The type of the data to copy + * @param c The input device container from which to copy + * @param stream The stream on which to perform the copy + * @return The data copied to the host + */ +template < + typename Container, + std::enable_if_t< + std::is_convertible>::value>* = + nullptr> +thrust::host_vector make_host_vector_sync( + Container const& c, rmm::cuda_stream_view stream = rmm::cuda_stream_default) +{ + return make_host_vector_sync(device_span{c}, stream); +} + } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/valid_if.cuh b/cpp/include/cudf/detail/valid_if.cuh index c685837ae2b..11ce9199c2d 100644 --- a/cpp/include/cudf/detail/valid_if.cuh +++ b/cpp/include/cudf/detail/valid_if.cuh @@ -25,7 +25,6 @@ #include #include -#include #include namespace cudf { diff --git a/cpp/include/cudf/fixed_point/fixed_point.hpp b/cpp/include/cudf/fixed_point/fixed_point.hpp index 952075b1703..d7bc9e02eff 100644 --- a/cpp/include/cudf/fixed_point/fixed_point.hpp +++ b/cpp/include/cudf/fixed_point/fixed_point.hpp @@ -31,20 +31,8 @@ //! `fixed_point` and supporting types namespace numeric { -/** \cond HIDDEN_SYMBOLS */ -// This is a wrapper struct that enforces "strong typing" -// at the construction site of the type. No implicit -// conversions will be allowed and you will need to use the -// name of the type alias (i.e. scale_type{0}) -template -struct strong_typedef { - T _t; - CUDA_HOST_DEVICE_CALLABLE explicit constexpr strong_typedef(T t) : _t(t) {} - CUDA_HOST_DEVICE_CALLABLE operator T() const { return _t; } -}; -/** \endcond */ -using scale_type = strong_typedef; +enum scale_type : int32_t {}; /** * @brief Scoped enumerator to use when constructing `fixed_point` @@ -76,8 +64,7 @@ namespace detail { * * https://simple.wikipedia.org/wiki/Exponentiation_by_squaring
* Note: this is the iterative equivalent of the recursive definition (faster)
- * Quick-bench: http://quick-bench.com/Wg7o7HYQC9FW5M0CO0wQAjSwP_Y
- * `exponent` comes from `using scale_type = strong_typedef`
+ * Quick-bench: http://quick-bench.com/Wg7o7HYQC9FW5M0CO0wQAjSwP_Y * * @tparam Rep Representation type for return type * @tparam Base The base to be exponentiated @@ -106,14 +93,6 @@ CUDA_HOST_DEVICE_CALLABLE Rep ipow(T exponent) return square * extra; } -/** @brief Helper function to negate strongly typed scale_type - * - * @param scale The scale to be negated - * @return The negated scale - */ -CUDA_HOST_DEVICE_CALLABLE -auto negate(scale_type const& scale) { return scale_type{-scale}; } - /** @brief Function that performs a `right shift` scale "times" on the `val` * * Note: perform this operation when constructing with positive scale @@ -128,7 +107,7 @@ auto negate(scale_type const& scale) { return scale_type{-scale}; } template CUDA_HOST_DEVICE_CALLABLE constexpr T right_shift(T const& val, scale_type const& scale) { - return val / ipow(scale._t); + return val / ipow(static_cast(scale)); } /** @brief Function that performs a `left shift` scale "times" on the `val` @@ -145,7 +124,7 @@ CUDA_HOST_DEVICE_CALLABLE constexpr T right_shift(T const& val, scale_type const template CUDA_HOST_DEVICE_CALLABLE constexpr T left_shift(T const& val, scale_type const& scale) { - return val * ipow(-scale._t); + return val * ipow(static_cast(-scale)); } /** @brief Function that performs a `right` or `left shift` @@ -197,7 +176,7 @@ template ::value>* = nullptr> explicit constexpr operator U() const { - return detail::shift(static_cast(_value), detail::negate(_scale)); + return detail::shift(static_cast(_value), scale_type{-_scale}); } /** @@ -302,7 +281,7 @@ class fixed_point { { // Don't cast to U until converting to Rep because in certain cases casting to U before shifting // will result in integer overflow (i.e. if U = int32_t, Rep = int64_t and _value > 2 billion) - return static_cast(detail::shift(_value, detail::negate(_scale))); + return static_cast(detail::shift(_value, scale_type{-_scale})); } CUDA_HOST_DEVICE_CALLABLE operator scaled_integer() const diff --git a/cpp/include/cudf/groupby.hpp b/cpp/include/cudf/groupby.hpp index 19f87873873..85c469f58f8 100644 --- a/cpp/include/cudf/groupby.hpp +++ b/cpp/include/cudf/groupby.hpp @@ -17,10 +17,13 @@ #pragma once #include +#include +#include #include #include #include +#include #include #include @@ -222,6 +225,62 @@ class groupby { host_span requests, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** + * @brief Performs grouped shifts for specified values. + * + * In `j`th column, for each group, `i`th element is determined by the `i - offsets[j]`th + * element of the group. If `i - offsets[j] < 0 or >= group_size`, the value is determined by + * @p fill_values[j]. + * + * @note The first returned table stores the keys passed to the groupby object. Row `i` of the key + * table corresponds to the group labels of row `i` in the shifted columns. The key order in + * each group matches the input order. The order of each group is arbitrary. The group order + * in successive calls to `groupby::shifts` may be different. + * + * Example: + * @code{.pseudo} + * keys: {1 4 1 3 4 4 1} + * {1 2 1 3 2 2 1} + * values: {3 9 1 4 2 5 7} + * {"a" "c" "bb" "ee" "z" "x" "d"} + * offset: {2, -1} + * fill_value: {@, @} + * result (group order maybe different): + * keys: {3 1 1 1 4 4 4} + * {3 1 1 1 2 2 2} + * values: {@ @ @ 3 @ @ 9} + * {@ "bb" "d" @ "z" "x" @} + * + * ------------------------------------------------- + * keys: {1 4 1 3 4 4 1} + * {1 2 1 3 2 2 1} + * values: {3 9 1 4 2 5 7} + * {"a" "c" "bb" "ee" "z" "x" "d"} + * offset: {-2, 1} + * fill_value: {-1, "42"} + * result (group order maybe different): + * keys: {3 1 1 1 4 4 4} + * {3 1 1 1 2 2 2} + * values: {-1 7 -1 -1 5 -1 -1} + * {"42" "42" "a" "bb" "42" "c" "z"} + * + * @endcode + * + * @param values Table whose columns to be shifted + * @param offsets The offsets by which to shift the input + * @param fill_values Fill values for indeterminable outputs + * @param mr Device memory resource used to allocate the returned table and columns' device memory + * @return Pair containing the tables with each group's key and the columns shifted + * + * @throws cudf::logic_error if @p fill_value[i] dtype does not match @p values[i] dtype for + * `i`th column + */ + std::pair, std::unique_ptr
> shift( + table_view const& values, + host_span offsets, + std::vector> const& fill_values, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @brief The grouped data corresponding to a groupby operation on a set of values. * @@ -251,6 +310,46 @@ class groupby { groups get_groups(cudf::table_view values = {}, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** + * @brief Performs grouped replace nulls on @p value + * + * For each `value[i] == NULL` in group `j`, `value[i]` is replaced with the first non-null value + * in group `j` that precedes or follows `value[i]`. If a non-null value is not found in the + * specified direction, `value[i]` is left NULL. + * + * The returned pair contains a column of the sorted keys and the result column. In result column, + * values of the same group are in contiguous memory. In each group, the order of values maintain + * their original order. The order of groups are not guaranteed. + * + * Example: + * @code{.pseudo} + * + * //Inputs: + * keys: {3 3 1 3 1 3 4} + * {2 2 1 2 1 2 5} + * values: {3 4 7 @ @ @ @} + * {@ @ @ "x" "tt" @ @} + * replace_policies: {FORWARD, BACKWARD} + * + * //Outputs (group orders may be different): + * keys: {3 3 3 3 1 1 4} + * {2 2 2 2 1 1 5} + * result: {3 4 4 4 7 7 @} + * {"x" "x" "x" @ "tt" "tt" @} + * @endcode + * + * @param[in] values A table whose column null values will be replaced. + * @param[in] replace_policies Specify the position of replacement values relative to null values, + * one for each column + * @param[in] mr Device memory resource used to allocate device memory of the returned column. + * + * @return Pair that contains a table with the sorted keys and the result column + */ + std::pair, std::unique_ptr
> replace_nulls( + table_view const& values, + host_span replace_policies, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + private: table_view _keys; ///< Keys that determine grouping null_policy _include_null_keys{null_policy::EXCLUDE}; ///< Include rows in keys diff --git a/cpp/include/cudf/hashing.hpp b/cpp/include/cudf/hashing.hpp index 0fb5002a953..73bff0b36e5 100644 --- a/cpp/include/cudf/hashing.hpp +++ b/cpp/include/cudf/hashing.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ #include #include +#include namespace cudf { /** @@ -29,18 +30,18 @@ namespace cudf { * @brief Computes the hash value of each row in the input set of columns. * * @param input The table of columns to hash - * @param initial_hash Optional vector of initial hash values for each column. - * If this vector is empty then each element will be hashed as-is. + * @param initial_hash Optional host_span of initial hash values for each column. + * If this span is empty then each element will be hashed as-is. * @param mr Device memory resource used to allocate the returned column's device memory. * * @returns A column where each row is the hash of a column from the input */ std::unique_ptr hash( table_view const& input, - hash_id hash_function = hash_id::HASH_MURMUR3, - std::vector const& initial_hash = {}, - uint32_t seed = DEFAULT_HASH_SEED, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + hash_id hash_function = hash_id::HASH_MURMUR3, + cudf::host_span initial_hash = {}, + uint32_t seed = DEFAULT_HASH_SEED, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp index 9dbde1432aa..bbe0eb0eaac 100644 --- a/cpp/include/cudf/interop.hpp +++ b/cpp/include/cudf/interop.hpp @@ -35,8 +35,8 @@ namespace cudf { /** * @brief Convert a DLPack DLTensor into a cudf table * - * The `device_type` of the DLTensor must be `kDLGPU`, `kDLCPU`, or - * `kDLCPUPinned`, and `device_id` must match the current device. The `ndim` + * The `device_type` of the DLTensor must be `kDLCPU`, `kDLCuda`, or + * `kDLCUDAHost`, and `device_id` must match the current device. The `ndim` * must be set to 1 or 2. The `dtype` must have 1 lane and the bitsize must * match a supported `cudf::data_type`. * diff --git a/cpp/include/cudf/io/detail/avro.hpp b/cpp/include/cudf/io/detail/avro.hpp index 40090dbc438..4310d0e7c4b 100644 --- a/cpp/include/cudf/io/detail/avro.hpp +++ b/cpp/include/cudf/io/detail/avro.hpp @@ -43,22 +43,26 @@ class reader { * * @param filepaths Paths to the files containing the input dataset * @param options Settings for controlling reading behavior + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ explicit reader(std::vector const &filepaths, avro_reader_options const &options, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr); /** * @brief Constructor from an array of datasources * * @param sources Input `datasource` objects to read the dataset from * @param options Settings for controlling reading behavior + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ explicit reader(std::vector> &&sources, avro_reader_options const &options, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr); /** * @brief Destructor explicitly-declared to avoid inlined in header diff --git a/cpp/include/cudf/io/detail/csv.hpp b/cpp/include/cudf/io/detail/csv.hpp index 7790c2ceee1..8ec2818c2ca 100644 --- a/cpp/include/cudf/io/detail/csv.hpp +++ b/cpp/include/cudf/io/detail/csv.hpp @@ -38,22 +38,26 @@ class reader { * * @param filepaths Paths to the files containing the input dataset * @param options Settings for controlling reading behavior + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ explicit reader(std::vector const &filepaths, csv_reader_options const &options, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr); /** * @brief Constructor from an array of datasources * * @param sources Input `datasource` objects to read the dataset from * @param options Settings for controlling reading behavior + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ explicit reader(std::vector> &&sources, csv_reader_options const &options, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr); /** * @brief Destructor explicitly-declared to avoid inlined in header @@ -83,14 +87,15 @@ class writer { * * @param sinkp The data sink to write the data to * @param options Settings for controlling writing behavior + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ writer(std::unique_ptr sinkp, csv_writer_options const &options, - rmm::mr::device_memory_resource *mr = - rmm::mr::get_current_device_resource()); // cannot provide definition here (because - // _impl is incomplete, hence unique_ptr has - // not enough sizeof() info) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr); // cannot provide definition here (because + // _impl is incomplete hence unique_ptr has + // not enough sizeof() info) /** * @brief Destructor explicitly-declared to avoid inlined in header diff --git a/cpp/include/cudf/io/detail/json.hpp b/cpp/include/cudf/io/detail/json.hpp index 2176381879a..6ed93dc5c25 100644 --- a/cpp/include/cudf/io/detail/json.hpp +++ b/cpp/include/cudf/io/detail/json.hpp @@ -51,22 +51,26 @@ class reader { * * @param filepaths Paths to the files containing the input dataset * @param options Settings for controlling reading behavior + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ explicit reader(std::vector const &filepaths, json_reader_options const &options, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr); /** * @brief Constructor from an array of datasources * * @param sources Input `datasource` objects to read the dataset from * @param options Settings for controlling reading behavior + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ explicit reader(std::vector> &&sources, json_reader_options const &options, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr); /** * @brief Destructor explicitly-declared to avoid inlined in header diff --git a/cpp/include/cudf/io/detail/orc.hpp b/cpp/include/cudf/io/detail/orc.hpp index b8b6bc79159..ab26c01db74 100644 --- a/cpp/include/cudf/io/detail/orc.hpp +++ b/cpp/include/cudf/io/detail/orc.hpp @@ -52,22 +52,26 @@ class reader { * * @param filepaths Paths to the files containing the input dataset * @param options Settings for controlling reading behavior + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ explicit reader(std::vector const& filepaths, orc_reader_options const& options, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Constructor from an array of datasources * * @param sources Input `datasource` objects to read the dataset from * @param options Settings for controlling reading behavior + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ explicit reader(std::vector>&& sources, orc_reader_options const& options, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Destructor explicitly declared to avoid inlining in header @@ -101,14 +105,14 @@ class writer { * @param sink The data sink to write the data to * @param options Settings for controlling writing behavior * @param mode Option to write at once or in chunks - * @param mr Device memory resource to use for device memory allocation * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource to use for device memory allocation */ explicit writer(std::unique_ptr sink, orc_writer_options const& options, - SingleWriteMode mode = SingleWriteMode::NO, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(), - rmm::cuda_stream_view stream = rmm::cuda_stream_default); + SingleWriteMode mode, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Constructor with chunked writer options. @@ -116,14 +120,14 @@ class writer { * @param sink The data sink to write the data to * @param options Settings for controlling writing behavior * @param mode Option to write at once or in chunks - * @param mr Device memory resource to use for device memory allocation * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource to use for device memory allocation */ explicit writer(std::unique_ptr sink, chunked_orc_writer_options const& options, - SingleWriteMode mode = SingleWriteMode::YES, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(), - rmm::cuda_stream_view stream = rmm::cuda_stream_default); + SingleWriteMode mode, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Destructor explicitly declared to avoid inlining in header diff --git a/cpp/include/cudf/io/detail/parquet.hpp b/cpp/include/cudf/io/detail/parquet.hpp index 2c946dae748..d95af7a11da 100644 --- a/cpp/include/cudf/io/detail/parquet.hpp +++ b/cpp/include/cudf/io/detail/parquet.hpp @@ -54,22 +54,26 @@ class reader { * * @param filepaths Paths to the files containing the input dataset * @param options Settings for controlling reading behavior + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ explicit reader(std::vector const& filepaths, parquet_reader_options const& options, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Constructor from an array of datasources * * @param sources Input `datasource` objects to read the dataset from * @param options Settings for controlling reading behavior + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ explicit reader(std::vector>&& sources, parquet_reader_options const& options, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Destructor explicitly-declared to avoid inlined in header @@ -103,14 +107,14 @@ class writer { * @param sink The data sink to write the data to * @param options Settings for controlling writing behavior * @param mode Option to write at once or in chunks - * @param mr Device memory resource to use for device memory allocation * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource to use for device memory allocation */ explicit writer(std::unique_ptr sink, parquet_writer_options const& options, - SingleWriteMode mode = SingleWriteMode::YES, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(), - rmm::cuda_stream_view stream = rmm::cuda_stream_default); + SingleWriteMode mode, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Constructor for writer to handle chunked parquet options. @@ -118,16 +122,16 @@ class writer { * @param sink The data sink to write the data to * @param options Settings for controlling writing behavior for chunked writer * @param mode Option to write at once or in chunks - * @param mr Device memory resource to use for device memory allocation * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource to use for device memory allocation * * @return A parquet-compatible blob that contains the data for all rowgroups in the list */ explicit writer(std::unique_ptr sink, chunked_parquet_writer_options const& options, - SingleWriteMode mode = SingleWriteMode::NO, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(), - rmm::cuda_stream_view stream = rmm::cuda_stream_default); + SingleWriteMode mode, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Destructor explicitly-declared to avoid inlined in header diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index 6be82b4968c..28e51351730 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -63,12 +63,6 @@ class orc_reader_options { // Cast timestamp columns to a specific type data_type _timestamp_type{type_id::EMPTY}; - // Whether to convert decimals to float64 - bool _decimals_as_float64 = true; - // For decimals as int, optional forced decimal scale; - // -1 is auto (column scale), >=0: number of fractional digits - size_type _forced_decimals_scale = -1; - friend orc_reader_options_builder; /** @@ -134,16 +128,6 @@ class orc_reader_options { */ data_type get_timestamp_type() const { return _timestamp_type; } - /** - * @brief Whether to convert decimals to float64. - */ - bool is_enabled_decimals_as_float64() const { return _decimals_as_float64; } - - /** - * @brief Returns whether decimal scale is inferred or forced to have limited fractional digits. - */ - size_type get_forced_decimals_scale() const { return _forced_decimals_scale; } - // Setters /** @@ -207,20 +191,6 @@ class orc_reader_options { * @param type Type of timestamp. */ void set_timestamp_type(data_type type) { _timestamp_type = type; } - - /** - * @brief Enable/Disable conversion of decimals to float64. - * - * @param val Boolean value to enable/disable. - */ - void set_decimals_as_float64(bool val) { _decimals_as_float64 = val; } - - /** - * @brief Sets whether decimal scale is inferred or forced to have limited fractional digits. - * - * @param val Length of fractional digits. - */ - void set_forced_decimals_scale(size_type val) { _forced_decimals_scale = val; } }; class orc_reader_options_builder { @@ -325,30 +295,6 @@ class orc_reader_options_builder { return *this; } - /** - * @brief Enable/Disable conversion of decimals to float64. - * - * @param val Boolean value to enable/disable. - * @return this for chaining. - */ - orc_reader_options_builder& decimals_as_float64(bool val) - { - options._decimals_as_float64 = val; - return *this; - } - - /** - * @brief Sets whether decimal scale is inferred or forced to have limited fractional digits. - * - * @param val Length of fractional digits. - * @return this for chaining. - */ - orc_reader_options_builder& forced_decimals_scale(size_type val) - { - options._forced_decimals_scale = val; - return *this; - } - /** * @brief move orc_reader_options member once it's built. */ diff --git a/cpp/include/cudf/io/orc_metadata.hpp b/cpp/include/cudf/io/orc_metadata.hpp index 906df3f1005..807fab2e85c 100644 --- a/cpp/include/cudf/io/orc_metadata.hpp +++ b/cpp/include/cudf/io/orc_metadata.hpp @@ -23,6 +23,8 @@ #include +#include +#include #include namespace cudf { @@ -61,21 +63,9 @@ struct raw_orc_statistics { raw_orc_statistics read_raw_orc_statistics(source_info const& src_info); /** - * @brief Enumerator for types of column statistics that can be included in `column_statistics`. - * - * The statistics type depends on the column data type. + * @brief Monostate type alias for the statistics variant. */ -enum class statistics_type { - NONE, - INT, - DOUBLE, - STRING, - BUCKET, - DECIMAL, - DATE, - BINARY, - TIMESTAMP, -}; +using no_statistics = std::monostate; /** * @brief Base class for column statistics that include optional minimum and maximum. @@ -84,13 +74,8 @@ enum class statistics_type { */ template struct minmax_statistics { - std::unique_ptr _minimum; - std::unique_ptr _maximum; - - auto has_minimum() const { return _minimum != nullptr; } - auto has_maximum() const { return _maximum != nullptr; } - auto minimum() const { return _minimum.get(); } - auto maximum() const { return _maximum.get(); } + std::optional minimum; + std::optional maximum; }; /** @@ -100,24 +85,19 @@ struct minmax_statistics { */ template struct sum_statistics { - std::unique_ptr _sum; - - auto has_sum() const { return _sum != nullptr; } - auto sum() const { return _sum.get(); } + std::optional sum; }; /** * @brief Statistics for integral columns. */ struct integer_statistics : minmax_statistics, sum_statistics { - static constexpr statistics_type type = statistics_type::INT; }; /** * @brief Statistics for floating point columns. */ struct double_statistics : minmax_statistics, sum_statistics { - static constexpr statistics_type type = statistics_type::DOUBLE; }; /** @@ -128,7 +108,6 @@ struct double_statistics : minmax_statistics, sum_statistics { * Note: According to ORC specs, the sum should be signed, but pyarrow uses unsigned value */ struct string_statistics : minmax_statistics, sum_statistics { - static constexpr statistics_type type = statistics_type::STRING; }; /** @@ -137,34 +116,26 @@ struct string_statistics : minmax_statistics, sum_statistics _count; - - auto count(size_t index) const { return &_count.at(index); } + std::vector count; }; /** * @brief Statistics for decimal columns. */ struct decimal_statistics : minmax_statistics, sum_statistics { - static constexpr statistics_type type = statistics_type::DECIMAL; }; /** * @brief Statistics for date(time) columns. */ -struct date_statistics : minmax_statistics { - static constexpr statistics_type type = statistics_type::DATE; -}; +using date_statistics = minmax_statistics; /** * @brief Statistics for binary columns. * * The `sum` is the total number of bytes across all elements. */ -struct binary_statistics : sum_statistics { - static constexpr statistics_type type = statistics_type::BINARY; -}; +using binary_statistics = sum_statistics; /** * @brief Statistics for timestamp columns. @@ -173,14 +144,8 @@ struct binary_statistics : sum_statistics { * the UNIX epoch. The `minimum_utc` and `maximum_utc` are the same values adjusted to UTC. */ struct timestamp_statistics : minmax_statistics { - static constexpr statistics_type type = statistics_type::TIMESTAMP; - std::unique_ptr _minimum_utc; - std::unique_ptr _maximum_utc; - - auto has_minimum_utc() const { return _minimum_utc != nullptr; } - auto has_maximum_utc() const { return _maximum_utc != nullptr; } - auto minimum_utc() const { return _minimum_utc.get(); } - auto maximum_utc() const { return _maximum_utc.get(); } + std::optional minimum_utc; + std::optional maximum_utc; }; namespace orc { @@ -196,40 +161,20 @@ struct column_statistics; * All columns can have the `number_of_values` statistics. Depending on the data type, a column can * have additional statistics, accessible through `type_specific_stats` accessor. */ -class column_statistics { - private: - std::unique_ptr _number_of_values; - statistics_type _type = statistics_type::NONE; - void* _type_specific_stats = nullptr; - - public: - column_statistics() = default; - column_statistics(cudf::io::orc::column_statistics&& other); - - column_statistics& operator=(column_statistics&&) noexcept; - column_statistics(column_statistics&&) noexcept; - - auto has_number_of_values() const { return _number_of_values != nullptr; } - auto number_of_values() const { return _number_of_values.get(); } - - auto type() const { return _type; } - - /** - * @brief Returns a non-owning pointer to the type-specific statistics of the given type. - * - * Returns null if the requested statistics type does not match the type of the currently held - * type-specific statistics. - * - * @tparam T the statistics type - */ - template - T const* type_specific_stats() const - { - if (T::type != _type) return nullptr; - return static_cast(_type_specific_stats); - } - - ~column_statistics(); +struct column_statistics { + std::optional number_of_values; + std::variant + type_specific_stats; + + column_statistics(cudf::io::orc::column_statistics&& detail_statistics); }; /** diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp index 7cb3db1eb30..178e46a0c5c 100644 --- a/cpp/include/cudf/io/parquet.hpp +++ b/cpp/include/cudf/io/parquet.hpp @@ -392,6 +392,17 @@ class column_in_metadata { std::vector children; public: + /** + * @brief Get the children of this column metadata + * + * @return this for chaining + */ + column_in_metadata& add_child(column_in_metadata const& child) + { + children.push_back(child); + return *this; + } + /** * @brief Set the name of this column * diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp index 5a2c913d4c3..428a4195bf8 100644 --- a/cpp/include/cudf/join.hpp +++ b/cpp/include/cudf/join.hpp @@ -424,13 +424,13 @@ std::unique_ptr> left_anti_join( * TableB: {{1, 2, 3}, {1, 2, 5}} * left_on: {0} * right_on: {1} - * Result: {{0}, {1}} + * Result: {{0}} * * TableA: {{0, 1, 2}, {1, 2, 5}} * TableB: {{1, 2, 3}} * left_on: {0} * right_on: {0} - * Result: { {0} {1} } + * Result: { {0}, {1} } * @endcode * * @throw cudf::logic_error if number of elements in `left_on` or `right_on` diff --git a/cpp/include/cudf/lists/combine.hpp b/cpp/include/cudf/lists/combine.hpp new file mode 100644 index 00000000000..a9407ed57ca --- /dev/null +++ b/cpp/include/cudf/lists/combine.hpp @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +namespace cudf { +namespace lists { +/** + * @addtogroup lists_combine + * @{ + * @file + */ + +/* + * @brief Flag to specify whether a null list element will be ignored from concatenation, or the + * entire concatenation result involving null list elements will be a null element. + */ +enum class concatenate_null_policy { IGNORE, NULLIFY_OUTPUT_ROW }; + +/** + * @brief Row-wise concatenating multiple lists columns into a single lists column. + * + * The output column is generated by concatenating the elements within each row of the input + * table. If any row of the input table contains null elements, the concatenation process will + * either ignore those null elements, or will simply set the entire resulting row to be a null + * element. + * + * @code{.pseudo} + * s1 = [{0, 1}, {2, 3, 4}, {5}, {}, {6, 7}] + * s2 = [{8}, {9}, {}, {10, 11, 12}, {13, 14, 15, 16}] + * r = lists::concatenate_rows(s1, s2) + * r is now [{0, 1, 8}, {2, 3, 4, 9}, {5}, {10, 11, 12}, {6, 7, 13, 14, 15, 16}] + * @endcode + * + * @throws cudf::logic_error if any column of the input table is not a lists columns. + * @throws cudf::logic_error if any lists column contains nested typed entry. + * @throws cudf::logic_error if all lists columns do not have the same entry type. + * + * @param input Table of lists to be concatenated. + * @param null_policy The parameter to specify whether a null list element will be ignored from + * concatenation, or any concatenation involving a null element will result in a null list. + * @param mr Device memory resource used to allocate the returned column's device memory. + * @return A new column in which each row is a list resulted from concatenating all list elements in + * the corresponding row of the input table. + */ +std::unique_ptr concatenate_rows( + table_view const& input, + concatenate_null_policy null_policy = concatenate_null_policy::IGNORE, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Concatenating multiple lists on the same row of a lists column into a single list. + * + * Given a lists column where each row in the column is a list of lists of entries, an output lists + * column is generated by concatenating all the list elements at the same row together. If any row + * contains null list elements, the concatenation process will either ignore those null elements, or + * will simply set the entire resulting row to be a null element. + * + * @code{.pseudo} + * l = [ [{1, 2}, {3, 4}, {5}], [{6}, {}, {7, 8, 9}] ] + * r = lists::concatenate_list_elements(l); + * r is [ {1, 2, 3, 4, 5}, {6, 7, 8, 9} ] + * @endcode + * + * @throws cudf::logic_error if the input column is not at least two-level depth lists column (i.e., + * each row must be a list of list). + * @throws cudf::logic_error if the input lists column contains nested typed entries that are not + * lists. + * + * @param input The lists column containing lists of list elements to concatenate. + * @param null_policy The parameter to specify whether a null list element will be ignored from + * concatenation, or any concatenation involving a null element will result in a null list. + * @param mr Device memory resource used to allocate the returned column's device memory. + * @return A new column in which each row is a list resulted from concatenating all list elements in + * the corresponding row of the input lists column. + */ +std::unique_ptr concatenate_list_elements( + column_view const& input, + concatenate_null_policy null_policy = concatenate_null_policy::IGNORE, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** @} */ // end of group +} // namespace lists +} // namespace cudf diff --git a/cpp/include/cudf/lists/detail/combine.hpp b/cpp/include/cudf/lists/detail/combine.hpp new file mode 100644 index 00000000000..9f28074173a --- /dev/null +++ b/cpp/include/cudf/lists/detail/combine.hpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include + +namespace cudf { +namespace lists { +namespace detail { +/** + * @copydoc cudf::lists::concatenate_rows + * + * @param stream CUDA stream used for device memory operations and kernel launches. + */ +std::unique_ptr concatenate_rows( + table_view const& input, + concatenate_null_policy null_policy, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @copydoc cudf::lists::concatenate_list_elements + * + * @param stream CUDA stream used for device memory operations and kernel launches. + */ +std::unique_ptr concatenate_list_elements( + column_view const& input, + concatenate_null_policy null_policy, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +} // namespace detail +} // namespace lists +} // namespace cudf diff --git a/cpp/include/cudf/lists/detail/copying.hpp b/cpp/include/cudf/lists/detail/copying.hpp index cfa1980e665..3760294f079 100644 --- a/cpp/include/cudf/lists/detail/copying.hpp +++ b/cpp/include/cudf/lists/detail/copying.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,7 +39,7 @@ namespace detail { * @param start Index to first list to select in the column * @param end One past the index to last list to select in the column * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocatet the returned column's device memory. + * @param mr Device memory resource used to allocate the returned column's device memory. * @return New lists column of size (end - start) */ std::unique_ptr copy_slice(lists_column_view const& lists, @@ -47,6 +47,7 @@ std::unique_ptr copy_slice(lists_column_view const& lists, size_type end, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); + } // namespace detail } // namespace lists } // namespace cudf diff --git a/cpp/include/cudf/lists/detail/interleave_columns.hpp b/cpp/include/cudf/lists/detail/interleave_columns.hpp new file mode 100644 index 00000000000..7ae90779fdc --- /dev/null +++ b/cpp/include/cudf/lists/detail/interleave_columns.hpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include + +namespace cudf { +namespace lists { +namespace detail { + +/** + * @brief Returns a single column by interleaving rows of the given table of list elements. + * + * @code{.pseudo} + * s1 = [{0, 1}, {2, 3, 4}, {5}, {}, {6, 7}] + * s2 = [{8}, {9}, {}, {10, 11, 12}, {13, 14, 15, 16}] + * r = lists::interleave_columns(s1, s2) + * r is now [{0, 1}, {8}, {2, 3, 4}, {9}, {5}, {}, {}, {10, 11, 12}, {6, 7}, {13, 14, 15, 16}] + * @endcode + * + * @throws cudf::logic_error if any column of the input table is not a lists columns. + * @throws cudf::logic_error if any lists column contains nested typed entry. + * @throws cudf::logic_error if all lists columns do not have the same entry type. + * + * @param input Table containing lists columns to interleave. + * @param has_null_mask A boolean flag indicating that the input columns have a null mask. + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned column's device memory. + * @return The interleaved columns as a single column. + */ +std::unique_ptr interleave_columns( + table_view const& input, + bool has_null_mask, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +} // namespace detail +} // namespace lists +} // namespace cudf diff --git a/cpp/include/cudf/lists/detail/scatter.cuh b/cpp/include/cudf/lists/detail/scatter.cuh index 8e2ecdf49a7..dac67545748 100644 --- a/cpp/include/cudf/lists/detail/scatter.cuh +++ b/cpp/include/cudf/lists/detail/scatter.cuh @@ -20,7 +20,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -32,6 +34,8 @@ #include #include +#include +#include #include @@ -46,8 +50,8 @@ namespace { * also holding a reference to the list column. * * Analogous to the list_view, this class is default constructable, - * and can thus be stored in rmm::device_vector. It is used to represent - * the results of a `scatter()` operation; a device_vector may hold + * and can thus be stored in rmm::device_uvector. It is used to represent + * the results of a `scatter()` operation; a device_uvector may hold * several instances of unbound_list_view, each with a flag indicating * whether it came from the scatter source or target. Each instance * may later be "bound" to the appropriate source/target column, to @@ -131,7 +135,7 @@ struct unbound_list_view { } private: - // Note: Cannot store reference to list column, because of storage in device_vector. + // Note: Cannot store reference to list column, because of storage in device_uvector. // Only keep track of whether this list row came from the source or target of scatter. label_type _label{ @@ -140,19 +144,22 @@ struct unbound_list_view { size_type _size{}; // Number of elements in *this* list row. }; +template rmm::device_uvector list_vector_from_column( unbound_list_view::label_type label, cudf::detail::lists_column_device_view const& lists_column, + IndexIterator index_begin, + IndexIterator index_end, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto n_rows = lists_column.size(); + auto n_rows = thrust::distance(index_begin, index_end); auto vector = rmm::device_uvector(n_rows, stream, mr); thrust::transform(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(n_rows), + index_begin, + index_end, vector.begin(), [label, lists_column] __device__(size_type row_index) { return unbound_list_view{label, lists_column, row_index}; @@ -203,43 +210,6 @@ std::pair construct_child_nullmask( mr); } -#ifndef NDEBUG -void print(std::string const& msg, column_view const& col, rmm::cuda_stream_view stream) -{ - if (col.type().id() != type_id::INT32) { - std::cout << "[Cannot print non-INT32 column.]" << std::endl; - return; - } - - std::cout << msg << " = ["; - thrust::for_each_n( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - col.size(), - [c = col.template data()] __device__(auto const& i) { printf("%d,", c[i]); }); - std::cout << "]" << std::endl; -} - -void print(std::string const& msg, - rmm::device_uvector const& scatter, - rmm::cuda_stream_view stream) -{ - std::cout << msg << " == ["; - - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - scatter.size(), - [s = scatter.begin()] __device__(auto const& i) { - auto si = s[i]; - printf("%s[%d](%d), ", - (si.label() == unbound_list_view::label_type::SOURCE ? "S" : "T"), - si.row_index(), - si.size()); - }); - std::cout << "]" << std::endl; -} -#endif // NDEBUG - /** * @brief (type_dispatch endpoint) Functor that constructs the child column result * of `scatter()`ing a list column. @@ -247,7 +217,7 @@ void print(std::string const& msg, * The protocol is as follows: * * Inputs: - * 1. list_vector: A device_vector of unbound_list_view, with each element + * 1. list_vector: A device_uvector of unbound_list_view, with each element * indicating the position, size, and which column the list * row came from. * 2. list_offsets: The offsets column for the (outer) lists column, each offset @@ -336,69 +306,46 @@ struct list_child_constructor { auto const num_child_rows{ cudf::detail::get_value(list_offsets, list_offsets.size() - 1, stream)}; - auto const child_null_mask = + auto child_null_mask = source_lists_column_view.child().nullable() || target_lists_column_view.child().nullable() ? construct_child_nullmask( list_vector, list_offsets, source_lists, target_lists, num_child_rows, stream, mr) : std::make_pair(rmm::device_buffer{}, 0); -#ifndef NDEBUG - print("list_offsets ", list_offsets, stream); - print("source_lists.child() ", source_lists_column_view.child(), stream); - print("source_lists.offsets() ", source_lists_column_view.offsets(), stream); - print("target_lists.child() ", target_lists_column_view.child(), stream); - print("target_lists.offsets() ", target_lists_column_view.offsets(), stream); - print("scatter_rows ", list_vector, stream); -#endif // NDEBUG - - auto child_column = cudf::make_fixed_width_column(cudf::data_type{cudf::type_to_id()}, + auto child_column = cudf::make_fixed_width_column(source_lists_column_view.child().type(), num_child_rows, - child_null_mask.first, + std::move(child_null_mask.first), child_null_mask.second, stream, mr); - auto copy_child_values_for_list_index = [d_scattered_lists = - list_vector.begin(), // unbound_list_view* - d_child_column = - child_column->mutable_view().data(), - d_offsets = list_offsets.template data(), - source_lists, - target_lists] __device__(auto const& row_index) { - auto const unbound_list_row = d_scattered_lists[row_index]; - auto const actual_list_row = unbound_list_row.bind_to_column(source_lists, target_lists); - auto const& bound_column = - (unbound_list_row.label() == unbound_list_view::label_type::SOURCE ? source_lists - : target_lists); - auto const list_begin_offset = - bound_column.offsets().template element(unbound_list_row.row_index()); - auto const list_end_offset = - bound_column.offsets().template element(unbound_list_row.row_index() + 1); - -#ifndef NDEBUG - printf( - "%d: Unbound == %s[%d](%d), Bound size == %d, calc_begin==%d, calc_end=%d, calc_size=%d\n", - row_index, - (unbound_list_row.label() == unbound_list_view::label_type::SOURCE ? "S" : "T"), - unbound_list_row.row_index(), - unbound_list_row.size(), - actual_list_row.size(), - list_begin_offset, - list_end_offset, - list_end_offset - list_begin_offset); -#endif // NDEBUG - - // Copy all elements in this list row, to "appropriate" offset in child-column. - auto const destination_start_offset = d_offsets[row_index]; - thrust::for_each_n(thrust::seq, - thrust::make_counting_iterator(0), - actual_list_row.size(), - [actual_list_row, d_child_column, destination_start_offset] __device__( - auto const& list_element_index) { - d_child_column[destination_start_offset + list_element_index] = - actual_list_row.template element(list_element_index); - }); - }; + auto copy_child_values_for_list_index = + [d_scattered_lists = list_vector.begin(), // unbound_list_view* + d_child_column = child_column->mutable_view().data(), + d_offsets = list_offsets.template data(), + source_lists, + target_lists] __device__(auto const& row_index) { + auto const unbound_list_row = d_scattered_lists[row_index]; + auto const actual_list_row = unbound_list_row.bind_to_column(source_lists, target_lists); + auto const& bound_column = + (unbound_list_row.label() == unbound_list_view::label_type::SOURCE ? source_lists + : target_lists); + auto const list_begin_offset = + bound_column.offsets().template element(unbound_list_row.row_index()); + auto const list_end_offset = + bound_column.offsets().template element(unbound_list_row.row_index() + 1); + + // Copy all elements in this list row, to "appropriate" offset in child-column. + auto const destination_start_offset = d_offsets[row_index]; + thrust::for_each_n(thrust::seq, + thrust::make_counting_iterator(0), + actual_list_row.size(), + [actual_list_row, d_child_column, destination_start_offset] __device__( + auto const& list_element_index) { + d_child_column[destination_start_offset + list_element_index] = + actual_list_row.template element(list_element_index); + }); + }; // For each list-row, copy underlying elements to the child column. thrust::for_each_n(rmm::exec_policy(stream), @@ -431,6 +378,8 @@ struct list_child_constructor { auto const num_child_rows{ cudf::detail::get_value(list_offsets, list_offsets.size() - 1, stream)}; + if (num_child_rows == 0) { return make_empty_column(data_type{type_id::STRING}); } + auto string_views = rmm::device_uvector(num_child_rows, stream); auto populate_string_views = [d_scattered_lists = list_vector.begin(), // unbound_list_view* @@ -521,6 +470,11 @@ struct list_child_constructor { auto const num_child_rows{ cudf::detail::get_value(list_offsets, list_offsets.size() - 1, stream)}; + if (num_child_rows == 0) { + // make an empty lists column using the input child type + return empty_like(source_lists_column_view.child()); + } + auto child_list_views = rmm::device_uvector(num_child_rows, stream, mr); // Function to convert from parent list_device_view instances to child list_device_views. @@ -641,7 +595,7 @@ struct list_child_constructor { std::make_unique(structs_list_offsets, stream, mr), std::make_unique(structs_member, stream, mr), structs_list_null_count, - rmm::device_buffer(structs_list_nullmask), + rmm::device_buffer(structs_list_nullmask, stream), stream, mr); }; @@ -704,64 +658,46 @@ struct list_child_constructor { void assert_same_data_type(column_view const& lhs, column_view const& rhs) { CUDF_EXPECTS(lhs.type().id() == rhs.type().id(), "Mismatched Data types."); - CUDF_EXPECTS(lhs.num_children() == rhs.num_children(), "Mismatched number of child columns."); + // Empty string column has no children + CUDF_EXPECTS(lhs.type().id() == type_id::STRING or lhs.num_children() == rhs.num_children(), + "Mismatched number of child columns."); for (int i{0}; i < lhs.num_children(); ++i) { assert_same_data_type(lhs.child(i), rhs.child(i)); } } -} // namespace - /** - * @brief Scatters lists into a copy of the target column - * according to a scatter map. + * @brief General implementation of scattering into list column * - * The scatter is performed according to the scatter iterator such that row - * `scatter_map[i]` of the output column is replaced by the source list-row. - * All other rows of the output column equal corresponding rows of the target table. + * Scattering `source` into `target` according to `scatter_map`. + * The view order of `source` and `target` can be specified by + * `source_vector` and `target_vector` respectively. * - * If the same index appears more than once in the scatter map, the result is - * undefined. - * - * The caller must update the null mask in the output column. - * - * @tparam SourceIterator must produce list_view objects * @tparam MapIterator must produce index values within the target column. * + * @param source_vector A vector of `unbound_list_view` into source column + * @param target_vector A vector of `unbound_list_view` into target column + * @param scatter_map_begin Start iterator of scatter map + * @param scatter_map_end End iterator of scatter map + * @param source Source column view + * @param target Target column view * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory * @return New lists column. */ template -std::unique_ptr scatter( - column_view const& source, +std::unique_ptr scatter_impl( + rmm::device_uvector const& source_vector, + rmm::device_uvector& target_vector, MapIterator scatter_map_begin, MapIterator scatter_map_end, + column_view const& source, column_view const& target, - rmm::cuda_stream_view stream = 0, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { - auto const num_rows = target.size(); - - if (num_rows == 0) { return cudf::empty_like(target); } - - auto const child_column_type = lists_column_view(target).child().type(); - assert_same_data_type(source, target); - using lists_column_device_view = cudf::detail::lists_column_device_view; - using unbound_list_view = cudf::lists::detail::unbound_list_view; - - auto const source_device_view = column_device_view::create(source, stream); - auto const source_vector = list_vector_from_column(unbound_list_view::label_type::SOURCE, - lists_column_device_view(*source_device_view), - stream, - mr); - - auto const target_device_view = column_device_view::create(target, stream); - auto target_vector = list_vector_from_column(unbound_list_view::label_type::TARGET, - lists_column_device_view(*target_device_view), - stream, - mr); + auto const child_column_type = lists_column_view(target).child().type(); // Scatter. thrust::scatter(rmm::exec_policy(stream), @@ -792,7 +728,7 @@ std::unique_ptr scatter( auto null_mask = target.has_nulls() ? copy_bitmask(target, stream, mr) : rmm::device_buffer{0, stream, mr}; - return cudf::make_lists_column(num_rows, + return cudf::make_lists_column(target.size(), std::move(offsets_column), std::move(child_column), cudf::UNKNOWN_NULL_COUNT, @@ -801,6 +737,143 @@ std::unique_ptr scatter( mr); } +} // namespace + +/** + * @brief Scatters lists into a copy of the target column + * according to a scatter map. + * + * The scatter is performed according to the scatter iterator such that row + * `scatter_map[i]` of the output column is replaced by the source list-row. + * All other rows of the output column equal corresponding rows of the target table. + * + * If the same index appears more than once in the scatter map, the result is + * undefined. + * + * The caller must update the null mask in the output column. + * + * @tparam MapIterator must produce index values within the target column. + * + * @param source Source column view + * @param scatter_map_begin Start iterator of scatter map + * @param scatter_map_end End iterator of scatter map + * @param target Target column view + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New lists column. + */ +template +std::unique_ptr scatter( + column_view const& source, + MapIterator scatter_map_begin, + MapIterator scatter_map_end, + column_view const& target, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +{ + auto const num_rows = target.size(); + if (num_rows == 0) { return cudf::empty_like(target); } + + auto const source_device_view = column_device_view::create(source, stream); + auto const scatter_map_size = thrust::distance(scatter_map_begin, scatter_map_end); + auto const source_vector = + list_vector_from_column(unbound_list_view::label_type::SOURCE, + cudf::detail::lists_column_device_view(*source_device_view), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(scatter_map_size), + stream, + mr); + + auto const target_device_view = column_device_view::create(target, stream); + auto target_vector = + list_vector_from_column(unbound_list_view::label_type::TARGET, + cudf::detail::lists_column_device_view(*target_device_view), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_rows), + stream, + mr); + + return scatter_impl( + source_vector, target_vector, scatter_map_begin, scatter_map_end, source, target, stream, mr); +} + +/** + * @brief Scatters list scalar (a single row) into a copy of the target column + * according to a scatter map. + * + * Returns a copy of the target column where every row specified in the `scatter_map` + * is replaced by the row value. + * + * If the same index appears more than once in the scatter map, the result is + * undefined. + * + * The caller must update the null mask in the output column. + * + * @tparam MapIterator must produce index values within the target column. + * + * @param slr Source scalar, specifying row data + * @param scatter_map_begin Start iterator of scatter map + * @param scatter_map_end End iterator of scatter map + * @param target Target column view + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New lists column. + */ +template +std::unique_ptr scatter( + scalar const& slr, + MapIterator scatter_map_begin, + MapIterator scatter_map_end, + column_view const& target, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +{ + auto const num_rows = target.size(); + if (num_rows == 0) { return cudf::empty_like(target); } + + auto lv = static_cast(&slr); + bool slr_valid = slr.is_valid(stream); + rmm::device_buffer null_mask = + slr_valid ? cudf::detail::create_null_mask(1, mask_state::UNALLOCATED, stream, mr) + : cudf::detail::create_null_mask(1, mask_state::ALL_NULL, stream, mr); + auto offset_column = make_numeric_column( + data_type{type_to_id()}, 2, mask_state::UNALLOCATED, stream, mr); + thrust::sequence(rmm::exec_policy(stream), + offset_column->mutable_view().begin(), + offset_column->mutable_view().end(), + 0, + lv->view().size()); + auto wrapped = column_view(data_type{type_id::LIST}, + 1, + nullptr, + static_cast(null_mask.data()), + slr_valid ? 0 : 1, + 0, + {offset_column->view(), lv->view()}); + + auto const source_device_view = column_device_view::create(wrapped, stream); + auto const scatter_map_size = thrust::distance(scatter_map_begin, scatter_map_end); + auto const source_vector = + list_vector_from_column(unbound_list_view::label_type::SOURCE, + cudf::detail::lists_column_device_view(*source_device_view), + thrust::make_constant_iterator(0), + thrust::make_constant_iterator(0) + scatter_map_size, + stream, + mr); + + auto const target_device_view = column_device_view::create(target, stream); + auto target_vector = + list_vector_from_column(unbound_list_view::label_type::TARGET, + cudf::detail::lists_column_device_view(*target_device_view), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_rows), + stream, + mr); + + return scatter_impl( + source_vector, target_vector, scatter_map_begin, scatter_map_end, wrapped, target, stream, mr); +} + } // namespace detail } // namespace lists } // namespace cudf diff --git a/cpp/include/cudf/lists/list_device_view.cuh b/cpp/include/cudf/lists/list_device_view.cuh index 4f207474526..802639f2393 100644 --- a/cpp/include/cudf/lists/list_device_view.cuh +++ b/cpp/include/cudf/lists/list_device_view.cuh @@ -40,10 +40,10 @@ class list_device_view { cudf_assert(row_index >= 0 && row_index < lists_column.size() && row_index < offsets.size() && "row_index out of bounds"); - begin_offset = offsets.element(row_index); + begin_offset = offsets.element(row_index + lists_column.offset()); cudf_assert(begin_offset >= 0 && begin_offset <= lists_column.child().size() && "begin_offset out of bounds."); - _size = offsets.element(row_index + 1) - begin_offset; + _size = offsets.element(row_index + 1 + lists_column.offset()) - begin_offset; } ~list_device_view() = default; diff --git a/cpp/include/cudf/lists/lists_column_device_view.cuh b/cpp/include/cudf/lists/lists_column_device_view.cuh index 187b9c2cf6a..d8f082c9a42 100644 --- a/cpp/include/cudf/lists/lists_column_device_view.cuh +++ b/cpp/include/cudf/lists/lists_column_device_view.cuh @@ -75,6 +75,12 @@ class lists_column_device_view { */ CUDA_DEVICE_CALLABLE bool is_null(size_type idx) const { return underlying.is_null(idx); } + /** + * @brief Fetches the offset of the underlying column_device_view, + * in case it is a sliced/offset column. + */ + CUDA_DEVICE_CALLABLE size_type offset() const { return underlying.offset(); } + private: column_device_view underlying; }; diff --git a/cpp/include/cudf/lists/lists_column_factories.hpp b/cpp/include/cudf/lists/lists_column_factories.hpp new file mode 100644 index 00000000000..bdf06cfa9e7 --- /dev/null +++ b/cpp/include/cudf/lists/lists_column_factories.hpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +namespace cudf { +namespace lists { +namespace detail { + +/** + * @brief Internal API to construct a lists column from a `list_scalar`, for public + * use, use `cudf::make_column_from_scalar`. + * + * @param[in] value The `list_scalar` to construct from + * @param[in] size The number of rows for the output column. + * @param[in] stream CUDA stream used for device memory operations and kernel launches. + * @param[in] mr Device memory resource used to allocate the returned column's device memory. + */ +std::unique_ptr make_lists_column_from_scalar( + list_scalar const& value, + size_type size, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +} // namespace detail +} // namespace lists +} // namespace cudf diff --git a/cpp/include/cudf/rolling.hpp b/cpp/include/cudf/rolling.hpp index 44a64a01c5e..4fb1b4a7319 100644 --- a/cpp/include/cudf/rolling.hpp +++ b/cpp/include/cudf/rolling.hpp @@ -16,6 +16,7 @@ #pragma once +#include #include #include @@ -58,7 +59,7 @@ std::unique_ptr rolling_window( size_type preceding_window, size_type following_window, size_type min_periods, - std::unique_ptr const& agg, + rolling_aggregation const& agg, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -67,7 +68,7 @@ std::unique_ptr rolling_window( * size_type preceding_window, * size_type following_window, * size_type min_periods, - * std::unique_ptr const& agg, + * rolling_aggregation const& agg, * rmm::mr::device_memory_resource* mr) * * @param default_outputs A column of per-row default values to be returned instead @@ -80,7 +81,7 @@ std::unique_ptr rolling_window( size_type preceding_window, size_type following_window, size_type min_periods, - std::unique_ptr const& agg, + rolling_aggregation const& agg, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -119,6 +120,7 @@ struct window_bounds { { } }; + /** * @brief Applies a grouping-aware, fixed-size rolling window function to the values in a column. * @@ -195,7 +197,7 @@ std::unique_ptr grouped_rolling_window( size_type preceding_window, size_type following_window, size_type min_periods, - std::unique_ptr const& aggr, + rolling_aggregation const& aggr, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -205,7 +207,7 @@ std::unique_ptr grouped_rolling_window( * size_type preceding_window, * size_type following_window, * size_type min_periods, - * std::unique_ptr const& aggr, + * rolling_aggregation const& aggr, * rmm::mr::device_memory_resource* mr) */ std::unique_ptr grouped_rolling_window( @@ -214,7 +216,7 @@ std::unique_ptr grouped_rolling_window( window_bounds preceding_window, window_bounds following_window, size_type min_periods, - std::unique_ptr const& aggr, + rolling_aggregation const& aggr, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -224,7 +226,7 @@ std::unique_ptr grouped_rolling_window( * size_type preceding_window, * size_type following_window, * size_type min_periods, - * std::unique_ptr const& aggr, + * rolling_aggregation const& aggr, * rmm::mr::device_memory_resource* mr) * * @param default_outputs A column of per-row default values to be returned instead @@ -238,7 +240,7 @@ std::unique_ptr grouped_rolling_window( size_type preceding_window, size_type following_window, size_type min_periods, - std::unique_ptr const& aggr, + rolling_aggregation const& aggr, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -249,7 +251,7 @@ std::unique_ptr grouped_rolling_window( * size_type preceding_window, * size_type following_window, * size_type min_periods, - * std::unique_ptr const& aggr, + * rolling_aggregation const& aggr, * rmm::mr::device_memory_resource* mr) */ std::unique_ptr grouped_rolling_window( @@ -259,12 +261,12 @@ std::unique_ptr grouped_rolling_window( window_bounds preceding_window, window_bounds following_window, size_type min_periods, - std::unique_ptr const& aggr, + rolling_aggregation const& aggr, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Applies a grouping-aware, timestamp-based rolling window function to the values in a - *column. + * column. * * Like `rolling_window()`, this function aggregates values in a window around each * element of a specified `input` column. It differs from `rolling_window()` in two respects: @@ -353,20 +355,40 @@ std::unique_ptr grouped_time_range_rolling_window( size_type preceding_window_in_days, size_type following_window_in_days, size_type min_periods, - std::unique_ptr const& aggr, + rolling_aggregation const& aggr, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @copydoc std::unique_ptr grouped_time_range_rolling_window( - * table_view const& group_keys, - * column_view const& timestamp_column, - * cudf::order const& timestamp_order, - * column_view const& input, - * size_type preceding_window_in_days, - * size_type following_window_in_days, - * size_type min_periods, - * std::unique_ptr const& aggr, - * rmm::mr::device_memory_resource* mr) + * @brief Applies a grouping-aware, timestamp-based rolling window function to the values in a + * column,. + * + * @copydetails std::unique_ptr grouped_time_range_rolling_window( + * table_view const& group_keys, + * column_view const& timestamp_column, + * cudf::order const& timestamp_order, + * column_view const& input, + * size_type preceding_window_in_days, + * size_type following_window_in_days, + * size_type min_periods, + * rolling_aggregation const& aggr, + * rmm::mr::device_memory_resource* mr) + * + * The `preceding_window_in_days` and `following_window_in_days` supports "unbounded" windows, + * if set to `window_bounds::unbounded()`. + * + * @param[in] group_keys The (pre-sorted) grouping columns + * @param[in] timestamp_column The (pre-sorted) timestamps for each row + * @param[in] timestamp_order The order (ASCENDING/DESCENDING) in which the timestamps are sorted + * @param[in] input The input column (to be aggregated) + * @param[in] preceding_window_in_days Possibly unbounded time-interval in the backward direction, + * specified as a `window_bounds` + * @param[in] following_window_in_days Possibly unbounded time-interval in the forward direction, + * specified as a `window_bounds` + * @param[in] min_periods Minimum number of observations in window required to have a value, + * otherwise element `i` is null. + * @param[in] aggr The rolling window aggregation type (SUM, MAX, MIN, etc.) + * + * @returns A nullable output column containing the rolling window results */ std::unique_ptr grouped_time_range_rolling_window( table_view const& group_keys, @@ -376,7 +398,126 @@ std::unique_ptr grouped_time_range_rolling_window( window_bounds preceding_window_in_days, window_bounds following_window_in_days, size_type min_periods, - std::unique_ptr const& aggr, + rolling_aggregation const& aggr, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Applies a grouping-aware, value range-based rolling window function to the values in a + * column. + * + * This function aggregates rows in a window around each element of a specified `input` column. + * The window is determined based on the values of an ordered `orderby` column, and on the values + * of a `preceding` and `following` scalar representing an inclusive range of orderby column values. + * + * 1. The elements of the `input` column are grouped into distinct groups (e.g. the result of a + * groupby), determined by the corresponding values of the columns under `group_keys`. The + * window-aggregation cannot cross the group boundaries. + * 2. Within a group, with all rows sorted by the `orderby` column, the aggregation window + * for a row at index `i` is determined as follows: + * a) If `orderby` is ASCENDING, aggregation window for row `i` includes all `input` rows at + * index `j` such that: + * @code{.pseudo} + * (orderby[i] - preceding) <= orderby[j] <= orderby[i] + following + * @endcode + * b) If `orderby` is DESCENDING, aggregation window for row `i` includes all `input` rows at + * index `j` such that: + * @code{.pseudo} + * (orderby[i] + preceding) >= orderby[j] >= orderby[i] - following + * @endcode + * + * Note: This method requires that the rows are presorted by the group keys and orderby column + * values. + * + * The window intervals are specified as scalar values appropriate for the orderby column. + * Currently, only the following combinations of `orderby` column type and range types + * are supported: + * 1. If `orderby` column is a TIMESTAMP, the `preceding`/`following` windows are specified + * in terms of `DURATION` scalars of the same resolution. + * E.g. For `orderby` column of type `TIMESTAMP_SECONDS`, the intervals may only be + * `DURATION_SECONDS`. Durations of higher resolution (e.g. `DURATION_NANOSECONDS`) + * or lower (e.g. `DURATION_DAYS`) cannot be used. + * 2. If the `orderby` column is an integral type (e.g. `INT32`), the `preceding`/`following` + * should be the exact same type (`INT32`). + * + * @code{.pseudo} + * Example: Consider an motor-racing statistics dataset, containing the following columns: + * 1. driver_name: (STRING) Name of the car driver + * 2. num_overtakes: (INT32) Number of times the driver overtook another car in a lap + * 3. lap_number: (INT32) The number of the lap + * + * The `group_range_rolling_window()` function allows one to calculate the total number of overtakes + * each driver made within any 3 lap window of each entry: + * 1. Group/partition the dataset by `driver_id` (This is the group_keys argument.) + * 2. Sort each group by the `lap_number` (i.e. This is the orderby_column.) + * 3. Calculate the SUM(num_overtakes) over a window (preceding=1, following=1) + * + * For the following input: + * + * [ // driver_name, num_overtakes, lap_number + * { "bottas", 1, 1 }, + * { "hamilton", 2, 1 }, + * { "bottas", 2, 2 }, + * { "bottas", 1, 3 }, + * { "hamilton", 3, 1 }, + * { "hamilton", 8, 2 }, + * { "bottas", 5, 7 }, + * { "bottas", 6, 8 }, + * { "hamilton", 4, 4 } + * ] + * + * Partitioning (grouping) by `driver_name`, and ordering by `lap_number` yields the following + * `num_overtakes` vector (with 2 groups, one for each distinct `driver_name`): + * + * lap_number: [ 1, 2, 3, 7, 8, 1, 1, 2, 4 ] + * num_overtakes: [ 1, 2, 1, 5, 6, 2, 3, 8, 4 ] + * <-----bottas------>|<----hamilton---> + * + * The SUM aggregation is applied, with 1 preceding, and 1 following, with a minimum of 1 + * period. The aggregation window is thus 3 (laps) wide, yielding the following output column: + * + * Results: [ 3, 4, 3, 11, 11, 13, 13, 13, 4 ] + * + * @endcode + * + * Note: The number of rows participating in each window might vary, based on the index within the + * group, datestamp, and `min_periods`. Apropos: + * 1. results[0] considers 2 values, because it is at the beginning of its group, and has no + * preceding values. + * 2. results[5] considers 3 values, despite being at the beginning of its group. It must include 2 + * following values, based on its orderby_column value. + * + * Each aggregation operation cannot cross group boundaries. + * + * The type of the returned column depends on the input column type `T`, and the aggregation: + * 1. COUNT returns `INT32` columns + * 2. MIN/MAX returns `T` columns + * 3. SUM returns the promoted type for T. Sum on `INT32` yields `INT64`. + * 4. MEAN returns FLOAT64 columns + * 5. COLLECT returns columns of type `LIST`. + * + * LEAD/LAG/ROW_NUMBER are undefined for range queries. + * + * @param[in] group_keys The (pre-sorted) grouping columns + * @param[in] orderby_column The (pre-sorted) order-by column, for range comparisons + * @param[in] order The order (ASCENDING/DESCENDING) in which the order-by column is sorted + * @param[in] input The input column (to be aggregated) + * @param[in] preceding The interval value in the backward direction + * @param[in] following The interval value in the forward direction. + * @param[in] min_periods Minimum number of observations in window required to have a value, + * otherwise element `i` is null. + * @param[in] aggr The rolling window aggregation type (SUM, MAX, MIN, etc.) + * + * @returns A nullable output column containing the rolling window results + */ +std::unique_ptr grouped_range_rolling_window( + table_view const& group_keys, + column_view const& orderby_column, + cudf::order const& order, + column_view const& input, + range_window_bounds const& preceding, + range_window_bounds const& following, + size_type min_periods, + rolling_aggregation const& aggr, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -418,7 +559,7 @@ std::unique_ptr rolling_window( column_view const& preceding_window, column_view const& following_window, size_type min_periods, - std::unique_ptr const& agg, + rolling_aggregation const& agg, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/include/cudf/rolling/range_window_bounds.hpp b/cpp/include/cudf/rolling/range_window_bounds.hpp new file mode 100644 index 00000000000..0c86bd3cf86 --- /dev/null +++ b/cpp/include/cudf/rolling/range_window_bounds.hpp @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace cudf { + +/** + * @brief Abstraction for window boundary sizes, to be used with + * `grouped_range_rolling_window()`. + * + * Similar to `window_bounds` in `grouped_rolling_window()`, `range_window_bounds` + * represents window boundaries for use with `grouped_range_rolling_window()`. + * A window may be specified as either of the following: + * 1. A fixed-width numeric scalar value. E.g. + * a) A `DURATION_DAYS` scalar, for use with a `TIMESTAMP_DAYS` orderby column + * b) An `INT32` scalar, for use with an `INT32` orderby column + * 2. "unbounded", indicating that the bounds stretch to the first/last + * row in the group. + */ +struct range_window_bounds { + public: + /** + * @brief Factory method to construct a bounded window boundary. + * + * @param value Finite window boundary + * + */ + static range_window_bounds get(scalar const&); + + /** + * @brief Factory method to construct an unbounded window boundary. + * + * @param @type The datatype of the window boundary + */ + static range_window_bounds unbounded(data_type type); + + /** + * @brief Whether or not the window is unbounded + * + * @return true If window is unbounded + * @return false If window is of finite bounds + */ + bool is_unbounded() const { return _is_unbounded; } + + /** + * @brief Returns the underlying scalar value for the bounds + */ + scalar const& range_scalar() const { return *_range_scalar; } + + range_window_bounds(range_window_bounds const&) = + default; // Required to return (by copy) from functions. + range_window_bounds() = default; // Required for use as return types from dispatch functors. + + private: + const bool _is_unbounded{true}; + std::shared_ptr _range_scalar{nullptr}; // To enable copy construction/assignment. + + range_window_bounds(bool is_unbounded_, std::unique_ptr range_scalar_); +}; + +} // namespace cudf diff --git a/cpp/include/cudf/scalar/scalar.hpp b/cpp/include/cudf/scalar/scalar.hpp index ded833f4ca0..6938ad5feaa 100644 --- a/cpp/include/cudf/scalar/scalar.hpp +++ b/cpp/include/cudf/scalar/scalar.hpp @@ -15,20 +15,15 @@ */ #pragma once +#include +#include #include #include -#include - -#include #include #include #include -#include -#include -#include - /** * @file * @brief Class definitions for cudf::scalar @@ -50,16 +45,27 @@ namespace cudf { */ class scalar { public: - virtual ~scalar() = default; - scalar(scalar&& other) = default; - scalar(scalar const& other) = default; + virtual ~scalar() = default; + scalar(scalar&& other) = default; + scalar& operator=(scalar const& other) = delete; scalar& operator=(scalar&& other) = delete; + /** + * @brief Construct a new scalar object by deep copying another. + * + * @param[in] other The scalar to copy. + * @param[in] stream CUDA stream used for device memory operations. + * @param[in] mr Device memory resource to use for device memory allocation + */ + scalar(scalar const& other, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @brief Returns the scalar's logical value type */ - data_type type() const noexcept { return _type; } + data_type type() const noexcept; /** * @brief Updates the validity of the value @@ -67,10 +73,7 @@ class scalar { * @param is_valid true: set the value to valid. false: set it to null * @param stream CUDA stream used for device memory operations. */ - void set_valid(bool is_valid, rmm::cuda_stream_view stream = rmm::cuda_stream_default) - { - _is_valid.set_value(is_valid, stream); - } + void set_valid(bool is_valid, rmm::cuda_stream_view stream = rmm::cuda_stream_default); /** * @brief Indicates whether the scalar contains a valid value @@ -81,20 +84,17 @@ class scalar { * @return true Value is valid * @return false Value is invalid/null */ - bool is_valid(rmm::cuda_stream_view stream = rmm::cuda_stream_default) const - { - return _is_valid.value(stream); - } + bool is_valid(rmm::cuda_stream_view stream = rmm::cuda_stream_default) const; /** * @brief Returns a raw pointer to the validity bool in device memory */ - bool* validity_data() { return _is_valid.data(); } + bool* validity_data(); /** * @brief Returns a const raw pointer to the validity bool in device memory */ - bool const* validity_data() const { return _is_valid.data(); } + bool const* validity_data() const; protected: data_type _type{type_id::EMPTY}; ///< Logical type of value in the scalar @@ -116,10 +116,7 @@ class scalar { scalar(data_type type, bool is_valid = false, rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) - : _type(type), _is_valid(is_valid, stream, mr) - { - } + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); }; namespace detail { @@ -130,53 +127,57 @@ class fixed_width_scalar : public scalar { public: using value_type = T; - ~fixed_width_scalar() = default; - fixed_width_scalar(fixed_width_scalar&& other) = default; - fixed_width_scalar(fixed_width_scalar const& other) = default; + ~fixed_width_scalar() = default; + fixed_width_scalar(fixed_width_scalar&& other) = default; + fixed_width_scalar& operator=(fixed_width_scalar const& other) = delete; fixed_width_scalar& operator=(fixed_width_scalar&& other) = delete; + /** + * @brief Construct a new fixed-width scalar object by deep copying another. + * + * @param[in] other The scalar to copy. + * @param[in] stream CUDA stream used for device memory operations. + * @param[in] mr Device memory resource to use for device memory allocation + */ + fixed_width_scalar(fixed_width_scalar const& other, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @brief Set the value of the scalar * * @param value New value of scalar * @param stream CUDA stream used for device memory operations. */ - void set_value(T value, rmm::cuda_stream_view stream = rmm::cuda_stream_default) - { - _data.set_value(value, stream); - this->set_valid(true, stream); - } + void set_value(T value, rmm::cuda_stream_view stream = rmm::cuda_stream_default); /** * @brief Implicit conversion operator to get the value of the scalar on the host */ - explicit operator value_type() const { return this->value(0); } + explicit operator value_type() const; /** * @brief Get the value of the scalar * * @param stream CUDA stream used for device memory operations. */ - T value(rmm::cuda_stream_view stream = rmm::cuda_stream_default) const - { - return _data.value(stream); - } + T value(rmm::cuda_stream_view stream = rmm::cuda_stream_default) const; /** * @brief Returns a raw pointer to the value in device memory */ - T* data() { return _data.data(); } + T* data(); /** * @brief Returns a const raw pointer to the value in device memory */ - T const* data() const { return _data.data(); } + T const* data() const; protected: rmm::device_scalar _data{}; ///< device memory containing the value - fixed_width_scalar() : scalar(data_type(type_to_id())) {} + fixed_width_scalar(); /** * @brief Construct a new fixed width scalar object @@ -189,10 +190,7 @@ class fixed_width_scalar : public scalar { fixed_width_scalar(T value, bool is_valid = true, rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) - : scalar(data_type(type_to_id()), is_valid, stream, mr), _data(value, stream, mr) - { - } + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a new fixed width scalar object from existing device memory. @@ -205,11 +203,7 @@ class fixed_width_scalar : public scalar { fixed_width_scalar(rmm::device_scalar&& data, bool is_valid = true, rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) - : scalar(data_type(type_to_id()), is_valid, stream, mr), - _data{std::forward>(data)} - { - } + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); }; } // namespace detail @@ -224,13 +218,24 @@ class numeric_scalar : public detail::fixed_width_scalar { static_assert(is_numeric(), "Unexpected non-numeric type."); public: - numeric_scalar() = default; - ~numeric_scalar() = default; - numeric_scalar(numeric_scalar&& other) = default; - numeric_scalar(numeric_scalar const& other) = default; + numeric_scalar() = default; + ~numeric_scalar() = default; + numeric_scalar(numeric_scalar&& other) = default; + numeric_scalar& operator=(numeric_scalar const& other) = delete; numeric_scalar& operator=(numeric_scalar&& other) = delete; + /** + * @brief Construct a new numeric scalar object by deep copying another. + * + * @param[in] other The scalar to copy. + * @param[in] stream CUDA stream used for device memory operations. + * @param[in] mr Device memory resource to use for device memory allocation + */ + numeric_scalar(numeric_scalar const& other, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @brief Construct a new numeric scalar object * @@ -242,10 +247,7 @@ class numeric_scalar : public detail::fixed_width_scalar { numeric_scalar(T value, bool is_valid = true, rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) - : detail::fixed_width_scalar(value, is_valid, stream, mr) - { - } + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a new numeric scalar object from existing device memory. @@ -258,10 +260,7 @@ class numeric_scalar : public detail::fixed_width_scalar { numeric_scalar(rmm::device_scalar&& data, bool is_valid = true, rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) - : detail::fixed_width_scalar(std::forward>(data), is_valid, stream, mr) - { - } + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); }; /** @@ -274,15 +273,27 @@ class fixed_point_scalar : public scalar { static_assert(is_fixed_point(), "Unexpected non-fixed_point type."); public: - using rep_type = typename T::rep; + using rep_type = typename T::rep; + using value_type = T; + + fixed_point_scalar(); + ~fixed_point_scalar() = default; + fixed_point_scalar(fixed_point_scalar&& other) = default; - fixed_point_scalar() : scalar(data_type(type_to_id())){}; - ~fixed_point_scalar() = default; - fixed_point_scalar(fixed_point_scalar&& other) = default; - fixed_point_scalar(fixed_point_scalar const& other) = default; fixed_point_scalar& operator=(fixed_point_scalar const& other) = delete; fixed_point_scalar& operator=(fixed_point_scalar&& other) = delete; + /** + * @brief Construct a new fixed_point scalar object by deep copying another. + * + * @param[in] other The scalar to copy. + * @param[in] stream CUDA stream used for device memory operations. + * @param[in] mr Device memory resource to use for device memory allocation + */ + fixed_point_scalar(fixed_point_scalar const& other, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @brief Construct a new fixed_point scalar object from already shifted value and scale * @@ -296,11 +307,7 @@ class fixed_point_scalar : public scalar { numeric::scale_type scale, bool is_valid = true, rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) - : scalar{data_type{type_to_id(), static_cast(scale)}, is_valid, stream, mr}, - _data{value} - { - } + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a new fixed_point scalar object from a value and default 0-scale @@ -313,10 +320,7 @@ class fixed_point_scalar : public scalar { fixed_point_scalar(rep_type value, bool is_valid = true, rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) - : scalar{data_type{type_to_id(), 0}, is_valid, stream, mr}, _data{value} - { - } + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a new fixed_point scalar object from a fixed_point number @@ -329,10 +333,7 @@ class fixed_point_scalar : public scalar { fixed_point_scalar(T value, bool is_valid = true, rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) - : scalar{data_type{type_to_id(), value.scale()}, is_valid, stream, mr}, _data{value.value()} - { - } + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a new fixed_point scalar object from existing device memory. @@ -347,42 +348,31 @@ class fixed_point_scalar : public scalar { numeric::scale_type scale, bool is_valid = true, rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) - : scalar{data_type{type_to_id(), scale}, is_valid, stream, mr}, - _data{std::forward>(data)} - { - } + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Get the value of the scalar * * @param stream CUDA stream used for device memory operations. */ - rep_type value(rmm::cuda_stream_view stream = rmm::cuda_stream_default) const - { - return _data.value(stream); - } + rep_type value(rmm::cuda_stream_view stream = rmm::cuda_stream_default) const; /** * @brief Get the decimal32 or decimal64 * * @param stream CUDA stream used for device memory operations. */ - T fixed_point_value(rmm::cuda_stream_view stream = rmm::cuda_stream_default) const - { - using namespace numeric; - return T{scaled_integer{_data.value(stream), scale_type{type().scale()}}}; - } + T fixed_point_value(rmm::cuda_stream_view stream = rmm::cuda_stream_default) const; /** * @brief Returns a raw pointer to the value in device memory */ - rep_type* data() { return _data.data(); } + rep_type* data(); /** * @brief Returns a const raw pointer to the value in device memory */ - rep_type const* data() const { return _data.data(); } + rep_type const* data() const; protected: rmm::device_scalar _data{}; ///< device memory containing the value @@ -395,13 +385,24 @@ class string_scalar : public scalar { public: using value_type = cudf::string_view; - string_scalar() : scalar(data_type(type_id::STRING)) {} - ~string_scalar() = default; - string_scalar(string_scalar&& other) = default; - string_scalar(string_scalar const& other) = default; + string_scalar(); + ~string_scalar() = default; + string_scalar(string_scalar&& other) = default; + string_scalar& operator=(string_scalar const& other) = delete; string_scalar& operator=(string_scalar&& other) = delete; + /** + * @brief Construct a new string scalar object by deep copying another string_scalar. + * + * @param[in] other The other string_scalar to copy + * @param[in] stream CUDA stream used for device memory operations. + * @param[in] mr Device memory resource to use for device memory allocation + */ + string_scalar(string_scalar const& other, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @brief Construct a new string scalar object * @@ -413,10 +414,7 @@ class string_scalar : public scalar { string_scalar(std::string const& string, bool is_valid = true, rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) - : scalar(data_type(type_id::STRING), is_valid), _data(string.data(), string.size(), stream, mr) - { - } + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a new string scalar object from string_view @@ -449,7 +447,7 @@ class string_scalar : public scalar { /** * @brief Implicit conversion operator to get the value of the scalar in a host std::string */ - explicit operator std::string() const { return this->to_string(0); } + explicit operator std::string() const; /** * @brief Get the value of the scalar in a host std::string @@ -468,12 +466,12 @@ class string_scalar : public scalar { /** * @brief Returns the size of the string in bytes */ - size_type size() const { return _data.size(); } + size_type size() const; /** * @brief Returns a raw pointer to the string in device memory */ - const char* data() const { return static_cast(_data.data()); } + const char* data() const; protected: rmm::device_buffer _data{}; ///< device memory containing the string @@ -490,13 +488,24 @@ class chrono_scalar : public detail::fixed_width_scalar { static_assert(is_chrono(), "Unexpected non-chrono type"); public: - chrono_scalar() = default; - ~chrono_scalar() = default; - chrono_scalar(chrono_scalar&& other) = default; - chrono_scalar(chrono_scalar const& other) = default; + chrono_scalar() = default; + ~chrono_scalar() = default; + chrono_scalar(chrono_scalar&& other) = default; + chrono_scalar& operator=(chrono_scalar const& other) = delete; chrono_scalar& operator=(chrono_scalar&& other) = delete; + /** + * @brief Construct a new chrono scalar object by deep copying another. + * + * @param[in] other The scalar to copy. + * @param[in] stream CUDA stream used for device memory operations. + * @param[in] mr Device memory resource to use for device memory allocation + */ + chrono_scalar(chrono_scalar const& other, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @brief Construct a new chrono scalar object * @@ -508,10 +517,7 @@ class chrono_scalar : public detail::fixed_width_scalar { chrono_scalar(T value, bool is_valid = true, rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) - : detail::fixed_width_scalar(value, is_valid, stream, mr) - { - } + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a new chrono scalar object from existing device memory. @@ -524,18 +530,29 @@ class chrono_scalar : public detail::fixed_width_scalar { chrono_scalar(rmm::device_scalar&& data, bool is_valid = true, rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) - : detail::fixed_width_scalar(std::forward>(data), is_valid, stream, mr) - { - } + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); }; template -struct timestamp_scalar : chrono_scalar { +class timestamp_scalar : public chrono_scalar { + public: static_assert(is_timestamp(), "Unexpected non-timestamp type"); using chrono_scalar::chrono_scalar; + using rep_type = typename T::rep; - timestamp_scalar() = default; + timestamp_scalar() = default; + timestamp_scalar(timestamp_scalar&& other) = default; + + /** + * @brief Construct a new timestamp scalar object by deep copying another. + * + * @param[in] other The scalar to copy. + * @param[in] stream CUDA stream used for device memory operations. + * @param[in] mr Device memory resource to use for device memory allocation + */ + timestamp_scalar(timestamp_scalar const& other, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a new timestamp scalar object from a duration that is @@ -551,23 +568,34 @@ struct timestamp_scalar : chrono_scalar { timestamp_scalar(Duration2 const& value, bool is_valid, rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) - : chrono_scalar(T{typename T::duration{value}}, is_valid, stream, mr) - { - } + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Return the duration in number of ticks since the UNIX epoch. */ - typename T::rep ticks_since_epoch() { return this->value().time_since_epoch().count(); } + rep_type ticks_since_epoch(); }; template -struct duration_scalar : chrono_scalar { +class duration_scalar : public chrono_scalar { + public: static_assert(is_duration(), "Unexpected non-duration type"); using chrono_scalar::chrono_scalar; + using rep_type = typename T::rep; - duration_scalar() = default; + duration_scalar() = default; + duration_scalar(duration_scalar&& other) = default; + + /** + * @brief Construct a new duration scalar object by deep copying another. + * + * @param[in] other The scalar to copy. + * @param[in] stream CUDA stream used for device memory operations. + * @param[in] mr Device memory resource to use for device memory allocation + */ + duration_scalar(duration_scalar const& other, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a new duration scalar object from tick counts @@ -577,18 +605,130 @@ struct duration_scalar : chrono_scalar { * @param stream CUDA stream used for device memory operations. * @param mr Device memory resource to use for device memory allocation */ - duration_scalar(typename T::rep value, + duration_scalar(rep_type value, bool is_valid, rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) - : chrono_scalar(T{value}, is_valid, stream, mr) - { - } + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Return the duration in number of ticks. */ - typename T::rep count() { return this->value().count(); } + rep_type count(); }; + +/** + * @brief An owning class to represent a list value in device memory + */ +class list_scalar : public scalar { + public: + list_scalar(); + ~list_scalar() = default; + list_scalar(list_scalar&& other) = default; + + list_scalar& operator=(list_scalar const& other) = delete; + list_scalar& operator=(list_scalar&& other) = delete; + + /** + * @brief Construct a new list scalar object by deep copying another. + * + * @param[in] other The scalar to copy. + * @param[in] stream CUDA stream used for device memory operations. + * @param[in] mr Device memory resource to use for device memory allocation + */ + list_scalar(list_scalar const& other, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + + /** + * @brief Construct a new list scalar object from column_view + * + * The input column_view is copied. + * + * @param data The column data to copy. + * @param is_valid Whether the value held by the scalar is valid + * @param stream CUDA stream used for device memory operations. + * @param mr Device memory resource to use for device memory allocation + */ + list_scalar(cudf::column_view const& data, + bool is_valid = true, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + + /** + * @brief Construct a new list scalar object from existing column. + * + * @param data The column to take ownership of + * @param is_valid Whether the value held by the scalar is valid + * @param stream CUDA stream used for device memory operations. + * @param mr Device memory resource to use for device memory allocation + */ + list_scalar(cudf::column&& data, + bool is_valid = true, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + + /** + * @brief Returns a non-owning, immutable view to underlying device data + */ + column_view view() const; + + private: + cudf::column _data; +}; + +/** + * @brief An owning class to represent a struct value in device memory + */ +class struct_scalar : public scalar { + public: + struct_scalar(); + ~struct_scalar() = default; + struct_scalar(struct_scalar&& other) = default; + struct_scalar(struct_scalar const& other) = default; + struct_scalar& operator=(struct_scalar const& other) = delete; + struct_scalar& operator=(struct_scalar&& other) = delete; + + /** + * @brief Construct a new struct scalar object from table_view + * + * The input table_view is deep-copied. + * + * @param data The table data to copy. + * @param is_valid Whether the value held by the scalar is valid + * @param stream CUDA stream used for device memory operations. + * @param mr Device memory resource to use for device memory allocation + */ + struct_scalar(table_view const& data, + bool is_valid = true, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + + /** + * @brief Construct a new struct scalar object from a host_span of column_views + * + * The input column_views are deep-copied. + * + * @param data The column_views to copy. + * @param is_valid Whether the value held by the scalar is valid + * @param stream CUDA stream used for device memory operations. + * @param mr Device memory resource to use for device memory allocation + */ + struct_scalar(host_span data, + bool is_valid = true, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + + /** + * @brief Returns a non-owning, immutable view to underlying device data + */ + table_view view() const; + + private: + table _data; + + void init(bool is_valid, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); + void superimpose_nulls(rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); +}; + /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/scalar/scalar_factories.hpp b/cpp/include/cudf/scalar/scalar_factories.hpp index a0a0a22091e..b96a8c65a04 100644 --- a/cpp/include/cudf/scalar/scalar_factories.hpp +++ b/cpp/include/cudf/scalar/scalar_factories.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -156,5 +156,45 @@ std::unique_ptr make_fixed_point_scalar( return std::make_unique>(value, scale, true, stream, mr); } +/** + * @brief Construct scalar using the given column of elements + * + * @param elements Elements of the list + * @param stream CUDA stream used for device memory operations. + * @param mr Device memory resource used to allocate the scalar's `data` and `is_valid` bool. + */ +std::unique_ptr make_list_scalar( + column_view elements, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Construct a struct scalar using the given table_view. + * + * The columns must have 1 row. + * + * @param data The columnar data to store in the scalar object + * @param stream CUDA stream used for device memory operations. + * @param mr Device memory resource used to allocate the scalar's `data` and `is_valid` bool. + */ +std::unique_ptr make_struct_scalar( + table_view const& data, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Construct a struct scalar using the given span of column views. + * + * The columns must have 1 row. + * + * @param value The columnar data to store in the scalar object + * @param stream CUDA stream used for device memory operations. + * @param mr Device memory resource used to allocate the scalar's `data` and `is_valid` bool. + */ +std::unique_ptr make_struct_scalar( + host_span data, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/stream_compaction.hpp b/cpp/include/cudf/stream_compaction.hpp index f47b4515b3a..192be4fb6a9 100644 --- a/cpp/include/cudf/stream_compaction.hpp +++ b/cpp/include/cudf/stream_compaction.hpp @@ -227,8 +227,9 @@ enum class duplicate_keep_option { * @param[in] input input table_view to copy only unique rows * @param[in] keys vector of indices representing key columns from `input` * @param[in] keep keep first entry, last entry, or no entries if duplicates found - * @param[in] nulls_equal flag to denote nulls are equal if null_equality::EQUAL, - * nulls are not equal if null_equality::UNEQUAL + * @param[in] nulls_equal flag to denote nulls are equal if null_equality::EQUAL, nulls are not + * equal if null_equality::UNEQUAL + * @param[in] null_precedence flag to denote nulls should appear before or after non-null items * @param[in] mr Device memory resource used to allocate the returned table's device * memory * @@ -239,6 +240,7 @@ std::unique_ptr
drop_duplicates( std::vector const& keys, duplicate_keep_option keep, null_equality nulls_equal = null_equality::EQUAL, + null_order null_precedence = null_order::BEFORE, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** diff --git a/cpp/include/cudf/strings/combine.hpp b/cpp/include/cudf/strings/combine.hpp index 49f824b3805..3e069de2f0f 100644 --- a/cpp/include/cudf/strings/combine.hpp +++ b/cpp/include/cudf/strings/combine.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include #include @@ -30,52 +31,29 @@ namespace strings { */ /** - * @brief Row-wise concatenates the given list of strings columns and - * returns a single strings column result. - * - * Each new string is created by concatenating the strings from the same - * row delimited by the separator provided. - * - * Any row with a null entry will result in the corresponding output - * row to be null entry unless a narep string is specified to be used - * in its place. - * - * The number of strings in the columns provided must be the same. - * - * @code{.pseudo} - * Example: - * s1 = ['aa', null, '', 'aa'] - * s2 = ['', 'bb', 'bb', null] - * r1 = concatenate([s1,s2]) - * r1 is ['aa', null, 'bb', null] - * r2 = concatenate([s1,s2],':','_') - * r2 is ['aa:', '_:bb', ':bb', 'aa:_'] - * @endcode - * - * @throw cudf::logic_error if input columns are not all strings columns. - * @throw cudf::logic_error if separator is not valid. - * - * @param strings_columns List of string columns to concatenate. - * @param separator String that should inserted between each string from each row. - * Default is an empty string. - * @param narep String that should be used in place of any null strings - * found in any column. Default of invalid-scalar means any null entry in any column will - * produces a null result for that row. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New column with concatenated results. + * @brief Setting for specifying how separators are added with + * null strings elements. */ -std::unique_ptr concatenate( - table_view const& strings_columns, - string_scalar const& separator = string_scalar(""), - string_scalar const& narep = string_scalar("", false), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +enum class separator_on_nulls { + YES, ///< Always add separators between elements + NO ///< Do not add separators if an element is null +}; + +/** + * @brief Setting for specifying what will be output from `join_list_elements` when an input list + * is empty. + */ +enum class output_if_empty_list { + EMPTY_STRING, ///< Empty list will result in empty string + NULL_ELEMENT ///< Empty list will result in a null +}; /** * @brief Concatenates all strings in the column into one new string delimited * by an optional separator string. * * This returns a column with one string. Any null entries are ignored unless - * the narep parameter specifies a replacement string. + * the @p narep parameter specifies a replacement string. * * @code{.pseudo} * Example: @@ -110,11 +88,9 @@ std::unique_ptr join_strings( * * - If row separator for a given row is null, output column for that row is null, unless * there is a valid @p separator_narep - * - If all column values for a given row is null, output column for that row is null, unless - * there is a valid @p col_narep - * - null column values for a given row are skipped, if the column replacement isn't valid - * - The separator is only applied between two valid column values - * - If valid @p separator_narep and @p col_narep are provided, the output column is always + * - The separator is applied between two output row values if the @p separate_nulls + * is `YES` or only between valid rows if @p separate_nulls is `NO`. + * - If @p separator_narep and @p col_narep are both valid, the output column is always * non nullable * * @code{.pseudo} @@ -123,16 +99,25 @@ std::unique_ptr join_strings( * c1 = [null, 'cc', 'dd', null, null, 'gg'] * c2 = ['bb', '', null, null, null, 'hh'] * sep = ['::', '%%', '^^', '!', '*', null] - * out0 = concatenate([c0, c1, c2], sep) - * out0 is ['aa::bb', 'cc%%', '^^dd', 'ee', null, null] + * out = concatenate({c0, c1, c2}, sep) + * // all rows have at least one null or sep[i]==null + * out is [null, null, null, null, null, null] * * sep_rep = '+' - * out1 = concatenate([c0, c1, c2], sep, sep_rep) - * out1 is ['aa::bb', 'cc%%', '^^dd', 'ee', null, 'ff+gg+hh'] + * out = concatenate({c0, c1, c2}, sep, sep_rep) + * // all rows with at least one null output as null + * out is [null, null, null, null, null, 'ff+gg+hh'] + * + * col_narep = '-' + * sep_na = non-valid scalar + * out = concatenate({c0, c1, c2}, sep, sep_na, col_narep) + * // only the null entry in the sep column produces a null row + * out is ['aa::-::bb', '-%%cc%%', '^^dd^^-', 'ee!-!-', '-*-*-', null] * - * col_rep = '-' - * out2 = concatenate([c0, c1, c2], sep, invalid_sep_rep, col_rep) - * out2 is ['aa::-::bb', '-%%cc%%', '^^dd^^-', 'ee!-!-', '-*-*-', null] + * col_narep = '' + * out = concatenate({c0, c1, c2}, sep, sep_rep, col_narep, separator_on_nulls:NO) + * // parameter suppresses separator for null rows + * out is ['aa::bb', 'cc%%', '^^dd', 'ee', '', 'ff+gg+hh'] * @endcode * * @throw cudf::logic_error if no input columns are specified - table view is empty @@ -148,6 +133,8 @@ std::unique_ptr join_strings( * @param col_narep String that should be used in place of any null strings * found in any column. Default of invalid-scalar means no null column value replacements. * Default is an invalid string. + * @param separate_nulls If YES, then the separator is included for null rows + * if `col_narep` is valid. * @param mr Resource for allocating device memory. * @return New column with concatenated results. */ @@ -156,8 +143,184 @@ std::unique_ptr concatenate( strings_column_view const& separators, string_scalar const& separator_narep = string_scalar("", false), string_scalar const& col_narep = string_scalar("", false), + separator_on_nulls separate_nulls = separator_on_nulls::YES, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Row-wise concatenates the given list of strings columns and + * returns a single strings column result. + * + * Each new string is created by concatenating the strings from the same + * row delimited by the separator provided. + * + * Any row with a null entry will result in the corresponding output + * row to be null entry unless a narep string is specified to be used + * in its place. + * + * If @p separate_nulls is set to `NO` and @p narep is valid then + * separators are not added to the output between null elements. + * Otherwise, separators are always added if @p narep is valid. + * + * More than one column must be specified in the input @p strings_columns + * table. + * + * @code{.pseudo} + * Example: + * s1 = ['aa', null, '', 'dd'] + * s2 = ['', 'bb', 'cc', null] + * out = concatenate({s1, s2}) + * out is ['aa', null, 'cc', null] + * + * out = concatenate({s1, s2}, ':', '_') + * out is ['aa:', '_:bb', ':cc', 'dd:_'] + * + * out = concatenate({s1, s2}, ':', '', separator_on_nulls::NO) + * out is ['aa:', 'bb', ':cc', 'dd'] + * @endcode + * + * @throw cudf::logic_error if input columns are not all strings columns. + * @throw cudf::logic_error if separator is not valid. + * @throw cudf::logic_error if only one column is specified + * + * @param strings_columns List of string columns to concatenate. + * @param separator String that should inserted between each string from each row. + * Default is an empty string. + * @param narep String that should be used in place of any null strings + * found in any column. Default of invalid-scalar means any null entry in any column will + * produces a null result for that row. + * @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid. + * @param mr Device memory resource used to allocate the returned column's device memory. + * @return New column with concatenated results. + */ +std::unique_ptr concatenate( + table_view const& strings_columns, + string_scalar const& separator = string_scalar(""), + string_scalar const& narep = string_scalar("", false), + separator_on_nulls separate_nulls = separator_on_nulls::YES, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Given a lists column of strings (each row is a list of strings), concatenates the strings + * within each row and returns a single strings column result. + * + * Each new string is created by concatenating the strings from the same row (same list element) + * delimited by the row separator provided in the @p separators strings column. + * + * A null list row will always result in a null string in the output row. Any non-null list row + * having a null element will result in the corresponding output row to be null unless a valid + * @p string_narep scalar is provided to be used in its place. Any null row in the @p separators + * column will also result in a null output row unless a valid @p separator_narep scalar is provided + * to be used in place of the null separators. + * + * If @p separate_nulls is set to `NO` and @p string_narep is valid then separators are not added to + * the output between null elements. Otherwise, separators are always added if @p string_narep is + * valid. + * + * If @p empty_list_policy is set to `EMPTY_STRING`, any row that is an empty list will result in + * an empty output string. Otherwise, the output will be a null. + * + * In the special case when the input list row contains all null elements, the output will be the + * same as in case of empty input list regardless of @p string_narep and @p separate_nulls values. + * + * @code{.pseudo} + * Example: + * s = [ ['aa', 'bb', 'cc'], null, ['', 'dd'], ['ee', null], ['ff', 'gg'] ] + * sep = ['::', '%%', '!', '*', null] + * + * out = join_list_elements(s, sep) + * out is ['aa::bb::cc', null, '!dd', null, null] + * + * out = join_list_elements(s, sep, ':', '_') + * out is ['aa::bb::cc', null, '!dd', 'ee*_', 'ff:gg'] + * + * out = join_list_elements(s, sep, ':', '', separator_on_nulls::NO) + * out is ['aa::bb::cc', null, '!dd', 'ee', 'ff:gg'] + * @endcode + * + * @throw cudf::logic_error if input column is not lists of strings column. + * @throw cudf::logic_error if the number of rows from `separators` and `lists_strings_column` do + * not match + * + * @param lists_strings_column Column containing lists of strings to concatenate. + * @param separators Strings column that provides separators for concatenation. + * @param separator_narep String that should be used to replace null separator, default is an + * invalid-scalar denoting that rows containing null separator will result in null string in + * the corresponding output rows. + * @param string_narep String that should be used to replace null strings in any non-null list row, + * default is an invalid-scalar denoting that list rows containing null strings will result + * in null string in the corresponding output rows. + * @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid. + * @param empty_list_policy if set to EMPTY_STRING, any input row that is an empty list will + * result in an empty string. Otherwise, it will result in a null. + * @param mr Device memory resource used to allocate the returned column's device memory. + * @return New strings column with concatenated results. + */ +std::unique_ptr join_list_elements( + const lists_column_view& lists_strings_column, + const strings_column_view& separators, + string_scalar const& separator_narep = string_scalar("", false), + string_scalar const& string_narep = string_scalar("", false), + separator_on_nulls separate_nulls = separator_on_nulls::YES, + output_if_empty_list empty_list_policy = output_if_empty_list::EMPTY_STRING, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Given a lists column of strings (each row is a list of strings), concatenates the strings + * within each row and returns a single strings column result. + * + * Each new string is created by concatenating the strings from the same row (same list element) + * delimited by the @p separator provided. + * + * A null list row will always result in a null string in the output row. Any non-null list row + * having a null elenent will result in the corresponding output row to be null unless a + * @p narep string is specified to be used in its place. + * + * If @p separate_nulls is set to `NO` and @p narep is valid then separators are not added to the + * output between null elements. Otherwise, separators are always added if @p narep is valid. + * + * If @p empty_list_policy is set to `EMPTY_STRING`, any row that is an empty list will result in + * an empty output string. Otherwise, the output will be a null. + * + * In the special case when the input list row contains all null elements, the output will be the + * same as in case of empty input list regardless of @p narep and @p separate_nulls values. + * + * @code{.pseudo} + * Example: + * s = [ ['aa', 'bb', 'cc'], null, ['', 'dd'], ['ee', null], ['ff'] ] + * + * out = join_list_elements(s) + * out is ['aabbcc', null, 'dd', null, 'ff'] + * + * out = join_list_elements(s, ':', '_') + * out is ['aa:bb:cc', null, ':dd', 'ee:_', 'ff'] + * + * out = join_list_elements(s, ':', '', separator_on_nulls::NO) + * out is ['aa:bb:cc', null, ':dd', 'ee', 'ff'] + * @endcode + * + * @throw cudf::logic_error if input column is not lists of strings column. + * @throw cudf::logic_error if separator is not valid. + * + * @param lists_strings_column Column containing lists of strings to concatenate. + * @param separator String that should inserted between strings of each list row, default is an + * empty string. + * @param narep String that should be used to replace null strings in any non-null list row, default + * is an invalid-scalar denoting that list rows containing null strings will result in null + * string in the corresponding output rows. + * @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid. + * @param empty_list_policy if set to EMPTY_STRING, any input row that is an empty list will result + * in an empty string. Otherwise, it will result in a null. + * @param mr Device memory resource used to allocate the returned column's device memory. + * @return New strings column with concatenated results. + */ +std::unique_ptr join_list_elements( + const lists_column_view& lists_strings_column, + string_scalar const& separator = string_scalar(""), + string_scalar const& narep = string_scalar("", false), + separator_on_nulls separate_nulls = separator_on_nulls::YES, + output_if_empty_list empty_list_policy = output_if_empty_list::EMPTY_STRING, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** @} */ // end of doxygen group } // namespace strings } // namespace cudf diff --git a/cpp/include/cudf/strings/detail/combine.hpp b/cpp/include/cudf/strings/detail/combine.hpp index ed783ca996c..d6bdf398886 100644 --- a/cpp/include/cudf/strings/detail/combine.hpp +++ b/cpp/include/cudf/strings/detail/combine.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -32,11 +33,13 @@ namespace detail { * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr concatenate(table_view const& strings_columns, - string_scalar const& separator, - string_scalar const& narep, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); +std::unique_ptr concatenate( + table_view const& strings_columns, + string_scalar const& separator, + string_scalar const& narep, + separator_on_nulls separate_nulls = separator_on_nulls::YES, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @copydoc join_strings(table_view const&,string_scalar const&,string_scalar @@ -44,11 +47,12 @@ std::unique_ptr concatenate(table_view const& strings_columns, * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr join_strings(strings_column_view const& strings, - string_scalar const& separator, - string_scalar const& narep, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); +std::unique_ptr join_strings( + strings_column_view const& strings, + string_scalar const& separator, + string_scalar const& narep, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail } // namespace strings diff --git a/cpp/include/cudf/strings/detail/copy_if_else.cuh b/cpp/include/cudf/strings/detail/copy_if_else.cuh index 176a548da4d..bffcb5c1a31 100644 --- a/cpp/include/cudf/strings/detail/copy_if_else.cuh +++ b/cpp/include/cudf/strings/detail/copy_if_else.cuh @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -73,8 +74,7 @@ std::unique_ptr copy_if_else( stream, mr); size_type null_count = valid_mask.second; - rmm::device_buffer null_mask{0, stream, mr}; - if (null_count) null_mask = valid_mask.first; + auto null_mask = (null_count > 0) ? std::move(valid_mask.first) : rmm::device_buffer{}; // build offsets column auto offsets_transformer = [lhs_begin, rhs_begin, filter_fn] __device__(size_type idx) { @@ -92,8 +92,9 @@ std::unique_ptr copy_if_else( auto d_offsets = offsets_column->view().template data(); // build chars column - size_type bytes = thrust::device_pointer_cast(d_offsets)[strings_count]; - auto chars_column = create_chars_child_column(strings_count, null_count, bytes, stream, mr); + auto const bytes = + cudf::detail::get_value(offsets_column->view(), strings_count, stream); + auto chars_column = create_chars_child_column(strings_count, bytes, stream, mr); auto d_chars = chars_column->mutable_view().template data(); // fill in chars thrust::for_each_n( diff --git a/cpp/include/cudf/strings/detail/copy_range.cuh b/cpp/include/cudf/strings/detail/copy_range.cuh index c5d87258b7a..c0fa74c4662 100644 --- a/cpp/include/cudf/strings/detail/copy_range.cuh +++ b/cpp/include/cudf/strings/detail/copy_range.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include #include @@ -180,10 +181,10 @@ std::unique_ptr copy_range( auto p_offsets = thrust::device_pointer_cast(p_offsets_column->view().template data()); - auto chars_bytes = p_offsets[target.size()]; - - auto p_chars_column = strings::detail::create_chars_child_column( - target.size(), null_count, chars_bytes, stream, mr); + auto const chars_bytes = + cudf::detail::get_value(p_offsets_column->view(), target.size(), stream); + auto p_chars_column = + strings::detail::create_chars_child_column(target.size(), chars_bytes, stream, mr); // copy to the chars column diff --git a/cpp/include/cudf/strings/detail/gather.cuh b/cpp/include/cudf/strings/detail/gather.cuh index 988fa552100..86f79881408 100644 --- a/cpp/include/cudf/strings/detail/gather.cuh +++ b/cpp/include/cudf/strings/detail/gather.cuh @@ -65,7 +65,7 @@ std::unique_ptr gather_chars(StringIterator strings_begin, auto const output_count = std::distance(map_begin, map_end); if (output_count == 0) return make_empty_column(data_type{type_id::INT8}); - auto chars_column = create_chars_child_column(output_count, 0, chars_bytes, stream, mr); + auto chars_column = create_chars_child_column(output_count, chars_bytes, stream, mr); auto const d_chars = chars_column->mutable_view().template data(); auto gather_chars_fn = [strings_begin, map_begin, offsets] __device__(size_type out_idx) -> char { diff --git a/cpp/include/cudf/strings/detail/json.hpp b/cpp/include/cudf/strings/detail/json.hpp index e6a0b49f102..85094175572 100644 --- a/cpp/include/cudf/strings/detail/json.hpp +++ b/cpp/include/cudf/strings/detail/json.hpp @@ -32,6 +32,7 @@ namespace detail { std::unique_ptr get_json_object( cudf::strings_column_view const& col, cudf::string_scalar const& json_path, + get_json_object_options options, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/strings/detail/merge.cuh b/cpp/include/cudf/strings/detail/merge.cuh index caac0579085..8d893a120dc 100644 --- a/cpp/include/cudf/strings/detail/merge.cuh +++ b/cpp/include/cudf/strings/detail/merge.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,10 +19,10 @@ #include #include #include +#include #include #include #include -#include #include #include @@ -82,9 +82,9 @@ std::unique_ptr merge(strings_column_view const& lhs, auto d_offsets = offsets_column->view().template data(); // create the chars column - size_type bytes = thrust::device_pointer_cast(d_offsets)[strings_count]; - auto chars_column = - strings::detail::create_chars_child_column(strings_count, null_count, bytes, stream, mr); + auto const bytes = + cudf::detail::get_value(offsets_column->view(), strings_count, stream); + auto chars_column = strings::detail::create_chars_child_column(strings_count, bytes, stream, mr); // merge the strings auto d_chars = chars_column->mutable_view().template data(); thrust::for_each_n(rmm::exec_policy(stream), diff --git a/cpp/include/cudf/strings/detail/modify_strings.cuh b/cpp/include/cudf/strings/detail/modify_strings.cuh deleted file mode 100644 index 6feaa039bab..00000000000 --- a/cpp/include/cudf/strings/detail/modify_strings.cuh +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include -#include -#include -#include -#include - -#include - -#include -#include - -namespace cudf { -namespace strings { -namespace detail { - -/** - * @brief Generic string modification in two passes: 1st pass probes for memory load requirements; - * 2nd pass executes string modification. - * - * @tparam device_probe_functor Functor for probing memory requirements; - * must implement `__device__ int32_t operator()(size_type idx) const` - * @tparam device_execute_functor Functor for executing string modification; must - * implement `__device__ int32_t operator()(size_type idx)` - * @tparam ...Types Types of possible additional arguments to be forwarded - * to the probe / execute functors (pre-condition: must both take the same trailling pack of - * arguments, in addition to their required args) - * - * @param strings Number Column of strings to apply the modifications on; - * it is not modified in place; rather a new column is returned instead - * @param stream CUDA stream used for device memory operations and kernel launches. - * (cannot be a default argument because of the variadic pack); - * @param mr Device memory resource used to allocate the returned column's device memory. - * (cannot be a default argument because of the variadic pack); - * @param ...args Additional arguments to be forwarded to - * the probe / execute constructors (can be empty); - * @return modified strings column - */ -template -std::unique_ptr modify_strings(strings_column_view const& strings, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr, - Types&&... args) -{ - auto strings_count = strings.size(); - if (strings_count == 0) return detail::make_empty_strings_column(stream, mr); - - auto strings_column = column_device_view::create(strings.parent(), stream); - auto d_column = *strings_column; - size_type null_count = strings.null_count(); - - // copy null mask - rmm::device_buffer null_mask = cudf::detail::copy_bitmask(strings.parent(), stream, mr); - // get the lookup tables used for case conversion - - device_probe_functor d_probe_fctr{d_column, std::forward(args)...}; - - // build offsets column -- calculate the size of each output string - auto offsets_transformer_itr = cudf::detail::make_counting_transform_iterator(0, d_probe_fctr); - auto offsets_column = detail::make_offsets_child_column( - offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr); - auto offsets_view = offsets_column->view(); - auto d_new_offsets = - offsets_view.template data(); // not sure why this requires `.template` and the next - // one (`d_chars = ...`) doesn't - - // build the chars column -- convert characters based on case_flag parameter - size_type bytes = thrust::device_pointer_cast(d_new_offsets)[strings_count]; - auto chars_column = - strings::detail::create_chars_child_column(strings_count, null_count, bytes, stream, mr); - auto chars_view = chars_column->mutable_view(); - auto d_chars = chars_view.data(); - - device_execute_functor d_execute_fctr{ - d_column, d_new_offsets, d_chars, std::forward(args)...}; - - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - strings_count, - d_execute_fctr); - - return make_strings_column(strings_count, - std::move(offsets_column), - std::move(chars_column), - null_count, - std::move(null_mask), - stream, - mr); -} - -} // namespace detail -} // namespace strings -} // namespace cudf diff --git a/cpp/include/cudf/strings/detail/replace.hpp b/cpp/include/cudf/strings/detail/replace.hpp index b1c6b9a6f0b..820168ce3de 100644 --- a/cpp/include/cudf/strings/detail/replace.hpp +++ b/cpp/include/cudf/strings/detail/replace.hpp @@ -78,10 +78,22 @@ std::unique_ptr replace( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @copydoc cudf::strings::replace(strings_column_view const&, string_scalar const&, - * rmm::mr::device_memory_resource*) + * @brief Replaces any null string entries with the given string. * - * @param[in] stream CUDA stream used for device memory operations and kernel launches. + * This returns a strings column with no null entries. + * + * @code{.pseudo} + * Example: + * s = ["hello", nullptr, "goodbye"] + * r = replace_nulls(s,"**") + * r is now ["hello", "**", "goodbye"] + * @endcode + * + * @param strings Strings column for this operation. + * @param repl Replacement string for null entries. Default is empty string. + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned column's device memory. + * @return New strings column. */ std::unique_ptr replace_nulls( strings_column_view const& strings, diff --git a/cpp/include/cudf/strings/detail/strings_column_factories.cuh b/cpp/include/cudf/strings/detail/strings_column_factories.cuh index 92cf537454c..166deb6560d 100644 --- a/cpp/include/cudf/strings/detail/strings_column_factories.cuh +++ b/cpp/include/cudf/strings/detail/strings_column_factories.cuh @@ -14,17 +14,15 @@ * limitations under the License. */ -#include - #include #include #include #include #include +#include #include #include -#include #include #include @@ -115,7 +113,7 @@ std::unique_ptr make_strings_column(IndexPairIterator begin, } else { // this approach is 2-3x faster for a large number of smaller string lengths auto chars_column = - strings::detail::create_chars_child_column(strings_count, null_count, bytes, stream, mr); + strings::detail::create_chars_child_column(strings_count, bytes, stream, mr); auto d_chars = chars_column->mutable_view().template data(); auto copy_chars = [d_chars] __device__(auto item) { string_index_pair const str = thrust::get<0>(item); @@ -185,9 +183,8 @@ std::unique_ptr make_strings_column(CharIterator chars_begin, [] __device__(auto offset) { return static_cast(offset); }); // build chars column - auto chars_column = - strings::detail::create_chars_child_column(strings_count, null_count, bytes, stream, mr); - auto chars_view = chars_column->mutable_view(); + auto chars_column = strings::detail::create_chars_child_column(strings_count, bytes, stream, mr); + auto chars_view = chars_column->mutable_view(); thrust::copy(rmm::exec_policy(stream), chars_begin, chars_end, chars_view.data()); return make_strings_column(strings_count, diff --git a/cpp/include/cudf/strings/detail/utilities.cuh b/cpp/include/cudf/strings/detail/utilities.cuh index ba903c87485..68ebb5dbe19 100644 --- a/cpp/include/cudf/strings/detail/utilities.cuh +++ b/cpp/include/cudf/strings/detail/utilities.cuh @@ -17,11 +17,15 @@ #include #include +#include +#include +#include #include #include #include +#include #include #include @@ -87,6 +91,196 @@ std::unique_ptr child_offsets_from_string_iterator( return make_offsets_child_column(begin, begin + num_strings, stream, mr); } +/** + * @brief Copies input string data into a buffer and increments the pointer by the number of bytes + * copied. + * + * @param buffer Device buffer to copy to. + * @param input Data to copy from. + * @param bytes Number of bytes to copy. + * @return Pointer to the end of the output buffer after the copy. + */ +__device__ inline char* copy_and_increment(char* buffer, const char* input, size_type bytes) +{ + memcpy(buffer, input, bytes); + return buffer + bytes; +} + +/** + * @brief Copies input string data into a buffer and increments the pointer by the number of bytes + * copied. + * + * @param buffer Device buffer to copy to. + * @param d_string String to copy. + * @return Pointer to the end of the output buffer after the copy. + */ +__device__ inline char* copy_string(char* buffer, const string_view& d_string) +{ + return copy_and_increment(buffer, d_string.data(), d_string.size_bytes()); +} + +/** + * @brief Creates child offsets and chars columns by applying the template function that + * can be used for computing the output size of each string as well as create the output. + * + * @tparam SizeAndExecuteFunction Function must accept an index and return a size. + * It must also have members d_offsets and d_chars which are set to + * memory containing the offsets and chars columns during write. + * + * @param size_and_exec_fn This is called twice. Once for the output size of each string. + * After that, the d_offsets and d_chars are set and this is called again to fill in the + * chars memory. + * @param exec_size Number of rows for executing the `size_and_exec_fn` function. + * @param strings_count Number of strings. + * @param mr Device memory resource used to allocate the returned columns' device memory. + * @param stream CUDA stream used for device memory operations and kernel launches. + * @return offsets child column and chars child column for a strings column + */ +template +auto make_strings_children( + SizeAndExecuteFunction size_and_exec_fn, + size_type exec_size, + size_type strings_count, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +{ + auto offsets_column = make_numeric_column( + data_type{type_id::INT32}, strings_count + 1, mask_state::UNALLOCATED, stream, mr); + auto offsets_view = offsets_column->mutable_view(); + auto d_offsets = offsets_view.template data(); + size_and_exec_fn.d_offsets = d_offsets; + + // This is called twice -- once for offsets and once for chars. + // Reducing the number of places size_and_exec_fn is inlined speeds up compile time. + auto for_each_fn = [exec_size, stream](SizeAndExecuteFunction& size_and_exec_fn) { + thrust::for_each_n(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + exec_size, + size_and_exec_fn); + }; + + // Compute the offsets values + for_each_fn(size_and_exec_fn); + thrust::exclusive_scan( + rmm::exec_policy(stream), d_offsets, d_offsets + strings_count + 1, d_offsets); + + // Now build the chars column + auto const bytes = cudf::detail::get_value(offsets_view, strings_count, stream); + std::unique_ptr chars_column = + create_chars_child_column(strings_count, bytes, stream, mr); + + // Execute the function fn again to fill the chars column. + // Note that if the output chars column has zero size, the function fn should not be called to + // avoid accidentally overwriting the offsets. + if (bytes > 0) { + size_and_exec_fn.d_chars = chars_column->mutable_view().template data(); + for_each_fn(size_and_exec_fn); + } + + return std::make_pair(std::move(offsets_column), std::move(chars_column)); +} + +/** + * @brief Creates child offsets and chars columns by applying the template function that + * can be used for computing the output size of each string as well as create the output. + * + * @tparam SizeAndExecuteFunction Function must accept an index and return a size. + * It must also have members d_offsets and d_chars which are set to + * memory containing the offsets and chars columns during write. + * + * @param size_and_exec_fn This is called twice. Once for the output size of each string. + * After that, the d_offsets and d_chars are set and this is called again to fill in the + * chars memory. + * @param strings_count Number of strings. + * @param mr Device memory resource used to allocate the returned columns' device memory. + * @param stream CUDA stream used for device memory operations and kernel launches. + * @return offsets child column and chars child column for a strings column + */ +template +auto make_strings_children( + SizeAndExecuteFunction size_and_exec_fn, + size_type strings_count, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +{ + return make_strings_children(size_and_exec_fn, strings_count, strings_count, stream, mr); +} + +/** + * @brief Creates child offsets, chars columns and null mask, null count of a strings column by + * applying the template function that can be used for computing the output size of each string as + * well as create the output. + * + * @tparam SizeAndExecuteFunction Function must accept an index and return a size. + * It must have members `d_offsets`, `d_chars`, and `d_validities` which are set to memory + * containing the offsets column, chars column and string validities during write. + * + * @param size_and_exec_fn This is called twice. Once for the output size of each string, which is + * written into the `d_offsets` array. After that, `d_chars` is set and this + * is called again to fill in the chars memory. The `d_validities` array may + * be modified to set the value `0` for the corresponding rows that contain + * null string elements. + * @param exec_size Range for executing the function `size_and_exec_fn`. + * @param strings_count Number of strings. + * @param mr Device memory resource used to allocate the returned columns' device memory. + * @param stream CUDA stream used for device memory operations and kernel launches. + * @return offsets child column, chars child column, null_mask, and null_count for a strings column. + */ +template +std::tuple, std::unique_ptr, rmm::device_buffer, size_type> +make_strings_children_with_null_mask( + SizeAndExecuteFunction size_and_exec_fn, + size_type exec_size, + size_type strings_count, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +{ + auto offsets_column = make_numeric_column( + data_type{type_id::INT32}, strings_count + 1, mask_state::UNALLOCATED, stream, mr); + auto offsets_view = offsets_column->mutable_view(); + auto d_offsets = offsets_view.template data(); + size_and_exec_fn.d_offsets = d_offsets; + + auto validities = rmm::device_uvector(strings_count, stream); + size_and_exec_fn.d_validities = validities.begin(); + + // This is called twice: once for offsets and validities, and once for chars + auto for_each_fn = [exec_size, stream](SizeAndExecuteFunction& size_and_exec_fn) { + thrust::for_each_n(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + exec_size, + size_and_exec_fn); + }; + + // Compute the string sizes (storing in `d_offsets`) and string validities + for_each_fn(size_and_exec_fn); + + // Compute the offsets from string sizes + thrust::exclusive_scan( + rmm::exec_policy(stream), d_offsets, d_offsets + strings_count + 1, d_offsets); + + // Now build the chars column + auto const bytes = cudf::detail::get_value(offsets_view, strings_count, stream); + auto chars_column = create_chars_child_column(strings_count, bytes, stream, mr); + + // Execute the function fn again to fill the chars column. + // Note that if the output chars column has zero size, the function fn should not be called to + // avoid accidentally overwriting the offsets. + if (bytes > 0) { + size_and_exec_fn.d_chars = chars_column->mutable_view().template data(); + for_each_fn(size_and_exec_fn); + } + + // Finally compute null mask and null count from the validities array + auto [null_mask, null_count] = cudf::detail::valid_if( + validities.begin(), validities.end(), thrust::identity{}, stream, mr); + + return std::make_tuple(std::move(offsets_column), + std::move(chars_column), + null_count > 0 ? std::move(null_mask) : rmm::device_buffer{}, + null_count); +} + // This template is a thin wrapper around per-context singleton objects. // It maintains a single object for each CUDA context. template diff --git a/cpp/include/cudf/strings/detail/utilities.hpp b/cpp/include/cudf/strings/detail/utilities.hpp index a5db4d55001..4eff3f2dafc 100644 --- a/cpp/include/cudf/strings/detail/utilities.hpp +++ b/cpp/include/cudf/strings/detail/utilities.hpp @@ -30,7 +30,6 @@ namespace detail { * This will return the properly sized column to be filled in by the caller. * * @param strings_count Number of strings in the column. - * @param null_count Number of null string entries in the column. * @param bytes Number of bytes for the chars column. * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. @@ -38,7 +37,6 @@ namespace detail { */ std::unique_ptr create_chars_child_column( size_type strings_count, - size_type null_count, size_type bytes, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/strings/json.hpp b/cpp/include/cudf/strings/json.hpp index b39e4a2027c..9081fa23eec 100644 --- a/cpp/include/cudf/strings/json.hpp +++ b/cpp/include/cudf/strings/json.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,8 @@ #include +#include + namespace cudf { namespace strings { @@ -26,6 +28,76 @@ namespace strings { * @file */ +/** + * @brief Settings for `get_json_object()`. + */ +class get_json_object_options { + // allow single quotes to represent strings in JSON + bool allow_single_quotes = false; + + // individual string values are returned with quotes stripped. + bool strip_quotes_from_single_strings = true; + + public: + /** + * @brief Default constructor. + */ + explicit get_json_object_options() = default; + + /** + * @brief Returns true/false depending on whether single-quotes for representing strings + * are allowed. + */ + CUDA_HOST_DEVICE_CALLABLE bool get_allow_single_quotes() const { return allow_single_quotes; } + + /** + * @brief Returns true/false depending on whether individually returned string values have + * their quotes stripped. + * + * When set to true, if the return value for a given row is an individual string + * (not an object, or an array of strings), strip the quotes from the string and return only the + * contents of the string itself. Example: + * + * @code{.pseudo} + * + * With strip_quotes_from_single_strings OFF: + * Input = {"a" : "b"} + * Query = $.a + * Output = "b" + * + * With strip_quotes_from_single_strings ON: + * Input = {"a" : "b"} + * Query = $.a + * Output = b + * + * @endcode + */ + CUDA_HOST_DEVICE_CALLABLE bool get_strip_quotes_from_single_strings() const + { + return strip_quotes_from_single_strings; + } + + /** + * @brief Set whether single-quotes for strings are allowed. + * + * @param _allow_single_quotes bool indicating desired behavior. + */ + void set_allow_single_quotes(bool _allow_single_quotes) + { + allow_single_quotes = _allow_single_quotes; + } + + /** + * @brief Set whether individually returned string values have their quotes stripped. + * + * @param _strip_quotes_from_single_strings bool indicating desired behavior. + */ + void set_strip_quotes_from_single_strings(bool _strip_quotes_from_single_strings) + { + strip_quotes_from_single_strings = _strip_quotes_from_single_strings; + } +}; + /** * @brief Apply a JSONPath string to all rows in an input strings column. * @@ -37,12 +109,14 @@ namespace strings { * * @param col The input strings column. Each row must contain a valid json string * @param json_path The JSONPath string to be applied to each row + * @param options Options for controlling the behavior of the function * @param mr Resource for allocating device memory. * @return New strings column containing the retrieved json object strings */ std::unique_ptr get_json_object( cudf::strings_column_view const& col, cudf::string_scalar const& json_path, + get_json_object_options options = get_json_object_options{}, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/include/cudf/strings/replace.hpp b/cpp/include/cudf/strings/replace.hpp index 8f0957d1020..e9091b88b08 100644 --- a/cpp/include/cudf/strings/replace.hpp +++ b/cpp/include/cudf/strings/replace.hpp @@ -151,28 +151,6 @@ std::unique_ptr replace( strings_column_view const& repls, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); -/** - * @brief Replaces any null string entries with the given string. - * - * This returns a strings column with no null entries. - * - * @code{.pseudo} - * Example: - * s = ["hello", nullptr, "goodbye"] - * r = replace_nulls(s,"**") - * r is now ["hello", "**", "goodbye"] - * @endcode - * - * @param strings Strings column for this operation. - * @param repl Replacement string for null entries. Default is empty string. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column. - */ -std::unique_ptr replace_nulls( - strings_column_view const& strings, - string_scalar const& repl = string_scalar(""), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - /** @} */ // end of doxygen group } // namespace strings } // namespace cudf diff --git a/cpp/include/cudf/strings/string_view.cuh b/cpp/include/cudf/strings/string_view.cuh index 4bcb46e4655..f5ab2046441 100644 --- a/cpp/include/cudf/strings/string_view.cuh +++ b/cpp/include/cudf/strings/string_view.cuh @@ -92,20 +92,6 @@ __device__ inline size_type string_view::length() const { if (_length == UNKNOWN_STRING_LENGTH) _length = strings::detail::characters_in_string(_data, _bytes); - if (_length && (_char_width == UNKNOWN_CHAR_WIDTH)) { - uint8_t const* ptr = reinterpret_cast(data()); - auto const first = strings::detail::bytes_in_utf8_byte(*ptr); - // see if they are all the same width - _char_width = (thrust::find_if(thrust::seq, - ptr, - ptr + size_bytes(), - [first](auto ch) { - auto width = strings::detail::bytes_in_utf8_byte(ch); - return (width != 0) && (width != first); - })) == (ptr + size_bytes()) - ? first - : VARIABLE_CHAR_WIDTH; - } return _length; } @@ -251,7 +237,7 @@ __device__ inline size_type string_view::byte_offset(size_type pos) const size_type offset = 0; const char* sptr = _data; const char* eptr = sptr + _bytes; - if (_char_width > 0) return pos * _char_width; + if (length() == size_bytes()) return pos; while ((pos > 0) && (sptr < eptr)) { size_type charbytes = strings::detail::bytes_in_utf8_byte(static_cast(*sptr++)); if (charbytes) --pos; @@ -408,7 +394,7 @@ __device__ inline string_view string_view::substr(size_type pos, size_type lengt __device__ inline size_type string_view::character_offset(size_type bytepos) const { - if (_char_width > 0) return bytepos / _char_width; + if (length() == size_bytes()) return bytepos; return strings::detail::characters_in_string(data(), bytepos); } diff --git a/cpp/include/cudf/strings/string_view.hpp b/cpp/include/cudf/strings/string_view.hpp index 667a25c7641..4b1a901d72f 100644 --- a/cpp/include/cudf/strings/string_view.hpp +++ b/cpp/include/cudf/strings/string_view.hpp @@ -36,13 +36,6 @@ using char_utf8 = uint32_t; ///< UTF-8 characters are 1-4 bytes */ constexpr cudf::size_type UNKNOWN_STRING_LENGTH{-1}; -/** - * @brief The char width is initialized to this value as a place-holder. - * - * The byte-width of the characters in a string is computed on-demand. - */ -constexpr int8_t UNKNOWN_CHAR_WIDTH{-1}; - /** * @brief This value is assigned to the _char_width member if the string * contains characters of different widths. @@ -314,7 +307,7 @@ class string_view { /** * @brief Default constructor represents an empty string. */ - CUDA_HOST_DEVICE_CALLABLE string_view() : _data(""), _bytes(0), _length(0), _char_width(0) {} + CUDA_HOST_DEVICE_CALLABLE string_view() : _data(""), _bytes(0), _length(0) {} /** * @brief Create instance from existing device char array. @@ -323,7 +316,7 @@ class string_view { * @param bytes Number of bytes in data array. */ CUDA_HOST_DEVICE_CALLABLE string_view(const char* data, size_type bytes) - : _data(data), _bytes(bytes), _length(UNKNOWN_STRING_LENGTH), _char_width(UNKNOWN_CHAR_WIDTH) + : _data(data), _bytes(bytes), _length(UNKNOWN_STRING_LENGTH) { } @@ -334,10 +327,9 @@ class string_view { string_view& operator=(string_view&&) = default; private: - const char* _data{}; ///< Pointer to device memory contain char array for this string - size_type _bytes{}; ///< Number of bytes in _data for this string - mutable size_type _length{}; ///< Number of characters in this string (computed) - mutable int8_t _char_width{}; ///< Number of bytes per character if uniform width (computed) + const char* _data{}; ///< Pointer to device memory contain char array for this string + size_type _bytes{}; ///< Number of bytes in _data for this string + mutable size_type _length{}; ///< Number of characters in this string (computed) /** * @brief Return the character position of the given byte offset. diff --git a/cpp/include/cudf/strings/strings_column_view.hpp b/cpp/include/cudf/strings/strings_column_view.hpp index 0c7270b3ba8..4d3c2dcdc56 100644 --- a/cpp/include/cudf/strings/strings_column_view.hpp +++ b/cpp/include/cudf/strings/strings_column_view.hpp @@ -19,7 +19,7 @@ #include #include -#include +#include /** * @file @@ -86,23 +86,6 @@ class strings_column_view : private column_view { //! Strings column APIs. namespace strings { -/** - * @brief Prints the strings to stdout. - * - * @param strings Strings instance for this operation. - * @param start Index of first string to print. - * @param end Index of last string to print. Specify -1 for all strings. - * @param max_width Maximum number of characters to print per string. - * Specify -1 to print all characters. - * @param delimiter The chars to print between each string. - * Default is new-line character. - */ -void print(strings_column_view const& strings, - size_type start = 0, - size_type end = -1, - size_type max_width = -1, - const char* delimiter = "\n"); - /** * @brief Create output per Arrow strings format. * @@ -110,10 +93,10 @@ void print(strings_column_view const& strings, * * @param strings Strings instance for this operation. * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource used to allocate the returned device_vectors. + * @param mr Device memory resource used to allocate the returned device vectors. * @return Pair containing a vector of chars and a vector of offsets. */ -std::pair, rmm::device_vector> create_offsets( +std::pair, rmm::device_uvector> create_offsets( strings_column_view const& strings, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/table/row_operators.cuh b/cpp/include/cudf/table/row_operators.cuh index 61d714c5538..bec5299ab77 100644 --- a/cpp/include/cudf/table/row_operators.cuh +++ b/cpp/include/cudf/table/row_operators.cuh @@ -475,17 +475,19 @@ class row_hasher { // Hash the first column w/ the seed auto const initial_hash = hash_combiner(hash_value_type{0}, - type_dispatcher(_table.column(0).type(), - element_hasher_with_seed{_seed}, - _table.column(0), - row_index)); + type_dispatcher( + _table.column(0).type(), + element_hasher_with_seed{_seed}, + _table.column(0), + row_index)); // Hashes an element in a column auto hasher = [=](size_type column_index) { - return cudf::type_dispatcher(_table.column(column_index).type(), - element_hasher{}, - _table.column(column_index), - row_index); + return cudf::type_dispatcher( + _table.column(column_index).type(), + element_hasher{}, + _table.column(column_index), + row_index); }; // Hash each element and combine all the hash values together @@ -528,10 +530,11 @@ class row_hasher_initial_values { // Hashes an element in a column and combines with an initial value auto hasher = [=](size_type column_index) { - auto hash_value = cudf::type_dispatcher(_table.column(column_index).type(), - element_hasher{}, - _table.column(column_index), - row_index); + auto hash_value = + cudf::type_dispatcher(_table.column(column_index).type(), + element_hasher{}, + _table.column(column_index), + row_index); return hash_combiner(_initial_hash[column_index], hash_value); }; diff --git a/cpp/include/cudf/table/table_view.hpp b/cpp/include/cudf/table/table_view.hpp index a225e590f9a..1ff701c3b01 100644 --- a/cpp/include/cudf/table/table_view.hpp +++ b/cpp/include/cudf/table/table_view.hpp @@ -257,9 +257,19 @@ class mutable_table_view : public detail::table_view_base { mutable_table_view(std::vector const& views); }; -inline bool has_nulls(table_view view) +inline bool has_nulls(table_view const& view) { - return std::any_of(view.begin(), view.end(), [](column_view col) { return col.has_nulls(); }); + return std::any_of(view.begin(), view.end(), [](auto const& col) { return col.has_nulls(); }); +} + +inline bool has_nested_nulls(table_view const& input) +{ + return std::any_of(input.begin(), input.end(), [](auto const& col) { + return col.has_nulls() || + std::any_of(col.child_begin(), col.child_end(), [](auto const& child_col) { + return has_nested_nulls(table_view{{child_col}}); + }); + }); } /** diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp index d7b6402fe4e..8116097e38e 100644 --- a/cpp/include/cudf/types.hpp +++ b/cpp/include/cudf/types.hpp @@ -61,6 +61,7 @@ class scalar; // clang-format off class list_scalar; +class struct_scalar; class string_scalar; template class numeric_scalar; template class fixed_point_scalar; @@ -74,8 +75,6 @@ template class timestamp_scalar_device_view; template class duration_scalar_device_view; // clang-format on -class struct_scalar; - class table; class table_view; class mutable_table_view; diff --git a/cpp/include/cudf/utilities/span.hpp b/cpp/include/cudf/utilities/span.hpp index 999306d4ee7..52ad0648e23 100644 --- a/cpp/include/cudf/utilities/span.hpp +++ b/cpp/include/cudf/utilities/span.hpp @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -119,6 +120,11 @@ struct is_host_span_supported_container< // thrust::host_vector> : std::true_type { }; +template +struct is_host_span_supported_container< // + std::basic_string, Alloc>> : std::true_type { +}; + template struct host_span : public cudf::detail::span_base> { using base = cudf::detail::span_base>; @@ -257,6 +263,17 @@ class base_2dspan { return {this->data() + flatten_index(row, 0, this->size()), this->size().second}; } + constexpr base_2dspan subspan(size_t first_row, size_t num_rows) const noexcept + { + return base_2dspan( + _data + flatten_index(first_row, 0, this->size()), num_rows, this->size().second); + } + + constexpr RowType flat_view() + { + return {this->data(), this->size().first * this->size().second}; + } + template typename OtherRowType, diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp index aa5f554ad40..e2f5f6db624 100644 --- a/cpp/include/cudf/utilities/traits.hpp +++ b/cpp/include/cudf/utilities/traits.hpp @@ -140,7 +140,7 @@ constexpr inline bool is_numeric() struct is_numeric_impl { template - bool operator()() + constexpr bool operator()() { return is_numeric(); } @@ -181,7 +181,7 @@ constexpr inline bool is_index_type() struct is_index_type_impl { template - bool operator()() + constexpr bool operator()() { return is_index_type(); } @@ -218,7 +218,7 @@ constexpr inline bool is_unsigned() struct is_unsigned_impl { template - bool operator()() + constexpr bool operator()() { return is_unsigned(); } @@ -264,7 +264,7 @@ constexpr inline bool is_floating_point() struct is_floating_point_impl { template - bool operator()() + constexpr bool operator()() { return is_floating_point(); } @@ -332,7 +332,7 @@ constexpr inline bool is_timestamp() struct is_timestamp_impl { template - bool operator()() + constexpr bool operator()() { return is_timestamp(); } @@ -367,7 +367,7 @@ constexpr inline bool is_fixed_point() struct is_fixed_point_impl { template - bool operator()() + constexpr bool operator()() { return is_fixed_point(); } @@ -400,7 +400,7 @@ constexpr inline bool is_duration() struct is_duration_impl { template - bool operator()() + constexpr bool operator()() { return is_duration(); } @@ -435,7 +435,7 @@ constexpr inline bool is_chrono() struct is_chrono_impl { template - bool operator()() + constexpr bool operator()() { return is_chrono(); } @@ -488,7 +488,7 @@ constexpr inline bool is_dictionary() struct is_dictionary_impl { template - bool operator()() + constexpr bool operator()() { return is_dictionary(); } @@ -524,7 +524,7 @@ constexpr inline bool is_fixed_width() struct is_fixed_width_impl { template - bool operator()() + constexpr bool operator()() { return is_fixed_width(); } @@ -567,7 +567,7 @@ constexpr inline bool is_compound() struct is_compound_impl { template - bool operator()() + constexpr bool operator()() { return is_compound(); } @@ -609,7 +609,7 @@ constexpr inline bool is_nested() struct is_nested_impl { template - bool operator()() + constexpr bool operator()() { return is_nested(); } diff --git a/cpp/include/cudf_test/column_utilities.hpp b/cpp/include/cudf_test/column_utilities.hpp index 66710960296..b8b63b3be81 100644 --- a/cpp/include/cudf_test/column_utilities.hpp +++ b/cpp/include/cudf_test/column_utilities.hpp @@ -199,19 +199,24 @@ template <> inline std::pair, std::vector> to_host(column_view c) { auto strings_data = cudf::strings::create_offsets(strings_column_view(c)); - thrust::host_vector h_chars(strings_data.first); - thrust::host_vector h_offsets(strings_data.second); + thrust::host_vector h_chars(strings_data.first.size()); + thrust::host_vector h_offsets(strings_data.second.size()); + CUDA_TRY( + cudaMemcpy(h_chars.data(), strings_data.first.data(), h_chars.size(), cudaMemcpyDeviceToHost)); + CUDA_TRY(cudaMemcpy(h_offsets.data(), + strings_data.second.data(), + h_offsets.size() * sizeof(cudf::size_type), + cudaMemcpyDeviceToHost)); // build std::string vector from chars and offsets std::vector host_data; host_data.reserve(c.size()); - - // When C++17, replace this loop with std::adjacent_difference() - for (size_type idx = 0; idx < c.size(); ++idx) { - auto offset = h_offsets[idx]; - auto length = h_offsets[idx + 1] - offset; - host_data.push_back(std::string(h_chars.data() + offset, length)); - } + std::transform( + std::begin(h_offsets), + std::end(h_offsets) - 1, + std::begin(h_offsets) + 1, + std::back_inserter(host_data), + [&](auto start, auto end) { return std::string(h_chars.data() + start, end - start); }); return {host_data, bitmask_to_host(c)}; } diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp index 7667254ffbf..74d22085b26 100644 --- a/cpp/include/cudf_test/column_wrapper.hpp +++ b/cpp/include/cudf_test/column_wrapper.hpp @@ -145,7 +145,7 @@ rmm::device_buffer make_elements(InputIterator begin, InputIterator end) auto transform_begin = thrust::make_transform_iterator(begin, transformer); auto const size = cudf::distance(begin, end); auto const elements = thrust::host_vector(transform_begin, transform_begin + size); - return rmm::device_buffer{elements.data(), size * sizeof(ElementTo)}; + return rmm::device_buffer{elements.data(), size * sizeof(ElementTo), rmm::cuda_stream_default}; } /** @@ -171,7 +171,7 @@ rmm::device_buffer make_elements(InputIterator begin, InputIterator end) auto transform_begin = thrust::make_transform_iterator(begin, transformer); auto const size = cudf::distance(begin, end); auto const elements = thrust::host_vector(transform_begin, transform_begin + size); - return rmm::device_buffer{elements.data(), size * sizeof(RepType)}; + return rmm::device_buffer{elements.data(), size * sizeof(RepType), rmm::cuda_stream_default}; } /** @@ -198,7 +198,7 @@ rmm::device_buffer make_elements(InputIterator begin, InputIterator end) auto transformer_begin = thrust::make_transform_iterator(begin, to_rep); auto const size = cudf::distance(begin, end); auto const elements = thrust::host_vector(transformer_begin, transformer_begin + size); - return rmm::device_buffer{elements.data(), size * sizeof(RepType)}; + return rmm::device_buffer{elements.data(), size * sizeof(RepType), rmm::cuda_stream_default}; } /** @@ -245,7 +245,8 @@ rmm::device_buffer make_null_mask(ValidityIterator begin, ValidityIterator end) { auto null_mask = make_null_mask_vector(begin, end); return rmm::device_buffer{null_mask.data(), - null_mask.size() * sizeof(decltype(null_mask.front()))}; + null_mask.size() * sizeof(decltype(null_mask.front())), + rmm::cuda_stream_default}; } /** @@ -514,8 +515,10 @@ class fixed_point_column_wrapper : public detail::column_wrapper { auto const id = is_decimal32 ? type_id::DECIMAL32 : type_id::DECIMAL64; auto const data_type = cudf::data_type{id, static_cast(scale)}; - wrapped.reset( - new cudf::column{data_type, size, rmm::device_buffer{elements.data(), size * sizeof(Rep)}}); + wrapped.reset(new cudf::column{ + data_type, + size, + rmm::device_buffer{elements.data(), size * sizeof(Rep), rmm::cuda_stream_default}}); } /** @@ -577,11 +580,12 @@ class fixed_point_column_wrapper : public detail::column_wrapper { auto const id = is_decimal32 ? type_id::DECIMAL32 : type_id::DECIMAL64; auto const data_type = cudf::data_type{id, static_cast(scale)}; - wrapped.reset(new cudf::column{data_type, - size, - rmm::device_buffer{elements.data(), size * sizeof(Rep)}, - detail::make_null_mask(v, v + size), - cudf::UNKNOWN_NULL_COUNT}); + wrapped.reset(new cudf::column{ + data_type, + size, + rmm::device_buffer{elements.data(), size * sizeof(Rep), rmm::cuda_stream_default}, + detail::make_null_mask(v, v + size), + cudf::UNKNOWN_NULL_COUNT}); } /** @@ -1514,7 +1518,7 @@ class lists_column_wrapper : public detail::column_wrapper { std::move(offsets), std::move(data), v.size() <= 0 ? 0 : cudf::UNKNOWN_NULL_COUNT, - v.size() <= 0 ? rmm::device_buffer{0} + v.size() <= 0 ? rmm::device_buffer{} : cudf::test::detail::make_null_mask(v.begin(), v.end())); } @@ -1544,7 +1548,7 @@ class lists_column_wrapper : public detail::column_wrapper { size_type num_elements = offsets->size() == 0 ? 0 : offsets->size() - 1; wrapped = - make_lists_column(num_elements, std::move(offsets), std::move(c), 0, rmm::device_buffer{0}); + make_lists_column(num_elements, std::move(offsets), std::move(c), 0, rmm::device_buffer{}); } /** @@ -1776,7 +1780,7 @@ class structs_column_wrapper : public detail::column_wrapper { num_rows, std::move(child_columns), validity.size() <= 0 ? 0 : cudf::UNKNOWN_NULL_COUNT, - validity.size() <= 0 ? rmm::device_buffer{0} + validity.size() <= 0 ? rmm::device_buffer{} : detail::make_null_mask(validity.begin(), validity.end())); } diff --git a/cpp/include/cudf_test/iterator_utilities.hpp b/cpp/include/cudf_test/iterator_utilities.hpp index 297bcbf175c..f777ceed675 100644 --- a/cpp/include/cudf_test/iterator_utilities.hpp +++ b/cpp/include/cudf_test/iterator_utilities.hpp @@ -49,7 +49,7 @@ namespace test { * @return auto Validity iterator */ template -static auto iterator_with_null_at(Iter index_start, Iter index_end) +[[maybe_unused]] static auto iterator_with_null_at(Iter index_start, Iter index_end) { using index_type = typename std::iterator_traits::value_type; @@ -77,7 +77,7 @@ static auto iterator_with_null_at(Iter index_start, Iter index_end) * @param indices The indices for which the validity iterator must return `false` (i.e. null) * @return auto Validity iterator */ -static auto iterator_with_null_at(cudf::host_span const& indices) +[[maybe_unused]] static auto iterator_with_null_at(cudf::host_span indices) { return iterator_with_null_at(indices.begin(), indices.end()); } @@ -97,10 +97,24 @@ static auto iterator_with_null_at(cudf::host_span const& * @param index The index for which the validity iterator must return `false` (i.e. null) * @return auto Validity iterator */ -static auto iterator_with_null_at(cudf::size_type const& index) +[[maybe_unused]] static auto iterator_with_null_at(cudf::size_type index) { return iterator_with_null_at(std::vector{index}); } +/** + * @brief Bool iterator for marking all elements are null + * + * @return auto Validity iterator which always yields `false` + */ +[[maybe_unused]] static auto iterator_all_nulls() { return thrust::make_constant_iterator(false); } + +/** + * @brief Bool iterator for marking all elements are valid (non-null) + * + * @return auto Validity iterator which always yields `true` + */ +[[maybe_unused]] static auto iterator_no_null() { return thrust::make_constant_iterator(true); } + } // namespace test } // namespace cudf diff --git a/cpp/include/cudf_test/timestamp_utilities.cuh b/cpp/include/cudf_test/timestamp_utilities.cuh index 201b837e936..6cab8b92283 100644 --- a/cpp/include/cudf_test/timestamp_utilities.cuh +++ b/cpp/include/cudf_test/timestamp_utilities.cuh @@ -55,11 +55,10 @@ inline cudf::test::fixed_width_column_wrapper generate_timestamps(in auto lhs = start.time_since_epoch().count(); auto rhs = stop.time_since_epoch().count(); - // When C++17, auto [min, max] = std::minmax(lhs, rhs) - auto min = std::min(lhs, rhs); - auto max = std::max(lhs, rhs); - auto range = max - min; - auto iter = cudf::detail::make_counting_transform_iterator(0, [=](auto i) { + auto const min = std::min(lhs, rhs); + auto const max = std::max(lhs, rhs); + auto const range = max - min; + auto iter = cudf::detail::make_counting_transform_iterator(0, [=](auto i) { return cuda::std::chrono::floor( cuda::std::chrono::milliseconds(min + (range / count) * i)) .count(); diff --git a/cpp/include/cudf_test/type_lists.hpp b/cpp/include/cudf_test/type_lists.hpp index 71c2b74b37b..a344173144d 100644 --- a/cpp/include/cudf_test/type_lists.hpp +++ b/cpp/include/cudf_test/type_lists.hpp @@ -25,6 +25,8 @@ #include #include +#include + #include #include @@ -79,10 +81,10 @@ constexpr auto types_to_ids() template typename std::enable_if() && !cudf::is_timestamp_t::value, - std::vector>::type + thrust::host_vector>::type make_type_param_vector(std::initializer_list const& init_list) { - std::vector vec(init_list.size()); + thrust::host_vector vec(init_list.size()); std::transform(std::cbegin(init_list), std::cend(init_list), std::begin(vec), [](auto const& e) { if (std::is_unsigned::value) return static_cast(std::abs(e)); @@ -93,10 +95,11 @@ make_type_param_vector(std::initializer_list const& init_list) } template -typename std::enable_if::value, std::vector>::type +typename std::enable_if::value, + thrust::host_vector>::type make_type_param_vector(std::initializer_list const& init_list) { - std::vector vec(init_list.size()); + thrust::host_vector vec(init_list.size()); std::transform(std::cbegin(init_list), std::cend(init_list), std::begin(vec), [](auto const& e) { return TypeParam{typename TypeParam::duration{e}}; }); diff --git a/cpp/include/doxygen_groups.h b/cpp/include/doxygen_groups.h index f78ff98d49d..dda8ce87432 100644 --- a/cpp/include/doxygen_groups.h +++ b/cpp/include/doxygen_groups.h @@ -143,6 +143,7 @@ * @} * @defgroup lists_apis Lists * @{ + * @defgroup lists_combine Combining * @defgroup lists_extract Extracting * @defgroup lists_contains Searching * @defgroup lists_gather Gathering diff --git a/cpp/libcudf_kafka/CMakeLists.txt b/cpp/libcudf_kafka/CMakeLists.txt index e178f5a6280..d6b69e0bf73 100644 --- a/cpp/libcudf_kafka/CMakeLists.txt +++ b/cpp/libcudf_kafka/CMakeLists.txt @@ -15,7 +15,7 @@ #============================================================================= cmake_minimum_required(VERSION 3.18 FATAL_ERROR) -project(CUDA_KAFKA VERSION 0.19.0 LANGUAGES CXX) +project(CUDA_KAFKA VERSION 21.06.00 LANGUAGES CXX) ################################################################################################### # - Build options diff --git a/cpp/libcudf_kafka/cmake/thirdparty/CUDF_KAFKA_GetCUDF.cmake b/cpp/libcudf_kafka/cmake/thirdparty/CUDF_KAFKA_GetCUDF.cmake index 1f7c15d4f75..5b0f31035c3 100644 --- a/cpp/libcudf_kafka/cmake/thirdparty/CUDF_KAFKA_GetCUDF.cmake +++ b/cpp/libcudf_kafka/cmake/thirdparty/CUDF_KAFKA_GetCUDF.cmake @@ -14,22 +14,7 @@ # limitations under the License. #============================================================================= -function(cudfkafka_save_if_enabled var) - if(CUDF_KAFKA_${var}) - unset(${var} PARENT_SCOPE) - unset(${var} CACHE) - endif() -endfunction() - -function(cudfkafka_restore_if_enabled var) - if(CUDF_KAFKA_${var}) - set(${var} ON CACHE INTERNAL "" FORCE) - endif() -endfunction() - function(find_and_configure_cudf VERSION) - cudfkafka_save_if_enabled(BUILD_TESTS) - cudfkafka_save_if_enabled(BUILD_BENCHMARKS) CPMFindPackage(NAME cudf VERSION ${VERSION} GIT_REPOSITORY https://github.com/rapidsai/cudf.git @@ -38,9 +23,16 @@ function(find_and_configure_cudf VERSION) SOURCE_SUBDIR cpp OPTIONS "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF") - cudfkafka_restore_if_enabled(BUILD_TESTS) - cudfkafka_restore_if_enabled(BUILD_BENCHMARKS) + if(cudf_ADDED) + set(cudf_ADDED TRUE PARENT_SCOPE) + endif() endfunction() -set(CUDF_KAFKA_MIN_VERSION_cudf 0.19) -find_and_configure_cudf(${CUDF_KAFKA_MIN_VERSION_cudf}) +set(CUDA_KAFKA_MIN_VERSION_cudf "${CUDA_KAFKA_VERSION_MAJOR}.${CUDA_KAFKA_VERSION_MINOR}.00") +find_and_configure_cudf(${CUDA_KAFKA_MIN_VERSION_cudf}) + +if(cudf_ADDED) + # Since we are building cudf as part of ourselves we need + # to enable the CUDA language in the top-most scope + enable_language(CUDA) +endif() diff --git a/cpp/src/aggregation/aggregation.cpp b/cpp/src/aggregation/aggregation.cpp index 3a044a42101..a878dbe1535 100644 --- a/cpp/src/aggregation/aggregation.cpp +++ b/cpp/src/aggregation/aggregation.cpp @@ -22,142 +22,491 @@ namespace cudf { -std::vector aggregation::get_simple_aggregations(data_type col_type) const +namespace detail { + +// simple_aggregations_collector ---------------------------------------- + +std::vector> simple_aggregations_collector::visit( + data_type col_type, aggregation const& agg) +{ + std::vector> aggs; + aggs.push_back(agg.clone()); + return aggs; +} + +std::vector> simple_aggregations_collector::visit( + data_type col_type, sum_aggregation const& agg) +{ + return visit(col_type, static_cast(agg)); +} + +std::vector> simple_aggregations_collector::visit( + data_type col_type, product_aggregation const& agg) +{ + return visit(col_type, static_cast(agg)); +} + +std::vector> simple_aggregations_collector::visit( + data_type col_type, min_aggregation const& agg) +{ + return visit(col_type, static_cast(agg)); +} + +std::vector> simple_aggregations_collector::visit( + data_type col_type, max_aggregation const& agg) +{ + return visit(col_type, static_cast(agg)); +} + +std::vector> simple_aggregations_collector::visit( + data_type col_type, count_aggregation const& agg) +{ + return visit(col_type, static_cast(agg)); +} + +std::vector> simple_aggregations_collector::visit( + data_type col_type, any_aggregation const& agg) +{ + return visit(col_type, static_cast(agg)); +} + +std::vector> simple_aggregations_collector::visit( + data_type col_type, all_aggregation const& agg) +{ + return visit(col_type, static_cast(agg)); +} + +std::vector> simple_aggregations_collector::visit( + data_type col_type, sum_of_squares_aggregation const& agg) +{ + return visit(col_type, static_cast(agg)); +} + +std::vector> simple_aggregations_collector::visit( + data_type col_type, mean_aggregation const& agg) +{ + return visit(col_type, static_cast(agg)); +} + +std::vector> simple_aggregations_collector::visit( + data_type col_type, var_aggregation const& agg) +{ + return visit(col_type, static_cast(agg)); +} + +std::vector> simple_aggregations_collector::visit( + data_type col_type, std_aggregation const& agg) +{ + return visit(col_type, static_cast(agg)); +} + +std::vector> simple_aggregations_collector::visit( + data_type col_type, median_aggregation const& agg) +{ + return visit(col_type, static_cast(agg)); +} + +std::vector> simple_aggregations_collector::visit( + data_type col_type, quantile_aggregation const& agg) +{ + return visit(col_type, static_cast(agg)); +} + +std::vector> simple_aggregations_collector::visit( + data_type col_type, argmax_aggregation const& agg) +{ + return visit(col_type, static_cast(agg)); +} + +std::vector> simple_aggregations_collector::visit( + data_type col_type, argmin_aggregation const& agg) +{ + return visit(col_type, static_cast(agg)); +} + +std::vector> simple_aggregations_collector::visit( + data_type col_type, nunique_aggregation const& agg) +{ + return visit(col_type, static_cast(agg)); +} + +std::vector> simple_aggregations_collector::visit( + data_type col_type, nth_element_aggregation const& agg) +{ + return visit(col_type, static_cast(agg)); +} + +std::vector> simple_aggregations_collector::visit( + data_type col_type, row_number_aggregation const& agg) +{ + return visit(col_type, static_cast(agg)); +} + +std::vector> simple_aggregations_collector::visit( + data_type col_type, collect_list_aggregation const& agg) +{ + return visit(col_type, static_cast(agg)); +} + +std::vector> simple_aggregations_collector::visit( + data_type col_type, collect_set_aggregation const& agg) +{ + return visit(col_type, static_cast(agg)); +} + +std::vector> simple_aggregations_collector::visit( + data_type col_type, lead_lag_aggregation const& agg) +{ + return visit(col_type, static_cast(agg)); +} + +std::vector> simple_aggregations_collector::visit( + data_type col_type, udf_aggregation const& agg) +{ + return visit(col_type, static_cast(agg)); +} + +// aggregation_finalizer ---------------------------------------- + +void aggregation_finalizer::visit(aggregation const& agg) {} + +void aggregation_finalizer::visit(sum_aggregation const& agg) +{ + visit(static_cast(agg)); +} + +void aggregation_finalizer::visit(product_aggregation const& agg) +{ + visit(static_cast(agg)); +} + +void aggregation_finalizer::visit(min_aggregation const& agg) +{ + visit(static_cast(agg)); +} + +void aggregation_finalizer::visit(max_aggregation const& agg) { - return {this->kind}; + visit(static_cast(agg)); } -void aggregation::finalize(cudf::detail::aggregation_finalizer& finalizer) + +void aggregation_finalizer::visit(count_aggregation const& agg) +{ + visit(static_cast(agg)); +} + +void aggregation_finalizer::visit(any_aggregation const& agg) +{ + visit(static_cast(agg)); +} + +void aggregation_finalizer::visit(all_aggregation const& agg) +{ + visit(static_cast(agg)); +} + +void aggregation_finalizer::visit(sum_of_squares_aggregation const& agg) +{ + visit(static_cast(agg)); +} + +void aggregation_finalizer::visit(mean_aggregation const& agg) +{ + visit(static_cast(agg)); +} + +void aggregation_finalizer::visit(var_aggregation const& agg) +{ + visit(static_cast(agg)); +} + +void aggregation_finalizer::visit(std_aggregation const& agg) +{ + visit(static_cast(agg)); +} + +void aggregation_finalizer::visit(median_aggregation const& agg) +{ + visit(static_cast(agg)); +} + +void aggregation_finalizer::visit(quantile_aggregation const& agg) { - finalizer.visit(*this); + visit(static_cast(agg)); +} + +void aggregation_finalizer::visit(argmax_aggregation const& agg) +{ + visit(static_cast(agg)); +} + +void aggregation_finalizer::visit(argmin_aggregation const& agg) +{ + visit(static_cast(agg)); +} + +void aggregation_finalizer::visit(nunique_aggregation const& agg) +{ + visit(static_cast(agg)); +} + +void aggregation_finalizer::visit(nth_element_aggregation const& agg) +{ + visit(static_cast(agg)); +} + +void aggregation_finalizer::visit(row_number_aggregation const& agg) +{ + visit(static_cast(agg)); +} + +void aggregation_finalizer::visit(collect_list_aggregation const& agg) +{ + visit(static_cast(agg)); +} + +void aggregation_finalizer::visit(collect_set_aggregation const& agg) +{ + visit(static_cast(agg)); +} + +void aggregation_finalizer::visit(lead_lag_aggregation const& agg) +{ + visit(static_cast(agg)); +} + +void aggregation_finalizer::visit(udf_aggregation const& agg) +{ + visit(static_cast(agg)); +} + +} // namespace detail + +std::vector> aggregation::get_simple_aggregations( + data_type col_type, cudf::detail::simple_aggregations_collector& collector) const +{ + return collector.visit(col_type, *this); } /// Factory to create a SUM aggregation -std::unique_ptr make_sum_aggregation() +template +std::unique_ptr make_sum_aggregation() { - return std::make_unique(aggregation::SUM); + return std::make_unique(); } +template std::unique_ptr make_sum_aggregation(); +template std::unique_ptr make_sum_aggregation(); + /// Factory to create a PRODUCT aggregation -std::unique_ptr make_product_aggregation() +template +std::unique_ptr make_product_aggregation() { - return std::make_unique(aggregation::PRODUCT); + return std::make_unique(); } +template std::unique_ptr make_product_aggregation(); + /// Factory to create a MIN aggregation -std::unique_ptr make_min_aggregation() +template +std::unique_ptr make_min_aggregation() { return std::make_unique(); } +template std::unique_ptr make_min_aggregation(); +template std::unique_ptr make_min_aggregation(); + /// Factory to create a MAX aggregation -std::unique_ptr make_max_aggregation() +template +std::unique_ptr make_max_aggregation() { return std::make_unique(); } +template std::unique_ptr make_max_aggregation(); +template std::unique_ptr make_max_aggregation(); + /// Factory to create a COUNT aggregation -std::unique_ptr make_count_aggregation(null_policy null_handling) +template +std::unique_ptr make_count_aggregation(null_policy null_handling) { auto kind = (null_handling == null_policy::INCLUDE) ? aggregation::COUNT_ALL : aggregation::COUNT_VALID; - return std::make_unique(kind); + return std::make_unique(kind); } +template std::unique_ptr make_count_aggregation( + null_policy null_handling); +template std::unique_ptr make_count_aggregation( + null_policy null_handling); + /// Factory to create a ANY aggregation -std::unique_ptr make_any_aggregation() +template +std::unique_ptr make_any_aggregation() { - return std::make_unique(aggregation::ANY); + return std::make_unique(); } +template std::unique_ptr make_any_aggregation(); + /// Factory to create a ALL aggregation -std::unique_ptr make_all_aggregation() +template +std::unique_ptr make_all_aggregation() { - return std::make_unique(aggregation::ALL); + return std::make_unique(); } +template std::unique_ptr make_all_aggregation(); + /// Factory to create a SUM_OF_SQUARES aggregation -std::unique_ptr make_sum_of_squares_aggregation() +template +std::unique_ptr make_sum_of_squares_aggregation() { - return std::make_unique(aggregation::SUM_OF_SQUARES); + return std::make_unique(); } +template std::unique_ptr make_sum_of_squares_aggregation(); + /// Factory to create a MEAN aggregation -std::unique_ptr make_mean_aggregation() +template +std::unique_ptr make_mean_aggregation() { return std::make_unique(); } +template std::unique_ptr make_mean_aggregation(); +template std::unique_ptr make_mean_aggregation(); + /// Factory to create a VARIANCE aggregation -std::unique_ptr make_variance_aggregation(size_type ddof) +template +std::unique_ptr make_variance_aggregation(size_type ddof) { return std::make_unique(ddof); -}; +} +template std::unique_ptr make_variance_aggregation(size_type ddof); + /// Factory to create a STD aggregation -std::unique_ptr make_std_aggregation(size_type ddof) +template +std::unique_ptr make_std_aggregation(size_type ddof) { return std::make_unique(ddof); -}; +} +template std::unique_ptr make_std_aggregation(size_type ddof); + /// Factory to create a MEDIAN aggregation -std::unique_ptr make_median_aggregation() +template +std::unique_ptr make_median_aggregation() { - // TODO I think this should just return a quantile_aggregation? - return std::make_unique(aggregation::MEDIAN); + return std::make_unique(); } +template std::unique_ptr make_median_aggregation(); + /// Factory to create a QUANTILE aggregation -std::unique_ptr make_quantile_aggregation(std::vector const& q, - interpolation i) +template +std::unique_ptr make_quantile_aggregation(std::vector const& q, interpolation i) { return std::make_unique(q, i); } -/// Factory to create a ARGMAX aggregation -std::unique_ptr make_argmax_aggregation() +template std::unique_ptr make_quantile_aggregation( + std::vector const& q, interpolation i); + +/// Factory to create an ARGMAX aggregation +template +std::unique_ptr make_argmax_aggregation() { - return std::make_unique(aggregation::ARGMAX); + return std::make_unique(); } -/// Factory to create a ARGMIN aggregation -std::unique_ptr make_argmin_aggregation() +template std::unique_ptr make_argmax_aggregation(); +template std::unique_ptr make_argmax_aggregation(); + +/// Factory to create an ARGMIN aggregation +template +std::unique_ptr make_argmin_aggregation() { - return std::make_unique(aggregation::ARGMIN); + return std::make_unique(); } -/// Factory to create a NUNIQUE aggregation -std::unique_ptr make_nunique_aggregation(null_policy null_handling) +template std::unique_ptr make_argmin_aggregation(); +template std::unique_ptr make_argmin_aggregation(); + +/// Factory to create an NUNIQUE aggregation +template +std::unique_ptr make_nunique_aggregation(null_policy null_handling) { return std::make_unique(null_handling); } -/// Factory to create a NTH_ELEMENT aggregation -std::unique_ptr make_nth_element_aggregation(size_type n, null_policy null_handling) +template std::unique_ptr make_nunique_aggregation( + null_policy null_handling); + +/// Factory to create an NTH_ELEMENT aggregation +template +std::unique_ptr make_nth_element_aggregation(size_type n, null_policy null_handling) { return std::make_unique(n, null_handling); } +template std::unique_ptr make_nth_element_aggregation( + size_type n, null_policy null_handling); + /// Factory to create a ROW_NUMBER aggregation -std::unique_ptr make_row_number_aggregation() +template +std::unique_ptr make_row_number_aggregation() { - return std::make_unique(aggregation::ROW_NUMBER); + return std::make_unique(); } +template std::unique_ptr make_row_number_aggregation(); +template std::unique_ptr make_row_number_aggregation(); + /// Factory to create a COLLECT_LIST aggregation -std::unique_ptr make_collect_list_aggregation(null_policy null_handling) +template +std::unique_ptr make_collect_list_aggregation(null_policy null_handling) { return std::make_unique(null_handling); } +template std::unique_ptr make_collect_list_aggregation( + null_policy null_handling); +template std::unique_ptr make_collect_list_aggregation( + null_policy null_handling); + /// Factory to create a COLLECT_SET aggregation -std::unique_ptr make_collect_set_aggregation(null_policy null_handling, - null_equality nulls_equal, - nan_equality nans_equal) +template +std::unique_ptr make_collect_set_aggregation(null_policy null_handling, + null_equality nulls_equal, + nan_equality nans_equal) { return std::make_unique(null_handling, nulls_equal, nans_equal); } +template std::unique_ptr make_collect_set_aggregation( + null_policy null_handling, null_equality nulls_equal, nan_equality nans_equal); +template std::unique_ptr make_collect_set_aggregation( + null_policy null_handling, null_equality nulls_equal, nan_equality nans_equal); + /// Factory to create a LAG aggregation -std::unique_ptr make_lag_aggregation(size_type offset) +template +std::unique_ptr make_lag_aggregation(size_type offset) { - return std::make_unique(aggregation::LAG, offset); + return std::make_unique(aggregation::LAG, offset); } +template std::unique_ptr make_lag_aggregation(size_type offset); +template std::unique_ptr make_lag_aggregation( + size_type offset); + /// Factory to create a LEAD aggregation -std::unique_ptr make_lead_aggregation(size_type offset) +template +std::unique_ptr make_lead_aggregation(size_type offset) { - return std::make_unique(aggregation::LEAD, offset); + return std::make_unique(aggregation::LEAD, offset); } +template std::unique_ptr make_lead_aggregation(size_type offset); +template std::unique_ptr make_lead_aggregation( + size_type offset); + /// Factory to create a UDF aggregation -std::unique_ptr make_udf_aggregation(udf_type type, - std::string const& user_defined_aggregator, - data_type output_type) +template +std::unique_ptr make_udf_aggregation(udf_type type, + std::string const& user_defined_aggregator, + data_type output_type) { - aggregation* a = + auto* a = new detail::udf_aggregation{type == udf_type::PTX ? aggregation::PTX : aggregation::CUDA, user_defined_aggregator, output_type}; - return std::unique_ptr(a); + return std::unique_ptr(a); } +template std::unique_ptr make_udf_aggregation( + udf_type type, std::string const& user_defined_aggregator, data_type output_type); +template std::unique_ptr make_udf_aggregation( + udf_type type, std::string const& user_defined_aggregator, data_type output_type); namespace detail { namespace { diff --git a/cpp/src/aggregation/result_cache.cpp b/cpp/src/aggregation/result_cache.cpp index f35c05349b0..b259e5965ef 100644 --- a/cpp/src/aggregation/result_cache.cpp +++ b/cpp/src/aggregation/result_cache.cpp @@ -50,11 +50,8 @@ std::unique_ptr result_cache::release_result(size_t col_idx, aggregation { CUDF_EXPECTS(has_result(col_idx, agg), "Result does not exist in cache"); - // unordered_map.extract() is a c++17 feature so we do this: - auto result_it = _cache[col_idx].find(agg); - std::unique_ptr result = std::move(result_it->second.second); - _cache[col_idx].erase(result_it); - return result; + auto result_it = _cache[col_idx].extract(agg); + return std::move(result_it.mapped().second); } } // namespace detail diff --git a/cpp/src/ast/linearizer.cpp b/cpp/src/ast/linearizer.cpp index cc70845e1ff..66a32ead35e 100644 --- a/cpp/src/ast/linearizer.cpp +++ b/cpp/src/ast/linearizer.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ #include -#include +#include #include #include #include diff --git a/cpp/src/ast/transform.cu b/cpp/src/ast/transform.cu index bc055d46869..43d3bde97c2 100644 --- a/cpp/src/ast/transform.cu +++ b/cpp/src/ast/transform.cu @@ -15,7 +15,7 @@ */ #include -#include +#include #include #include #include @@ -61,27 +61,25 @@ namespace detail { * each thread. */ template -__launch_bounds__(max_block_size) __global__ - void compute_column_kernel(table_device_view const table, - const cudf::detail::fixed_width_scalar_device_view_base* literals, - mutable_column_device_view output_column, - const detail::device_data_reference* data_references, - const ast_operator* operators, - const cudf::size_type* operator_source_indices, - cudf::size_type num_operators, - cudf::size_type num_intermediates) +__launch_bounds__(max_block_size) __global__ void compute_column_kernel( + table_device_view const table, + device_span literals, + mutable_column_device_view output_column, + device_span data_references, + device_span operators, + device_span operator_source_indices, + cudf::size_type num_intermediates) { extern __shared__ std::int64_t intermediate_storage[]; auto thread_intermediate_storage = &intermediate_storage[threadIdx.x * num_intermediates]; - auto const start_idx = cudf::size_type(threadIdx.x + blockIdx.x * blockDim.x); - auto const stride = cudf::size_type(blockDim.x * gridDim.x); - auto const num_rows = table.num_rows(); + auto const start_idx = static_cast(threadIdx.x + blockIdx.x * blockDim.x); + auto const stride = static_cast(blockDim.x * gridDim.x); auto const evaluator = cudf::ast::detail::row_evaluator(table, literals, thread_intermediate_storage, &output_column); - for (cudf::size_type row_index = start_idx; row_index < num_rows; row_index += stride) { + for (cudf::size_type row_index = start_idx; row_index < table.num_rows(); row_index += stride) { evaluate_row_expression( - evaluator, data_references, operators, operator_source_indices, num_operators, row_index); + evaluator, data_references, operators, operator_source_indices, row_index); } } @@ -90,40 +88,8 @@ std::unique_ptr compute_column(table_view const table, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - // Linearize the AST - auto const expr_linearizer = linearizer(expr, table); - auto const data_references = expr_linearizer.data_references(); - auto const literals = expr_linearizer.literals(); - auto const operators = expr_linearizer.operators(); - auto const num_operators = cudf::size_type(operators.size()); - auto const operator_source_indices = expr_linearizer.operator_source_indices(); - auto const expr_data_type = expr_linearizer.root_data_type(); - - // Create ast_plan and device buffer - auto plan = ast_plan(); - plan.add_to_plan(data_references); - plan.add_to_plan(literals); - plan.add_to_plan(operators); - plan.add_to_plan(operator_source_indices); - auto const host_data_buffer = plan.get_host_data_buffer(); - auto const buffer_offsets = plan.get_offsets(); - auto const buffer_size = host_data_buffer.second; - auto device_data_buffer = - rmm::device_buffer(host_data_buffer.first.get(), buffer_size, stream, mr); - // To reduce overhead, we don't call a stream sync here. - // The stream is synced later when the table_device_view is created. - - // Create device pointers to components of plan - auto const device_data_buffer_ptr = static_cast(device_data_buffer.data()); - auto const device_data_references = reinterpret_cast( - device_data_buffer_ptr + buffer_offsets[0]); - auto const device_literals = - reinterpret_cast( - device_data_buffer_ptr + buffer_offsets[1]); - auto const device_operators = - reinterpret_cast(device_data_buffer_ptr + buffer_offsets[2]); - auto const device_operator_source_indices = - reinterpret_cast(device_data_buffer_ptr + buffer_offsets[3]); + auto const expr_linearizer = linearizer(expr, table); // Linearize the AST + auto const plan = ast_plan{expr_linearizer, stream, mr}; // Create ast_plan // Create table device view auto table_device = table_device_view::create(table, stream); @@ -131,7 +97,7 @@ std::unique_ptr compute_column(table_view const table, // Prepare output column auto output_column = cudf::make_fixed_width_column( - expr_data_type, table_num_rows, mask_state::UNALLOCATED, stream, mr); + expr_linearizer.root_data_type(), table_num_rows, mask_state::UNALLOCATED, stream, mr); auto mutable_output_device = cudf::mutable_column_device_view::create(output_column->mutable_view(), stream); @@ -155,12 +121,11 @@ std::unique_ptr compute_column(table_view const table, cudf::ast::detail::compute_column_kernel <<>>( *table_device, - device_literals, + plan._device_literals, *mutable_output_device, - device_data_references, - device_operators, - device_operator_source_indices, - num_operators, + plan._device_data_references, + plan._device_operators, + plan._device_operator_source_indices, num_intermediates); CHECK_CUDA(stream.value()); return output_column; diff --git a/cpp/src/binaryop/compiled/binary_ops.cu b/cpp/src/binaryop/compiled/binary_ops.cu index 7d43524f608..2b24e0cfa3d 100644 --- a/cpp/src/binaryop/compiled/binary_ops.cu +++ b/cpp/src/binaryop/compiled/binary_ops.cu @@ -123,7 +123,8 @@ struct binary_op { auto out_view = out->mutable_view(); auto out_itr = out_view.begin(); auto lhs_device_view = column_device_view::create(lhs, stream); - auto rhs_scalar = static_cast const&>(rhs); + using rhs_type = cudf::scalar_type_t; + auto rhs_scalar = rhs_type(static_cast(rhs), stream); auto rhs_scalar_view = get_scalar_device_view(rhs_scalar); if (lhs.has_nulls()) { auto lhs_itr = cudf::detail::make_null_replacement_iterator(*lhs_device_view, Lhs{}); diff --git a/cpp/src/bitmask/is_element_valid.cpp b/cpp/src/bitmask/is_element_valid.cpp new file mode 100644 index 00000000000..47870e01567 --- /dev/null +++ b/cpp/src/bitmask/is_element_valid.cpp @@ -0,0 +1,47 @@ + +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include + +namespace cudf { +namespace detail { + +bool is_element_valid_sync(column_view const& col_view, + size_type element_index, + rmm::cuda_stream_view stream) +{ + CUDF_EXPECTS(element_index >= 0 and element_index < col_view.size(), "invalid index."); + if (!col_view.nullable()) { return true; } + + bitmask_type word; + // null_mask() returns device ptr to bitmask without offset + size_type index = element_index + col_view.offset(); + CUDA_TRY(cudaMemcpyAsync(&word, + col_view.null_mask() + word_index(index), + sizeof(bitmask_type), + cudaMemcpyDeviceToHost, + stream.value())); + stream.synchronize(); + return static_cast(word & (bitmask_type{1} << intra_word_index(index))); +} + +} // namespace detail +} // namespace cudf diff --git a/cpp/src/column/column.cu b/cpp/src/column/column.cu index d30e5fc746a..3ee8e0a33a9 100644 --- a/cpp/src/column/column.cu +++ b/cpp/src/column/column.cu @@ -43,19 +43,8 @@ #include namespace cudf { -// Copy constructor -column::column(column const &other) - : _type{other._type}, - _size{other._size}, - _data{other._data}, - _null_mask{other._null_mask}, - _null_count{other._null_count} -{ - _children.reserve(other.num_children()); - for (auto const &c : other._children) { _children.emplace_back(std::make_unique(*c)); } -} -// Copy ctor w/ explicit stream/mr +// Copy ctor w/ optional stream/mr column::column(column const &other, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr) @@ -165,14 +154,16 @@ void column::set_null_mask(rmm::device_buffer &&new_null_mask, size_type new_nul _null_count = new_null_count; } -void column::set_null_mask(rmm::device_buffer const &new_null_mask, size_type new_null_count) +void column::set_null_mask(rmm::device_buffer const &new_null_mask, + size_type new_null_count, + rmm::cuda_stream_view stream) { if (new_null_count > 0) { CUDF_EXPECTS(new_null_mask.size() >= cudf::bitmask_allocation_size_bytes(this->size()), "Column with null values must be nullable and the null mask \ buffer size should match the size of the column."); } - _null_mask = new_null_mask; // copy + _null_mask = rmm::device_buffer{new_null_mask, stream}; // copy _null_count = new_null_count; } diff --git a/cpp/src/column/column_factories.cpp b/cpp/src/column/column_factories.cpp index 03339c2e0a8..86059a72e8f 100644 --- a/cpp/src/column/column_factories.cpp +++ b/cpp/src/column/column_factories.cpp @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -159,90 +158,6 @@ std::unique_ptr make_fixed_width_column(data_type type, /// clang-format on } -struct column_from_scalar_dispatch { - template - std::unique_ptr operator()(scalar const& value, - size_type size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const - { - if (!value.is_valid()) - return make_fixed_width_column(value.type(), size, mask_state::ALL_NULL, stream, mr); - auto output_column = - make_fixed_width_column(value.type(), size, mask_state::UNALLOCATED, stream, mr); - auto view = output_column->mutable_view(); - detail::fill_in_place(view, 0, size, value, stream); - return output_column; - } -}; - -template <> -std::unique_ptr column_from_scalar_dispatch::operator()( - scalar const& value, - size_type size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const -{ - auto null_mask = detail::create_null_mask(size, mask_state::ALL_NULL, stream, mr); - - if (!value.is_valid()) - return std::make_unique(value.type(), - size, - rmm::device_buffer{0, stream, mr}, - null_mask, - size); - - // Create a strings column_view with all nulls and no children. - // Since we are setting every row to the scalar, the fill() never needs to access - // any of the children in the strings column which would otherwise cause an exception. - column_view sc{ - data_type{type_id::STRING}, size, nullptr, static_cast(null_mask.data()), size}; - auto sv = static_cast const&>(value); - // fill the column with the scalar - auto output = strings::detail::fill(strings_column_view(sc), 0, size, sv, stream, mr); - output->set_null_mask(rmm::device_buffer{0, stream, mr}, 0); // should be no nulls - return output; -} - -template <> -std::unique_ptr column_from_scalar_dispatch::operator()( - scalar const& value, - size_type size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const -{ - CUDF_FAIL("dictionary not supported when creating from scalar"); -} - -template <> -std::unique_ptr column_from_scalar_dispatch::operator()( - scalar const& value, - size_type size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const -{ - CUDF_FAIL("TODO"); -} - -template <> -std::unique_ptr column_from_scalar_dispatch::operator()( - scalar const& value, - size_type size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const -{ - CUDF_FAIL("TODO. struct_view currently not supported."); -} - -std::unique_ptr make_column_from_scalar(scalar const& s, - size_type size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - if (size == 0) return make_empty_column(s.type()); - return type_dispatcher(s.type(), column_from_scalar_dispatch{}, s, size, stream, mr); -} - std::unique_ptr make_dictionary_from_scalar(scalar const& s, size_type size, rmm::cuda_stream_view stream, diff --git a/cpp/src/column/column_factories.cu b/cpp/src/column/column_factories.cu new file mode 100644 index 00000000000..9168d47aaf7 --- /dev/null +++ b/cpp/src/column/column_factories.cu @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +namespace cudf { + +namespace { + +struct column_from_scalar_dispatch { + template + std::unique_ptr operator()(scalar const& value, + size_type size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const + { + if (size == 0) return make_empty_column(value.type()); + if (!value.is_valid()) + return make_fixed_width_column(value.type(), size, mask_state::ALL_NULL, stream, mr); + auto output_column = + make_fixed_width_column(value.type(), size, mask_state::UNALLOCATED, stream, mr); + auto view = output_column->mutable_view(); + detail::fill_in_place(view, 0, size, value, stream); + return output_column; + } +}; + +template <> +std::unique_ptr column_from_scalar_dispatch::operator()( + scalar const& value, + size_type size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const +{ + if (size == 0) return make_empty_column(value.type()); + auto null_mask = detail::create_null_mask(size, mask_state::ALL_NULL, stream, mr); + + if (!value.is_valid()) + return std::make_unique( + value.type(), size, rmm::device_buffer{}, std::move(null_mask), size); + + // Create a strings column_view with all nulls and no children. + // Since we are setting every row to the scalar, the fill() never needs to access + // any of the children in the strings column which would otherwise cause an exception. + column_view sc{ + data_type{type_id::STRING}, size, nullptr, static_cast(null_mask.data()), size}; + auto sv = static_cast const&>(value); + // fill the column with the scalar + auto output = strings::detail::fill(strings_column_view(sc), 0, size, sv, stream, mr); + output->set_null_mask(rmm::device_buffer{}, 0); // should be no nulls + return output; +} + +template <> +std::unique_ptr column_from_scalar_dispatch::operator()( + scalar const& value, + size_type size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const +{ + CUDF_FAIL("dictionary not supported when creating from scalar"); +} + +template <> +std::unique_ptr column_from_scalar_dispatch::operator()( + scalar const& value, + size_type size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const +{ + auto lv = static_cast(&value); + return lists::detail::make_lists_column_from_scalar(*lv, size, stream, mr); +} + +template <> +std::unique_ptr column_from_scalar_dispatch::operator()( + scalar const& value, + size_type size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const +{ + if (size == 0) CUDF_FAIL("0-length struct column is unsupported."); + auto ss = static_cast const&>(value); + auto iter = thrust::make_constant_iterator(0); + + auto children = + detail::gather(ss.view(), iter, iter + size, out_of_bounds_policy::NULLIFY, stream, mr); + auto const is_valid = ss.is_valid(); + return make_structs_column(size, + std::move(children->release()), + is_valid ? 0 : size, + is_valid + ? rmm::device_buffer{} + : detail::create_null_mask(size, mask_state::ALL_NULL, stream, mr), + stream, + mr); +} + +} // anonymous namespace + +std::unique_ptr make_column_from_scalar(scalar const& s, + size_type size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + return type_dispatcher(s.type(), column_from_scalar_dispatch{}, s, size, stream, mr); +} + +} // namespace cudf diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu index 1b948083982..6ba10bef396 100644 --- a/cpp/src/copying/concatenate.cu +++ b/cpp/src/copying/concatenate.cu @@ -57,9 +57,6 @@ auto create_device_views(host_span views, rmm::cuda_stream_vi column_device_view::create(std::declval(), std::declval())); auto device_view_owners = std::vector(views.size()); std::transform(views.begin(), views.end(), device_view_owners.begin(), [stream](auto const& col) { - // TODO creating this device view can invoke null count computation - // even though it isn't used. See this issue: - // https://github.com/rapidsai/cudf/issues/4368 return column_device_view::create(col, stream); }); @@ -70,10 +67,8 @@ auto create_device_views(host_span views, rmm::cuda_stream_vi device_view_owners.cend(), std::back_inserter(device_views), [](auto const& col) { return *col; }); - // TODO each of these device vector copies invoke stream synchronization - // which appears to add unnecessary overhead. See this issue: - // https://github.com/rapidsai/rmm/issues/120 - auto d_views = make_device_uvector_async(device_views); + + auto d_views = make_device_uvector_async(device_views, stream); // Compute the partition offsets auto offsets = thrust::host_vector(views.size() + 1); @@ -84,7 +79,7 @@ auto create_device_views(host_span views, rmm::cuda_stream_vi std::next(offsets.begin()), [](auto const& col) { return col.size(); }, thrust::plus{}); - auto d_offsets = make_device_uvector_async(offsets); + auto d_offsets = make_device_uvector_async(offsets, stream); auto const output_size = offsets.back(); return std::make_tuple( @@ -455,7 +450,8 @@ rmm::device_buffer concatenate_masks(host_span views, rmm::device_buffer null_mask = create_null_mask(total_element_count, mask_state::UNINITIALIZED, mr); - detail::concatenate_masks(views, static_cast(null_mask.data()), 0); + detail::concatenate_masks( + views, static_cast(null_mask.data()), rmm::cuda_stream_default); return null_mask; } diff --git a/cpp/src/copying/contiguous_split.cu b/cpp/src/copying/contiguous_split.cu index 9a2f0f26f74..809390553a4 100644 --- a/cpp/src/copying/contiguous_split.cu +++ b/cpp/src/copying/contiguous_split.cu @@ -248,14 +248,12 @@ __device__ void copy_buffer(uint8_t* __restrict__ dst, * the actual copy. * * @param num_src_bufs Total number of source buffers (N) - * @param num_partitions Number of partitions the each source buffer is split into (M) * @param src_bufs Input source buffers (N) * @param dst_bufs Desination buffers (N*M) * @param buf_info Information on the range of values to be copied for each destination buffer. */ template __global__ void copy_partition(int num_src_bufs, - int num_partitions, uint8_t** src_bufs, uint8_t** dst_bufs, dst_buf_info* buf_info) @@ -447,6 +445,13 @@ struct buf_info_functor { return {current + 1, offset_stack_pos + offset_depth}; } + template + std::enable_if_t::value, std::pair> + operator()(Args&&...) + { + CUDF_FAIL("Unsupported type"); + } + private: std::pair add_null_buffer(column_view const& col, src_buf_info* current, @@ -599,17 +604,6 @@ std::pair buf_info_functor::operator() -std::pair buf_info_functor::operator()( - column_view const& col, - src_buf_info* current, - int offset_stack_pos, - int parent_offset_index, - int offset_depth) -{ - CUDF_FAIL("Unsupported type"); -} - template std::pair setup_source_buf_info(InputIter begin, InputIter end, @@ -660,10 +654,7 @@ BufInfo build_output_columns(InputIter begin, { auto current_info = info_begin; std::transform(begin, end, out_begin, [¤t_info, base_ptr](column_view const& src) { - // Use C++17 structured bindings - bitmask_type const* bitmask_ptr; - size_type null_count; - std::tie(bitmask_ptr, null_count) = [&]() { + auto [bitmask_ptr, null_count] = [&]() { if (src.nullable()) { auto const ptr = current_info->num_elements == 0 @@ -1024,9 +1015,9 @@ std::vector contiguous_split(cudf::table_view const& input, // copy. 1 block per buffer { - constexpr size_type block_size = 512; + constexpr size_type block_size = 256; copy_partition<<>>( - num_src_bufs, num_partitions, d_src_bufs, d_dst_bufs, d_dst_buf_info); + num_src_bufs, d_src_bufs, d_dst_bufs, d_dst_buf_info); } // DtoH dst info (to retrieve null counts) diff --git a/cpp/src/copying/copy.cpp b/cpp/src/copying/copy.cpp index 50bf168037d..670c147aa7e 100644 --- a/cpp/src/copying/copy.cpp +++ b/cpp/src/copying/copy.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -44,6 +45,79 @@ inline mask_state should_allocate_mask(mask_allocation_policy mask_alloc, bool m } } +/** + * @brief Functor to produce an empty column of the same type as the + * input scalar. + * + * In the case of nested types, full column hierarchy is preserved. + */ +template +struct scalar_empty_like_functor_impl { + std::unique_ptr operator()(scalar const& input) + { + return cudf::make_empty_column(input.type()); + } +}; + +template <> +struct scalar_empty_like_functor_impl { + std::unique_ptr operator()(scalar const& input) + { + return cudf::strings::detail::make_empty_strings_column(rmm::cuda_stream_default, + rmm::mr::get_current_device_resource()); + } +}; + +template <> +struct scalar_empty_like_functor_impl { + std::unique_ptr operator()(scalar const& input) + { + auto ls = static_cast(&input); + + // TODO: add a manual constructor for lists_column_view. + column_view offsets{cudf::data_type{cudf::type_id::INT32}, 0, nullptr}; + std::vector children; + children.push_back(offsets); + children.push_back(ls->view()); + column_view lcv{cudf::data_type{cudf::type_id::LIST}, 0, nullptr, nullptr, 0, 0, children}; + + return empty_like(lcv); + } +}; + +template <> +struct scalar_empty_like_functor_impl { + std::unique_ptr operator()(scalar const& input) + { + auto ss = static_cast(&input); + + // TODO: add a manual constructor for structs_column_view + // TODO: add cudf::get_element() support for structs + cudf::table_view tbl = ss->view(); + std::vector children(tbl.begin(), tbl.end()); + column_view scv{cudf::data_type{cudf::type_id::STRUCT}, 0, nullptr, nullptr, 0, 0, children}; + + return empty_like(scv); + } +}; + +template <> +struct scalar_empty_like_functor_impl { + std::unique_ptr operator()(scalar const& input) + { + CUDF_FAIL("Dictionary scalars not supported"); + } +}; + +struct scalar_empty_like_functor { + template + std::unique_ptr operator()(scalar const& input) + { + scalar_empty_like_functor_impl func; + return func(input); + } +}; + } // namespace /* @@ -91,6 +165,15 @@ std::unique_ptr empty_like(column_view const& input) input.type(), 0, rmm::device_buffer{}, rmm::device_buffer{}, 0, std::move(children)); } +/* + * Initializes and returns an empty column of the same type as the `input`. + */ +std::unique_ptr empty_like(scalar const& input) +{ + CUDF_FUNC_RANGE(); + return type_dispatcher(input.type(), detail::scalar_empty_like_functor{}, input); +}; + /* * Creates a table of empty columns with the same types as the `input_table` */ diff --git a/cpp/src/copying/copy.cu b/cpp/src/copying/copy.cu index fecf7d18d46..9f8e6f7bdcb 100644 --- a/cpp/src/copying/copy.cu +++ b/cpp/src/copying/copy.cu @@ -17,12 +17,14 @@ #include #include #include +#include #include #include +#include #include #include - #include + #include namespace cudf { @@ -38,11 +40,28 @@ struct copy_if_else_functor_impl { } }; +/** + * @brief Functor to fetch a device-view for the specified scalar/column_view. + */ +struct get_iterable_device_view { + template ::value)> + auto operator()(T const& input) + { + return cudf::column_device_view::create(input); + } + + template ::value)> + auto operator()(T const& input) + { + return &input; + } +}; + template struct copy_if_else_functor_impl()>> { template - std::unique_ptr operator()(Left const& lhs, - Right const& rhs, + std::unique_ptr operator()(Left const& lhs_h, + Right const& rhs_h, size_type size, bool left_nullable, bool right_nullable, @@ -50,6 +69,11 @@ struct copy_if_else_functor_impl rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + auto p_lhs = get_iterable_device_view{}(lhs_h); + auto p_rhs = get_iterable_device_view{}(rhs_h); + auto const& lhs = *p_lhs; + auto const& rhs = *p_rhs; + if (left_nullable) { if (right_nullable) { auto lhs_iter = cudf::detail::make_pair_iterator(lhs); @@ -81,8 +105,8 @@ struct copy_if_else_functor_impl template <> struct copy_if_else_functor_impl { template - std::unique_ptr operator()(Left const& lhs, - Right const& rhs, + std::unique_ptr operator()(Left const& lhs_h, + Right const& rhs_h, size_type size, bool left_nullable, bool right_nullable, @@ -92,6 +116,11 @@ struct copy_if_else_functor_impl { { using T = string_view; + auto p_lhs = get_iterable_device_view{}(lhs_h); + auto p_rhs = get_iterable_device_view{}(rhs_h); + auto const& lhs = *p_lhs; + auto const& rhs = *p_rhs; + if (left_nullable) { if (right_nullable) { auto lhs_iter = cudf::detail::make_pair_iterator(lhs); @@ -115,40 +144,111 @@ struct copy_if_else_functor_impl { }; /** - * @brief Specialization of copy_if_else_functor for list_views. + * @brief Functor to generate gather-map for LHS column + * + * If specified `Predicate` evaluates to `true` for index `i`, + * gather map must contain `i` (to select LHS[i]). + * If false, gather map must have `null_index`, so that a null + * is gathered in its place. */ -template <> -struct copy_if_else_functor_impl { - template - std::unique_ptr operator()(Left const& lhs, - Right const& rhs, - size_type size, - bool left_nullable, - bool right_nullable, - Filter filter, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +template +class lhs_gather_map_functor { + public: + lhs_gather_map_functor(Predicate predicate, size_type null_index) + : _pred(predicate), _null_index(null_index) { - CUDF_FAIL("copy_if_else not supported for list_view yet"); } + + size_type __device__ operator()(size_type i) const { return _pred(i) ? i : _null_index; } + + private: + Predicate _pred; + size_type _null_index; }; -template <> -struct copy_if_else_functor_impl { - template - std::unique_ptr operator()(Left const& lhs, - Right const& rhs, - size_type size, - bool left_nullable, - bool right_nullable, - Filter filter, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) - { - CUDF_FAIL("copy_if_else not supported for struct_view yet"); - } +/** + * @brief Adapter to negate predicates. + */ +template +class logical_not { + public: + explicit logical_not(Predicate predicate) : _pred{predicate} {} + + bool __device__ operator()(size_type i) const { return not _pred(i); } + + private: + Predicate _pred; }; +/** + * @brief Implementation of copy_if_else() with gather()/scatter(). + * + * Currently supports only nested-type column_views. Scalars are not supported. + */ +template +std::unique_ptr scatter_gather_based_if_else(Left const& lhs, + Right const& rhs, + size_type size, + Filter is_left, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + if constexpr (std::is_same::value && + std::is_same::value) { + auto const null_map_entry = size + 1; // Out of bounds index, for gather() to nullify. + + auto const gather_lhs = make_counting_transform_iterator( + size_type{0}, lhs_gather_map_functor{is_left, null_map_entry}); + + auto const lhs_gathered_columns = + cudf::detail::gather(table_view{std::vector{lhs}}, + gather_lhs, + gather_lhs + size, + out_of_bounds_policy::NULLIFY, + stream, + mr) + ->release(); + auto& lhs_partial_output = lhs_gathered_columns[0]; + + auto scatter_map_rhs = rmm::device_uvector{static_cast(size), stream}; + auto const scatter_map_end = thrust::copy_if(rmm::exec_policy(stream), + thrust::make_counting_iterator(size_type{0}), + thrust::make_counting_iterator(size_type{size}), + scatter_map_rhs.begin(), + logical_not{is_left}); + + auto const scatter_src_rhs = cudf::detail::gather(table_view{std::vector{rhs}}, + scatter_map_rhs.begin(), + scatter_map_end, + out_of_bounds_policy::DONT_CHECK, + stream); + + auto result = cudf::detail::scatter( + table_view{std::vector{scatter_src_rhs->get_column(0).view()}}, + scatter_map_rhs.begin(), + scatter_map_end, + table_view{std::vector{lhs_partial_output->view()}}, + false, + stream, + mr); + + return std::move(result->release()[0]); + } + + // Bail out for Scalars. + // For nested types types, scatter/gather based copy_if_else() is not currently supported + // if either `lhs` or `rhs` is a scalar, partially because: + // 1. Struct scalars are not yet available. + // 2. List scalars do not yet support explosion to a full column. + CUDF_FAIL("Scalars of nested types are not currently supported!"); + (void)lhs; + (void)rhs; + (void)size; + (void)is_left; + (void)stream; + (void)mr; +} + /** * @brief Functor called by the `type_dispatcher` to invoke copy_if_else on combinations * of column_view and scalar @@ -164,6 +264,12 @@ struct copy_if_else_functor { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + if constexpr (std::is_same_v or std::is_same_v) { + (void)left_nullable; + (void)right_nullable; + return scatter_gather_based_if_else(lhs, rhs, size, filter, stream, mr); + } + copy_if_else_functor_impl copier{}; return copier(lhs, rhs, size, left_nullable, right_nullable, filter, stream, mr); } @@ -183,7 +289,7 @@ std::unique_ptr copy_if_else(Left const& lhs, CUDF_EXPECTS(boolean_mask.type() == data_type(type_id::BOOL8), "Boolean mask column must be of type type_id::BOOL8"); - if (boolean_mask.is_empty()) { return cudf::make_empty_column(lhs.type()); } + if (boolean_mask.is_empty()) { return cudf::empty_like(lhs); } auto bool_mask_device_p = column_device_view::create(boolean_mask); column_device_view bool_mask_device = *bool_mask_device_p; @@ -230,13 +336,7 @@ std::unique_ptr copy_if_else(column_view const& lhs, CUDF_EXPECTS(boolean_mask.size() == lhs.size(), "Boolean mask column must be the same size as lhs and rhs columns"); CUDF_EXPECTS(lhs.size() == rhs.size(), "Both columns must be of the size"); - return copy_if_else(*column_device_view::create(lhs), - *column_device_view::create(rhs), - lhs.has_nulls(), - rhs.has_nulls(), - boolean_mask, - stream, - mr); + return copy_if_else(lhs, rhs, lhs.has_nulls(), rhs.has_nulls(), boolean_mask, stream, mr); } std::unique_ptr copy_if_else(scalar const& lhs, @@ -247,13 +347,7 @@ std::unique_ptr copy_if_else(scalar const& lhs, { CUDF_EXPECTS(boolean_mask.size() == rhs.size(), "Boolean mask column must be the same size as rhs column"); - return copy_if_else(lhs, - *column_device_view::create(rhs), - !lhs.is_valid(), - rhs.has_nulls(), - boolean_mask, - stream, - mr); + return copy_if_else(lhs, rhs, !lhs.is_valid(), rhs.has_nulls(), boolean_mask, stream, mr); } std::unique_ptr copy_if_else(column_view const& lhs, @@ -264,13 +358,7 @@ std::unique_ptr copy_if_else(column_view const& lhs, { CUDF_EXPECTS(boolean_mask.size() == lhs.size(), "Boolean mask column must be the same size as lhs column"); - return copy_if_else(*column_device_view::create(lhs), - rhs, - lhs.has_nulls(), - !rhs.is_valid(), - boolean_mask, - stream, - mr); + return copy_if_else(lhs, rhs, lhs.has_nulls(), !rhs.is_valid(), boolean_mask, stream, mr); } std::unique_ptr copy_if_else(scalar const& lhs, diff --git a/cpp/src/copying/copy_range.cu b/cpp/src/copying/copy_range.cu index f4ce9ea27ac..39a947d2ab9 100644 --- a/cpp/src/copying/copy_range.cu +++ b/cpp/src/copying/copy_range.cu @@ -90,17 +90,6 @@ struct out_of_place_copy_range_dispatch { cudf::column_view const& source; cudf::column_view const& target; - template ())> - std::unique_ptr operator()( - cudf::size_type source_begin, - cudf::size_type source_end, - cudf::size_type target_begin, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) - { - CUDF_FAIL("Unsupported type for out of place copy."); - } - template ())> std::unique_ptr operator()( cudf::size_type source_begin, @@ -122,6 +111,13 @@ struct out_of_place_copy_range_dispatch { return p_ret; } + + template + std::enable_if_t(), std::unique_ptr> + operator()(Args...) + { + CUDF_FAIL("Unsupported type for out of place copy."); + } }; template <> @@ -212,17 +208,6 @@ std::unique_ptr out_of_place_copy_range_dispatch::operator() -std::unique_ptr out_of_place_copy_range_dispatch::operator()( - cudf::size_type source_begin, - cudf::size_type source_end, - cudf::size_type target_begin, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FAIL("list_view type not supported"); -} - } // namespace namespace cudf { diff --git a/cpp/src/copying/get_element.cu b/cpp/src/copying/get_element.cu index 446f9b0dda9..a4d863d204d 100644 --- a/cpp/src/copying/get_element.cu +++ b/cpp/src/copying/get_element.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,8 +17,11 @@ #include #include #include +#include #include #include +#include +#include #include #include @@ -122,7 +125,22 @@ struct get_element_functor { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) { - CUDF_FAIL("get_element_functor not supported for list_view"); + bool valid = is_element_valid_sync(input, index, stream); + auto const child_col_idx = lists_column_view::child_column_index; + + if (valid) { + lists_column_view lcv(input); + // Make a copy of the row + auto row_slice_contents = + lists::detail::copy_slice(lcv, index, index + 1, stream, mr)->release(); + // Construct scalar with row data + return std::make_unique( + std::move(*row_slice_contents.children[child_col_idx]), valid, stream, mr); + } else { + auto empty_row_contents = empty_like(input)->release(); + return std::make_unique( + std::move(*empty_row_contents.children[child_col_idx]), valid, stream, mr); + } } template ()> *p = nullptr> @@ -156,12 +174,9 @@ struct get_element_functor { mr); } - template ::value> *p = nullptr> - std::unique_ptr operator()( - column_view const &input, - size_type index, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) + template + std::enable_if_t::value, std::unique_ptr> operator()( + Args &&...) { CUDF_FAIL("get_element_functor not supported for struct_view"); } diff --git a/cpp/src/copying/pack.cpp b/cpp/src/copying/pack.cpp index 0d1bb5a8312..182e3ff0584 100644 --- a/cpp/src/copying/pack.cpp +++ b/cpp/src/copying/pack.cpp @@ -17,6 +17,8 @@ #include #include +#include + namespace cudf { namespace detail { @@ -216,7 +218,7 @@ table_view unpack(uint8_t const* metadata, uint8_t const* gpu_data) packed_columns pack(cudf::table_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::pack(input, 0, mr); + return detail::pack(input, rmm::cuda_stream_default, mr); } /** diff --git a/cpp/src/copying/sample.cu b/cpp/src/copying/sample.cu index db0984068cf..42dc9f76b18 100644 --- a/cpp/src/copying/sample.cu +++ b/cpp/src/copying/sample.cu @@ -25,7 +25,6 @@ #include -#include #include #include #include diff --git a/cpp/src/copying/scatter.cu b/cpp/src/copying/scatter.cu index cedac96cee6..a932957ada4 100644 --- a/cpp/src/copying/scatter.cu +++ b/cpp/src/copying/scatter.cu @@ -149,18 +149,15 @@ struct column_scalar_scatterer_impl { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const { - CUDF_FAIL("scatter scalar to list_view not implemented"); + return lists::detail::scatter( + source, scatter_iter, scatter_iter + scatter_rows, target, stream, mr); } }; template struct column_scalar_scatterer_impl { - std::unique_ptr operator()(std::reference_wrapper const& source, - MapIterator scatter_iter, - size_type scatter_rows, - column_view const& target, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const + template + std::unique_ptr operator()(Args&&...) const { CUDF_FAIL("scatter scalar to struct_view not implemented"); } @@ -197,8 +194,8 @@ struct column_scalar_scatterer_impl { auto contents = new_indices->release(); auto indices_column = std::make_unique(indices_type, static_cast(output_size), - *(contents.data.release()), - rmm::device_buffer{0, stream, mr}, + std::move(*(contents.data.release())), + rmm::device_buffer{}, 0); // use the keys from the matched column std::unique_ptr keys_column(std::move(dict_target->release().children.back())); diff --git a/cpp/src/copying/segmented_shift.cu b/cpp/src/copying/segmented_shift.cu new file mode 100644 index 00000000000..6fc785a61c6 --- /dev/null +++ b/cpp/src/copying/segmented_shift.cu @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +namespace cudf { +namespace detail { + +namespace { + +/** + * @brief Helper function to invoke general `copy_if_else` + */ +template +std::unique_ptr segmented_shift_rep_impl(PairIterator input_pair_iterator, + ScalarIterator fill_pair_iterator, + bool nullable, + size_type offset, + device_span segment_offsets, + data_type value_type, + size_type column_size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + if (offset > 0) { + auto filter = [segment_offsets, offset] __device__(auto const& i) { + auto segment_bound_idx = + thrust::upper_bound(thrust::seq, segment_offsets.begin(), segment_offsets.end(), i) - 1; + return not(*segment_bound_idx <= i and i < *segment_bound_idx + offset); + }; + return copy_if_else(nullable, + input_pair_iterator, + input_pair_iterator + column_size, + fill_pair_iterator, + filter, + value_type, + stream, + mr); + } else { + auto filter = [segment_offsets, offset] __device__(auto const& i) { + auto segment_bound_idx = + thrust::upper_bound(thrust::seq, segment_offsets.begin(), segment_offsets.end(), i); + return not(*segment_bound_idx + offset <= i and i < *segment_bound_idx); + }; + return copy_if_else(nullable, + input_pair_iterator, + input_pair_iterator + column_size, + fill_pair_iterator, + filter, + value_type, + stream, + mr); + } +} + +/** + * @brief Helper function to invoke string specialization of `copy_if_else` + */ +template +std::unique_ptr segmented_shift_string_impl(PairIterator input_pair_iterator, + ScalarIterator fill_pair_iterator, + size_type offset, + device_span segment_offsets, + size_type column_size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + if (offset > 0) { + auto filter = [segment_offsets, offset] __device__(auto const& i) { + auto segment_bound_idx = + thrust::upper_bound(thrust::seq, segment_offsets.begin(), segment_offsets.end(), i) - 1; + return not(*segment_bound_idx <= i and i < *segment_bound_idx + offset); + }; + return strings::detail::copy_if_else(input_pair_iterator, + input_pair_iterator + column_size, + fill_pair_iterator, + filter, + stream, + mr); + } else { + auto filter = [segment_offsets, offset] __device__(auto const& i) { + auto segment_bound_idx = + thrust::upper_bound(thrust::seq, segment_offsets.begin(), segment_offsets.end(), i); + return not(*segment_bound_idx + offset <= i and i < *segment_bound_idx); + }; + return strings::detail::copy_if_else(input_pair_iterator, + input_pair_iterator + column_size, + fill_pair_iterator, + filter, + stream, + mr); + } +} + +template +struct segmented_shift_functor { + template + std::unique_ptr operator()(Args&&...) + { + CUDF_FAIL("Unsupported type for segmented_shift."); + } +}; + +/** + * @brief Segmented shift specialization for representation layout compatible types. + */ +template +struct segmented_shift_functor()>> { + std::unique_ptr operator()(column_view const& segmented_values, + device_span segment_offsets, + size_type offset, + scalar const& fill_value, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + auto values_device_view = column_device_view::create(segmented_values, stream); + auto fill_pair_iterator = make_pair_iterator(fill_value); + bool nullable = not fill_value.is_valid() or segmented_values.nullable(); + + if (segmented_values.has_nulls()) { + auto input_pair_iterator = make_pair_iterator(*values_device_view) - offset; + return segmented_shift_rep_impl(input_pair_iterator, + fill_pair_iterator, + nullable, + offset, + segment_offsets, + segmented_values.type(), + segmented_values.size(), + stream, + mr); + } else { + auto input_pair_iterator = make_pair_iterator(*values_device_view) - offset; + return segmented_shift_rep_impl(input_pair_iterator, + fill_pair_iterator, + nullable, + offset, + segment_offsets, + segmented_values.type(), + segmented_values.size(), + stream, + mr); + } + } +}; + +/** + * @brief Segmented shift specialization for `string_view`. + */ +template <> +struct segmented_shift_functor { + std::unique_ptr operator()(column_view const& segmented_values, + device_span segment_offsets, + size_type offset, + scalar const& fill_value, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + using T = string_view; + + auto values_device_view = column_device_view::create(segmented_values, stream); + auto fill_pair_iterator = make_pair_iterator(fill_value); + if (segmented_values.has_nulls()) { + auto input_pair_iterator = make_pair_iterator(*values_device_view) - offset; + return segmented_shift_string_impl(input_pair_iterator, + fill_pair_iterator, + offset, + segment_offsets, + segmented_values.size(), + stream, + mr); + } else { + auto input_pair_iterator = make_pair_iterator(*values_device_view) - offset; + return segmented_shift_string_impl(input_pair_iterator, + fill_pair_iterator, + offset, + segment_offsets, + segmented_values.size(), + stream, + mr); + } + } +}; + +/** + * @brief Functor to instantiate the specializations for segmented shift and + * forward arguments. + */ +struct segmented_shift_functor_forwarder { + template + std::unique_ptr operator()(column_view const& segmented_values, + device_span segment_offsets, + size_type offset, + scalar const& fill_value, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + segmented_shift_functor shifter; + return shifter(segmented_values, segment_offsets, offset, fill_value, stream, mr); + } +}; + +} // namespace + +std::unique_ptr segmented_shift(column_view const& segmented_values, + device_span segment_offsets, + size_type offset, + scalar const& fill_value, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + if (segmented_values.is_empty()) { return empty_like(segmented_values); } + if (offset == 0) { return std::make_unique(segmented_values); }; + + return type_dispatcher(segmented_values.type(), + segmented_shift_functor_forwarder{}, + segmented_values, + segment_offsets, + offset, + fill_value, + stream, + mr); +} + +} // namespace detail +} // namespace cudf diff --git a/cpp/src/copying/shift.cu b/cpp/src/copying/shift.cu index cf85bf51e80..ebeaf0e3b20 100644 --- a/cpp/src/copying/shift.cu +++ b/cpp/src/copying/shift.cu @@ -46,8 +46,7 @@ inline bool __device__ out_of_bounds(size_type size, size_type idx) struct shift_functor { template - std::enable_if_t(), std::unique_ptr> operator()( - Args&&... args) + std::enable_if_t(), std::unique_ptr> operator()(Args&&...) { CUDF_FAIL("shift does not support non-fixed-width types."); } diff --git a/cpp/src/dictionary/add_keys.cu b/cpp/src/dictionary/add_keys.cu index f32f351487a..e3d1ea88ece 100644 --- a/cpp/src/dictionary/add_keys.cu +++ b/cpp/src/dictionary/add_keys.cu @@ -63,6 +63,7 @@ std::unique_ptr add_keys( std::vector{0}, // only one key column duplicate_keep_option::KEEP_FIRST, null_equality::EQUAL, + null_order::BEFORE, stream, mr) ->release(); diff --git a/cpp/src/dictionary/detail/concatenate.cu b/cpp/src/dictionary/detail/concatenate.cu index cdf086e3f4a..4aa1e3e2278 100644 --- a/cpp/src/dictionary/detail/concatenate.cu +++ b/cpp/src/dictionary/detail/concatenate.cu @@ -169,18 +169,12 @@ struct dispatch_compute_indices { return result; } - template + template typename std::enable_if_t(), std::unique_ptr> - operator()(column_view const&, - column_view const&, - column_view const&, - offsets_pair const*, - size_type const*, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource*) + operator()(Args&&...) { - CUDF_FAIL("list_view as keys for dictionary not supported"); + CUDF_FAIL("dictionary concatenate not supported for this column type"); } }; @@ -213,6 +207,7 @@ std::unique_ptr concatenate(host_span columns, std::vector{0}, duplicate_keep_option::KEEP_FIRST, null_equality::EQUAL, + null_order::BEFORE, stream, mr) ->release(); diff --git a/cpp/src/dictionary/dictionary_factories.cu b/cpp/src/dictionary/dictionary_factories.cu index 73d1becf639..35e7d5fbc27 100644 --- a/cpp/src/dictionary/dictionary_factories.cu +++ b/cpp/src/dictionary/dictionary_factories.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -134,7 +134,7 @@ std::unique_ptr make_dictionary_column(std::unique_ptr keys, // If the types match, then just commandeer the column's data buffer. if (new_type.id() == indices_type) { return std::make_unique( - new_type, indices_size, *(contents.data.release()), rmm::device_buffer{0, stream, mr}, 0); + new_type, indices_size, std::move(*(contents.data.release())), rmm::device_buffer{}, 0); } // If the new type does not match, then convert the data. cudf::column_view cast_view{cudf::data_type{indices_type}, indices_size, contents.data->data()}; diff --git a/cpp/src/dictionary/replace.cu b/cpp/src/dictionary/replace.cu index f8f1d01b4a5..9b644f38794 100644 --- a/cpp/src/dictionary/replace.cu +++ b/cpp/src/dictionary/replace.cu @@ -85,7 +85,7 @@ std::unique_ptr replace_nulls(dictionary_column_view const& input, rmm::mr::device_memory_resource* mr) { if (input.is_empty()) { return cudf::empty_like(input.parent()); } - if (!input.has_nulls()) { return std::make_unique(input.parent()); } + if (!input.has_nulls()) { return std::make_unique(input.parent(), stream, mr); } CUDF_EXPECTS(input.keys().type() == replacement.keys().type(), "keys must match"); CUDF_EXPECTS(replacement.size() == input.size(), "column sizes must match"); @@ -118,7 +118,7 @@ std::unique_ptr replace_nulls(dictionary_column_view const& input, { if (input.is_empty()) { return cudf::empty_like(input.parent()); } if (!input.has_nulls() || !replacement.is_valid()) { - return std::make_unique(input.parent()); + return std::make_unique(input.parent(), stream, mr); } CUDF_EXPECTS(input.keys().type() == replacement.type(), "keys must match scalar type"); diff --git a/cpp/src/dictionary/search.cu b/cpp/src/dictionary/search.cu index 0aaf10707f4..5db12d75d62 100644 --- a/cpp/src/dictionary/search.cu +++ b/cpp/src/dictionary/search.cu @@ -45,7 +45,7 @@ struct dispatch_scalar_index { template ()>* = nullptr> - std::unique_ptr operator()(Args&&... args) + std::unique_ptr operator()(Args&&...) { CUDF_FAIL("indices must be an integral type"); } @@ -89,33 +89,18 @@ struct find_index_fn { stream, mr); } - template ::value>* = nullptr> - std::unique_ptr operator()(dictionary_column_view const& input, - scalar const& key, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const - { - CUDF_FAIL("dictionary column cannot be the keys column of another dictionary"); - } - - template ::value>* = nullptr> - std::unique_ptr operator()(dictionary_column_view const& input, - scalar const& key, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const - { - CUDF_FAIL("list_view column cannot be the keys column of a dictionary"); - } template ::value>* = nullptr> - std::unique_ptr operator()(dictionary_column_view const& input, - scalar const& key, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const + std::enable_if_t::value or + std::is_same::value or + std::is_same::value>* = nullptr> + std::unique_ptr operator()(dictionary_column_view const&, + scalar const&, + rmm::cuda_stream_view, + rmm::mr::device_memory_resource*) const { - CUDF_FAIL("struct_view column cannot be the keys column of a dictionary"); + CUDF_FAIL( + "dictionary, list_view, and struct_view columns cannot be the keys column of a dictionary"); } }; @@ -151,12 +136,12 @@ struct find_insert_index_fn { std::enable_if_t::value or std::is_same::value or std::is_same::value>* = nullptr> - std::unique_ptr operator()(dictionary_column_view const& input, - scalar const& key, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const + std::unique_ptr operator()(dictionary_column_view const&, + scalar const&, + rmm::cuda_stream_view, + rmm::mr::device_memory_resource*) const { - CUDF_FAIL("column cannot be the keys for dictionary"); + CUDF_FAIL("dictionary, list_view, and struct_view columns cannot be the keys for a dictionary"); } }; diff --git a/cpp/src/dictionary/set_keys.cu b/cpp/src/dictionary/set_keys.cu index f3f1ffcfdab..8f07c9cbbed 100644 --- a/cpp/src/dictionary/set_keys.cu +++ b/cpp/src/dictionary/set_keys.cu @@ -85,15 +85,12 @@ struct dispatch_compute_indices { return result; } - template + template typename std::enable_if_t(), std::unique_ptr> - operator()(dictionary_column_view const& input, - column_view const& new_keys, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + operator()(Args&&...) { - CUDF_FAIL("list_view dictionary set_keys not supported yet"); + CUDF_FAIL("dictionary set_keys not supported for this column type"); } }; @@ -115,6 +112,7 @@ std::unique_ptr set_keys( std::vector{0}, duplicate_keep_option::KEEP_FIRST, null_equality::EQUAL, + null_order::BEFORE, stream, mr) ->release(); diff --git a/cpp/src/filling/repeat.cu b/cpp/src/filling/repeat.cu index 0c4a4ae127e..28a21d92ef9 100644 --- a/cpp/src/filling/repeat.cu +++ b/cpp/src/filling/repeat.cu @@ -29,7 +29,7 @@ #include #include -#include +#include #include #include @@ -72,38 +72,26 @@ struct count_accessor { } }; -struct compute_offsets { - cudf::column_view const* p_column = nullptr; +struct count_checker { + cudf::column_view const& count; template - std::enable_if_t::value, rmm::device_vector> operator()( - bool check_count, rmm::cuda_stream_view stream) + std::enable_if_t::value, void> operator()(rmm::cuda_stream_view stream) { // static_cast is necessary due to bool - if (check_count && static_cast(std::numeric_limits::max()) > - std::numeric_limits::max()) { - auto max = thrust::reduce(p_column->begin(), p_column->end(), 0, thrust::maximum()); + if (static_cast(std::numeric_limits::max()) > + std::numeric_limits::max()) { + auto max = thrust::reduce( + rmm::exec_policy(stream), count.begin(), count.end(), 0, thrust::maximum()); CUDF_EXPECTS(max <= std::numeric_limits::max(), - "count should not have values larger than size_type's limit."); + "count should not have values larger than size_type maximum."); } - rmm::device_vector offsets(p_column->size()); - thrust::inclusive_scan( - rmm::exec_policy(stream), p_column->begin(), p_column->end(), offsets.begin()); - if (check_count == true) { - CUDF_EXPECTS( - thrust::is_sorted(rmm::exec_policy(stream), offsets.begin(), offsets.end()) == true, - "count has negative values or the resulting table has more \ - rows than size_type's limit."); - } - - return offsets; } template - std::enable_if_t::value, rmm::device_vector> operator()( - bool check_count, rmm::cuda_stream_view stream) + std::enable_if_t::value, void> operator()(rmm::cuda_stream_view stream) { - CUDF_FAIL("count value should be a integral type."); + CUDF_FAIL("count value type should be integral."); } }; @@ -122,10 +110,21 @@ std::unique_ptr
repeat(table_view const& input_table, if (input_table.num_rows() == 0) { return cudf::empty_like(input_table); } - auto offsets = cudf::type_dispatcher(count.type(), compute_offsets{&count}, check_count, stream); + if (check_count) { cudf::type_dispatcher(count.type(), count_checker{count}, stream); } + + auto count_iter = cudf::detail::indexalator_factory::make_input_iterator(count); + + rmm::device_uvector offsets(count.size(), stream); + thrust::inclusive_scan( + rmm::exec_policy(stream), count_iter, count_iter + count.size(), offsets.begin()); + + if (check_count) { + CUDF_EXPECTS(thrust::is_sorted(rmm::exec_policy(stream), offsets.begin(), offsets.end()), + "count has negative values or the resulting table has too many rows."); + } - size_type output_size{offsets.back()}; - rmm::device_vector indices(output_size); + size_type output_size{offsets.back_element(stream)}; + rmm::device_uvector indices(output_size, stream); thrust::upper_bound(rmm::exec_policy(stream), offsets.begin(), offsets.end(), @@ -150,8 +149,8 @@ std::unique_ptr
repeat(table_view const& input_table, if ((input_table.num_rows() == 0) || (count == 0)) { return cudf::empty_like(input_table); } auto output_size = input_table.num_rows() * count; - auto map_begin = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), [count] __device__(auto i) { return i / count; }); + auto map_begin = cudf::detail::make_counting_transform_iterator( + 0, [count] __device__(auto i) { return i / count; }); auto map_end = map_begin + output_size; return gather(input_table, map_begin, map_end, out_of_bounds_policy::DONT_CHECK, stream, mr); diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index 34c57996af3..b265e1c3112 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -19,8 +19,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -33,6 +35,7 @@ #include #include +#include #include #include @@ -78,6 +81,44 @@ std::pair, std::vector> groupby::disp groupby::~groupby() = default; namespace { + +/** + * @brief Factory to construct empty result columns. + * + * Adds special handling for COLLECT_LIST/COLLECT_SET, because: + * 1. `make_empty_column()` does not support construction of nested columns. + * 2. Empty lists need empty child columns, to persist type information. + */ +struct empty_column_constructor { + column_view values; + + template + std::unique_ptr operator()() const + { + using namespace cudf; + using namespace cudf::detail; + + if constexpr (k == aggregation::Kind::COLLECT_LIST || k == aggregation::Kind::COLLECT_SET) { + return make_lists_column( + 0, make_empty_column(data_type{type_to_id()}), empty_like(values), 0, {}); + } + + // If `values` is LIST typed, and the aggregation results match the type, + // construct empty results based on `values`. + // Most generally, this applies if input type matches output type. + // + // Note: `target_type_t` is not recursive, and `ValuesType` does not consider children. + // It is important that `COLLECT_LIST` and `COLLECT_SET` are handled before this + // point, because `COLLECT_LIST(LIST)` produces `LIST`, but `target_type_t` + // wouldn't know the difference. + if constexpr (std::is_same_v, ValuesType>) { + return empty_like(values); + } + + return make_empty_column(target_type(values.type(), k)); + } +}; + /// Make an empty table with appropriate types for requested aggs auto empty_results(host_span requests) { @@ -92,7 +133,8 @@ auto empty_results(host_span requests) request.aggregations.end(), std::back_inserter(results), [&request](auto const& agg) { - return make_empty_column(cudf::detail::target_type(request.values.type(), agg->kind)); + return cudf::detail::dispatch_type_and_aggregation( + request.values.type(), agg->kind, empty_column_constructor{request.values}); }); return aggregation_result{std::move(results)}; @@ -118,25 +160,6 @@ void verify_valid_requests(host_span requests) }); }), "Invalid type/aggregation combination."); - -// The aggregations listed in the lambda below will not work with a values column of type -// dictionary if this is compiled with nvcc/ptxas 10.2. -// https://nvbugswb.nvidia.com/NvBugs5/SWBug.aspx?bugid=3186317&cp= -#if (__CUDACC_VER_MAJOR__ == 10) and (__CUDACC_VER_MINOR__ == 2) - CUDF_EXPECTS( - std::all_of( - requests.begin(), - requests.end(), - [](auto const& request) { - return std::all_of( - request.aggregations.begin(), request.aggregations.end(), [&request](auto const& agg) { - return (!cudf::is_dictionary(request.values.type()) || - !(agg->kind == aggregation::SUM or agg->kind == aggregation::MEAN or - agg->kind == aggregation::STD or agg->kind == aggregation::VARIANCE)); - }); - }), - "dictionary type not supported for this aggregation"); -#endif } } // namespace @@ -202,6 +225,35 @@ groupby::groups groupby::get_groups(table_view values, rmm::mr::device_memory_re } } +std::pair, std::unique_ptr
> groupby::replace_nulls( + table_view const& values, + host_span replace_policies, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + CUDF_EXPECTS(_keys.num_rows() == values.num_rows(), + "Size mismatch between group labels and value."); + CUDF_EXPECTS(static_cast(replace_policies.size()) == values.num_columns(), + "Size mismatch between num_columns and replace_policies."); + + if (values.is_empty()) { return std::make_pair(empty_like(_keys), empty_like(values)); } + auto const stream = rmm::cuda_stream_default; + + auto const& group_labels = helper().group_labels(stream); + std::vector> results; + std::transform(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(values.num_columns()), + std::back_inserter(results), + [&](auto i) { + auto grouped_values = helper().grouped_values(values.column(i), stream); + return detail::group_replace_nulls( + grouped_values->view(), group_labels, replace_policies[i], stream, mr); + }); + + return std::make_pair(std::move(helper().sorted_keys(stream, mr)), + std::make_unique
(std::move(results))); +} + // Get the sort helper object detail::sort::sort_groupby_helper& groupby::helper() { @@ -211,5 +263,37 @@ detail::sort::sort_groupby_helper& groupby::helper() return *_helper; }; +std::pair, std::unique_ptr
> groupby::shift( + table_view const& values, + host_span offsets, + std::vector> const& fill_values, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + CUDF_EXPECTS(values.num_columns() == static_cast(fill_values.size()), + "Mismatch number of fill_values and columns."); + CUDF_EXPECTS( + std::all_of(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(values.num_columns()), + [&](auto i) { return values.column(i).type() == fill_values[i].get().type(); }), + "values and fill_value should have the same type."); + + auto stream = rmm::cuda_stream_default; + std::vector> results; + auto const& group_offsets = helper().group_offsets(stream); + std::transform( + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(values.num_columns()), + std::back_inserter(results), + [&](size_type i) { + auto grouped_values = helper().grouped_values(values.column(i), stream); + return cudf::detail::segmented_shift( + grouped_values->view(), group_offsets, offsets[i], fill_values[i].get(), stream, mr); + }); + + return std::make_pair(helper().sorted_keys(stream, mr), + std::make_unique(std::move(results))); +} + } // namespace groupby } // namespace cudf diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index 38aacbe59a7..31b48790861 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -55,33 +55,37 @@ namespace groupby { namespace detail { namespace hash { namespace { -// This is a temporary fix due to compiler bug and we can resort back to -// constexpr once cuda 10.2 becomes RAPIDS's minimum compiler version -#if 0 + /** * @brief List of aggregation operations that can be computed with a hash-based * implementation. */ -constexpr std::array hash_aggregations{ - aggregation::SUM, aggregation::MIN, aggregation::MAX, - aggregation::COUNT_VALID, aggregation::COUNT_ALL, - aggregation::ARGMIN, aggregation::ARGMAX, - aggregation::SUM_OF_SQUARES, - aggregation::MEAN, aggregation::STD, aggregation::VARIANCE}; - -//Could be hash: SUM, PRODUCT, MIN, MAX, COUNT_VALID, COUNT_ALL, ANY, ALL, +constexpr std::array hash_aggregations{aggregation::SUM, + aggregation::PRODUCT, + aggregation::MIN, + aggregation::MAX, + aggregation::COUNT_VALID, + aggregation::COUNT_ALL, + aggregation::ARGMIN, + aggregation::ARGMAX, + aggregation::SUM_OF_SQUARES, + aggregation::MEAN, + aggregation::STD, + aggregation::VARIANCE}; + +// Could be hash: SUM, PRODUCT, MIN, MAX, COUNT_VALID, COUNT_ALL, ANY, ALL, // Compound: MEAN(SUM, COUNT_VALID), VARIANCE, STD(MEAN (SUM, COUNT_VALID), COUNT_VALID), // ARGMAX, ARGMIN -// FIXME(kn): adding SUM_OF_SQUARES causes ptxas compiler crash (<=CUDA 10.2) for more than 3 types! +// TODO replace with std::find in C++20 onwards. template -constexpr bool array_contains(std::array const& haystack, T needle) { - for (auto i = 0u; i < N; ++i) { - if (haystack[i] == needle) return true; +constexpr bool array_contains(std::array const& haystack, T needle) +{ + for (auto const& val : haystack) { + if (val == needle) return true; } return false; } -#endif /** * @brief Indicates whether the specified aggregation operation can be computed @@ -93,16 +97,67 @@ constexpr bool array_contains(std::array const& haystack, T needle) { */ bool constexpr is_hash_aggregation(aggregation::Kind t) { - // this is a temporary fix due to compiler bug and we can resort back to - // constexpr once cuda 10.2 becomes RAPIDS's minimum compiler version - // return array_contains(hash_aggregations, t); - return (t == aggregation::SUM) or (t == aggregation::MIN) or (t == aggregation::MAX) or - (t == aggregation::COUNT_VALID) or (t == aggregation::COUNT_ALL) or - (t == aggregation::ARGMIN) or (t == aggregation::ARGMAX) or - (t == aggregation::SUM_OF_SQUARES) or (t == aggregation::MEAN) or - (t == aggregation::STD) or (t == aggregation::VARIANCE); + return array_contains(hash_aggregations, t); } +class groupby_simple_aggregations_collector final + : public cudf::detail::simple_aggregations_collector { + public: + using cudf::detail::simple_aggregations_collector::visit; + + std::vector> visit(data_type col_type, + cudf::detail::min_aggregation const& agg) override + { + std::vector> aggs; + aggs.push_back(col_type.id() == type_id::STRING ? make_argmin_aggregation() + : make_min_aggregation()); + return aggs; + } + + std::vector> visit(data_type col_type, + cudf::detail::max_aggregation const& agg) override + { + std::vector> aggs; + aggs.push_back(col_type.id() == type_id::STRING ? make_argmax_aggregation() + : make_max_aggregation()); + return aggs; + } + + std::vector> visit( + data_type col_type, cudf::detail::mean_aggregation const& agg) override + { + CUDF_EXPECTS(is_fixed_width(col_type), "MEAN aggregation expects fixed width type"); + std::vector> aggs; + aggs.push_back(make_sum_aggregation()); + // COUNT_VALID + aggs.push_back(make_count_aggregation()); + + return aggs; + } + + std::vector> visit(data_type col_type, + cudf::detail::var_aggregation const& agg) override + { + std::vector> aggs; + aggs.push_back(make_sum_aggregation()); + // COUNT_VALID + aggs.push_back(make_count_aggregation()); + + return aggs; + } + + std::vector> visit(data_type col_type, + cudf::detail::std_aggregation const& agg) override + { + std::vector> aggs; + aggs.push_back(make_sum_aggregation()); + // COUNT_VALID + aggs.push_back(make_count_aggregation()); + + return aggs; + } +}; + template class hash_compound_agg_finalizer final : public cudf::detail::aggregation_finalizer { size_t col_idx; @@ -118,6 +173,8 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final rmm::cuda_stream_view stream; public: + using cudf::detail::aggregation_finalizer::visit; + hash_compound_agg_finalizer(size_t col_idx, column_view col, cudf::detail::result_cache* sparse_results, @@ -156,10 +213,9 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final } // Enables conversion of ARGMIN/ARGMAX into MIN/MAX - auto gather_argminmax(aggregation::Kind const& agg_kind) + auto gather_argminmax(aggregation const& agg) { - auto transformed_agg = std::make_unique(agg_kind); - auto arg_result = to_dense_agg_result(*transformed_agg); + auto arg_result = to_dense_agg_result(agg); // We make a view of ARG(MIN/MAX) result without a null mask and gather // using this map. The values in data buffer of ARG(MIN/MAX) result // corresponding to null values was initialized to ARG(MIN/MAX)_SENTINEL @@ -178,7 +234,7 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final stream, mr); return std::move(gather_argminmax->release()[0]); - }; + } // Declare overloads for each kind of aggregation to dispatch void visit(cudf::aggregation const& agg) override @@ -190,20 +246,24 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final void visit(cudf::detail::min_aggregation const& agg) override { if (dense_results->has_result(col_idx, agg)) return; - if (result_type.id() == type_id::STRING) - dense_results->add_result(col_idx, agg, gather_argminmax(aggregation::ARGMIN)); - else + if (result_type.id() == type_id::STRING) { + auto transformed_agg = make_argmin_aggregation(); + dense_results->add_result(col_idx, agg, gather_argminmax(*transformed_agg)); + } else { dense_results->add_result(col_idx, agg, to_dense_agg_result(agg)); + } } void visit(cudf::detail::max_aggregation const& agg) override { if (dense_results->has_result(col_idx, agg)) return; - if (result_type.id() == type_id::STRING) - dense_results->add_result(col_idx, agg, gather_argminmax(aggregation::ARGMAX)); - else + if (result_type.id() == type_id::STRING) { + auto transformed_agg = make_argmax_aggregation(); + dense_results->add_result(col_idx, agg, gather_argminmax(*transformed_agg)); + } else { dense_results->add_result(col_idx, agg, to_dense_agg_result(agg)); + } } void visit(cudf::detail::mean_aggregation const& agg) override @@ -262,19 +322,22 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final { if (dense_results->has_result(col_idx, agg)) return; auto var_agg = make_variance_aggregation(agg._ddof); - this->visit(*static_cast(var_agg.get())); + this->visit(*dynamic_cast(var_agg.get())); column_view variance = dense_results->get_result(col_idx, *var_agg); auto result = cudf::detail::unary_operation(variance, unary_operator::SQRT, stream, mr); dense_results->add_result(col_idx, agg, std::move(result)); } }; - // flatten aggs to filter in single pass aggs -std::tuple, std::vector> +std::tuple, + std::vector>, + std::vector> flatten_single_pass_aggs(host_span requests) { std::vector columns; + std::vector> aggs; std::vector agg_kinds; std::vector col_ids; @@ -283,24 +346,30 @@ flatten_single_pass_aggs(host_span requests) auto const& agg_v = request.aggregations; std::unordered_set agg_kinds_set; - auto insert_agg = [&](size_t i, column_view const& request_values, aggregation::Kind k) { - if (agg_kinds_set.insert(k).second) { - agg_kinds.push_back(k); - columns.push_back(request_values); - col_ids.push_back(i); - } - }; + auto insert_agg = + [&](size_t i, column_view const& request_values, std::unique_ptr&& agg) { + if (agg_kinds_set.insert(agg->kind).second) { + agg_kinds.push_back(agg->kind); + aggs.push_back(std::move(agg)); + columns.push_back(request_values); + col_ids.push_back(i); + } + }; auto values_type = cudf::is_dictionary(request.values.type()) ? cudf::dictionary_column_view(request.values).keys().type() : request.values.type(); for (auto&& agg : agg_v) { - for (auto const& agg_s : agg->get_simple_aggregations(values_type)) - insert_agg(i, request.values, agg_s); + groupby_simple_aggregations_collector collector; + + for (auto& agg_s : agg->get_simple_aggregations(values_type, collector)) { + insert_agg(i, request.values, std::move(agg_s)); + } } } - return std::make_tuple(table_view(columns), std::move(agg_kinds), std::move(col_ids)); + return std::make_tuple( + table_view(columns), std::move(agg_kinds), std::move(aggs), std::move(col_ids)); } /** @@ -428,17 +497,14 @@ void compute_single_pass_aggs(table_view const& keys, rmm::cuda_stream_view stream) { // flatten the aggs to a table that can be operated on by aggregate_row - table_view flattened_values; - std::vector aggs; - std::vector col_ids; - std::tie(flattened_values, aggs, col_ids) = flatten_single_pass_aggs(requests); + auto const [flattened_values, agg_kinds, aggs, col_ids] = flatten_single_pass_aggs(requests); // make table that will hold sparse results - table sparse_table = create_sparse_results_table(flattened_values, aggs, stream); + table sparse_table = create_sparse_results_table(flattened_values, agg_kinds, stream); // prepare to launch kernel to do the actual aggregation auto d_sparse_table = mutable_table_device_view::create(sparse_table, stream); auto d_values = table_device_view::create(flattened_values, stream); - rmm::device_vector d_aggs(aggs); + auto const d_aggs = cudf::detail::make_device_uvector_async(agg_kinds, stream); bool skip_key_rows_with_nulls = keys_have_nulls and include_null_keys == null_policy::EXCLUDE; @@ -452,15 +518,14 @@ void compute_single_pass_aggs(table_view const& keys, keys.num_rows(), *d_values, *d_sparse_table, - d_aggs.data().get(), + d_aggs.data(), static_cast(row_bitmask.data()), skip_key_rows_with_nulls}); // Add results back to sparse_results cache auto sparse_result_cols = sparse_table.release(); for (size_t i = 0; i < aggs.size(); i++) { // Note that the cache will make a copy of this temporary aggregation - auto agg = std::make_unique(aggs[i]); - sparse_results->add_result(col_ids[i], *agg, std::move(sparse_result_cols[i])); + sparse_results->add_result(col_ids[i], *aggs[i], std::move(sparse_result_cols[i])); } } diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index 46185e07600..9d8f145a7c9 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -51,7 +51,7 @@ namespace detail { * memoised sorted and/or grouped values and re-using will save on computation * of these values. */ -struct aggregrate_result_functor final : store_result_functor { +struct aggregate_result_functor final : store_result_functor { using store_result_functor::store_result_functor; template void operator()(aggregation const& agg) @@ -61,7 +61,7 @@ struct aggregrate_result_functor final : store_result_functor { }; template <> -void aggregrate_result_functor::operator()(aggregation const& agg) +void aggregate_result_functor::operator()(aggregation const& agg) { if (cache.has_result(col_idx, agg)) return; @@ -76,7 +76,7 @@ void aggregrate_result_functor::operator()(aggregation } template <> -void aggregrate_result_functor::operator()(aggregation const& agg) +void aggregate_result_functor::operator()(aggregation const& agg) { if (cache.has_result(col_idx, agg)) return; @@ -87,7 +87,7 @@ void aggregrate_result_functor::operator()(aggregation c } template <> -void aggregrate_result_functor::operator()(aggregation const& agg) +void aggregate_result_functor::operator()(aggregation const& agg) { if (cache.has_result(col_idx, agg)) return; @@ -99,7 +99,19 @@ void aggregrate_result_functor::operator()(aggregation const& }; template <> -void aggregrate_result_functor::operator()(aggregation const& agg) +void aggregate_result_functor::operator()(aggregation const& agg) +{ + if (cache.has_result(col_idx, agg)) return; + + cache.add_result( + col_idx, + agg, + detail::group_product( + get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr)); +}; + +template <> +void aggregate_result_functor::operator()(aggregation const& agg) { if (cache.has_result(col_idx, agg)) return; @@ -114,7 +126,7 @@ void aggregrate_result_functor::operator()(aggregation cons }; template <> -void aggregrate_result_functor::operator()(aggregation const& agg) +void aggregate_result_functor::operator()(aggregation const& agg) { if (cache.has_result(col_idx, agg)) return; @@ -129,7 +141,7 @@ void aggregrate_result_functor::operator()(aggregation cons }; template <> -void aggregrate_result_functor::operator()(aggregation const& agg) +void aggregate_result_functor::operator()(aggregation const& agg) { if (cache.has_result(col_idx, agg)) return; @@ -166,7 +178,7 @@ void aggregrate_result_functor::operator()(aggregation const& }; template <> -void aggregrate_result_functor::operator()(aggregation const& agg) +void aggregate_result_functor::operator()(aggregation const& agg) { if (cache.has_result(col_idx, agg)) return; @@ -203,7 +215,7 @@ void aggregrate_result_functor::operator()(aggregation const& }; template <> -void aggregrate_result_functor::operator()(aggregation const& agg) +void aggregate_result_functor::operator()(aggregation const& agg) { if (cache.has_result(col_idx, agg)) return; @@ -227,11 +239,11 @@ void aggregrate_result_functor::operator()(aggregation const& }; template <> -void aggregrate_result_functor::operator()(aggregation const& agg) +void aggregate_result_functor::operator()(aggregation const& agg) { if (cache.has_result(col_idx, agg)) return; - auto var_agg = static_cast(agg); + auto var_agg = dynamic_cast(agg); auto mean_agg = make_mean_aggregation(); auto count_agg = make_count_aggregation(); operator()(*mean_agg); @@ -250,11 +262,11 @@ void aggregrate_result_functor::operator()(aggregation co }; template <> -void aggregrate_result_functor::operator()(aggregation const& agg) +void aggregate_result_functor::operator()(aggregation const& agg) { if (cache.has_result(col_idx, agg)) return; - auto std_agg = static_cast(agg); + auto std_agg = dynamic_cast(agg); auto var_agg = make_variance_aggregation(std_agg._ddof); operator()(*var_agg); column_view var_result = cache.get_result(col_idx, *var_agg); @@ -264,14 +276,14 @@ void aggregrate_result_functor::operator()(aggregation const& }; template <> -void aggregrate_result_functor::operator()(aggregation const& agg) +void aggregate_result_functor::operator()(aggregation const& agg) { if (cache.has_result(col_idx, agg)) return; auto count_agg = make_count_aggregation(); operator()(*count_agg); column_view group_sizes = cache.get_result(col_idx, *count_agg); - auto quantile_agg = static_cast(agg); + auto quantile_agg = dynamic_cast(agg); auto result = detail::group_quantiles(get_sorted_values(), group_sizes, @@ -285,7 +297,7 @@ void aggregrate_result_functor::operator()(aggregation co }; template <> -void aggregrate_result_functor::operator()(aggregation const& agg) +void aggregate_result_functor::operator()(aggregation const& agg) { if (cache.has_result(col_idx, agg)) return; @@ -305,11 +317,11 @@ void aggregrate_result_functor::operator()(aggregation cons }; template <> -void aggregrate_result_functor::operator()(aggregation const& agg) +void aggregate_result_functor::operator()(aggregation const& agg) { if (cache.has_result(col_idx, agg)) return; - auto nunique_agg = static_cast(agg); + auto nunique_agg = dynamic_cast(agg); auto result = detail::group_nunique(get_sorted_values(), helper.group_labels(stream), @@ -322,19 +334,20 @@ void aggregrate_result_functor::operator()(aggregation con }; template <> -void aggregrate_result_functor::operator()(aggregation const& agg) +void aggregate_result_functor::operator()(aggregation const& agg) { if (cache.has_result(col_idx, agg)) return; - auto nth_element_agg = static_cast(agg); + auto nth_element_agg = dynamic_cast(agg); auto count_agg = make_count_aggregation(nth_element_agg._null_handling); - if (count_agg->kind == aggregation::COUNT_VALID) + if (count_agg->kind == aggregation::COUNT_VALID) { operator()(*count_agg); - else if (count_agg->kind == aggregation::COUNT_ALL) + } else if (count_agg->kind == aggregation::COUNT_ALL) { operator()(*count_agg); - else + } else { CUDF_FAIL("Wrong count aggregation kind"); + } column_view group_sizes = cache.get_result(col_idx, *count_agg); cache.add_result(col_idx, @@ -351,37 +364,42 @@ void aggregrate_result_functor::operator()(aggregation } template <> -void aggregrate_result_functor::operator()(aggregation const& agg) +void aggregate_result_functor::operator()(aggregation const& agg) { auto null_handling = - static_cast(agg)._null_handling; - CUDF_EXPECTS(null_handling == null_policy::INCLUDE, - "null exclusion is not supported on groupby COLLECT_LIST aggregation."); + dynamic_cast(agg)._null_handling; + agg.do_hash(); if (cache.has_result(col_idx, agg)) return; - auto result = detail::group_collect( - get_grouped_values(), helper.group_offsets(stream), helper.num_groups(stream), stream, mr); + auto result = detail::group_collect(get_grouped_values(), + helper.group_offsets(stream), + helper.num_groups(stream), + null_handling, + stream, + mr); cache.add_result(col_idx, agg, std::move(result)); }; template <> -void aggregrate_result_functor::operator()(aggregation const& agg) +void aggregate_result_functor::operator()(aggregation const& agg) { auto const null_handling = - static_cast(agg)._null_handling; - CUDF_EXPECTS(null_handling == null_policy::INCLUDE, - "null exclusion is not supported on groupby COLLECT_SET aggregation."); + dynamic_cast(agg)._null_handling; if (cache.has_result(col_idx, agg)) { return; } - auto const collect_result = detail::group_collect( - get_grouped_values(), helper.group_offsets(stream), helper.num_groups(stream), stream, mr); + auto const collect_result = detail::group_collect(get_grouped_values(), + helper.group_offsets(stream), + helper.num_groups(stream), + null_handling, + stream, + mr); auto const nulls_equal = - static_cast(agg)._nulls_equal; + dynamic_cast(agg)._nulls_equal; auto const nans_equal = - static_cast(agg)._nans_equal; + dynamic_cast(agg)._nans_equal; cache.add_result( col_idx, agg, @@ -403,7 +421,7 @@ std::pair, std::vector> groupby::sort for (size_t i = 0; i < requests.size(); i++) { auto store_functor = - detail::aggregrate_result_functor(i, requests[i].values, helper(), cache, stream, mr); + detail::aggregate_result_functor(i, requests[i].values, helper(), cache, stream, mr); for (size_t j = 0; j < requests[i].aggregations.size(); j++) { // TODO (dm): single pass compute all supported reductions cudf::detail::aggregation_dispatcher( diff --git a/cpp/src/groupby/sort/group_collect.cu b/cpp/src/groupby/sort/group_collect.cu index b7bcd05a72a..1e6a681af94 100644 --- a/cpp/src/groupby/sort/group_collect.cu +++ b/cpp/src/groupby/sort/group_collect.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,30 +17,101 @@ #include #include #include +#include #include +#include #include #include #include +#include + namespace cudf { namespace groupby { namespace detail { +/** + * @brief Purge null entries in grouped values, and adjust group offsets. + * + * @param values Grouped values to be purged + * @param offsets Offsets of groups' starting points + * @param num_groups Number of groups + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned column's device memory + * @return Pair of null-eliminated grouped values and corresponding offsets + */ +std::pair, std::unique_ptr> purge_null_entries( + column_view const &values, + column_view const &offsets, + size_type num_groups, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) +{ + auto values_device_view = column_device_view::create(values, stream); + + auto not_null_pred = [d_value = *values_device_view] __device__(auto i) { + return d_value.is_valid_nocheck(i); + }; + + // Purge null entries in grouped values. + auto null_purged_entries = + cudf::detail::copy_if(table_view{{values}}, not_null_pred, stream, mr)->release(); + + auto null_purged_values = std::move(null_purged_entries.front()); + + // Recalculate offsets after null entries are purged. + rmm::device_uvector null_purged_sizes(num_groups, stream); + + thrust::transform( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_groups), + null_purged_sizes.begin(), + [d_offsets = offsets.template begin(), not_null_pred] __device__(auto i) { + return thrust::count_if(thrust::seq, + thrust::make_counting_iterator(d_offsets[i]), + thrust::make_counting_iterator(d_offsets[i + 1]), + not_null_pred); + }); + + auto null_purged_offsets = strings::detail::make_offsets_child_column( + null_purged_sizes.cbegin(), null_purged_sizes.cend(), stream, mr); + + return std::make_pair, std::unique_ptr>( + std::move(null_purged_values), std::move(null_purged_offsets)); +} + std::unique_ptr group_collect(column_view const &values, cudf::device_span group_offsets, size_type num_groups, + null_policy null_handling, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr) { - rmm::device_buffer offsets_data( - group_offsets.data(), group_offsets.size() * sizeof(cudf::size_type), stream, mr); + auto [child_column, + offsets_column] = [null_handling, num_groups, &values, &group_offsets, stream, mr] { + auto offsets_column = make_numeric_column( + data_type(type_to_id()), num_groups + 1, mask_state::UNALLOCATED, stream, mr); + + thrust::copy(rmm::exec_policy(stream), + group_offsets.begin(), + group_offsets.end(), + offsets_column->mutable_view().template begin()); - auto offsets = std::make_unique( - cudf::data_type(cudf::type_to_id()), num_groups + 1, std::move(offsets_data)); + // If column of grouped values contains null elements, and null_policy == EXCLUDE, + // those elements must be filtered out, and offsets recomputed. + if (null_handling == null_policy::EXCLUDE && values.has_nulls()) { + return cudf::groupby::detail::purge_null_entries( + values, offsets_column->view(), num_groups, stream, mr); + } else { + return std::make_pair(std::make_unique(values, stream, mr), + std::move(offsets_column)); + } + }(); return make_lists_column(num_groups, - std::move(offsets), - std::make_unique(values, stream, mr), + std::move(offsets_column), + std::move(child_column), 0, rmm::device_buffer{0, stream, mr}, stream, diff --git a/cpp/src/groupby/sort/group_count_scan.cu b/cpp/src/groupby/sort/group_count_scan.cu index 4ad533aebdc..0caef47f0e3 100644 --- a/cpp/src/groupby/sort/group_count_scan.cu +++ b/cpp/src/groupby/sort/group_count_scan.cu @@ -23,7 +23,6 @@ #include #include -#include #include namespace cudf { diff --git a/cpp/src/groupby/sort/group_nth_element.cu b/cpp/src/groupby/sort/group_nth_element.cu index e6c10aa1056..c3d874f3b33 100644 --- a/cpp/src/groupby/sort/group_nth_element.cu +++ b/cpp/src/groupby/sort/group_nth_element.cu @@ -27,6 +27,9 @@ #include +#include +#include + namespace cudf { namespace groupby { namespace detail { @@ -45,7 +48,10 @@ std::unique_ptr group_nth_element(column_view const &values, if (num_groups == 0) { return empty_like(values); } - auto nth_index = rmm::device_vector(num_groups, values.size()); + auto nth_index = rmm::device_uvector(num_groups, stream); + // TODO: replace with async version + thrust::uninitialized_fill_n( + rmm::exec_policy(stream), nth_index.begin(), num_groups, values.size()); // nulls_policy::INCLUDE (equivalent to pandas nth(dropna=None) but return nulls for n if (null_handling == null_policy::INCLUDE || !values.has_nulls()) { diff --git a/cpp/src/groupby/sort/group_product.cu b/cpp/src/groupby/sort/group_product.cu new file mode 100644 index 00000000000..e9cf8611b58 --- /dev/null +++ b/cpp/src/groupby/sort/group_product.cu @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include + +namespace cudf { +namespace groupby { +namespace detail { +std::unique_ptr group_product(column_view const& values, + size_type num_groups, + cudf::device_span group_labels, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto values_type = cudf::is_dictionary(values.type()) + ? dictionary_column_view(values).keys().type() + : values.type(); + return type_dispatcher(values_type, + reduce_functor{}, + values, + num_groups, + group_labels, + stream, + mr); +} + +} // namespace detail +} // namespace groupby +} // namespace cudf diff --git a/cpp/src/groupby/sort/group_quantiles.cu b/cpp/src/groupby/sort/group_quantiles.cu index c9f9e3cad9e..64ddc8f6b9d 100644 --- a/cpp/src/groupby/sort/group_quantiles.cu +++ b/cpp/src/groupby/sort/group_quantiles.cu @@ -14,21 +14,22 @@ * limitations under the License. */ +#include +#include "group_reductions.hpp" + #include #include #include #include +#include #include #include #include -#include -#include #include -#include +#include #include -#include #include namespace cudf { @@ -153,7 +154,7 @@ std::unique_ptr group_quantiles(column_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - rmm::device_vector dv_quantiles(quantiles); + auto dv_quantiles = cudf::detail::make_device_uvector_async(quantiles, stream); auto values_type = cudf::is_dictionary(values.type()) ? dictionary_column_view(values).keys().type() diff --git a/cpp/src/groupby/sort/group_reductions.hpp b/cpp/src/groupby/sort/group_reductions.hpp index b69fe6a0291..7cc0aea8362 100644 --- a/cpp/src/groupby/sort/group_reductions.hpp +++ b/cpp/src/groupby/sort/group_reductions.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,12 +24,24 @@ #include +/** @internal @file Internal API in this file are mostly segmented reduction operations on column, + * which are used in sort-based groupby aggregations. + * + */ namespace cudf { namespace groupby { namespace detail { /** * @brief Internal API to calculate groupwise sum * + * @code{.pseudo} + * values = [2, 1, 4, -1, -2, , 4, ] + * group_labels = [0, 0, 0, 1, 1, 2, 2, 3] + * num_groups = 4 + * + * group_sum = [7, -3, 4, ] + * @endcode + * * @param values Grouped values to get sum of * @param num_groups Number of groups * @param group_labels ID of group that the corresponding value belongs to @@ -42,9 +54,40 @@ std::unique_ptr group_sum(column_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); +/** + * @brief Internal API to calculate groupwise product + * + * @code{.pseudo} + * values = [2, 1, 4, -1, -2, , 4, ] + * group_labels = [0, 0, 0, 1, 1, 2, 2, 3] + * num_groups = 4 + * + * group_product = [6, 2, 4, ] + * @endcode + * + * @param values Grouped values to get product of + * @param num_groups Number of groups + * @param group_labels ID of group that the corresponding value belongs to + * @param mr Device memory resource used to allocate the returned column's device memory + * @param stream CUDA stream used for device memory operations and kernel launches. + */ +std::unique_ptr group_product(column_view const& values, + size_type num_groups, + cudf::device_span group_labels, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + /** * @brief Internal API to calculate groupwise minimum value * + * @code{.pseudo} + * values = [2, 1, 4, -1, -2, , 4, ] + * group_labels = [0, 0, 0, 1, 1, 2, 2, 3] + * num_groups = 4 + * + * group_min = [1, -2, 4, ] + * @endcode + * * @param values Grouped values to get minimum from * @param num_groups Number of groups * @param group_labels ID of group that the corresponding value belongs to @@ -60,6 +103,14 @@ std::unique_ptr group_min(column_view const& values, /** * @brief Internal API to calculate groupwise maximum value * + * @code{.pseudo} + * values = [2, 1, 4, -1, -2, , 4, ] + * group_labels = [0, 0, 0, 1, 1, 2, 2, 3] + * num_groups = 4 + * + * group_max = [4, -1, 4, ] + * @endcode + * * @param values Grouped values to get maximum from * @param num_groups Number of groups * @param group_labels ID of group that the corresponding value belongs to @@ -75,7 +126,15 @@ std::unique_ptr group_max(column_view const& values, /** * @brief Internal API to calculate group-wise indices of maximum values. * - * @param values Ungrouped values to get maximum value's index from + * @code{.pseudo} + * values = [2, 1, 4, -1, -2, , 4, ] + * group_labels = [0, 0, 0, 1, 1, 2, 2, 3] + * num_groups = 4 + * + * group_max = [2, 0, 0, ] + * @endcode + * + * @param values Grouped values to get maximum value's index from * @param num_groups Number of groups * @param group_labels ID of group that the corresponding value belongs to * @param key_sort_order Indices indicating sort order of groupby keys @@ -92,7 +151,15 @@ std::unique_ptr group_argmax(column_view const& values, /** * @brief Internal API to calculate group-wise indices of minimum values. * - * @param values Ungrouped values to get minimum value's index from + * @code{.pseudo} + * values = [2, 1, 4, -1, -2, , 4, ] + * group_labels = [0, 0, 0, 1, 1, 2, 2, 3] + * num_groups = 4 + * + * group_max = [1, 1, 0, ] + * @endcode + * + * @param values Grouped values to get minimum value's index from * @param num_groups Number of groups * @param group_labels ID of group that the corresponding value belongs to * @param key_sort_order Indices indicating sort order of groupby keys @@ -110,6 +177,14 @@ std::unique_ptr group_argmin(column_view const& values, * @brief Internal API to calculate number of non-null values in each group of * @p values * + * @code{.pseudo} + * values = [2, 1, 4, -1, -2, , 4, ] + * group_labels = [0, 0, 0, 1, 1, 2, 2, 3] + * num_groups = 4 + * + * group_count_valid = [3, 2, 1, 0] + * @endcode + * * @param values Grouped values to get valid count of * @param group_labels ID of group that the corresponding value belongs to * @param num_groups Number of groups ( unique values in @p group_labels ) @@ -125,6 +200,13 @@ std::unique_ptr group_count_valid(column_view const& values, /** * @brief Internal API to calculate number of values in each group of @p values * + * @code{.pseudo} + * group_offsets = [0, 3, 5, 7, 8] + * num_groups = 4 + * + * group_count_all = [3, 2, 2, 1] + * @endcode + * * @param group_offsets Offsets of groups' starting points within @p values * @param num_groups Number of groups ( unique values in @p group_labels ) * @param mr Device memory resource used to allocate the returned column's device memory @@ -138,6 +220,16 @@ std::unique_ptr group_count_all(cudf::device_span group /** * @brief Internal API to calculate groupwise variance * + * @code{.pseudo} + * values = [2, 1, 4, -1, -2, , 4, ] + * group_labels = [0, 0, 0, 1, 1, 2, 2, 3] + * group_means = [2.333333, -1.5, 4.0, ] + * group_sizes = [3, 2, 2, 1] + * ddof = 1 + * + * group_var = [2.333333, 0.5, , ] + * @endcode + * * @param values Grouped values to get variance of * @param group_means Pre-calculated groupwise MEAN * @param group_sizes Number of valid elements per group @@ -158,6 +250,16 @@ std::unique_ptr group_var(column_view const& values, /** * @brief Internal API to calculate groupwise quantiles * + * @code{.pseudo} + * values = [1, 2, 4, -2, -1, , 4, ] + * group_labels = [0, 0, 0, 1, 1, 2, 2, 3] + * group_sizes = [3, 2, 2, 1] + * num_groups = 4 + * quantiles = [0.25, 0.5] + * + * group_quantiles = [1.5, 2, -1.75, -1.5, 4, 4, , ] + * @endcode + * * @param values Grouped and sorted (within group) values to get quantiles from * @param group_sizes Number of valid elements per group * @param group_offsets Offsets of groups' starting points within @p values @@ -179,6 +281,16 @@ std::unique_ptr group_quantiles(column_view const& values, * @brief Internal API to calculate number of unique values in each group of * @p values * + * @code{.pseudo} + * values = [2, 4, 4, -1, -2, , 4, ] + * group_labels = [0, 0, 0, 1, 1, 2, 2, 3] + * group_offsets = [0, 3, 5, 7, 8] + * num_groups = 4 + * + * group_nunique(null_policy::EXCLUDE) = [2, 2, 1, 0] + * group_nunique(null_policy::INCLUDE) = [2, 2, 2, 1] + * @endcode + * * @param values Grouped and sorted (within group) values to get unique count of * @param group_labels ID of group that the corresponding value belongs to * @param num_groups Number of groups ( unique values in @p group_labels ) @@ -200,6 +312,17 @@ std::unique_ptr group_nunique(column_view const& values, /** * @brief Internal API to calculate nth values in each group of @p values * + * @code{.pseudo} + * values = [2, 1, 4, -1, -2, , 4, ] + * group_sizes = [3, 2, 2, 1] + * group_labels = [0, 0, 0, 1, 1, 2, 2, 3] + * group_offsets = [0, 3, 5, 7, 8] + * num_groups = 4 + * + * group_nth_element(n=0, null_policy::EXCLUDE) = [2, -1, 4, ] + * group_nth_element(n=0, null_policy::INCLUDE) = [2, -1, , ] + * @endcode + * * @param values Grouped values to get nth value of * @param group_sizes Number of elements per group * @param group_labels ID of group that the corresponding value belongs to @@ -223,18 +346,32 @@ std::unique_ptr group_nth_element(column_view const& values, /** * @brief Internal API to collect grouped values into a lists column * + * @code{.pseudo} + * values = [2, 1, 4, -1, -2, , 4, ] + * group_offsets = [0, 3, 5, 7, 8] + * num_groups = 4 + * + * group_collect = [[2, 1, 4], [-1, -2] [, 4], []] + * @endcode + * * @param values Grouped values to collect * @param group_offsets Offsets of groups' starting points within @p values * @param num_groups Number of groups - * @param mr Device memory resource used to allocate the returned column's device memory + * @param null_handling Exclude nulls while counting if null_policy::EXCLUDE, + * Include nulls if null_policy::INCLUDE. * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned column's device memory */ std::unique_ptr group_collect(column_view const& values, cudf::device_span group_offsets, size_type num_groups, + null_policy null_handling, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); +/** @endinternal + * + */ } // namespace detail } // namespace groupby } // namespace cudf diff --git a/cpp/src/groupby/sort/group_replace_nulls.cu b/cpp/src/groupby/sort/group_replace_nulls.cu new file mode 100644 index 00000000000..56e4cb83f71 --- /dev/null +++ b/cpp/src/groupby/sort/group_replace_nulls.cu @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include + +namespace cudf { +namespace groupby { +namespace detail { + +std::unique_ptr group_replace_nulls(cudf::column_view const& grouped_value, + device_span group_labels, + cudf::replace_policy replace_policy, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + cudf::size_type size = grouped_value.size(); + + auto device_in = cudf::column_device_view::create(grouped_value); + auto index = thrust::make_counting_iterator(0); + auto valid_it = cudf::detail::make_validity_iterator(*device_in); + auto in_begin = thrust::make_zip_iterator(thrust::make_tuple(index, valid_it)); + + rmm::device_uvector gather_map(size, stream); + auto gm_begin = thrust::make_zip_iterator( + thrust::make_tuple(gather_map.begin(), thrust::make_discard_iterator())); + + auto func = cudf::detail::replace_policy_functor(); + thrust::equal_to eq; + if (replace_policy == cudf::replace_policy::PRECEDING) { + thrust::inclusive_scan_by_key(rmm::exec_policy(stream), + group_labels.begin(), + group_labels.begin() + size, + in_begin, + gm_begin, + eq, + func); + } else { + auto gl_rbegin = thrust::make_reverse_iterator(group_labels.begin() + size); + auto in_rbegin = thrust::make_reverse_iterator(in_begin + size); + auto gm_rbegin = thrust::make_reverse_iterator(gm_begin + size); + thrust::inclusive_scan_by_key( + rmm::exec_policy(stream), gl_rbegin, gl_rbegin + size, in_rbegin, gm_rbegin, eq, func); + } + + auto output = cudf::detail::gather(cudf::table_view({grouped_value}), + gather_map.begin(), + gather_map.end(), + cudf::out_of_bounds_policy::DONT_CHECK, + stream, + mr); + + return std::move(output->release()[0]); +} + +} // namespace detail +} // namespace groupby +} // namespace cudf diff --git a/cpp/src/groupby/sort/group_scan_util.cuh b/cpp/src/groupby/sort/group_scan_util.cuh index 9f8614a61b4..53d05b0c48b 100644 --- a/cpp/src/groupby/sort/group_scan_util.cuh +++ b/cpp/src/groupby/sort/group_scan_util.cuh @@ -27,7 +27,6 @@ #include #include -#include #include #include diff --git a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh index 63a68974d6b..67062658c39 100644 --- a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh +++ b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh @@ -40,14 +40,17 @@ struct reduce_functor { template static constexpr bool is_supported() { - if (K == aggregation::SUM) - return cudf::is_numeric() || cudf::is_duration() || cudf::is_fixed_point(); - else if (K == aggregation::MIN or K == aggregation::MAX) - return cudf::is_fixed_width() and is_relationally_comparable(); - else if (K == aggregation::ARGMIN or K == aggregation::ARGMAX) - return is_relationally_comparable(); - else - return false; + switch (K) { + case aggregation::SUM: + return cudf::is_numeric() || cudf::is_duration() || cudf::is_fixed_point(); + case aggregation::PRODUCT: return cudf::detail::is_product_supported(); + case aggregation::MIN: + case aggregation::MAX: + return cudf::is_fixed_width() and is_relationally_comparable(); + case aggregation::ARGMIN: + case aggregation::ARGMAX: return is_relationally_comparable(); + default: return false; + } } template @@ -62,7 +65,7 @@ struct reduce_functor { using OpType = cudf::detail::corresponding_operator_t; using ResultType = cudf::detail::target_type_t; - auto result_type = is_fixed_point() + auto result_type = is_fixed_point() ? data_type{type_to_id(), values.type().scale()} : data_type{type_to_id()}; diff --git a/cpp/src/hash/hashing.cu b/cpp/src/hash/hashing.cu index 53be019f73b..a882b33bcdf 100644 --- a/cpp/src/hash/hashing.cu +++ b/cpp/src/hash/hashing.cu @@ -14,32 +14,23 @@ * limitations under the License. */ #include -#include -#include -#include #include #include -#include -#include #include -#include #include #include -#include #include +#include + +#include #include namespace cudf { +namespace detail { namespace { -// MD5 supported leaf data type check -bool md5_type_check(data_type dt) -{ - return !is_chrono(dt) && (is_fixed_width(dt) || (dt.id() == type_id::STRING)); -} - template std::vector to_leaf_columns(IterType iter_begin, IterType iter_end) { @@ -58,88 +49,6 @@ std::vector to_leaf_columns(IterType iter_begin, IterType iter_end) } // namespace -namespace detail { - -std::unique_ptr hash(table_view const& input, - hash_id hash_function, - std::vector const& initial_hash, - uint32_t seed, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - switch (hash_function) { - case (hash_id::HASH_MURMUR3): return murmur_hash3_32(input, initial_hash, stream, mr); - case (hash_id::HASH_MD5): return md5_hash(input, stream, mr); - case (hash_id::HASH_SERIAL_MURMUR3): - return serial_murmur_hash3_32(input, seed, stream, mr); - case (hash_id::HASH_SPARK_MURMUR3): - return serial_murmur_hash3_32(input, seed, stream, mr); - default: return nullptr; - } -} - -std::unique_ptr md5_hash(table_view const& input, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - if (input.num_columns() == 0 || input.num_rows() == 0) { - const string_scalar string_128bit("d41d8cd98f00b204e9orig98ecf8427e"); - auto output = make_column_from_scalar(string_128bit, input.num_rows(), stream, mr); - return output; - } - - // Accepts string and fixed width columns, or single layer list columns holding those types - CUDF_EXPECTS( - std::all_of(input.begin(), - input.end(), - [](auto col) { - return md5_type_check(col.type()) || - (col.type().id() == type_id::LIST && md5_type_check(col.child(1).type())); - }), - "MD5 unsupported column type"); - - // Result column allocation and creation - auto begin = thrust::make_constant_iterator(32); - auto offsets_column = - cudf::strings::detail::make_offsets_child_column(begin, begin + input.num_rows(), stream, mr); - - auto chars_column = strings::detail::create_chars_child_column( - input.num_rows(), 0, input.num_rows() * 32, stream, mr); - auto chars_view = chars_column->mutable_view(); - auto d_chars = chars_view.data(); - - rmm::device_buffer null_mask{0, stream, mr}; - - auto const device_input = table_device_view::create(input, stream); - - // Hash each row, hashing each element sequentially left to right - thrust::for_each(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(input.num_rows()), - [d_chars, device_input = *device_input] __device__(auto row_index) { - md5_intermediate_data hash_state; - MD5Hash hasher = MD5Hash{}; - for (int col_index = 0; col_index < device_input.num_columns(); col_index++) { - if (device_input.column(col_index).is_valid(row_index)) { - cudf::type_dispatcher(device_input.column(col_index).type(), - hasher, - device_input.column(col_index), - row_index, - &hash_state); - } - } - hasher.finalize(&hash_state, d_chars + (row_index * 32)); - }); - - return make_strings_column(input.num_rows(), - std::move(offsets_column), - std::move(chars_column), - 0, - std::move(null_mask), - stream, - mr); -} - template