From 1160ef198578e48f8fcced2fbedb661658781759 Mon Sep 17 00:00:00 2001 From: Thomas Li Date: Wed, 31 Jul 2024 17:34:03 +0000 Subject: [PATCH] Squashed commit of the following: commit 606d15e7260b553cbdb69f9ecd935c12ba94e430 Author: Thomas Li Date: Wed Jul 31 14:30:48 2024 +0000 put back mistakenly removed CMakeLists.txt commit feac68de39be09c1751d0ccc2bb5f93b1075ac8f Author: Thomas Li Date: Wed Jul 31 13:59:50 2024 +0000 rpath was the problem? commit b2b68e14b9faa1dac0f2516667f65ecb5693a744 Author: Thomas Li Date: Tue Jul 30 22:29:14 2024 +0000 maybe fix? commit 5243eac8a90114e4fdf794760cb6b6029d9ba1a1 Author: Thomas Li Date: Tue Jul 30 21:11:03 2024 +0000 fix cuda suffixing commit acb31227d3ffb07e4a35be5d1c0ec6cbadbfe53d Author: Thomas Li Date: Tue Jul 30 20:29:52 2024 +0000 fixes commit b2306df549ac5db08dc0d1b09df270137dacfe9d Author: Thomas Li Date: Tue Jul 30 20:08:13 2024 +0000 fixes commit d6d91df1510a70d79fefacf8b57ca1caf027edf8 Merge: b7a2782f1a 7b3e73a7e3 Author: Thomas Li Date: Tue Jul 30 19:32:18 2024 +0000 Merge branch 'branch-24.10' of github.com:rapidsai/cudf into setup-pylibcudf-package commit 7b3e73a7e38b671db1387879cfa963fe61060c36 Merge: ce259fff66 dbf4bd02a8 Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Tue Jul 30 13:14:19 2024 -0400 Merge pull request #16435 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit dbf4bd02a8fdccd1891edbc2d049c3ddddb234b3 Author: GALI PREM SAGAR Date: Tue Jul 30 12:14:14 2024 -0500 Add about rmm modes in `cudf.pandas` docs (#16404) This PR adds user facing docs for rmm memory modes and prefetching. --------- Co-authored-by: Mark Harris <783069+harrism@users.noreply.github.com> Co-authored-by: Bradley Dice commit ce259fff6641dd847883d535645c7c17c36fb7ec Merge: b8bfe2c912 0f07b0bb5e Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Tue Jul 30 09:02:26 2024 -0400 Merge pull request #16433 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit 0f07b0bb5e2cc89ca66e9d9639ff6ac961ec0471 Author: GALI PREM SAGAR Date: Tue Jul 30 08:02:21 2024 -0500 Enable prefetching before `runpy` (#16427) This PR enables prefetching before we execute the `runpy` module and script code. commit b8bfe2c91234032cbe9b2549e46a08109e238c8a Merge: d1be0b6dc0 5feeaf3827 Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Tue Jul 30 09:02:06 2024 -0400 Merge pull request #16432 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit 5feeaf3827bfd20755cdd0516ef0c6ba484a600c Author: Richard (Rick) Zamora Date: Tue Jul 30 08:02:01 2024 -0500 [Bug] Remove loud `NativeFile` deprecation noise for `read_parquet` from S3 (#16415) Important follow-up to https://github.com/rapidsai/cudf/pull/16132 Without this PR, using `dask_cudf.read_parquet("s3://...", ...)` will result in loud deprecation warnings after `compute`/`persist` is called. This is because dask will always pass `NativeFile` objects down to cudf. My fault for missing this earlier! commit d1be0b6dc06fddd0b69fb69731281b16894cb132 Author: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon Jul 29 15:12:38 2024 -1000 Align CategoricalIndex APIs with pandas 2.x (#16369) Mostly exposing methods that were available on the CategoricalColumn Authors: - Matthew Roeschke (https://github.com/mroeschke) - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/16369 commit 368a34ca9fd7db1b6cfb6e7817978e3e4fcfb00b Author: Bradley Dice Date: Mon Jul 29 20:05:17 2024 -0500 Use RMM adaptor constructors instead of factories. (#16414) This PR uses RMM memory resource adaptor constructors instead of factory functions. With CTAD, we do not need the factory and can use the constructor directly. The factory will be deprecated in https://github.com/rapidsai/rmm/pull/1626. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Nghia Truong (https://github.com/ttnghia) - Jayjeet Chakraborty (https://github.com/JayjeetAtGithub) URL: https://github.com/rapidsai/cudf/pull/16414 commit e8048f7f3d66433203651a6a603d4de1360ca5ca Merge: f8eb63e499 bd302d773c Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Mon Jul 29 20:07:38 2024 -0400 Merge pull request #16431 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit bd302d773c50552531bc7f11f782f8ed876e8fab Author: Nghia Truong <7416935+ttnghia@users.noreply.github.com> Date: Mon Jul 29 17:07:33 2024 -0700 Support thread-safe for `prefetch_config::get` and `prefetch_config::set` (#16425) This adds muti-thread support for `prefetch_config` getter and setter functions. This avoid the issue that the config map is corrupted in multi-thread environments. Closes https://github.com/rapidsai/cudf/issues/16426. --------- Signed-off-by: Nghia Truong commit f8eb63e499f94d583d715f5c1f5e6f234589be57 Author: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon Jul 29 12:39:19 2024 -1000 Align Index APIs with pandas 2.x (#16361) Similar to https://github.com/rapidsai/cudf/pull/16310, the follow APIs have been modified to adjust/add parameters * `to_flat_index` * `isin` * `unique` * `transpose` Authors: - Matthew Roeschke (https://github.com/mroeschke) - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/16361 commit 743e16426c564d0ed0d7e3d9be5f67e4605c4f32 Author: James Lamb Date: Mon Jul 29 14:19:43 2024 -0500 update some branch references in GitHub Actions configs (#16397) Fixes some lingering references to `branch-24.08` in the `pr_issue_status_automation` CI workflow. This was missed when new branches were cut because that file ends in `.yml` and `update-version.sh` was only modifying files ending in `.yaml`. The corresponding `update-version.sh` changes were made in #16183 and are already on 24.10 thanks to forward mergers. https://github.com/rapidsai/cudf/blob/dc05a01f3fc0742c5fbbddd86a0f2007bfdc2050/ci/release/update-version.sh#L78 ## Notes for Reviewers I checked like this, and don't see any other missed references: ```shell git grep -E '24\.8|24\.08|0\.39' ``` Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Kyle Edwards (https://github.com/KyleFromNVIDIA) URL: https://github.com/rapidsai/cudf/pull/16397 commit 35796057b64e258713d4d89ba368837d30a1a9c5 Author: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon Jul 29 08:33:23 2024 -1000 Align misc DataFrame and MultiIndex methods with pandas 2.x (#16402) The API changes in this PR are mostly adding implementations or adding missing keyword argument (although they might not be implemented). The APIs affected are: * `DataFrame.insert` * `DataFrame.melt` * `DataFrame.merge` * `DataFrame.quantile` * `DataFrame.cov` * `DataFrame.corr` * `DataFrame.median` * `DataFrame.rolling` * `DataFrame.resample` * `DataFrame.dropna` * `MultiIndex.from_tuple` * `MultiIndex.from_frame` * `MultiIndex.from_product` Authors: - Matthew Roeschke (https://github.com/mroeschke) - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/16402 commit 6e7624d6b31c93b0547590929ac63ed8e3a48d24 Author: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Mon Jul 29 14:06:51 2024 -0400 Add stream parameter to reshape APIs (#16410) Adds `stream` parameter to reshape APIs: - `cudf::interleave_columns` - `cudf::tile` - `cudf::byte_cast` Found while working #15983 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/16410 commit 58f47242fe04b1e25fd42e1e45e8c15417140777 Author: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon Jul 29 06:09:21 2024 -1000 Align groupby APIs with pandas 2.x (#16403) The following breaking APIs are affected: * `apply` * `transform` * `describe` The rest of the APIs are non-breaking and generally will raise a `NotImplementedError` Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/16403 commit 18c1465b597284d8b558964cc0ca48de7da60a17 Author: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon Jul 29 06:06:07 2024 -1000 Align ewm APIs with pandas 2.x (#16413) These all currently are not implemented and raise a `NotImplementedError` Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/16413 commit eed0b1f36c84aa4a4bf17a3b99f931940cb6ddd9 Merge: 24997fda19 a51964ed8b Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Mon Jul 29 09:42:33 2024 -0400 Merge pull request #16419 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit a51964ed8b00c3c88d463e329af7ec8378642343 Author: GALI PREM SAGAR Date: Mon Jul 29 08:42:27 2024 -0500 Fix a `pandas-2.0` missing attribute error (#16416) `NumpyEADtype` is a 2.1.0+ change, this PR handles the missing attribute error in pandas-2.0 commit 24997fda194d5b8af34048a8bf275830cabbff8c Author: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com> Date: Fri Jul 26 18:37:30 2024 -0700 Deduplicate decimal32/decimal64 to decimal128 conversion function (#16236) Closes #16194 This PR deduplicates the `convert_data_to_decimal128` function from `to_arrow.cu`, `writer_impl.cu` and `to_arrow_device.cu` to a common location. Authors: - Muhammad Haseeb (https://github.com/mhaseeb123) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - Nghia Truong (https://github.com/ttnghia) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/16236 commit 473dec55abd1a3d9d540c541443f831d18ebb532 Author: Jayjeet Chakraborty Date: Fri Jul 26 14:45:12 2024 -0700 Add query 10 to the TPC-H suite (#16392) Adds Q10 to the TPC-H benchmark suite Authors: - Jayjeet Chakraborty (https://github.com/JayjeetAtGithub) Approvers: - Mike Wilson (https://github.com/hyperbolic2346) - Yunsong Wang (https://github.com/PointKernel) URL: https://github.com/rapidsai/cudf/pull/16392 commit 46ff702144a2477d06ffabd3d92d38967c10b1ff Merge: 73158f06e2 5dd3efba5b Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Fri Jul 26 16:47:54 2024 -0400 Merge pull request #16411 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit 5dd3efba5b7e0c22dce87cf20aecb1b198677d2e Author: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Fri Jul 26 16:47:49 2024 -0400 Fix nightly memcheck error for empty STREAM_INTEROP_TEST (#16406) ## Description The `STREAM_INTEROP_TEST` code was commented out in #16379 so the `compute-sanitizer` returns an error for this test in the nightly cpp-memcheck tests. https://github.com/rapidsai/cudf/actions/runs/10107041505/job/27950193878#step:9:62177 This PR comments out the empty test so it is not built. The test will be re-enabled in a future release when the deprecated functions are replaced. ## Checklist - [x] I am familiar with the [Contributing Guidelines](https://github.com/rapidsai/cudf/blob/HEAD/CONTRIBUTING.md). - [x] New or existing tests cover these changes. - [x] The documentation is up to date with these changes. commit 73158f06e2b816d88e4a2b71f236812ab997391f Merge: dc05a01f3f f88a242832 Author: Jake Awe <50372925+AyodeAwe@users.noreply.github.com> Date: Fri Jul 26 13:14:22 2024 -0500 Merge pull request #16409 from vyasr/branch-24.10-merge-branch-24.08 Branch 24.10 merge branch 24.08 commit f88a242832a1c991c615961631f02c9875ab871f Merge: dc05a01f3f cd762b4eb1 Author: Vyas Ramasubramani Date: Fri Jul 26 18:10:32 2024 +0000 Merge branch 'branch-24.08' into branch-24.10-merge-branch-24.08 commit cd762b4eb1fd55a0bc5079ed69bfc04426f10e60 Author: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri Jul 26 08:08:01 2024 -1000 Gate ArrowStringArrayNumpySemantics cudf.pandas proxy behind version check (#16401) ## Description `ArrowStringArrayNumpySemantics` was newly added in 2.1: https://github.com/pandas-dev/pandas/blob/2.1.x/pandas/core/arrays/string_arrow.py#L488, so putting the proxy wrapper behind a version check for pandas 2.0.x compat ```ipython In [1]: %load_ext cudf.pandas In [2]: import pandas as pd In [3]: pd.__version__ Out[3]: '2.0.0' ``` ## Checklist - [ ] I am familiar with the [Contributing Guidelines](https://github.com/rapidsai/cudf/blob/HEAD/CONTRIBUTING.md). - [ ] New or existing tests cover these changes. - [ ] The documentation is up to date with these changes. commit 1cea1eaf6c1e87e65729897dd9bbedc4bdc5e7ab Author: Kyle Edwards Date: Thu Jul 25 16:26:34 2024 -0400 Don't export bs_thread_pool (#16398) ## Description cudf does not currently export any headers that depend on bs_thread_pool, and having it as a dependency is currently causing problems for consumers. Avoid exporting it since it's not needed. ## Checklist - [ ] I am familiar with the [Contributing Guidelines](https://github.com/rapidsai/cudf/blob/HEAD/CONTRIBUTING.md). - [ ] New or existing tests cover these changes. - [ ] The documentation is up to date with these changes. commit dc05a01f3fc0742c5fbbddd86a0f2007bfdc2050 Merge: fb2021fe82 e553295cfa Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Thu Jul 25 12:14:52 2024 -0400 Merge pull request #16396 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit e553295cfaf2f5bd1f539ee78d9a3a064e00e5f0 Author: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Thu Jul 25 11:14:47 2024 -0500 Require fixed width types for casting in `cudf-polars` (#16381) Fixes a bug where numeric <-> string casts are not being properly rejected at the cudf-polars level. Authors: - https://github.com/brandon-b-miller Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/16381 commit fb2021fe82724746ae1c58345ed37f7e7a0207ed Merge: 673b96f6d1 f756e01a3c Author: Ray Douglass <3107146+raydouglass@users.noreply.github.com> Date: Thu Jul 25 11:06:30 2024 -0400 Merge pull request #16391 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit f756e01a3c5ff83421b1afb44460d9e5147a410e Author: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Thu Jul 25 07:04:47 2024 -0700 Implement support for scan_ndjson in cudf-polars (#16263) Implement support for scan_ndjson in cudf-polars. Authors: - Thomas Li (https://github.com/lithomas1) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Lawrence Mitchell (https://github.com/wence-) URL: https://github.com/rapidsai/cudf/pull/16263 commit 673b96f6d15dbd5d8bcb22d612d3c324aa899e26 Merge: 5a3399bec8 4cc37896a5 Author: Jake Awe <50372925+AyodeAwe@users.noreply.github.com> Date: Thu Jul 25 08:27:15 2024 -0500 Merge pull request #16393 from jameslamb/branch-24.10-merge-branch-24.08 Merge branch-24.08 into branch-24.10 commit d953676e9281125a5b8bd9be739c997611471771 Author: Robert Maynard Date: Thu Jul 25 04:49:12 2024 -0400 Hide visibility of non public symbols (#15982) Converts cudf over to a system of explicit markup of what symbols should be used by consumers. This is done by compiling with `-fvisibility=hidden` and explicit markup via `CUDF_EXPORT` of components we want usable. Due to issues with tests a portion of `include/` detail functions had to be marked as public API. More concernning are that the tests leverage functions from `cpp/` that are never part of the installed headers. That set of files can be found at https://github.com/rapidsai/cudf/commit/16b365635ab0f86bb1cc6db5f036564e8290f3b1 and we should discuss how we should restructure cudf to remove these. Authors: - Robert Maynard (https://github.com/robertmaynard) - Bradley Dice (https://github.com/bdice) Approvers: - Bradley Dice (https://github.com/bdice) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/15982 commit 4aefcc7b2988346166b9a757fc837e93f6f0a3bb Author: GALI PREM SAGAR Date: Wed Jul 24 22:30:35 2024 -0500 Add ability to prefetch in `cudf.pandas` and change default to managed pool (#16296) This PR adds ability to prefetch in `cudf.pandas` based off of: https://github.com/rapidsai/rmm/pull/1608/ Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) - Bradley Dice (https://github.com/bdice) Approvers: - Bradley Dice (https://github.com/bdice) - Muhammad Haseeb (https://github.com/mhaseeb123) - Vyas Ramasubramani (https://github.com/vyasr) - Mark Harris (https://github.com/harrism) URL: https://github.com/rapidsai/cudf/pull/16296 commit 6486bb928dfb0e1817b0604572e2f5789d05c596 Author: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Wed Jul 24 22:24:46 2024 -0400 Migrate lists/filtering to pylibcudf (#16184) Apart of #15162 Authors: - Matthew Murray (https://github.com/Matt711) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/16184 commit a33f520b370d048a22de031294311c241ab23858 Author: David Gardner <96306125+dagardner-nv@users.noreply.github.com> Date: Wed Jul 24 18:42:16 2024 -0700 Fix inconsistent usage of 'results' and 'records' in read-json.md (#15766) * Fix inconsistent usage of 'results' and 'records' in `docs/cudf/source/user_guide/io/read-json.md` Authors: - David Gardner (https://github.com/dagardner-nv) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Bradley Dice (https://github.com/bdice) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/15766 commit 5a3399bec868f44d13c003f172c665919096d8e8 Author: James Lamb Date: Wed Jul 24 19:26:12 2024 -0500 fix [tool.setuptools] reference in custreamz config (#16365) Noticed this warning in logs from #16183 > _/python3.10/site-packages/setuptools/config/pyprojecttoml.py:70: _ToolsTypoInMetadata: Ignoring [tools.setuptools] in pyproject.toml, did you mean [tool.setuptools]?_ This fixes that. ## Notes for Reviewers Intentionally targeting this at 24.10. This misconfiguration has been in `custreamz` since the 23.04 release ([git blame link](https://github.com/rapidsai/cudf/blame/e6d412cba7c23df7ee500c28257ed9281cea49b9/python/custreamz/pyproject.toml#L60)). I think the only effect might be that some test files are included in wheels when we don't want to. I don't think the fix for it needs to be rushed into 24.08. I searched across RAPIDS in case this was copied from somewhere else... don't see any other instances of this typo that need to be fixed. Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/16365 commit 4cc37896a5dff1e019f0dff8101f3a84a05fd5d8 Merge: 29ce5c529e a36dacb663 Author: James Lamb Date: Wed Jul 24 18:54:56 2024 -0500 Merge branch-24.08 into branch-24.10 commit a36dacb66325e03d3264482d35a5cf7e0b6c7a37 Author: Lawrence Mitchell Date: Thu Jul 25 00:31:40 2024 +0100 Make C++ compilation warning free after #16297 (#16379) In https://github.com/rapidsai/cudf/pull/16297, we deprecated the use of `to_arrow` in favour of `to_arrow_host` and `to_arrow_device`. However, the scalar detail overload of `to_arrow` used the public table overload. So we get a warning when compiling internal libcudf code. Fix this by using the detail API, and fix a bug along the way where we were not passing through the arrow memory resource. Authors: - Lawrence Mitchell (https://github.com/wence-) Approvers: - David Wendt (https://github.com/davidwendt) - Michael Schellenberger Costa (https://github.com/miscco) - Vyas Ramasubramani (https://github.com/vyasr) - Karthikeyan (https://github.com/karthikeyann) URL: https://github.com/rapidsai/cudf/pull/16379 commit ae4c7e3ce4fe100eb919ca00fa34461e44078ba9 Author: James Lamb Date: Wed Jul 24 18:30:53 2024 -0500 split up CUDA-suffixed dependencies in dependencies.yaml (#16183) Contributes to https://github.com/rapidsai/build-planning/issues/31 Follow-up to #15245 RAPIDS DLFW builds prefer to build all RAPIDS packages together without CUDA suffixes, leading to the following set of requirements for `cudf` wheels built there: * project name must be `cudf` (not `cudf-cu12`) * all dependencies must be unsuffixed (e.g. `rmm` not `rmm-cu12`) * the correct set of dependencies based on CUDA version must be expressed in the wheel metadata (e.g. `cubinlinker` and `ptxcompiler` on CUDA 11, `pynvjitlink` on CUDA 12) To meet all 3 of those, this proposes decomposing CUDA-suffixed dependencies in `dependencies.yaml` into two lists... `cuda_suffixed="true"` and `cuda_suffixed="false"`. That'd allow DLFW builds to do the following to meet its requirements: ```shell pip wheel \ -C rapidsai.disable-cuda=true \ -C rapidsai.matrix-entry="cuda=12.5;cuda_suffixed=false" \ . ``` Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Bradley Dice (https://github.com/bdice) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/16183 commit 29ce5c529ea9ea18edc32ab905f1ef076f266008 Author: Michael Schellenberger Costa Date: Thu Jul 25 01:29:41 2024 +0200 Fix some issues with deprecated / removed cccl facilities (#16377) `cub::If` has been deprecated and should not be used. There is a better alternative in `cuda::std::conditional_t` `thrust::{binary, unary}_function` has been deprecated and does not serve a purpose similar to the removed `std::{binary, unary}_function` Rather than relying on the type aliases one should use the `std::invoke` machinery Authors: - Michael Schellenberger Costa (https://github.com/miscco) Approvers: - Bradley Dice (https://github.com/bdice) - Nghia Truong (https://github.com/ttnghia) - Bernhard Manfred Gruber (https://github.com/bernhardmgruber) URL: https://github.com/rapidsai/cudf/pull/16377 commit a6b1cf1fa96d622626a9e4d99a5c71d33fb1bd49 Merge: 2eabe0de58 59f65843b8 Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Wed Jul 24 19:10:33 2024 -0400 Merge pull request #16389 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit 59f65843b80d967f743841aee8489b6ae63b269a Author: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com> Date: Wed Jul 24 16:10:28 2024 -0700 Gracefully CUDF_FAIL when `skip_rows > 0` in Chunked Parquet reader (#16385) This PR must merge in cudf 24.08 to avoid unhandled expections. Gracefully CUDF_FAIL in chunked parquet reader when `skip_rows>0` which may result in runtime exceptions like segfaults or an infinite loop. See #16186 for more information. Authors: - Muhammad Haseeb (https://github.com/mhaseeb123) Approvers: - David Wendt (https://github.com/davidwendt) - Vyas Ramasubramani (https://github.com/vyasr) - Bradley Dice (https://github.com/bdice) - Karthikeyan (https://github.com/karthikeyann) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/16385 commit 2eabe0de584ff8c8ae6e82b1845309d5b01c4a98 Merge: 4624edf586 8bba6dfad2 Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Wed Jul 24 18:16:08 2024 -0400 Merge pull request #16388 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit 8bba6dfad239b4fd69a82acbc5dd7707ba576cce Author: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Wed Jul 24 18:16:03 2024 -0400 Migrate lists/set_operations to pylibcudf (#16190) Apart of #15162 Authors: - Matthew Murray (https://github.com/Matt711) Approvers: - Thomas Li (https://github.com/lithomas1) URL: https://github.com/rapidsai/cudf/pull/16190 commit 4624edf58683391529cd9d7b76ca2e45438655bf Merge: 077457ee89 73937fbaba Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Wed Jul 24 16:42:06 2024 -0400 Merge pull request #16387 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit 73937fbabaeea76665663ed23688b1cac61b7ee9 Author: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Wed Jul 24 16:42:00 2024 -0400 Migrate lists/filling to pylibcudf (#16189) Apart of #15162 Authors: - Matthew Murray (https://github.com/Matt711) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Thomas Li (https://github.com/lithomas1) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/16189 commit 077457ee89140e98c9e25849511b14410370f684 Merge: 17c1afbd93 8fcf72a787 Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Wed Jul 24 13:06:35 2024 -0400 Merge pull request #16382 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit 8fcf72a787acb0168c97d11b8ab9130146e9b37e Author: Alessandro Bellina Date: Wed Jul 24 12:06:29 2024 -0500 [JNI] Add setKernelPinnedCopyThreshold and setPinnedAllocationThreshold (#16288) In 24.08 two new cuDF methods are being added, and the second method is still in flight (see: https://github.com/rapidsai/cudf/pull/16206): ``` cudf::set_kernel_pinned_copy_threshold cudf::set_allocate_host_as_pinned_threshold ``` We'd like to expose these methods in our JNI layer. I created a Cudf.java with the two static methods, and put the definitions in CudfJni.cpp. Marked as draft since I need https://github.com/rapidsai/cudf/pull/16206 to merge, and we are still testing it. Authors: - Alessandro Bellina (https://github.com/abellina) - Nghia Truong (https://github.com/ttnghia) Approvers: - Robert (Bobby) Evans (https://github.com/revans2) - Jason Lowe (https://github.com/jlowe) URL: https://github.com/rapidsai/cudf/pull/16288 commit 17c1afbd936989bdcdcdb5654c1cbc4dbe57cc7d Merge: a0c58c766e 7191b74ce2 Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Wed Jul 24 09:55:53 2024 -0400 Merge pull request #16380 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit 7191b74ce244518f17ef65e701f5a262f1c5cf8a Author: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed Jul 24 03:55:48 2024 -1000 Align Index __init__ APIs with pandas 2.x (#16362) * It would be nice to have `Index`'s constructor to not go through `IndexMeta.__call__`, but I think that would be a separate effort * There were a couple `verify_integrity` keyword arguments added that don't raise a `NotImplementedError` since there's not support, but I don't think it's worth making this case falling back in `cudf.pandas` as it's just a validation and won't affect further behavior with the object Authors: - Matthew Roeschke (https://github.com/mroeschke) - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/16362 commit a0c58c766e41525059e5a4e37ac5fce3a638468e Merge: b66281c4fa 743264f6ac Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Wed Jul 24 06:32:36 2024 -0400 Merge pull request #16378 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit 743264f6ac924fdbec58fad666f989b14b901a98 Author: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Wed Jul 24 05:32:31 2024 -0500 Warn on cuDF failure when `POLARS_VERBOSE` is true (#16308) Just something quick to get us started here Closes https://github.com/rapidsai/cudf/issues/16256 Authors: - https://github.com/brandon-b-miller - Lawrence Mitchell (https://github.com/wence-) Approvers: - Lawrence Mitchell (https://github.com/wence-) URL: https://github.com/rapidsai/cudf/pull/16308 commit b66281c4fa811431dec0cdc0d8222fba9e8e4088 Merge: f20205b2dc 62625f1bfc Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Wed Jul 24 03:42:08 2024 -0400 Merge pull request #16376 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit 62625f1bfcdb980186a1afbec41e420fdb4a7075 Author: Matt Topol Date: Wed Jul 24 03:42:03 2024 -0400 Host implementation of `to_arrow` using nanoarrow (#16297) Adds the corresponding `to_arrow_host` functions for interop using `ArrowDeviceArray`. This includes updating the version of nanoarrow in use to pick up some bug fixes and features. Authors: - Matt Topol (https://github.com/zeroshade) - Muhammad Haseeb (https://github.com/mhaseeb123) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Muhammad Haseeb (https://github.com/mhaseeb123) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/16297 commit f20205b2dc7a5e830b72386df378934c53da5043 Merge: bc748d67b5 8c1749b40e Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Wed Jul 24 01:19:15 2024 -0400 Merge pull request #16375 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit 8c1749b40eaa983966ed3bece6bdd29a4316d18a Author: Kyle Edwards Date: Wed Jul 24 01:19:10 2024 -0400 Use rapids_cpm_bs_thread_pool() (#16360) Authors: - Kyle Edwards (https://github.com/KyleFromNVIDIA) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/16360 commit bc748d67b52de4cf1c876f9701644fdbf1d839e5 Merge: 6d9aff4b7d 75289c58f3 Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Wed Jul 24 00:46:03 2024 -0400 Merge pull request #16374 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit 75289c58f3d9ca11a51396e4adadfbd5f51856f5 Author: Bradley Dice Date: Tue Jul 23 23:45:59 2024 -0500 Rename PrefetchConfig to prefetch_config. (#16358) This PR addresses a comment requesting a rename of `PrefetchConfig` to `prefetch_config`. See: https://github.com/rapidsai/cudf/pull/16020#discussion_r1686284151 Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - Shruti Shivakumar (https://github.com/shrshi) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/16358 commit 6d9aff4b7dfd23db43d294dacdeaf6c52af2fc4b Merge: dcf791c83e f0efc8b36a Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Tue Jul 23 20:17:10 2024 -0400 Merge pull request #16373 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit f0efc8b36a8f43cfa027966265dcea052bb5c45d Author: Vukasin Milovanovic Date: Tue Jul 23 17:17:05 2024 -0700 Modify `make_host_vector` and `make_device_uvector` factories to optionally use pinned memory and kernel copy (#16206) Issue #15616 Modified `make_host_vector` functions to return `cudf::detail::host_vector`, which can use a pinned or a pageable memory resource. When pinned memory is used, the D2H copy is potentially done using a CUDA kernel. Also added factories to create `host_vector`s without device data. These are useful to replace uses of `std::vector` and `thrust::host_vector` when the data eventually gets copied to the GPU. Added `is_device_accessible` to `host_span`. With this, `make_device_uvector` can optionally use the kernel for the H2D copy. Modified `cudf::detail::host_vector` to be derived from `thrust::host_vector`, to avoid issues with implicit conversion from `std::vector`. Used `cudf::detail::host_vector` and its new factory functions wherever data ends up copied to the GPU. Stopped using `thrust::copy_n` for the kernel copy path in `cuda_memcpy` because of an optimization that allows it to fall back to `cudaMemCpyAsync`. We now call a simple local kernel. Authors: - Vukasin Milovanovic (https://github.com/vuule) Approvers: - Robert Maynard (https://github.com/robertmaynard) - Yunsong Wang (https://github.com/PointKernel) - Nghia Truong (https://github.com/ttnghia) - Alessandro Bellina (https://github.com/abellina) URL: https://github.com/rapidsai/cudf/pull/16206 commit dcf791c83e3ab87d57d94017ee7413d96f9e99a5 Merge: 7a09f809dc 39f256c339 Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Tue Jul 23 20:03:22 2024 -0400 Merge pull request #16372 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit 39f256c3397afc9c495cb819636abddb23f81dc0 Author: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Tue Jul 23 19:03:16 2024 -0500 Fall back to CPU for unsupported libcudf binaryops in cudf-polars (#16188) This PR adds logic that should trigger CPU fallback unsupported binary ops. Authors: - https://github.com/brandon-b-miller - Lawrence Mitchell (https://github.com/wence-) Approvers: - Lawrence Mitchell (https://github.com/wence-) URL: https://github.com/rapidsai/cudf/pull/16188 commit 7a09f809dc5c8cf8d2663fae186e4d249893c888 Merge: a3aacd8915 cd711913d2 Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Tue Jul 23 18:24:24 2024 -0400 Merge pull request #16370 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit cd711913d2312ba158e34f5c03784a7b07f1583a Author: Elias Stehle <3958403+elstehle@users.noreply.github.com> Date: Wed Jul 24 00:24:19 2024 +0200 Adds write-coalescing code path optimization to FST (#16143) This PR adds an optimized code path to the finite-state transducer (FST) that will use a shared memory-backed write buffer for the translated output and translated output indexes, if the the write buffer does not require allocating excessive amounts of shared memory (i.e., current heuristic is 24 KB/CTA). Writes are first buffered in shared memory and then collaboratively written out using coalesced writes to global memory. ## Benchmark results Numbers are for libcudf's FST_NVBENCH for a 1.073 GB input. FST outputs one token per input symbol. Benchmarks run on V100 with 900 GB/s theoretical peak BW. We compare the current FST implementation (old) to an FST implementaation that uses write-coalescing to gmem (new). | | OLD throughput (GB/s) | NEW throughput (GB/s) | relative performance | | 1st kernel, per byte: bytes read/written | 2nd kernel, per byte: bytes read/written | expected SOL (GB/s) | achieved SOL (old) | achieved SOL (new) | |------------------|------------------------|------------------------|----------------------|---|------------------------------------------|------------------------------------------|---------------------|--------------------|--------------------| | full | 15.7 | 74.74 | 476% | | 1 | 6 | 102.86 | 15.26% | 72.66% | | no out-indexes | 39.123 | 105.8 | 270% | | 1 | 2 | 240.00 | 16.30% | 44.08% | | no-output | 229.27 | 178.92 | 78% | | 1 | 1 | 360.00 | 63.69% | 49.70% | | out-indexes-only | 24.95 | 85.2 | 341% | | 1 | 5 | 120.00 | 20.79% | 71.00% | Authors: - Elias Stehle (https://github.com/elstehle) Approvers: - Shruti Shivakumar (https://github.com/shrshi) - Vukasin Milovanovic (https://github.com/vuule) URL: https://github.com/rapidsai/cudf/pull/16143 commit a3aacd8915fa503ea4be8e1d7797a080e0427923 Merge: 2de9fa7bd8 ff30c02111 Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Tue Jul 23 15:04:01 2024 -0400 Merge pull request #16366 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit ff30c0211109e14b1f6918fcc6c2e2b98f863a1f Author: Nghia Truong <7416935+ttnghia@users.noreply.github.com> Date: Tue Jul 23 12:03:55 2024 -0700 Fix compile warnings with `jni_utils.hpp` (#16336) This fixes the compiler warnings with `jni_utils.hpp`, removing some `const` qualifiers that are redundant. Closes https://github.com/rapidsai/cudf/issues/16335. Authors: - Nghia Truong (https://github.com/ttnghia) Approvers: - Jason Lowe (https://github.com/jlowe) URL: https://github.com/rapidsai/cudf/pull/16336 commit 2de9fa7bd821c7b1653340dfca4e6a1e9e216cc5 Merge: bc609fb648 e6d412cba7 Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Tue Jul 23 07:03:33 2024 -0400 Merge pull request #16364 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit e6d412cba7c23df7ee500c28257ed9281cea49b9 Author: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Tue Jul 23 06:03:28 2024 -0500 Fall back when casting a timestamp to numeric in cudf-polars (#16232) This PR adds logic that falls back to CPU when a cudf-polars query would cast a timestamp column to a numeric type, an unsupported operation in libcudf, which should fix a few polars tests. It could be cleaned up a bit with some of the utilities that will be added in https://github.com/rapidsai/cudf/pull/16150. Authors: - https://github.com/brandon-b-miller Approvers: - Lawrence Mitchell (https://github.com/wence-) URL: https://github.com/rapidsai/cudf/pull/16232 commit bc609fb6482e32152d64f3e9d34aaa4cb9b87cec Merge: 023dba6fab c7b28ceeb4 Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Tue Jul 23 06:28:20 2024 -0400 Merge pull request #16363 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit c7b28ceeb46d2b921e30f081a9ed97745c91ff9e Author: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Tue Jul 23 05:28:13 2024 -0500 Add `drop_nulls` in `cudf-polars` (#16290) Closes https://github.com/rapidsai/cudf/issues/16219 Authors: - https://github.com/brandon-b-miller Approvers: - Lawrence Mitchell (https://github.com/wence-) URL: https://github.com/rapidsai/cudf/pull/16290 commit 023dba6fab1c00116b11ff10fc7536d4f9e78fcd Merge: 4a0813b681 0cac2a9d68 Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Mon Jul 22 17:18:26 2024 -0400 Merge pull request #16359 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit 0cac2a9d68341a38721be16132ead14cf4a0d70b Author: Shruti Shivakumar Date: Mon Jul 22 14:18:21 2024 -0700 Remove size constraints on source files in batched JSON reading (#16162) Addresses https://github.com/rapidsai/cudf/issues/16138 The batched multi-source JSON reader fails when the size of any of the input source buffers exceeds `INT_MAX` bytes. The goal of this PR is to remove this constraint by modifying the batching behavior of the reader. Instead of constructing batches that include entire source files, the batches are now constructed at the granularity of byte ranges of size at most `INT_MAX` bytes, Authors: - Shruti Shivakumar (https://github.com/shrshi) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - Karthikeyan (https://github.com/karthikeyann) URL: https://github.com/rapidsai/cudf/pull/16162 commit 4a0813b68158474b00d3e7c692310b62b48fe2fc Merge: a4acaa7177 81e65ee312 Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Mon Jul 22 16:18:45 2024 -0400 Merge pull request #16357 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit 81e65ee312af5133ca2b98d52efaeb29c274a825 Author: GALI PREM SAGAR Date: Mon Jul 22 15:18:40 2024 -0500 Fix docstring of `DataFrame.apply` (#16351) This PR fixes docstring of `DataFrame.apply` Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Matthew Roeschke (https://github.com/mroeschke) URL: https://github.com/rapidsai/cudf/pull/16351 commit a4acaa717798a3a09a57ab333965c00666d9d808 Merge: 0868314b1d 996cb8d870 Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Mon Jul 22 16:15:22 2024 -0400 Merge pull request #16356 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit 996cb8d870b7b6153802bde670435e8cd3b8775d Author: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Mon Jul 22 16:15:16 2024 -0400 Migrate lists/sorting to pylibcudf (#16179) Apart of #15162 Authors: - Matthew Murray (https://github.com/Matt711) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/16179 commit 0868314b1d5f2ca31eb56f4fee5f75de42b22fbe Merge: a3ebf3badd c14c8bf59f Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Mon Jul 22 15:04:01 2024 -0400 Merge pull request #16355 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit c14c8bf59fd1e97fe94c8dfd2db6df7f9a6c65ad Author: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Mon Jul 22 12:03:56 2024 -0700 Implement parquet reading using pylibcudf in cudf-polars (#16346) Replace cudf-classic with pylibcudf for parquet reading in cudf-polars. Authors: - Thomas Li (https://github.com/lithomas1) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/16346 commit a3ebf3badd0c7375b3f24dd466d4db8fa127000e Merge: edbb1bcd9c e0a00c1fcb Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Mon Jul 22 15:03:29 2024 -0400 Merge pull request #16354 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit e0a00c1fcb4b72b7abd29debe5b2f6b38081d39a Author: Jayjeet Chakraborty Date: Mon Jul 22 12:03:24 2024 -0700 Add `stream` param to list explode APIs (#16317) Add `stream` param to list `explode*` APIs. Partially fixes https://github.com/rapidsai/cudf/issues/13744 Authors: - Jayjeet Chakraborty (https://github.com/JayjeetAtGithub) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/16317 commit edbb1bcd9c363876b79039caf7176270ee3eba03 Merge: b52ec0f436 e54b82c9f3 Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Mon Jul 22 15:03:09 2024 -0400 Merge pull request #16353 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit e54b82c9f3499b35e7e789d41d2042a5d5a80810 Author: Mark Harris <783069+harrism@users.noreply.github.com> Date: Tue Jul 23 05:03:04 2024 +1000 Use resource_ref for upstream in stream_checking_resource_adaptor (#16187) As we move toward replacing all `device_memory_resource` pointers with `resource_ref`s, there are some places that changes can be made ahead of RMM to simplify required changes as RMM is refactored. In this PR I eliminate the unnecessary `Upstream` template parameter from `cudf_test::stream_checking_resource_adaptor`, and use a `device_async_resource` for the upstream resource. A similar change will be made to all RMM resource adaptors, but this one can be done without deprecations since it is just a test utility. Authors: - Mark Harris (https://github.com/harrism) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/16187 commit b52ec0f436c549b79daf6d9379ad2851b8833dbe Merge: 0135e46880 3053f42351 Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Mon Jul 22 13:56:45 2024 -0400 Merge pull request #16352 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit 3053f42351b04e22d873f78f5bc49f8b20ff17ac Author: Jayjeet Chakraborty Date: Mon Jul 22 10:56:39 2024 -0700 Add missing `stream` param to dictionary factory APIs (#16319) Add `stream` param to dictionary column factory functions. Partially solves #13744 Authors: - Jayjeet Chakraborty (https://github.com/JayjeetAtGithub) Approvers: - Mark Harris (https://github.com/harrism) - Yunsong Wang (https://github.com/PointKernel) URL: https://github.com/rapidsai/cudf/pull/16319 commit 0135e468808ccf7e8471e654bcd723eafb9c48c5 Merge: c53f9c54ac 135c99512e Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Mon Jul 22 10:13:37 2024 -0400 Merge pull request #16344 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit 135c99512e5f7a2d38f6a870ad6883ccb39a3cce Author: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon Jul 22 04:13:32 2024 -1000 Align Series APIs with pandas 2.x (#16333) Similar to https://github.com/rapidsai/cudf/pull/16310, the follow APIs have been modified to adjust/add parameters * `reindex` * `reset_index` * `add_suffix` * `searchsorted` * `clip` * `mask` * `shift` * `dropna` * `rename` * `cov` * `apply` * `replace` Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/16333 commit c53f9c54ac9e4d25350f04ffcb41ceb5bca9bdb2 Merge: c636778de3 852b151002 Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Mon Jul 22 09:48:23 2024 -0400 Merge pull request #16343 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit 852b151002dc76e9f09d3529c80e4b589f1df9fc Author: Lawrence Mitchell Date: Mon Jul 22 14:48:18 2024 +0100 Fix issue in horizontal concat implementation in cudf-polars (#16271) Shorter tables must be extended to the same length as the longest table. Authors: - Lawrence Mitchell (https://github.com/wence-) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/16271 commit c636778de39491e24ace55d99dcfb29c574a20d2 Merge: dacc6c0baa e6537de747 Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Fri Jul 19 23:10:44 2024 -0400 Merge pull request #16342 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit e6537de7474c91b4153542e6611c8a4e33a58caa Author: Vyas Ramasubramani Date: Fri Jul 19 20:10:40 2024 -0700 Experimental support for configurable prefetching (#16020) This PR adds experimental support for prefetching managed memory at a select few points in libcudf. A new configuration object is introduced for handling whether prefetching is enabled or disabled, and whether to print debug information about pointers being prefetched. Prefetching control is managed on a per API basis to enable profiling of the effects of prefetching different classes of data in different contexts. Prefetching in this PR always occurs on the default stream, so it will trigger synchronization with any blocking streams that the user has created. Turning on prefetching and then passing non-blocking to any libcudf APIs will trigger undefined behavior. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - David Wendt (https://github.com/davidwendt) - Kyle Edwards (https://github.com/KyleFromNVIDIA) - Thomas Li (https://github.com/lithomas1) - Muhammad Haseeb (https://github.com/mhaseeb123) URL: https://github.com/rapidsai/cudf/pull/16020 commit dacc6c0baa47c89fe8e0d1c3d246bcc94a4b6416 Merge: 1ccdf15dd7 c5b96003ce Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Fri Jul 19 23:04:24 2024 -0400 Merge pull request #16341 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit c5b96003cef00b2635923d03edcd48a13821a61e Author: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Fri Jul 19 20:04:19 2024 -0700 Migrate Parquet reader to pylibcudf (#16078) xref #15162 Migrates the parquet reader (and chunked parquet reader) to pylibcudf. (Does not migrate the writers or the metadata reader yet). Authors: - Thomas Li (https://github.com/lithomas1) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - Lawrence Mitchell (https://github.com/wence-) URL: https://github.com/rapidsai/cudf/pull/16078 commit 1ccdf15dd736a1a08aa8f566a47ca0392ca33cac Merge: 97e1bab151 26a3799d2f Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Fri Jul 19 22:49:07 2024 -0400 Merge pull request #16340 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit 26a3799d2ff9ffb2aa72d63bb388b4bee70b3440 Author: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri Jul 19 16:49:01 2024 -1000 Make ColumnAccessor strictly require a mapping of columns (#16285) `ColumnAccessor` had a default `data=None` argument and initialized an empty dict in the `__init__` if `data` was not passed. This PR now makes `data` a required argument. Additionally if `verify=True`, the `__init__` would call `as_column` on each `data.values()` allowing non-`ColumnBase` inputs. This PR now avoids this call and makes the caller responsible for ensuring the inputs are `ColumnBase`s Also, adds a few `verify=False` internally where we know we are passing columns from a libcudf op or reconstructing from another `ColumnAccessor` Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/16285 commit 97e1bab151184aa537edf39b7e838c07e07271a9 Merge: 5ad4c877ed 75335f6af5 Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Fri Jul 19 21:21:32 2024 -0400 Merge pull request #16339 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit 75335f6af51bde6be68c1fb0a6caa8030b9eda3e Author: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com> Date: Fri Jul 19 18:21:27 2024 -0700 Report number of rows per file read by PQ reader when no row selection and fix segfault in chunked PQ reader when skip_rows > 0 (#16195) Closes #15389 Closes #16186 This PR adds the capability to calculate and report the number of rows read from each data source into the table returned by the Parquet reader (both chunked and normal). The returned vector of counts is only valid (non-empty) when row selection (AST filter) is not being used. This PR also fixes a segfault in chunked parquet reader when skip_rows > 0 and the number of passes > 1. This segfault was being caused by a couple of arithmetic errors when computing the (start_row, num_row) for row_group_info, pass, column chunk descriptor structs. Both changes were added to this PR as changes and the gtests from the former work were needed to implement the segfault fix. Authors: - Muhammad Haseeb (https://github.com/mhaseeb123) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) - Vukasin Milovanovic (https://github.com/vuule) URL: https://github.com/rapidsai/cudf/pull/16195 commit 5ad4c877ed631094f358f87c003ee9b381e9e270 Merge: ebacf394d9 535db9b26e Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Fri Jul 19 20:28:20 2024 -0400 Merge pull request #16338 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit 535db9b26ed1a57e4275f4a6f11b04ebeee21248 Author: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Fri Jul 19 17:28:14 2024 -0700 Deprecate Arrow support in I/O (#16132) Contributes to https://github.com/rapidsai/cudf/issues/15193 Authors: - Thomas Li (https://github.com/lithomas1) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Richard (Rick) Zamora (https://github.com/rjzamora) - Lawrence Mitchell (https://github.com/wence-) URL: https://github.com/rapidsai/cudf/pull/16132 commit ebacf394d975fa5a0f65a7337d5587c9e8273902 Merge: b11cdf854d e169e8e427 Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Fri Jul 19 19:36:08 2024 -0400 Merge pull request #16337 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit e169e8e4273e4d317e3f27c810c5b137dd75adb3 Author: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Fri Jul 19 16:36:03 2024 -0700 Implement read_csv in cudf-polars using pylibcudf (#16307) Replace cudf-classic with pylibcudf for CSV reading in cudf-polars Authors: - Thomas Li (https://github.com/lithomas1) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Lawrence Mitchell (https://github.com/wence-) URL: https://github.com/rapidsai/cudf/pull/16307 commit b11cdf854d64e248d682ad2d8178f8ae08e34b3e Merge: d82caec4e0 5dde41d7f7 Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Fri Jul 19 19:08:41 2024 -0400 Merge pull request #16334 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit 5dde41d7f7533180ecd355bac248a7ed18adcc10 Author: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri Jul 19 13:08:36 2024 -1000 Replace is_float/integer_dtype checks with .kind checks (#16261) It appears this was called when we already had a dtype object so can instead just simply check the .kind attribute Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/16261 commit d82caec4e04468b497f2d553221c6314c53f9d10 Merge: 3c3ee56637 4c46628eaf Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Fri Jul 19 18:51:12 2024 -0400 Merge pull request #16332 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit 4c46628eaf7ba16a2a181ceb3311f315cd4932dc Author: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri Jul 19 12:51:07 2024 -1000 Mark cudf._typing as a typing module in ruff (#16318) Additionally breaks up the prior, single-line of `select` rules that are enabled. Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Thomas Li (https://github.com/lithomas1) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/16318 commit 3c3ee56637116e07804f20efab46d4dd3aa7c4cf Merge: 1cb07e0c29 7d3083254c Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Fri Jul 19 18:48:43 2024 -0400 Merge pull request #16331 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit 7d3083254c0503b07f82af32188120f42acef860 Author: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri Jul 19 12:48:39 2024 -1000 Replace np.isscalar/issubdtype checks with is_scalar/.kind checks (#16275) * `is_scalar` also handles cudf.Scalars which should be handled internally * `issubdtype` can largely be replaced by checking the `.kind` attribute on the dtype Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/16275 commit 1cb07e0c29c0b6acd1896ecef867afeca27a84c1 Merge: 52657b3375 57ed7fce67 Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Fri Jul 19 18:25:01 2024 -0400 Merge pull request #16330 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit 57ed7fce6742abc96a8fd65216f032bad5937a2f Author: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Fri Jul 19 17:24:55 2024 -0500 Add tests for `pylibcudf` binaryops (#15470) This PR implements a more general approach to testing binaryops that originally came up in https://github.com/rapidsai/cudf/pull/15279. This PR can possibly supersede that one. Authors: - https://github.com/brandon-b-miller Approvers: - Lawrence Mitchell (https://github.com/wence-) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/15470 commit 52657b3375c900a66b6ec5f8d7e1ebe37c38232f Merge: 6be515506d ecc27a1140 Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Fri Jul 19 17:55:45 2024 -0400 Merge pull request #16329 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit ecc27a1140c0c287091f6a1291dfaf7ccd82cb19 Author: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri Jul 19 11:55:40 2024 -1000 Align more DataFrame APIs with pandas (#16310) I have a script that did some signature comparisons between `pandas.DataFrame` and `cudf.DataFrame` API and it appears some signatures have changed between the pandas 1.x and 2.x release. The API changes in this PR are mostly adding implementations or adding missing keyword argument (although they might not be implemented). The APIs affected are: * `__init__` * `__array__` * `__arrow_c_stream__` * `to_dict` * `where` * `add_prefix` * `join` * `apply` * `to_records` * `from_records` * `unstack` * `pct_change` * `sort_values` Marking as breaking as I ensured some added keywords are in the same positions as pandas and therefore might break users who are using purely positional arguments. Authors: - Matthew Roeschke (https://github.com/mroeschke) - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/16310 commit 6be515506d4a6f833e71ac67f16c2925f7b8576b Merge: fcaea56166 6e37afc7c9 Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Fri Jul 19 17:52:32 2024 -0400 Merge pull request #16328 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit 6e37afc7c9e177b307c41950e52453bd5906af44 Author: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri Jul 19 11:52:27 2024 -1000 Make __bool__ raise for more cudf objects (#16311) To match pandas, this PR makes `DataFrame`, `MultiIndex` and `RangeIndex` raise on `__bool__`. Authors: - Matthew Roeschke (https://github.com/mroeschke) - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/16311 commit fcaea56166e2d8f8b1916d702ec8572a9e12b2be Merge: 051fadd250 910989eb8f Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Fri Jul 19 17:48:42 2024 -0400 Merge pull request #16327 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit 910989eb8fb87b2e896aa032260705c27cce71e0 Author: Bradley Dice Date: Fri Jul 19 15:48:37 2024 -0600 Rename gather/scatter benchmarks to clarify coalesced behavior. (#16083) The benchmark names `coalesce_x` and `coalesce_o` are not very clear. This PR renames them to `coalesced` and `shuffled`. This was discussed with @GregoryKimball. Authors: - Bradley Dice (https://github.com/bdice) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Karthikeyan (https://github.com/karthikeyann) - Mike Wilson (https://github.com/hyperbolic2346) URL: https://github.com/rapidsai/cudf/pull/16083 commit 051fadd2500bc20b90b74d662deec918ee27f299 Merge: ece86996ad fa0d89d9b4 Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Fri Jul 19 17:46:33 2024 -0400 Merge pull request #16326 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit fa0d89d9b4b4152b919999b5f01b1e68407469c5 Author: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri Jul 19 11:46:28 2024 -1000 Clean unneeded/redudant dtype utils (#16309) * Replace `min_scalar_type` with `min_signed_type` (the former just called the latter) * Replace `numeric_normalize_types` with `find_common_dtype` followed by a column `astype` * Removed `_NUMPY_SCTYPES` with just hardcoding the integer/floating types or using `np.integer`/`np.floating` Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/16309 commit ece86996ad69b1631e0da6f4dfb551cda38585a8 Merge: f47c891a2e 18f5fe0010 Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Fri Jul 19 17:41:47 2024 -0400 Merge pull request #16325 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit 18f5fe0010fd42f604a340cd025a9ca9e122c6f5 Author: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Fri Jul 19 14:41:39 2024 -0700 Fix polars for 1.2.1 (#16316) I think Polars made a breaking change in a patch release. At least the error we're getting looks like the error from https://github.com/pola-rs/polars/pull/17606. Authors: - Thomas Li (https://github.com/lithomas1) Approvers: - Lawrence Mitchell (https://github.com/wence-) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/16316 commit f47c891a2ea3a0de4bb0462d557531e046860fbb Merge: c61638cbeb 3df4ac2842 Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Fri Jul 19 16:46:23 2024 -0400 Merge pull request #16323 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit 3df4ac28423b99e4dd88570da8d55e2e5af2e1bc Author: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri Jul 19 10:46:18 2024 -1000 Remove squeeze argument from groupby (#16312) In pandas, this argument was deprecated in pandas 1.x and removed in pandas 2.x. xref https://github.com/pandas-dev/pandas/pull/33218 Looks like in cudf this argument was never implemented, so to align with pandas, I think it should be OK to just remove this argument Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/16312 commit c61638cbeb4eeb9ce9244508edbe33ccc301b07e Merge: f7e9d0c0f8 cb570fe6d7 Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Fri Jul 19 16:45:35 2024 -0400 Merge pull request #16322 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit cb570fe6d7dc7ebdd6c8c030916ba27bef277b5e Author: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri Jul 19 10:45:30 2024 -1000 Deprecate dtype= parameter in reduction methods (#16313) In terms of pandas alignment, this argument doesn't exist in reduction ops. Additionally, the same result can be easily achieved by calling `astype` after the operation, and it appears libcudf does not support any arbitrary casting to an output type. Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/16313 commit f7e9d0c0f829118f06054c2e03425d7ddf33767e Merge: f364fdcd44 dc62177a64 Author: gpuCI <38199262+GPUtester@users.noreply.github.com> Date: Fri Jul 19 15:17:46 2024 -0400 Merge pull request #16320 from rapidsai/branch-24.08 Forward-merge branch-24.08 into branch-24.10 commit dc62177a64a5fb4d6521f346ff0f44c2ede740f6 Author: Lawrence Mitchell Date: Fri Jul 19 20:17:42 2024 +0100 Preserve order in left join for cudf-polars (#16268) Unlike all other joins, polars provides an ordering guarantee for left joins. By default libcudf does not, so we need to order the gather maps in this case. While here, because it requires another hard-coding of `int32` for something that should be `size_type`, expose `type_to_id` in cython and plumb it through. Authors: - Lawrence Mitchell (https://github.com/wence-) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/16268 commit f364fdcd44540b6d5403f1d08acbebfff4e78bd4 Author: Ray Douglass Date: Fri Jul 19 14:56:13 2024 -0400 DOC v24.10 Updates [skip ci] commit d5ab48d4f2586d2e45234463c1bbe877ce76afe8 Author: Kyle Edwards Date: Fri Jul 19 14:32:54 2024 -0400 Use workflow branch 24.08 again (#16314) After updating everything to CUDA 12.5.1, use `shared-workflows@branch-24.08` again. Contributes to https://github.com/rapidsai/build-planning/issues/73 Authors: - Kyle Edwards (https://github.com/KyleFromNVIDIA) Approvers: - James Lamb (https://github.com/jameslamb) - https://github.com/jakirkham URL: https://github.com/rapidsai/cudf/pull/16314 commit 2bbeee95ec338c30c0c876dc6a58376fbb0a5a06 Author: Ray Bell Date: Fri Jul 19 12:43:49 2024 -0400 DOC: use intersphinx mapping in pandas-compat ext (#15846) ~~If https://github.com/rapidsai/cudf/pull/15704 is merged~~ This PR changes the header in the admonition (pandas compat box) to be hyperlinked to the pandas docs instead of just text. See https://raybellwaves.github.io/compatsphinxext/compat.html which is the docs of a minimal repo where I have been testing Authors: - Ray Bell (https://github.com/raybellwaves) - Bradley Dice (https://github.com/bdice) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/15846 commit 461ed33753545832da0ff13bf01cf922a651bf0a Merge: 9a713e3adb 752b1f32b1 Author: Jake Awe <50372925+AyodeAwe@users.noreply.github.com> Date: Fri Jul 19 11:06:06 2024 -0500 Merge pull request #16315 from vyasr/branch-24.08-merge-branch-24.06 Branch 24.08 merge branch 24.06 commit 752b1f32b128b69847c8fc306f1c28ab7f91354b Merge: 9a713e3adb 781794bb52 Author: Vyas Ramasubramani Date: Fri Jul 19 15:04:17 2024 +0000 Merge branch 'branch-24.06' into branch-24.08-merge-branch-24.06 commit 9a713e3adb8abb1f41de0445b8ea896fdb48c560 Author: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Fri Jul 19 10:34:16 2024 -0400 Migrate lists/count_elements to pylibcudf (#16072) Apart of #15162 Authors: - Matthew Murray (https://github.com/Matt711) Approvers: - Thomas Li (https://github.com/lithomas1) URL: https://github.com/rapidsai/cudf/pull/16072 commit 8ff27ed5bcaf8fc5fc8d1f546dee30c59861c320 Author: Lawrence Mitchell Date: Fri Jul 19 15:15:20 2024 +0100 Support Literals in groupby-agg (#16218) To do this, we just need to collect the appropriate aggregation information, and broadcast literals to the correct size. Authors: - Lawrence Mitchell (https://github.com/wence-) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/16218 commit debbef0bc12f523054740432983030dd0b24f9c4 Author: Lawrence Mitchell Date: Fri Jul 19 15:12:56 2024 +0100 Update vendored thread_pool implementation (#16210) Since we introduced the vendored thread_pool in #8752, upstream has introduced some new features, and particularly now uses condition variables/notification to handle when there are no tasks in the queue. This avoids the issue described in #16209 where the thread pool by default artificially introduces a delay of 1000microseconds to all tasks whenever the task queue is emptied. - Closes #16209 Authors: - Lawrence Mitchell (https://github.com/wence-) Approvers: - Bradley Dice (https://github.com/bdice) - Robert Maynard (https://github.com/robertmaynard) URL: https://github.com/rapidsai/cudf/pull/16210 commit 781794bb52448f617351ed96441a8e2fdb765dd7 Author: Vyas Ramasubramani Date: Mon Jul 1 14:59:04 2024 -0700 Backport #16045 to 24.06 (#16102) Backporting #16045 for a patch release. --------- Co-authored-by: Paul Mattione <156858817+pmattione-nvidia@users.noreply.github.com> commit dfab1b589e5907b324dc1688f6dab862d194012c Author: Bradley Dice Date: Mon Jul 1 15:33:42 2024 -0500 Backport: Use size_t to allow large conditional joins (#16127) (#16133) Backports #16127 to 24.06 for inclusion in a hotfix release. --------- Co-authored-by: Vyas Ramasubramani commit e41242094092f9ed31fd4d04f8a30107c1ffb2ff Author: Vyas Ramasubramani Date: Mon Jul 1 11:24:52 2024 -0700 Backport #16038 to 24.06 (#16101) Backporting #16038 for a patch release. --------- Co-authored-by: Paul Mattione <156858817+pmattione-nvidia@users.noreply.github.com> commit 4e34a20a31fae2546f9cfbaa520d7561b80563c7 Author: Bradley Dice Date: Mon Jul 1 11:18:25 2024 -0500 Backport: Fix segfault in conditional join (#16094) (#16100) Backports #16094 to 24.06 for inclusion in a hotfix release. --- .../cuda11.8-conda/devcontainer.json | 6 +- .devcontainer/cuda11.8-pip/devcontainer.json | 6 +- .../cuda12.5-conda/devcontainer.json | 9 +- .devcontainer/cuda12.5-pip/devcontainer.json | 6 +- .github/workflows/build.yaml | 20 +- .github/workflows/pandas-tests.yaml | 2 +- .github/workflows/pr.yaml | 46 +- .../workflows/pr_issue_status_automation.yml | 6 +- .github/workflows/test.yaml | 22 +- README.md | 2 +- VERSION | 2 +- ci/build_python.sh | 7 + ci/cudf_pandas_scripts/pandas-tests/run.sh | 2 +- ci/release/update-version.sh | 5 +- ci/test_wheel_cudf_polars.sh | 2 +- .../all_cuda-118_arch-x86_64.yaml | 10 +- .../all_cuda-125_arch-x86_64.yaml | 10 +- cpp/CMakeLists.txt | 13 +- cpp/benchmarks/common/generate_input.cu | 2 +- cpp/benchmarks/copying/gather.cu | 6 +- cpp/benchmarks/copying/scatter.cu | 6 +- cpp/benchmarks/fixture/benchmark_fixture.hpp | 2 +- .../groupby/group_max_multithreaded.cpp | 10 +- cpp/benchmarks/io/fst.cu | 16 +- .../io/orc/orc_reader_multithreaded.cpp | 26 +- .../io/parquet/parquet_reader_multithread.cpp | 26 +- cpp/benchmarks/lists/copying/scatter_lists.cu | 6 +- cpp/cmake/thirdparty/get_nanoarrow.cmake | 4 +- cpp/cmake/thirdparty/get_thread_pool.cmake | 25 + .../thirdparty/patches/cccl_override.json | 5 + .../patches/cccl_symbol_visibility.diff | 27 + .../developer_guide/DEVELOPER_GUIDE.md | 27 +- cpp/doxygen/developer_guide/DOCUMENTATION.md | 6 +- cpp/examples/tpch/CMakeLists.txt | 4 + cpp/examples/tpch/q1.cpp | 2 +- cpp/examples/tpch/q10.cpp | 166 +++ cpp/examples/tpch/q5.cpp | 20 +- cpp/examples/tpch/q6.cpp | 2 +- cpp/examples/versions.cmake | 2 +- cpp/include/cudf/aggregation.hpp | 5 +- .../cudf/ast/detail/expression_parser.hpp | 11 +- .../ast/detail/expression_transformer.hpp | 10 +- cpp/include/cudf/ast/detail/operators.hpp | 4 +- cpp/include/cudf/ast/expressions.hpp | 4 +- cpp/include/cudf/binaryop.hpp | 31 +- cpp/include/cudf/column/column.hpp | 4 +- .../cudf/column/column_device_view.cuh | 4 +- cpp/include/cudf/column/column_factories.hpp | 4 +- cpp/include/cudf/column/column_view.hpp | 59 +- cpp/include/cudf/concatenate.hpp | 5 +- cpp/include/cudf/contiguous_split.hpp | 13 +- cpp/include/cudf/copying.hpp | 9 +- cpp/include/cudf/datetime.hpp | 5 +- .../cudf/detail/aggregation/aggregation.hpp | 4 +- .../cudf/detail/aggregation/result_cache.hpp | 6 +- cpp/include/cudf/detail/binaryop.hpp | 5 +- cpp/include/cudf/detail/concatenate.hpp | 5 +- cpp/include/cudf/detail/concatenate_masks.hpp | 5 +- cpp/include/cudf/detail/contiguous_split.hpp | 4 +- cpp/include/cudf/detail/copy.hpp | 4 +- cpp/include/cudf/detail/datetime.hpp | 4 +- cpp/include/cudf/detail/fill.hpp | 4 +- cpp/include/cudf/detail/gather.cuh | 6 +- cpp/include/cudf/detail/gather.hpp | 5 +- cpp/include/cudf/detail/groupby.hpp | 13 +- .../detail/groupby/group_replace_nulls.hpp | 4 +- .../cudf/detail/groupby/sort_helper.hpp | 12 +- cpp/include/cudf/detail/interop.hpp | 5 +- cpp/include/cudf/detail/is_element_valid.hpp | 7 +- cpp/include/cudf/detail/join.hpp | 9 +- cpp/include/cudf/detail/label_bins.hpp | 4 +- cpp/include/cudf/detail/merge.hpp | 6 +- cpp/include/cudf/detail/null_mask.cuh | 4 +- cpp/include/cudf/detail/null_mask.hpp | 12 +- cpp/include/cudf/detail/quantiles.hpp | 5 +- cpp/include/cudf/detail/repeat.hpp | 4 +- cpp/include/cudf/detail/replace.hpp | 4 +- cpp/include/cudf/detail/reshape.hpp | 8 +- cpp/include/cudf/detail/rolling.hpp | 4 +- cpp/include/cudf/detail/round.hpp | 4 +- cpp/include/cudf/detail/scan.hpp | 7 +- cpp/include/cudf/detail/scatter.hpp | 5 +- cpp/include/cudf/detail/search.hpp | 8 +- cpp/include/cudf/detail/sequence.hpp | 4 +- cpp/include/cudf/detail/sorting.hpp | 4 +- cpp/include/cudf/detail/stream_compaction.hpp | 4 +- cpp/include/cudf/detail/structs/utilities.hpp | 8 +- cpp/include/cudf/detail/tdigest/tdigest.hpp | 12 +- cpp/include/cudf/detail/timezone.hpp | 7 +- cpp/include/cudf/detail/transform.hpp | 5 +- cpp/include/cudf/detail/transpose.hpp | 5 +- cpp/include/cudf/detail/unary.hpp | 5 +- .../cudf/detail/utilities/alignment.hpp | 6 +- .../cudf/detail/utilities/cuda_memcpy.hpp | 8 +- .../cudf/detail/utilities/default_stream.hpp | 8 +- .../cudf/detail/utilities/host_memory.hpp | 51 + .../cudf/detail/utilities/host_vector.hpp | 31 +- .../cudf/detail/utilities/linked_column.hpp | 9 +- .../cudf/detail/utilities/stacktrace.hpp | 10 +- .../cudf/detail/utilities/stream_pool.hpp | 7 +- .../detail/utilities/vector_factories.hpp | 111 +- cpp/include/cudf/detail/valid_if.cuh | 2 +- .../cudf/dictionary/detail/concatenate.hpp | 10 +- cpp/include/cudf/dictionary/detail/encode.hpp | 10 +- cpp/include/cudf/dictionary/detail/merge.hpp | 10 +- .../cudf/dictionary/detail/replace.hpp | 10 +- cpp/include/cudf/dictionary/detail/search.hpp | 5 +- .../cudf/dictionary/detail/update_keys.hpp | 10 +- .../dictionary/dictionary_column_view.hpp | 6 +- .../cudf/dictionary/dictionary_factories.hpp | 17 +- cpp/include/cudf/dictionary/encode.hpp | 4 +- cpp/include/cudf/dictionary/search.hpp | 4 +- cpp/include/cudf/dictionary/update_keys.hpp | 4 +- cpp/include/cudf/filling.hpp | 5 +- cpp/include/cudf/fixed_point/fixed_point.hpp | 4 +- .../cudf/fixed_point/floating_conversion.hpp | 5 +- cpp/include/cudf/fixed_point/temporary.hpp | 4 +- cpp/include/cudf/groupby.hpp | 5 +- cpp/include/cudf/hashing.hpp | 5 +- cpp/include/cudf/hashing/detail/hashing.hpp | 10 +- cpp/include/cudf/interop.hpp | 88 +- cpp/include/cudf/interop/detail/arrow.hpp | 53 - cpp/include/cudf/io/arrow_io_source.hpp | 8 +- cpp/include/cudf/io/avro.hpp | 4 +- cpp/include/cudf/io/csv.hpp | 4 +- cpp/include/cudf/io/data_sink.hpp | 6 +- cpp/include/cudf/io/datasource.hpp | 7 +- cpp/include/cudf/io/detail/avro.hpp | 13 +- cpp/include/cudf/io/detail/csv.hpp | 13 +- cpp/include/cudf/io/detail/json.hpp | 7 +- cpp/include/cudf/io/detail/orc.hpp | 13 +- cpp/include/cudf/io/detail/parquet.hpp | 13 +- cpp/include/cudf/io/detail/tokenize_json.hpp | 5 +- cpp/include/cudf/io/detail/utils.hpp | 15 +- cpp/include/cudf/io/json.hpp | 8 +- cpp/include/cudf/io/orc.hpp | 14 +- cpp/include/cudf/io/orc_metadata.hpp | 5 +- cpp/include/cudf/io/orc_types.hpp | 10 +- cpp/include/cudf/io/parquet.hpp | 18 +- cpp/include/cudf/io/parquet_metadata.hpp | 5 +- cpp/include/cudf/io/text/byte_range_info.hpp | 5 +- .../cudf/io/text/data_chunk_source.hpp | 5 +- .../io/text/data_chunk_source_factories.hpp | 9 +- .../cudf/io/text/detail/bgzip_utils.hpp | 7 +- .../cudf/io/text/detail/multistate.hpp | 8 +- .../cudf/io/text/detail/tile_state.hpp | 6 +- cpp/include/cudf/io/text/detail/trie.hpp | 9 +- cpp/include/cudf/io/text/multibyte_split.hpp | 4 +- cpp/include/cudf/io/types.hpp | 11 +- cpp/include/cudf/join.hpp | 16 +- cpp/include/cudf/json/json.hpp | 5 +- cpp/include/cudf/labeling/label_bins.hpp | 4 +- cpp/include/cudf/lists/combine.hpp | 5 +- cpp/include/cudf/lists/contains.hpp | 5 +- cpp/include/cudf/lists/count_elements.hpp | 5 +- cpp/include/cudf/lists/detail/combine.hpp | 10 +- cpp/include/cudf/lists/detail/concatenate.hpp | 10 +- cpp/include/cudf/lists/detail/contains.hpp | 10 +- cpp/include/cudf/lists/detail/copying.hpp | 10 +- cpp/include/cudf/lists/detail/dremel.hpp | 17 +- cpp/include/cudf/lists/detail/extract.hpp | 10 +- cpp/include/cudf/lists/detail/gather.cuh | 3 + .../cudf/lists/detail/interleave_columns.hpp | 10 +- .../lists/detail/lists_column_factories.hpp | 10 +- cpp/include/cudf/lists/detail/reverse.hpp | 7 +- cpp/include/cudf/lists/detail/scatter.cuh | 10 +- .../cudf/lists/detail/set_operations.hpp | 6 +- cpp/include/cudf/lists/detail/sorting.hpp | 10 +- .../cudf/lists/detail/stream_compaction.hpp | 7 +- cpp/include/cudf/lists/explode.hpp | 12 +- cpp/include/cudf/lists/extract.hpp | 5 +- cpp/include/cudf/lists/filling.hpp | 6 +- cpp/include/cudf/lists/gather.hpp | 5 +- cpp/include/cudf/lists/list_device_view.cuh | 4 +- cpp/include/cudf/lists/list_view.hpp | 8 +- .../cudf/lists/lists_column_device_view.cuh | 8 +- cpp/include/cudf/lists/lists_column_view.hpp | 5 +- cpp/include/cudf/lists/reverse.hpp | 7 +- cpp/include/cudf/lists/set_operations.hpp | 8 +- cpp/include/cudf/lists/sorting.hpp | 5 +- cpp/include/cudf/lists/stream_compaction.hpp | 7 +- cpp/include/cudf/merge.hpp | 5 +- cpp/include/cudf/null_mask.hpp | 5 +- cpp/include/cudf/partitioning.hpp | 5 +- cpp/include/cudf/quantiles.hpp | 5 +- cpp/include/cudf/reduction.hpp | 5 +- .../cudf/reduction/detail/histogram.hpp | 7 +- .../cudf/reduction/detail/reduction.hpp | 7 +- .../reduction/detail/reduction_functions.hpp | 11 +- .../detail/segmented_reduction_functions.hpp | 11 +- cpp/include/cudf/replace.hpp | 5 +- cpp/include/cudf/reshape.hpp | 22 +- cpp/include/cudf/rolling.hpp | 5 +- .../cudf/rolling/range_window_bounds.hpp | 5 +- cpp/include/cudf/round.hpp | 5 +- cpp/include/cudf/scalar/scalar.hpp | 4 +- .../cudf/scalar/scalar_device_view.cuh | 6 +- cpp/include/cudf/scalar/scalar_factories.hpp | 4 +- cpp/include/cudf/search.hpp | 5 +- cpp/include/cudf/sorting.hpp | 5 +- cpp/include/cudf/stream_compaction.hpp | 5 +- cpp/include/cudf/strings/attributes.hpp | 4 +- cpp/include/cudf/strings/capitalize.hpp | 4 +- cpp/include/cudf/strings/case.hpp | 4 +- .../cudf/strings/char_types/char_cases.hpp | 8 +- .../cudf/strings/char_types/char_types.hpp | 4 +- .../strings/char_types/char_types_enum.hpp | 6 +- cpp/include/cudf/strings/combine.hpp | 4 +- cpp/include/cudf/strings/contains.hpp | 4 +- .../cudf/strings/convert/convert_booleans.hpp | 4 +- .../cudf/strings/convert/convert_datetime.hpp | 4 +- .../strings/convert/convert_durations.hpp | 4 +- .../strings/convert/convert_fixed_point.hpp | 4 +- .../cudf/strings/convert/convert_floats.hpp | 4 +- .../cudf/strings/convert/convert_integers.hpp | 4 +- .../cudf/strings/convert/convert_ipv4.hpp | 4 +- .../cudf/strings/convert/convert_lists.hpp | 4 +- .../cudf/strings/convert/convert_urls.hpp | 4 +- .../cudf/strings/detail/char_tables.hpp | 14 +- cpp/include/cudf/strings/detail/combine.hpp | 11 +- .../cudf/strings/detail/concatenate.hpp | 11 +- .../cudf/strings/detail/converters.hpp | 11 +- .../cudf/strings/detail/copy_range.hpp | 10 +- cpp/include/cudf/strings/detail/copying.hpp | 11 +- cpp/include/cudf/strings/detail/fill.hpp | 11 +- cpp/include/cudf/strings/detail/gather.cuh | 7 +- cpp/include/cudf/strings/detail/merge.hpp | 7 +- cpp/include/cudf/strings/detail/replace.hpp | 11 +- cpp/include/cudf/strings/detail/scan.hpp | 10 +- .../cudf/strings/detail/strings_children.cuh | 2 + cpp/include/cudf/strings/detail/utf8.hpp | 10 +- cpp/include/cudf/strings/detail/utilities.hpp | 11 +- cpp/include/cudf/strings/extract.hpp | 4 +- cpp/include/cudf/strings/find.hpp | 4 +- cpp/include/cudf/strings/find_multiple.hpp | 4 +- cpp/include/cudf/strings/findall.hpp | 4 +- cpp/include/cudf/strings/padding.hpp | 4 +- cpp/include/cudf/strings/regex/flags.hpp | 8 +- .../cudf/strings/regex/regex_program.hpp | 4 +- cpp/include/cudf/strings/repeat_strings.hpp | 4 +- cpp/include/cudf/strings/replace.hpp | 4 +- cpp/include/cudf/strings/replace_re.hpp | 4 +- cpp/include/cudf/strings/reverse.hpp | 4 +- cpp/include/cudf/strings/side_type.hpp | 8 +- cpp/include/cudf/strings/slice.hpp | 4 +- cpp/include/cudf/strings/split/partition.hpp | 4 +- cpp/include/cudf/strings/split/split.hpp | 4 +- cpp/include/cudf/strings/split/split_re.hpp | 4 +- cpp/include/cudf/strings/string_view.cuh | 5 +- cpp/include/cudf/strings/string_view.hpp | 6 +- .../cudf/strings/strings_column_view.hpp | 5 +- cpp/include/cudf/strings/strip.hpp | 4 +- cpp/include/cudf/strings/translate.hpp | 4 +- cpp/include/cudf/strings/wrap.hpp | 4 +- .../cudf/structs/detail/concatenate.hpp | 11 +- cpp/include/cudf/structs/detail/scan.hpp | 11 +- cpp/include/cudf/structs/struct_view.hpp | 6 +- .../structs/structs_column_device_view.cuh | 6 +- .../cudf/structs/structs_column_view.hpp | 6 +- .../cudf/table/experimental/row_operators.cuh | 4 +- cpp/include/cudf/table/row_operators.cuh | 4 +- cpp/include/cudf/table/table.hpp | 4 +- cpp/include/cudf/table/table_device_view.cuh | 6 +- .../cudf/tdigest/tdigest_column_view.hpp | 6 +- cpp/include/cudf/timezone.hpp | 6 +- cpp/include/cudf/transform.hpp | 5 +- cpp/include/cudf/transpose.hpp | 5 +- cpp/include/cudf/types.hpp | 6 +- cpp/include/cudf/unary.hpp | 5 +- cpp/include/cudf/utilities/bit.hpp | 4 +- cpp/include/cudf/utilities/default_stream.hpp | 7 +- cpp/include/cudf/utilities/error.hpp | 9 +- cpp/include/cudf/utilities/pinned_memory.hpp | 22 +- cpp/include/cudf/utilities/prefetch.hpp | 163 +++ cpp/include/cudf/utilities/span.hpp | 37 +- cpp/include/cudf/utilities/thread_pool.hpp | 381 ------ cpp/include/cudf/utilities/traits.cuh | 6 +- cpp/include/cudf/utilities/traits.hpp | 4 +- cpp/include/cudf/utilities/type_checks.hpp | 4 +- .../cudf/utilities/type_dispatcher.hpp | 6 +- cpp/include/cudf/wrappers/dictionary.hpp | 4 +- cpp/include/cudf/wrappers/durations.hpp | 6 +- cpp/include/cudf/wrappers/timestamps.hpp | 5 +- cpp/include/cudf_test/base_fixture.hpp | 5 +- cpp/include/cudf_test/column_utilities.hpp | 12 +- cpp/include/cudf_test/column_wrapper.hpp | 8 +- cpp/include/cudf_test/debug_utilities.hpp | 9 +- cpp/include/cudf_test/default_stream.hpp | 8 +- cpp/include/cudf_test/file_utilities.hpp | 3 +- .../cudf_test/io_metadata_utilities.hpp | 9 +- cpp/include/cudf_test/iterator_utilities.hpp | 7 +- cpp/include/cudf_test/print_utilities.cuh | 7 +- cpp/include/cudf_test/random.hpp | 5 +- .../stream_checking_resource_adaptor.hpp | 35 +- cpp/include/cudf_test/table_utilities.hpp | 9 +- cpp/include/cudf_test/tdigest_utilities.cuh | 7 +- cpp/include/cudf_test/testing_main.hpp | 9 +- cpp/include/cudf_test/timestamp_utilities.cuh | 5 +- cpp/include/cudf_test/type_list_utilities.hpp | 8 +- cpp/include/cudf_test/type_lists.hpp | 5 +- cpp/include/nvtext/byte_pair_encoding.hpp | 5 +- cpp/include/nvtext/detail/generate_ngrams.hpp | 4 +- cpp/include/nvtext/detail/load_hash_file.hpp | 4 +- cpp/include/nvtext/detail/tokenize.hpp | 4 +- cpp/include/nvtext/edit_distance.hpp | 5 +- cpp/include/nvtext/generate_ngrams.hpp | 5 +- cpp/include/nvtext/jaccard.hpp | 5 +- cpp/include/nvtext/minhash.hpp | 5 +- cpp/include/nvtext/ngrams_tokenize.hpp | 5 +- cpp/include/nvtext/normalize.hpp | 5 +- cpp/include/nvtext/replace.hpp | 5 +- cpp/include/nvtext/stemmer.hpp | 5 +- cpp/include/nvtext/subword_tokenize.hpp | 5 +- cpp/include/nvtext/tokenize.hpp | 5 +- cpp/src/aggregation/aggregation.cpp | 350 +++--- cpp/src/binaryop/binaryop.cpp | 7 +- cpp/src/binaryop/compiled/binary_ops.cu | 1 + cpp/src/bitmask/is_element_valid.cpp | 5 +- cpp/src/column/column_view.cpp | 42 + cpp/src/copying/concatenate.cu | 7 +- cpp/src/copying/contiguous_split.cu | 3 +- cpp/src/copying/purge_nonempty_nulls.cu | 1 + cpp/src/datetime/timezone.cpp | 6 +- cpp/src/dictionary/detail/concatenate.cu | 2 +- cpp/src/dictionary/dictionary_factories.cu | 13 +- cpp/src/dictionary/set_keys.cu | 1 + cpp/src/filling/calendrical_month_sequence.cu | 1 + cpp/src/interop/arrow_utilities.cpp | 31 + cpp/src/interop/arrow_utilities.hpp | 43 +- .../interop/decimal_conversion_utilities.cu | 70 ++ .../interop/decimal_conversion_utilities.cuh | 44 + cpp/src/interop/from_arrow_device.cu | 10 +- cpp/src/interop/from_arrow_host.cu | 2 +- cpp/src/interop/to_arrow.cu | 39 +- cpp/src/interop/to_arrow_device.cu | 104 +- cpp/src/interop/to_arrow_host.cu | 396 ++++++ cpp/src/interop/to_arrow_schema.cpp | 7 +- cpp/src/io/avro/reader_impl.cu | 8 +- cpp/src/io/comp/gpuinflate.hpp | 7 +- cpp/src/io/csv/reader_impl.cu | 44 +- cpp/src/io/fst/agent_dfa.cuh | 371 +++++- cpp/src/io/fst/dispatch_dfa.cuh | 7 +- cpp/src/io/fst/lookup_tables.cuh | 70 +- cpp/src/io/functions.cpp | 13 + cpp/src/io/json/json_column.cu | 4 +- cpp/src/io/json/json_normalization.cu | 26 +- cpp/src/io/json/nested_json.hpp | 18 +- cpp/src/io/json/nested_json_gpu.cu | 31 +- cpp/src/io/json/read_json.cu | 142 +-- cpp/src/io/json/read_json.hpp | 25 +- cpp/src/io/orc/reader_impl_decode.cu | 10 +- cpp/src/io/orc/stripe_enc.cu | 4 +- cpp/src/io/orc/writer_impl.cu | 50 +- cpp/src/io/orc/writer_impl.hpp | 9 +- .../io/parquet/compact_protocol_reader.hpp | 8 +- cpp/src/io/parquet/predicate_pushdown.cpp | 20 +- cpp/src/io/parquet/reader.cpp | 5 + cpp/src/io/parquet/reader_impl.cpp | 86 +- cpp/src/io/parquet/reader_impl.hpp | 31 +- cpp/src/io/parquet/reader_impl_chunking.cu | 131 +- cpp/src/io/parquet/reader_impl_chunking.hpp | 6 + cpp/src/io/parquet/reader_impl_helpers.cpp | 32 +- cpp/src/io/parquet/reader_impl_helpers.hpp | 20 +- cpp/src/io/parquet/reader_impl_preprocess.cu | 29 +- cpp/src/io/parquet/writer_impl.cu | 67 +- cpp/src/io/utilities/base64_utilities.hpp | 8 +- cpp/src/io/utilities/data_casting.cu | 4 +- cpp/src/io/utilities/file_io_utilities.cpp | 6 +- cpp/src/io/utilities/file_io_utilities.hpp | 12 +- cpp/src/io/utilities/row_selection.hpp | 6 +- cpp/src/io/utilities/string_parsing.hpp | 5 +- cpp/src/io/utilities/trie.cuh | 6 +- cpp/src/jit/parser.hpp | 6 +- cpp/src/join/hash_join.cu | 2 + cpp/src/lists/contains.cu | 1 + cpp/src/lists/copying/concatenate.cu | 1 + cpp/src/lists/copying/segmented_gather.cu | 1 + cpp/src/lists/dremel.cu | 6 +- cpp/src/lists/explode.cu | 29 +- cpp/src/lists/set_operations.cu | 1 + cpp/src/lists/stream_compaction/distinct.cu | 1 + cpp/src/merge/merge.cu | 1 + cpp/src/partitioning/round_robin.cu | 5 +- cpp/src/quantiles/quantile.cu | 1 + cpp/src/quantiles/quantiles.cu | 1 + cpp/src/quantiles/tdigest/tdigest.cu | 1 + cpp/src/reductions/minmax.cu | 3 +- cpp/src/reductions/scan/rank_scan.cu | 1 + cpp/src/reductions/scan/scan_inclusive.cu | 1 + cpp/src/reductions/segmented/reductions.cpp | 2 +- cpp/src/reshape/byte_cast.cu | 11 +- cpp/src/reshape/interleave_columns.cu | 4 +- cpp/src/reshape/tile.cu | 4 +- cpp/src/rolling/rolling.cu | 1 + cpp/src/scalar/scalar.cpp | 4 +- cpp/src/search/contains_column.cu | 1 + cpp/src/search/contains_scalar.cu | 2 + cpp/src/search/contains_table.cu | 1 + cpp/src/search/search_ordered.cu | 1 + cpp/src/strings/combine/join.cu | 6 +- cpp/src/strings/convert/convert_datetime.cu | 2 +- cpp/src/strings/convert/convert_durations.cu | 1 + cpp/src/strings/copying/concatenate.cu | 2 +- cpp/src/strings/filter_chars.cu | 2 +- cpp/src/strings/replace/multi_re.cu | 2 +- cpp/src/strings/strings_scalar_factories.cpp | 1 + cpp/src/strings/translate.cu | 2 +- cpp/src/strings/utilities.cu | 1 + cpp/src/table/row_operators.cu | 5 +- cpp/src/transform/one_hot_encode.cu | 1 + cpp/src/transform/row_bit_count.cu | 1 + cpp/src/utilities/cuda_memcpy.cu | 20 +- .../{pinned_memory.cpp => host_memory.cpp} | 86 +- cpp/src/utilities/prefetch.cpp | 95 ++ cpp/tests/CMakeLists.txt | 21 +- .../binaryop/binop-verify-input-test.cpp | 4 +- cpp/tests/interop/from_arrow_test.cpp | 9 + cpp/tests/interop/nanoarrow_utils.hpp | 9 +- cpp/tests/interop/to_arrow_device_test.cpp | 78 +- cpp/tests/interop/to_arrow_host_test.cpp | 1117 +++++++++++++++++ cpp/tests/interop/to_arrow_test.cpp | 10 + cpp/tests/io/fst/common.hpp | 4 +- cpp/tests/io/fst/fst_test.cu | 4 +- .../json_chunked_reader.cu} | 81 +- .../json_quote_normalization_test.cpp | 0 cpp/tests/io/{ => json}/json_test.cpp | 0 cpp/tests/io/{ => json}/json_tree.cpp | 6 +- .../io/{ => json}/json_type_cast_test.cu | 0 cpp/tests/io/json/json_utils.cuh | 105 ++ .../json_whitespace_normalization_test.cu | 0 cpp/tests/io/{ => json}/json_writer.cpp | 0 cpp/tests/io/{ => json}/nested_json_test.cpp | 0 cpp/tests/io/parquet_chunked_reader_test.cu | 394 ++++++ cpp/tests/io/parquet_reader_test.cpp | 203 +++ .../{json_tests.cpp => json_tests.cu} | 45 +- cpp/tests/streams/dictionary_test.cpp | 46 + cpp/tests/streams/interop_test.cpp | 9 + cpp/tests/streams/lists_test.cpp | 57 +- cpp/tests/streams/reshape_test.cpp | 47 + cpp/tests/strings/integers_tests.cpp | 4 +- cpp/tests/utilities/random_seed.cpp | 4 +- .../utilities_tests/pinned_memory_tests.cpp | 67 +- dependencies.yaml | 131 +- docs/cudf/source/conf.py | 6 + docs/cudf/source/cudf_pandas/how-it-works.md | 16 + .../source/developer_guide/documentation.md | 2 +- .../source/user_guide/api_docs/groupby.rst | 3 +- .../api_docs/pylibcudf/io/index.rst | 1 + .../api_docs/pylibcudf/io/parquet.rst | 6 + docs/cudf/source/user_guide/io/read-json.md | 6 +- java/ci/README.md | 4 +- java/pom.xml | 2 +- java/src/main/java/ai/rapids/cudf/Cudf.java | 36 + java/src/main/native/CMakeLists.txt | 1 + java/src/main/native/include/jni_utils.hpp | 20 +- java/src/main/native/src/CudfJni.cpp | 25 + java/src/main/native/src/RmmJni.cpp | 7 - java/src/main/native/src/TableJni.cpp | 5 +- .../main/native/src/aggregation128_utils.cu | 2 +- pyproject.toml | 64 +- python/cudf/CMakeLists.txt | 101 ++ python/cudf/cudf/_lib/lists.pyx | 92 +- python/cudf/cudf/_lib/parquet.pyx | 312 ++--- python/cudf/cudf/_lib/reduce.pyx | 15 +- python/cudf/cudf/_lib/types.pyx | 4 +- python/cudf/cudf/api/types.py | 2 +- python/cudf/cudf/core/_base_index.py | 54 +- python/cudf/cudf/core/_internals/where.py | 2 +- python/cudf/cudf/core/column/categorical.py | 130 +- python/cudf/cudf/core/column/column.py | 56 +- python/cudf/cudf/core/column/datetime.py | 11 +- python/cudf/cudf/core/column/decimal.py | 4 +- python/cudf/cudf/core/column/lists.py | 21 +- python/cudf/cudf/core/column/numerical.py | 70 +- .../cudf/cudf/core/column/numerical_base.py | 11 +- python/cudf/cudf/core/column/string.py | 16 +- python/cudf/cudf/core/column/timedelta.py | 7 +- python/cudf/cudf/core/column_accessor.py | 64 +- python/cudf/cudf/core/dataframe.py | 378 ++++-- python/cudf/cudf/core/dtypes.py | 9 +- python/cudf/cudf/core/frame.py | 29 +- python/cudf/cudf/core/groupby/groupby.py | 642 +++++++--- python/cudf/cudf/core/index.py | 227 +++- python/cudf/cudf/core/indexed_frame.py | 216 +++- python/cudf/cudf/core/indexing_utils.py | 8 +- python/cudf/cudf/core/join/_join_helpers.py | 29 +- python/cudf/cudf/core/multiindex.py | 56 +- python/cudf/cudf/core/resample.py | 12 +- python/cudf/cudf/core/reshape.py | 22 +- python/cudf/cudf/core/series.py | 256 +++- python/cudf/cudf/core/single_column_frame.py | 16 +- python/cudf/cudf/core/tools/numeric.py | 2 +- python/cudf/cudf/core/window/ewm.py | 77 +- python/cudf/cudf/core/window/rolling.py | 27 +- python/cudf/cudf/io/csv.py | 2 +- python/cudf/cudf/io/orc.py | 33 +- python/cudf/cudf/io/parquet.py | 44 +- python/cudf/cudf/pandas/__init__.py | 60 +- python/cudf/cudf/pandas/__main__.py | 14 +- python/cudf/cudf/pandas/_wrappers/pandas.py | 16 +- .../cudf/pandas/scripts/run-pandas-tests.sh | 2 +- python/cudf/cudf/testing/testing.py | 10 +- python/cudf/cudf/tests/test_categorical.py | 56 + .../cudf/cudf/tests/test_column_accessor.py | 190 ++- python/cudf/cudf/tests/test_csv.py | 7 +- python/cudf/cudf/tests/test_dataframe.py | 11 +- python/cudf/cudf/tests/test_dropna.py | 9 + python/cudf/cudf/tests/test_gcs.py | 3 +- python/cudf/cudf/tests/test_groupby.py | 25 + python/cudf/cudf/tests/test_index.py | 9 + python/cudf/cudf/tests/test_multiindex.py | 18 + python/cudf/cudf/tests/test_parquet.py | 24 +- python/cudf/cudf/tests/test_reductions.py | 15 +- python/cudf/cudf/tests/test_s3.py | 136 +- python/cudf/cudf/utils/dtypes.py | 58 +- python/cudf/cudf/utils/ioutils.py | 78 +- python/cudf/cudf/utils/utils.py | 26 + python/cudf/pyproject.toml | 5 +- python/cudf_kafka/pyproject.toml | 3 +- python/cudf_polars/cudf_polars/callback.py | 12 +- .../cudf_polars/containers/column.py | 3 +- .../cudf_polars/containers/dataframe.py | 12 - python/cudf_polars/cudf_polars/dsl/expr.py | 66 +- python/cudf_polars/cudf_polars/dsl/ir.py | 214 +++- .../cudf_polars/testing/asserts.py | 34 +- .../cudf_polars/cudf_polars/utils/dtypes.py | 41 +- .../cudf_polars/cudf_polars/utils/versions.py | 1 + python/cudf_polars/docs/overview.md | 2 +- python/cudf_polars/pyproject.toml | 3 +- .../tests/expressions/test_casting.py | 52 + .../tests/expressions/test_literal.py | 18 +- .../tests/expressions/test_numeric_binops.py | 14 +- .../tests/expressions/test_stringfunction.py | 6 +- python/cudf_polars/tests/test_config.py | 34 + python/cudf_polars/tests/test_drop_nulls.py | 65 + python/cudf_polars/tests/test_groupby.py | 28 + python/cudf_polars/tests/test_hconcat.py | 9 + python/cudf_polars/tests/test_join.py | 93 +- python/cudf_polars/tests/test_scan.py | 163 ++- python/cudf_polars/tests/utils/test_dtypes.py | 1 + python/custreamz/pyproject.toml | 7 +- python/dask_cudf/dask_cudf/io/parquet.py | 76 +- .../dask_cudf/dask_cudf/io/tests/test_s3.py | 92 +- python/dask_cudf/pyproject.toml | 7 +- python/pylibcudf/CMakeLists.txt | 6 +- python/pylibcudf/pylibcudf/CMakeLists.txt | 1 + python/pylibcudf/pylibcudf/__init__.pxd | 3 + python/pylibcudf/pylibcudf/__init__.py | 3 + python/pylibcudf/pylibcudf/binaryop.pxd | 8 + python/pylibcudf/pylibcudf/binaryop.pyx | 35 + python/pylibcudf/pylibcudf/column.pyx | 22 + python/pylibcudf/pylibcudf/experimental.pxd | 10 + python/pylibcudf/pylibcudf/experimental.pyx | 42 + python/pylibcudf/pylibcudf/expressions.pyx | 11 + python/pylibcudf/pylibcudf/io/CMakeLists.txt | 4 +- python/pylibcudf/pylibcudf/io/__init__.pxd | 2 +- python/pylibcudf/pylibcudf/io/__init__.py | 2 +- python/pylibcudf/pylibcudf/io/datasource.pyx | 10 +- python/pylibcudf/pylibcudf/io/parquet.pxd | 34 + python/pylibcudf/pylibcudf/io/parquet.pyx | 203 +++ python/pylibcudf/pylibcudf/io/types.pyx | 8 + python/pylibcudf/pylibcudf/join.pyx | 15 +- .../pylibcudf/pylibcudf/libcudf/binaryop.pxd | 39 +- .../pylibcudf/pylibcudf/libcudf/copying.pxd | 3 +- .../pylibcudf/libcudf/exception_handler.pxd | 69 + .../pylibcudf/libcudf/experimental.pxd | 16 + .../pylibcudf/libcudf/io/parquet.pxd | 79 +- .../pylibcudf/pylibcudf/libcudf/io/types.pxd | 1 + .../pylibcudf/libcudf/lists/contains.pxd | 3 +- .../libcudf/lists/count_elements.pxd | 2 +- .../pylibcudf/libcudf/lists/filling.pxd | 18 + .../libcudf/lists/set_operations.pxd | 36 + .../pylibcudf/libcudf/lists/sorting.pxd | 6 + .../libcudf/lists/stream_compaction.pxd | 7 +- .../libcudf/scalar/scalar_factories.pxd | 3 + .../libcudf/utilities/type_dispatcher.pxd | 7 + python/pylibcudf/pylibcudf/lists.pxd | 20 +- python/pylibcudf/pylibcudf/lists.pyx | 394 +++++- python/pylibcudf/pylibcudf/scalar.pxd | 4 + python/pylibcudf/pylibcudf/scalar.pyx | 18 + .../pylibcudf/pylibcudf/tests/common/utils.py | 73 +- python/pylibcudf/pylibcudf/tests/conftest.py | 15 + .../pylibcudf/tests/io/test_parquet.py | 108 ++ .../tests/io/test_source_sink_info.py | 21 +- .../pylibcudf/tests/test_binaryops.py | 785 ++++++++++++ .../pylibcudf/tests/test_column_factories.py | 3 +- .../tests/test_column_from_device.py | 51 - .../pylibcudf/pylibcudf/tests/test_copying.py | 3 +- python/pylibcudf/pylibcudf/tests/test_join.py | 3 +- .../pylibcudf/pylibcudf/tests/test_lists.py | 259 +++- .../pylibcudf/pylibcudf/tests/test_reshape.py | 3 +- .../pylibcudf/pylibcudf/tests/test_traits.py | 2 +- .../pylibcudf/tests/test_transform.py | 3 +- .../pylibcudf/pylibcudf/tests/test_unary.py | 2 +- python/pylibcudf/pylibcudf/types.pyx | 7 +- python/pylibcudf/pyproject.toml | 7 +- 596 files changed, 12207 insertions(+), 4023 deletions(-) create mode 100644 cpp/cmake/thirdparty/get_thread_pool.cmake create mode 100644 cpp/cmake/thirdparty/patches/cccl_symbol_visibility.diff create mode 100644 cpp/examples/tpch/q10.cpp create mode 100644 cpp/include/cudf/detail/utilities/host_memory.hpp delete mode 100644 cpp/include/cudf/interop/detail/arrow.hpp create mode 100644 cpp/include/cudf/utilities/prefetch.hpp delete mode 100644 cpp/include/cudf/utilities/thread_pool.hpp create mode 100644 cpp/src/interop/decimal_conversion_utilities.cu create mode 100644 cpp/src/interop/decimal_conversion_utilities.cuh create mode 100644 cpp/src/interop/to_arrow_host.cu rename cpp/src/utilities/{pinned_memory.cpp => host_memory.cpp} (73%) create mode 100644 cpp/src/utilities/prefetch.cpp create mode 100644 cpp/tests/interop/to_arrow_host_test.cpp rename cpp/tests/io/{json_chunked_reader.cpp => json/json_chunked_reader.cu} (64%) rename cpp/tests/io/{ => json}/json_quote_normalization_test.cpp (100%) rename cpp/tests/io/{ => json}/json_test.cpp (100%) rename cpp/tests/io/{ => json}/json_tree.cpp (99%) rename cpp/tests/io/{ => json}/json_type_cast_test.cu (100%) create mode 100644 cpp/tests/io/json/json_utils.cuh rename cpp/tests/io/{ => json}/json_whitespace_normalization_test.cu (100%) rename cpp/tests/io/{ => json}/json_writer.cpp (100%) rename cpp/tests/io/{ => json}/nested_json_test.cpp (100%) rename cpp/tests/large_strings/{json_tests.cpp => json_tests.cu} (50%) create mode 100644 cpp/tests/streams/reshape_test.cpp create mode 100644 docs/cudf/source/user_guide/api_docs/pylibcudf/io/parquet.rst create mode 100644 java/src/main/java/ai/rapids/cudf/Cudf.java create mode 100644 python/cudf/CMakeLists.txt create mode 100644 python/cudf_polars/tests/expressions/test_casting.py create mode 100644 python/cudf_polars/tests/test_config.py create mode 100644 python/cudf_polars/tests/test_drop_nulls.py create mode 100644 python/pylibcudf/pylibcudf/experimental.pxd create mode 100644 python/pylibcudf/pylibcudf/experimental.pyx create mode 100644 python/pylibcudf/pylibcudf/io/parquet.pxd create mode 100644 python/pylibcudf/pylibcudf/io/parquet.pyx create mode 100644 python/pylibcudf/pylibcudf/libcudf/exception_handler.pxd create mode 100644 python/pylibcudf/pylibcudf/libcudf/experimental.pxd create mode 100644 python/pylibcudf/pylibcudf/libcudf/lists/filling.pxd create mode 100644 python/pylibcudf/pylibcudf/libcudf/lists/set_operations.pxd create mode 100644 python/pylibcudf/pylibcudf/libcudf/utilities/type_dispatcher.pxd create mode 100644 python/pylibcudf/pylibcudf/tests/io/test_parquet.py create mode 100644 python/pylibcudf/pylibcudf/tests/test_binaryops.py delete mode 100644 python/pylibcudf/pylibcudf/tests/test_column_from_device.py diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json index 8423fe21c29..7a1361e52c5 100644 --- a/.devcontainer/cuda11.8-conda/devcontainer.json +++ b/.devcontainer/cuda11.8-conda/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.08-cpp-cuda11.8-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.10-cpp-cuda11.8-mambaforge-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.08-cuda11.8-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.10-cuda11.8-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.8": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.10": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json index 4945d6cf753..64d7cd54130 100644 --- a/.devcontainer/cuda11.8-pip/devcontainer.json +++ b/.devcontainer/cuda11.8-pip/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.08-cpp-cuda11.8-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.10-cpp-cuda11.8-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.08-cuda11.8-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.10-cuda11.8-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.8": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.10": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda12.5-conda/devcontainer.json b/.devcontainer/cuda12.5-conda/devcontainer.json index b79c949999a..4d61427b2ce 100644 --- a/.devcontainer/cuda12.5-conda/devcontainer.json +++ b/.devcontainer/cuda12.5-conda/devcontainer.json @@ -5,20 +5,17 @@ "args": { "CUDA": "12.5", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.08-cpp-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.10-cpp-mambaforge-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.08-cuda12.5-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.10-cuda12.5-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { - // TODO: change this back to rapidsai/devcontainers - // once https://github.com/lithomas1/devcontainers/tree/pylibcudf - // is merged in - "ghcr.io/lithomas1/devcontainers/features/rapids-build-utils:24.8": {} + "ghcr.io/lithomas1/devcontainers/features/rapids-build-utils:24.10": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/lithomas1/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda12.5-pip/devcontainer.json b/.devcontainer/cuda12.5-pip/devcontainer.json index 026eb540952..beab2940176 100644 --- a/.devcontainer/cuda12.5-pip/devcontainer.json +++ b/.devcontainer/cuda12.5-pip/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "12.5", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.08-cpp-cuda12.5-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.10-cpp-cuda12.5-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.08-cuda12.5-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.10-cuda12.5-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.8": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.10": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 937080572ad..2fc39c06fad 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -37,7 +37,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -46,7 +46,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -57,7 +57,7 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -69,7 +69,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-cudf: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -79,7 +79,7 @@ jobs: wheel-publish-cudf: needs: wheel-build-cudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -89,7 +89,7 @@ jobs: wheel-build-dask-cudf: needs: wheel-publish-cudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) @@ -101,7 +101,7 @@ jobs: wheel-publish-dask-cudf: needs: wheel-build-dask-cudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -111,7 +111,7 @@ jobs: wheel-build-cudf-polars: needs: wheel-publish-cudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) @@ -123,7 +123,7 @@ jobs: wheel-publish-cudf-polars: needs: wheel-build-cudf-polars secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pandas-tests.yaml b/.github/workflows/pandas-tests.yaml index 1516cb09449..cf0c2b377dd 100644 --- a/.github/workflows/pandas-tests.yaml +++ b/.github/workflows/pandas-tests.yaml @@ -17,7 +17,7 @@ jobs: pandas-tests: # run the Pandas unit tests secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 with: matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.9" and (.CUDA_VER | startswith("12.5.")) )) build_type: nightly diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 76645cb71c8..86314d3c9d3 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -35,41 +35,41 @@ jobs: - pandas-tests - pandas-tests-diff secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.10 checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.10 with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.10 with: build_type: pull-request conda-cpp-checks: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.10 with: build_type: pull-request enable_check_symbols: true conda-cpp-tests: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.10 with: build_type: pull-request conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.10 with: build_type: pull-request conda-python-cudf-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10 with: build_type: pull-request script: "ci/test_python_cudf.sh" @@ -77,14 +77,14 @@ jobs: # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10 with: build_type: pull-request script: "ci/test_python_other.sh" conda-java-tests: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -94,7 +94,7 @@ jobs: static-configure: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 with: build_type: pull-request # Use the wheel container so we can skip conda solves and since our @@ -104,7 +104,7 @@ jobs: conda-notebook-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -114,7 +114,7 @@ jobs: docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -124,28 +124,28 @@ jobs: wheel-build-pylibcudf: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 with: build_type: pull-request script: "ci/build_wheel_pylibcudf.sh" wheel-build-cudf: needs: wheel-build-pylibcudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 with: build_type: pull-request script: "ci/build_wheel_cudf.sh" wheel-tests-cudf: needs: wheel-build-cudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 with: build_type: pull-request script: ci/test_wheel_cudf.sh wheel-build-cudf-polars: needs: wheel-build-cudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) @@ -154,7 +154,7 @@ jobs: wheel-tests-cudf-polars: needs: wheel-build-cudf-polars secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) @@ -165,7 +165,7 @@ jobs: wheel-build-dask-cudf: needs: wheel-build-cudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) @@ -174,7 +174,7 @@ jobs: wheel-tests-dask-cudf: needs: wheel-build-dask-cudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) @@ -182,7 +182,7 @@ jobs: script: ci/test_wheel_dask_cudf.sh devcontainer: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.10 with: arch: '["amd64"]' cuda: '["12.5"]' @@ -193,7 +193,7 @@ jobs: unit-tests-cudf-pandas: needs: wheel-build-cudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 with: matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) build_type: pull-request @@ -202,7 +202,7 @@ jobs: # run the Pandas unit tests using PR branch needs: wheel-build-cudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 with: matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.9" and (.CUDA_VER | startswith("12.5.")) )) build_type: pull-request @@ -212,7 +212,7 @@ jobs: pandas-tests-diff: # diff the results of running the Pandas unit tests and publish a job summary needs: pandas-tests - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 with: node_type: cpu4 build_type: pull-request diff --git a/.github/workflows/pr_issue_status_automation.yml b/.github/workflows/pr_issue_status_automation.yml index 2a8ebd30993..45e5191eb54 100644 --- a/.github/workflows/pr_issue_status_automation.yml +++ b/.github/workflows/pr_issue_status_automation.yml @@ -23,7 +23,7 @@ on: jobs: get-project-id: - uses: rapidsai/shared-workflows/.github/workflows/project-get-item-id.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/project-get-item-id.yaml@branch-24.10 if: github.event.pull_request.state == 'open' secrets: inherit permissions: @@ -34,7 +34,7 @@ jobs: update-status: # This job sets the PR and its linked issues to "In Progress" status - uses: rapidsai/shared-workflows/.github/workflows/project-get-set-single-select-field.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/project-get-set-single-select-field.yaml@branch-24.10 if: ${{ github.event.pull_request.state == 'open' && needs.get-project-id.outputs.ITEM_PROJECT_ID != '' }} needs: get-project-id with: @@ -50,7 +50,7 @@ jobs: update-sprint: # This job sets the PR and its linked issues to the current "Weekly Sprint" - uses: rapidsai/shared-workflows/.github/workflows/project-get-set-iteration-field.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/project-get-set-iteration-field.yaml@branch-24.10 if: ${{ github.event.pull_request.state == 'open' && needs.get-project-id.outputs.ITEM_PROJECT_ID != '' }} needs: get-project-id with: diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 73f8d726e77..9feea050b19 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.10 with: build_type: nightly branch: ${{ inputs.branch }} @@ -25,7 +25,7 @@ jobs: enable_check_symbols: true conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.10 with: build_type: nightly branch: ${{ inputs.branch }} @@ -33,7 +33,7 @@ jobs: sha: ${{ inputs.sha }} conda-cpp-memcheck-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 with: build_type: nightly branch: ${{ inputs.branch }} @@ -45,7 +45,7 @@ jobs: run_script: "ci/test_cpp_memcheck.sh" static-configure: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 with: build_type: pull-request # Use the wheel container so we can skip conda solves and since our @@ -54,7 +54,7 @@ jobs: run_script: "ci/configure_cpp_static.sh" conda-python-cudf-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10 with: build_type: nightly branch: ${{ inputs.branch }} @@ -64,7 +64,7 @@ jobs: conda-python-other-tests: # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10 with: build_type: nightly branch: ${{ inputs.branch }} @@ -73,7 +73,7 @@ jobs: script: "ci/test_python_other.sh" conda-java-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 with: build_type: nightly branch: ${{ inputs.branch }} @@ -85,7 +85,7 @@ jobs: run_script: "ci/test_java.sh" conda-notebook-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 with: build_type: nightly branch: ${{ inputs.branch }} @@ -97,7 +97,7 @@ jobs: run_script: "ci/test_notebooks.sh" wheel-tests-cudf: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 with: build_type: nightly branch: ${{ inputs.branch }} @@ -106,7 +106,7 @@ jobs: script: ci/test_wheel_cudf.sh wheel-tests-dask-cudf: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) @@ -117,7 +117,7 @@ jobs: script: ci/test_wheel_dask_cudf.sh unit-tests-cudf-pandas: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.5.1 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/README.md b/README.md index 1ab6a2d7457..fd8b0365807 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ cuDF can be installed with conda (via [miniconda](https://docs.conda.io/projects ```bash conda install -c rapidsai -c conda-forge -c nvidia \ - cudf=24.08 python=3.11 cuda-version=12.5 + cudf=24.10 python=3.11 cuda-version=12.5 ``` We also provide [nightly Conda packages](https://anaconda.org/rapidsai-nightly) built from the HEAD diff --git a/VERSION b/VERSION index ec8489fda92..7c7ba04436f 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -24.08.00 +24.10.00 diff --git a/ci/build_python.sh b/ci/build_python.sh index 79e09432779..3705786f8bc 100755 --- a/ci/build_python.sh +++ b/ci/build_python.sh @@ -22,6 +22,13 @@ CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) # TODO: Remove `--no-test` flag once importing on a CPU # node works correctly # With boa installed conda build forwards to the boa builder + +# TODO: enable once conda recipes written for pylibcudf +# RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \ +# --no-test \ +# --channel "${CPP_CHANNEL}" \ +# conda/recipes/pylibcudf + RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \ --no-test \ --channel "${CPP_CHANNEL}" \ diff --git a/ci/cudf_pandas_scripts/pandas-tests/run.sh b/ci/cudf_pandas_scripts/pandas-tests/run.sh index abde5e5d160..48ee4a05628 100755 --- a/ci/cudf_pandas_scripts/pandas-tests/run.sh +++ b/ci/cudf_pandas_scripts/pandas-tests/run.sh @@ -19,7 +19,7 @@ RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/ mkdir -p "${RAPIDS_TESTS_DIR}" bash python/cudf/cudf/pandas/scripts/run-pandas-tests.sh \ - -n 10 \ + -n 5 \ --tb=no \ -m "not slow" \ --max-worker-restart=3 \ diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index f629de64905..ad96aff3930 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -68,15 +68,18 @@ done # README.md update sed_runner "s/version == ${CURRENT_SHORT_TAG}/version == ${NEXT_SHORT_TAG}/g" README.md sed_runner "s/cudf=${CURRENT_SHORT_TAG}/cudf=${NEXT_SHORT_TAG}/g" README.md +sed_runner "s/cudf=${CURRENT_SHORT_TAG}/cudf=${NEXT_SHORT_TAG}/g" python/cudf_polars/docs/overview.md +sed_runner "s/branch-${CURRENT_SHORT_TAG}/branch-${NEXT_SHORT_TAG}/g" python/cudf_polars/docs/overview.md # Libcudf examples update sed_runner "s/CUDF_TAG branch-${CURRENT_SHORT_TAG}/CUDF_TAG branch-${NEXT_SHORT_TAG}/" cpp/examples/versions.cmake # CI files -for FILE in .github/workflows/*.yaml; do +for FILE in .github/workflows/*.yaml .github/workflows/*.yml; do sed_runner "/shared-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}" sed_runner "s/dask-cuda.git@branch-[^\"\s]\+/dask-cuda.git@branch-${NEXT_SHORT_TAG}/g" ${FILE}; done +sed_runner "s/branch-[0-9]+\.[0-9]+/branch-${NEXT_SHORT_TAG}/g" ci/test_wheel_cudf_polars.sh # Java files NEXT_FULL_JAVA_TAG="${NEXT_SHORT_TAG}.${PATCH_PEP440}-SNAPSHOT" diff --git a/ci/test_wheel_cudf_polars.sh b/ci/test_wheel_cudf_polars.sh index 900acd5d473..cc9f5788685 100755 --- a/ci/test_wheel_cudf_polars.sh +++ b/ci/test_wheel_cudf_polars.sh @@ -10,7 +10,7 @@ set -eou pipefail # files in cudf_polars/pylibcudf", rather than "are there changes # between upstream and this branch which touch cudf_polars/pylibcudf" # TODO: is the target branch exposed anywhere in an environment variable? -if [ -n "$(git diff --name-only origin/branch-24.08...HEAD -- python/cudf_polars/ python/cudf/cudf/_lib/pylibcudf/)" ]; +if [ -n "$(git diff --name-only origin/branch-24.10...HEAD -- python/cudf_polars/ python/cudf/cudf/_lib/pylibcudf/)" ]; then HAS_CHANGES=1 else diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index b8d73a01f96..b1a1cc3c68e 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -26,7 +26,7 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.3 -- dask-cuda==24.8.*,>=0.0.0a0 +- dask-cuda==24.10.*,>=0.0.0a0 - dlpack>=0.8,<1.0 - doxygen=1.9.1 - fastavro>=0.22.9 @@ -43,10 +43,10 @@ dependencies: - libcufile=1.4.0.31 - libcurand-dev=10.3.0.86 - libcurand=10.3.0.86 -- libkvikio==24.8.*,>=0.0.0a0 +- libkvikio==24.10.*,>=0.0.0a0 - libparquet==16.1.0.* - librdkafka>=1.9.0,<1.10.0a0 -- librmm==24.8.*,>=0.0.0a0 +- librmm==24.10.*,>=0.0.0a0 - make - moto>=4.0.8 - msgpack-python @@ -77,9 +77,9 @@ dependencies: - python>=3.9,<3.12 - pytorch>=2.1.0 - rapids-build-backend>=0.3.0,<0.4.0.dev0 -- rapids-dask-dependency==24.8.*,>=0.0.0a0 +- rapids-dask-dependency==24.10.*,>=0.0.0a0 - rich -- rmm==24.8.*,>=0.0.0a0 +- rmm==24.10.*,>=0.0.0a0 - s3fs>=2022.3.0 - scikit-build-core>=0.7.0 - scipy diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 3f5fae49cbb..1017b11779c 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -27,7 +27,7 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.3 -- dask-cuda==24.8.*,>=0.0.0a0 +- dask-cuda==24.10.*,>=0.0.0a0 - dlpack>=0.8,<1.0 - doxygen=1.9.1 - fastavro>=0.22.9 @@ -42,10 +42,10 @@ dependencies: - libarrow==16.1.0.* - libcufile-dev - libcurand-dev -- libkvikio==24.8.*,>=0.0.0a0 +- libkvikio==24.10.*,>=0.0.0a0 - libparquet==16.1.0.* - librdkafka>=1.9.0,<1.10.0a0 -- librmm==24.8.*,>=0.0.0a0 +- librmm==24.10.*,>=0.0.0a0 - make - moto>=4.0.8 - msgpack-python @@ -75,9 +75,9 @@ dependencies: - python>=3.9,<3.12 - pytorch>=2.1.0 - rapids-build-backend>=0.3.0,<0.4.0.dev0 -- rapids-dask-dependency==24.8.*,>=0.0.0a0 +- rapids-dask-dependency==24.10.*,>=0.0.0a0 - rich -- rmm==24.8.*,>=0.0.0a0 +- rmm==24.10.*,>=0.0.0a0 - s3fs>=2022.3.0 - scikit-build-core>=0.7.0 - scipy diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 903cff27be4..310bc99b279 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -216,6 +216,8 @@ include(cmake/thirdparty/get_fmt.cmake) include(cmake/thirdparty/get_spdlog.cmake) # find nanoarrow include(cmake/thirdparty/get_nanoarrow.cmake) +# find thread_pool +include(cmake/thirdparty/get_thread_pool.cmake) # Workaround until https://github.com/rapidsai/rapids-cmake/issues/176 is resolved if(NOT BUILD_SHARED_LIBS) @@ -363,8 +365,10 @@ add_library( src/interop/dlpack.cpp src/interop/from_arrow.cu src/interop/arrow_utilities.cpp + src/interop/decimal_conversion_utilities.cu src/interop/to_arrow.cu src/interop/to_arrow_device.cu + src/interop/to_arrow_host.cu src/interop/from_arrow_device.cu src/interop/from_arrow_host.cu src/interop/from_arrow_stream.cu @@ -669,9 +673,10 @@ add_library( src/unary/null_ops.cu src/utilities/cuda_memcpy.cu src/utilities/default_stream.cpp + src/utilities/host_memory.cpp src/utilities/linked_column.cpp src/utilities/logger.cpp - src/utilities/pinned_memory.cpp + src/utilities/prefetch.cpp src/utilities/stacktrace.cpp src/utilities/stream_pool.cpp src/utilities/traits.cpp @@ -707,8 +712,10 @@ set_target_properties( CXX_STANDARD_REQUIRED ON # For std:: support of __int128_t. Can be removed once using cuda::std CXX_EXTENSIONS ON + CXX_VISIBILITY_PRESET hidden CUDA_STANDARD 17 CUDA_STANDARD_REQUIRED ON + CUDA_VISIBILITY_PRESET hidden POSITION_INDEPENDENT_CODE ON INTERFACE_POSITION_INDEPENDENT_CODE ON ) @@ -804,7 +811,7 @@ add_dependencies(cudf jitify_preprocess_run) # Specify the target module library dependencies target_link_libraries( cudf - PUBLIC ${ARROW_LIBRARIES} CCCL::CCCL rmm::rmm + PUBLIC ${ARROW_LIBRARIES} CCCL::CCCL rmm::rmm $ PRIVATE $ cuco::cuco ZLIB::ZLIB nvcomp::nvcomp kvikio::kvikio $ nanoarrow ) @@ -883,8 +890,10 @@ if(CUDF_BUILD_TESTUTIL) # set target compile options CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON + CXX_VISIBILITY_PRESET hidden CUDA_STANDARD 17 CUDA_STANDARD_REQUIRED ON + CUDA_VISIBILITY_PRESET hidden POSITION_INDEPENDENT_CODE ON INTERFACE_POSITION_INDEPENDENT_CODE ON ) diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu index 6df2cb44adc..0970003deb2 100644 --- a/cpp/benchmarks/common/generate_input.cu +++ b/cpp/benchmarks/common/generate_input.cu @@ -718,7 +718,7 @@ std::unique_ptr create_random_column(data_profi } template -struct clamp_down : public thrust::unary_function { +struct clamp_down { T max; clamp_down(T max) : max(max) {} __host__ __device__ T operator()(T x) const { return min(x, max); } diff --git a/cpp/benchmarks/copying/gather.cu b/cpp/benchmarks/copying/gather.cu index eeb0149fb3a..985166f7298 100644 --- a/cpp/benchmarks/copying/gather.cu +++ b/cpp/benchmarks/copying/gather.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -71,5 +71,5 @@ void BM_gather(benchmark::State& state) ->Ranges({{1 << 10, 1 << 26}, {1, 8}}) \ ->UseManualTime(); -GBM_BENCHMARK_DEFINE(double_coalesce_x, double, true); -GBM_BENCHMARK_DEFINE(double_coalesce_o, double, false); +GBM_BENCHMARK_DEFINE(double_coalesced, double, true); +GBM_BENCHMARK_DEFINE(double_shuffled, double, false); diff --git a/cpp/benchmarks/copying/scatter.cu b/cpp/benchmarks/copying/scatter.cu index a521dc82739..c27480b69f4 100644 --- a/cpp/benchmarks/copying/scatter.cu +++ b/cpp/benchmarks/copying/scatter.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -74,5 +74,5 @@ void BM_scatter(benchmark::State& state) ->Ranges({{1 << 10, 1 << 25}, {1, 8}}) \ ->UseManualTime(); -SBM_BENCHMARK_DEFINE(double_coalesce_x, double, true); -SBM_BENCHMARK_DEFINE(double_coalesce_o, double, false); +SBM_BENCHMARK_DEFINE(double_coalesced, double, true); +SBM_BENCHMARK_DEFINE(double_shuffled, double, false); diff --git a/cpp/benchmarks/fixture/benchmark_fixture.hpp b/cpp/benchmarks/fixture/benchmark_fixture.hpp index 8c8d6756b00..8900899f9be 100644 --- a/cpp/benchmarks/fixture/benchmark_fixture.hpp +++ b/cpp/benchmarks/fixture/benchmark_fixture.hpp @@ -107,7 +107,7 @@ class memory_stats_logger { public: memory_stats_logger() : existing_mr(rmm::mr::get_current_device_resource()), - statistics_mr(rmm::mr::make_statistics_adaptor(existing_mr)) + statistics_mr(rmm::mr::statistics_resource_adaptor(existing_mr)) { rmm::mr::set_current_device_resource(&statistics_mr); } diff --git a/cpp/benchmarks/groupby/group_max_multithreaded.cpp b/cpp/benchmarks/groupby/group_max_multithreaded.cpp index 3b8faba618f..bf1a1a5fcf7 100644 --- a/cpp/benchmarks/groupby/group_max_multithreaded.cpp +++ b/cpp/benchmarks/groupby/group_max_multithreaded.cpp @@ -20,8 +20,8 @@ #include #include #include -#include +#include #include template @@ -58,7 +58,7 @@ void bench_groupby_max_multithreaded(nvbench::state& state, nvbench::type_list> requests(num_threads); for (auto& thread_requests : requests) { @@ -75,10 +75,8 @@ void bench_groupby_max_multithreaded(nvbench::state& state, nvbench::type_list(pda_out_tt), + cudf::io::fst::detail::make_translation_table(pda_out_tt), stream); state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); @@ -134,7 +136,9 @@ void BM_FST_JSON_no_outidx(nvbench::state& state) auto parser = cudf::io::fst::detail::make_fst( cudf::io::fst::detail::make_symbol_group_lut(pda_sgs), cudf::io::fst::detail::make_transition_table(pda_state_tt), - cudf::io::fst::detail::make_translation_table(pda_out_tt), + cudf::io::fst::detail::make_translation_table(pda_out_tt), stream); state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); @@ -171,7 +175,9 @@ void BM_FST_JSON_no_out(nvbench::state& state) auto parser = cudf::io::fst::detail::make_fst( cudf::io::fst::detail::make_symbol_group_lut(pda_sgs), cudf::io::fst::detail::make_transition_table(pda_state_tt), - cudf::io::fst::detail::make_translation_table(pda_out_tt), + cudf::io::fst::detail::make_translation_table(pda_out_tt), stream); state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); @@ -209,7 +215,9 @@ void BM_FST_JSON_no_str(nvbench::state& state) auto parser = cudf::io::fst::detail::make_fst( cudf::io::fst::detail::make_symbol_group_lut(pda_sgs), cudf::io::fst::detail::make_transition_table(pda_state_tt), - cudf::io::fst::detail::make_translation_table(pda_out_tt), + cudf::io::fst::detail::make_translation_table(pda_out_tt), stream); state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); diff --git a/cpp/benchmarks/io/orc/orc_reader_multithreaded.cpp b/cpp/benchmarks/io/orc/orc_reader_multithreaded.cpp index aa0ee39a179..e91bf06fdfa 100644 --- a/cpp/benchmarks/io/orc/orc_reader_multithreaded.cpp +++ b/cpp/benchmarks/io/orc/orc_reader_multithreaded.cpp @@ -24,8 +24,8 @@ #include #include #include -#include +#include #include #include @@ -90,7 +90,7 @@ void BM_orc_multithreaded_read_common(nvbench::state& state, auto const num_threads = state.get_int64("num_threads"); auto streams = cudf::detail::fork_streams(cudf::get_default_stream(), num_threads); - cudf::detail::thread_pool threads(num_threads); + BS::thread_pool threads(num_threads); auto [source_sink_vector, total_file_size, num_files] = write_file_data(state, d_types); std::vector source_info_vector; @@ -112,13 +112,11 @@ void BM_orc_multithreaded_read_common(nvbench::state& state, cudf::io::read_orc(read_opts, stream, rmm::mr::get_current_device_resource()); }; - threads.paused = true; - for (size_t i = 0; i < num_files; ++i) { - threads.submit(read_func, i); - } + threads.pause(); + threads.detach_sequence(decltype(num_files){0}, num_files, read_func); timer.start(); - threads.paused = false; - threads.wait_for_tasks(); + threads.unpause(); + threads.wait(); cudf::detail::join_streams(streams, cudf::get_default_stream()); timer.stop(); }); @@ -170,7 +168,7 @@ void BM_orc_multithreaded_read_chunked_common(nvbench::state& state, size_t const output_limit = state.get_int64("output_limit"); auto streams = cudf::detail::fork_streams(cudf::get_default_stream(), num_threads); - cudf::detail::thread_pool threads(num_threads); + BS::thread_pool threads(num_threads); auto [source_sink_vector, total_file_size, num_files] = write_file_data(state, d_types); std::vector source_info_vector; std::transform(source_sink_vector.begin(), @@ -203,13 +201,11 @@ void BM_orc_multithreaded_read_chunked_common(nvbench::state& state, } while (reader.has_next()); }; - threads.paused = true; - for (size_t i = 0; i < num_files; ++i) { - threads.submit(read_func, i); - } + threads.pause(); + threads.detach_sequence(decltype(num_files){0}, num_files, read_func); timer.start(); - threads.paused = false; - threads.wait_for_tasks(); + threads.unpause(); + threads.wait(); cudf::detail::join_streams(streams, cudf::get_default_stream()); timer.stop(); }); diff --git a/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp b/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp index b4c8ed78ed8..9e76ebb71ab 100644 --- a/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp +++ b/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp @@ -23,10 +23,10 @@ #include #include #include -#include #include +#include #include #include @@ -93,7 +93,7 @@ void BM_parquet_multithreaded_read_common(nvbench::state& state, auto const num_threads = state.get_int64("num_threads"); auto streams = cudf::detail::fork_streams(cudf::get_default_stream(), num_threads); - cudf::detail::thread_pool threads(num_threads); + BS::thread_pool threads(num_threads); auto [source_sink_vector, total_file_size, num_files] = write_file_data(state, d_types); std::vector source_info_vector; @@ -114,13 +114,11 @@ void BM_parquet_multithreaded_read_common(nvbench::state& state, cudf::io::read_parquet(read_opts, stream, rmm::mr::get_current_device_resource()); }; - threads.paused = true; - for (size_t i = 0; i < num_files; ++i) { - threads.submit(read_func, i); - } + threads.pause(); + threads.detach_sequence(decltype(num_files){0}, num_files, read_func); timer.start(); - threads.paused = false; - threads.wait_for_tasks(); + threads.unpause(); + threads.wait(); cudf::detail::join_streams(streams, cudf::get_default_stream()); timer.stop(); }); @@ -176,7 +174,7 @@ void BM_parquet_multithreaded_read_chunked_common(nvbench::state& state, size_t const output_limit = state.get_int64("output_limit"); auto streams = cudf::detail::fork_streams(cudf::get_default_stream(), num_threads); - cudf::detail::thread_pool threads(num_threads); + BS::thread_pool threads(num_threads); auto [source_sink_vector, total_file_size, num_files] = write_file_data(state, d_types); std::vector source_info_vector; std::transform(source_sink_vector.begin(), @@ -207,13 +205,11 @@ void BM_parquet_multithreaded_read_chunked_common(nvbench::state& state, } while (reader.has_next()); }; - threads.paused = true; - for (size_t i = 0; i < num_files; ++i) { - threads.submit(read_func, i); - } + threads.pause(); + threads.detach_sequence(decltype(num_files){0}, num_files, read_func); timer.start(); - threads.paused = false; - threads.wait_for_tasks(); + threads.unpause(); + threads.wait(); cudf::detail::join_streams(streams, cudf::get_default_stream()); timer.stop(); }); diff --git a/cpp/benchmarks/lists/copying/scatter_lists.cu b/cpp/benchmarks/lists/copying/scatter_lists.cu index dbc3234dabf..570decf410f 100644 --- a/cpp/benchmarks/lists/copying/scatter_lists.cu +++ b/cpp/benchmarks/lists/copying/scatter_lists.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -143,5 +143,5 @@ void BM_lists_scatter(::benchmark::State& state) ->Ranges({{1 << 10, 1 << 25}, {64, 2048}}) /* 1K-1B rows, 64-2048 elements */ \ ->UseManualTime(); -SBM_BENCHMARK_DEFINE(double_type_colesce_o, double, true); -SBM_BENCHMARK_DEFINE(double_type_colesce_x, double, false); +SBM_BENCHMARK_DEFINE(double_coalesced, double, true); +SBM_BENCHMARK_DEFINE(double_shuffled, double, false); diff --git a/cpp/cmake/thirdparty/get_nanoarrow.cmake b/cpp/cmake/thirdparty/get_nanoarrow.cmake index 025bff7d8f0..8df1b431095 100644 --- a/cpp/cmake/thirdparty/get_nanoarrow.cmake +++ b/cpp/cmake/thirdparty/get_nanoarrow.cmake @@ -17,11 +17,11 @@ function(find_and_configure_nanoarrow) # Currently we need to always build nanoarrow so we don't pickup a previous installed version set(CPM_DOWNLOAD_nanoarrow ON) rapids_cpm_find( - nanoarrow 0.5.0 + nanoarrow 0.6.0.dev GLOBAL_TARGETS nanoarrow CPM_ARGS GIT_REPOSITORY https://github.com/apache/arrow-nanoarrow.git - GIT_TAG 11e73a8c85b45e3d49c8c541b4e1497a649fe03c + GIT_TAG 1e2664a70ec14907409cadcceb14d79b9670bcdb GIT_SHALLOW FALSE OPTIONS "BUILD_SHARED_LIBS OFF" "NANOARROW_NAMESPACE cudf" ) diff --git a/cpp/cmake/thirdparty/get_thread_pool.cmake b/cpp/cmake/thirdparty/get_thread_pool.cmake new file mode 100644 index 00000000000..777e16d9a4f --- /dev/null +++ b/cpp/cmake/thirdparty/get_thread_pool.cmake @@ -0,0 +1,25 @@ +# ============================================================================= +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# Need to call rapids_cpm_bs_thread_pool to get support for an installed version of thread-pool and +# to support installing it ourselves +function(find_and_configure_thread_pool) + include(${rapids-cmake-dir}/cpm/bs_thread_pool.cmake) + + # Find or install thread-pool + rapids_cpm_bs_thread_pool() + +endfunction() + +find_and_configure_thread_pool() diff --git a/cpp/cmake/thirdparty/patches/cccl_override.json b/cpp/cmake/thirdparty/patches/cccl_override.json index 2f29578f7ae..dcf9c1139f9 100644 --- a/cpp/cmake/thirdparty/patches/cccl_override.json +++ b/cpp/cmake/thirdparty/patches/cccl_override.json @@ -3,6 +3,11 @@ "packages" : { "CCCL" : { "patches" : [ + { + "file" : "${current_json_dir}/cccl_symbol_visibility.diff", + "issue" : "Correct symbol visibility issues in libcudacxx [https://github.com/NVIDIA/cccl/pull/1832/]", + "fixed_in" : "2.6" + }, { "file" : "${current_json_dir}/thrust_disable_64bit_dispatching.diff", "issue" : "Remove 64bit dispatching as not needed by libcudf and results in compiling twice as many kernels [https://github.com/rapidsai/cudf/pull/11437]", diff --git a/cpp/cmake/thirdparty/patches/cccl_symbol_visibility.diff b/cpp/cmake/thirdparty/patches/cccl_symbol_visibility.diff new file mode 100644 index 00000000000..f745d5fa314 --- /dev/null +++ b/cpp/cmake/thirdparty/patches/cccl_symbol_visibility.diff @@ -0,0 +1,27 @@ +diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/__config b/libcudacxx/include/cuda/std/detail/libcxx/include/__config +index e7c62c031b..5db861853a 100644 +--- a/libcudacxx/include/cuda/std/detail/libcxx/include/__config ++++ b/libcudacxx/include/cuda/std/detail/libcxx/include/__config +@@ -1049,7 +1049,6 @@ typedef __char32_t char32_t; + # define _LIBCUDACXX_EXPORTED_FROM_ABI __declspec(dllimport) + # endif + +-# define _LIBCUDACXX_TYPE_VIS _LIBCUDACXX_DLL_VIS + # define _LIBCUDACXX_FUNC_VIS _LIBCUDACXX_DLL_VIS + # define _LIBCUDACXX_EXCEPTION_ABI _LIBCUDACXX_DLL_VIS + # define _LIBCUDACXX_HIDDEN +@@ -1448,14 +1447,6 @@ __sanitizer_annotate_contiguous_container(const void*, const void*, const void*, + # define _LIBCUDACXX_WEAK __attribute__((__weak__)) + # endif + +-// Redefine some macros for internal use +-# if defined(__cuda_std__) +-# undef _LIBCUDACXX_FUNC_VIS +-# define _LIBCUDACXX_FUNC_VIS _LIBCUDACXX_INLINE_VISIBILITY +-# undef _LIBCUDACXX_TYPE_VIS +-# define _LIBCUDACXX_TYPE_VIS +-# endif // __cuda_std__ +- + // Thread API + # ifndef _LIBCUDACXX_HAS_THREAD_API_EXTERNAL + # if defined(_CCCL_COMPILER_NVRTC) || defined(__EMSCRIPTEN__) diff --git a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md index 0d097541692..aa054ba93e9 100644 --- a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md +++ b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md @@ -52,15 +52,36 @@ header file in `cudf/cpp/include/cudf/`. For example, `cudf/cpp/include/cudf/cop contains the APIs for functions related to copying from one column to another. Note the `.hpp` file extension used to indicate a C++ header file. -Header files should use the `#pragma once` include guard. +External/public libcudf C++ API header files need to mark all symbols inside of them with `CUDF_EXPORT`. +This is done by placing the macro on the `namespace cudf` as seen below. Markup on namespace +require them not to be nested, so the `cudf` namespace must be kept by itself. + +```c++ + +#pragma once + +namespace CUDF_EXPORT cudf { +namespace lists { + +... + + +} // namespace lists +} // namespace CUDF_EXPORT cudf + +``` + The naming of external API headers should be consistent with the name of the folder that contains the source files that implement the API. For example, the implementation of the APIs found in `cudf/cpp/include/cudf/copying.hpp` are located in `cudf/src/copying`. Likewise, the unit tests for the APIs reside in `cudf/tests/copying/`. -Internal API headers containing `detail` namespace definitions that are used across translation -units inside libcudf should be placed in `include/cudf/detail`. +Internal API headers containing `detail` namespace definitions that are either used across translation +units inside libcudf should be placed in `include/cudf/detail`. Just like the public C++ API headers, any +internal C++ API header requires `CUDF_EXPORT` markup on the `cudf` namespace so that the functions can be tested. + +All headers in cudf should use `#pragma once` for include guards. ## File extensions diff --git a/cpp/doxygen/developer_guide/DOCUMENTATION.md b/cpp/doxygen/developer_guide/DOCUMENTATION.md index b86f7db82b0..89376223baf 100644 --- a/cpp/doxygen/developer_guide/DOCUMENTATION.md +++ b/cpp/doxygen/developer_guide/DOCUMENTATION.md @@ -363,7 +363,7 @@ Here is an example of a doxygen description comment for a namespace declaration. * * This is the top-level namespace which contains all cuDF functions and types. */ - namespace cudf { + namespace CUDF_EXPORT cudf { A description comment should be included only once for each unique namespace declaration. Otherwise, if more than one description is found, doxygen aggregates the descriptions in an arbitrary order in the output pages. @@ -385,7 +385,7 @@ The existing groups have been carefully structured and named, so new groups shou When creating a new API, specify its group using the [\@ingroup](https://www.doxygen.nl/manual/commands.html#cmdingroup) tag and the group reference id from the [doxygen_groups.h](../include/doxygen_groups.h) file. - namespace cudf { + namespace CUDF_EXPORT cudf { /** * @brief ... @@ -401,7 +401,7 @@ When creating a new API, specify its group using the [\@ingroup](https://www.dox You can also use the \@addtogroup with a `@{ ... @}` pair to automatically include doxygen comment blocks as part of a group. - namespace cudf { + namespace CUDF_EXPORT cudf { /** * @addtogroup transformation_fill * @{ diff --git a/cpp/examples/tpch/CMakeLists.txt b/cpp/examples/tpch/CMakeLists.txt index 1b91d07e148..373a6d72d56 100644 --- a/cpp/examples/tpch/CMakeLists.txt +++ b/cpp/examples/tpch/CMakeLists.txt @@ -30,3 +30,7 @@ target_compile_features(tpch_q6 PRIVATE cxx_std_17) add_executable(tpch_q9 q9.cpp) target_link_libraries(tpch_q9 PRIVATE cudf::cudf) target_compile_features(tpch_q9 PRIVATE cxx_std_17) + +add_executable(tpch_q10 q10.cpp) +target_link_libraries(tpch_q10 PRIVATE cudf::cudf) +target_compile_features(tpch_q10 PRIVATE cxx_std_17) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 1bdf039da4a..fe03320b888 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -124,7 +124,7 @@ int main(int argc, char const** argv) auto shipdate_upper = cudf::timestamp_scalar(days_since_epoch(1998, 9, 2), true); auto const shipdate_upper_literal = cudf::ast::literal(shipdate_upper); - auto lineitem_pred = std::make_unique( + auto const lineitem_pred = std::make_unique( cudf::ast::ast_operator::LESS_EQUAL, shipdate_ref, shipdate_upper_literal); // Read out the `lineitem` table from parquet file diff --git a/cpp/examples/tpch/q10.cpp b/cpp/examples/tpch/q10.cpp new file mode 100644 index 00000000000..94da46f6930 --- /dev/null +++ b/cpp/examples/tpch/q10.cpp @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../utilities/timer.hpp" +#include "utils.hpp" + +#include +#include +#include + +/** + * @file q10.cpp + * @brief Implement query 10 of the TPC-H benchmark. + * + * create view customer as select * from '/tables/scale-1/customer.parquet'; + * create view orders as select * from '/tables/scale-1/orders.parquet'; + * create view lineitem as select * from '/tables/scale-1/lineitem.parquet'; + * create view nation as select * from '/tables/scale-1/nation.parquet'; + * + * select + * c_custkey, + * c_name, + * sum(l_extendedprice * (1 - l_discount)) as revenue, + * c_acctbal, + * n_name, + * c_address, + * c_phone, + * c_comment + * from + * customer, + * orders, + * lineitem, + * nation + * where + * c_custkey = o_custkey + * and l_orderkey = o_orderkey + * and o_orderdate >= date '1993-10-01' + * and o_orderdate < date '1994-01-01' + * and l_returnflag = 'R' + * and c_nationkey = n_nationkey + * group by + * c_custkey, + * c_name, + * c_acctbal, + * c_phone, + * n_name, + * c_address, + * c_comment + * order by + * revenue desc; + */ + +/** + * @brief Calculate the revenue column + * + * @param extendedprice The extended price column + * @param discount The discount column + * @param stream The CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned column's device memory. + */ +[[nodiscard]] std::unique_ptr calc_revenue( + cudf::column_view const& extendedprice, + cudf::column_view const& discount, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) +{ + auto const one = cudf::numeric_scalar(1); + auto const one_minus_discount = + cudf::binary_operation(one, discount, cudf::binary_operator::SUB, discount.type(), stream, mr); + auto const revenue_type = cudf::data_type{cudf::type_id::FLOAT64}; + auto revenue = cudf::binary_operation(extendedprice, + one_minus_discount->view(), + cudf::binary_operator::MUL, + revenue_type, + stream, + mr); + return revenue; +} +int main(int argc, char const** argv) +{ + auto const args = parse_args(argc, argv); + + // Use a memory pool + auto resource = create_memory_resource(args.memory_resource_type); + rmm::mr::set_current_device_resource(resource.get()); + + cudf::examples::timer timer; + + // Define the column projection and filter predicate for the `orders` table + std::vector const orders_cols = {"o_custkey", "o_orderkey", "o_orderdate"}; + auto const o_orderdate_ref = cudf::ast::column_reference(std::distance( + orders_cols.begin(), std::find(orders_cols.begin(), orders_cols.end(), "o_orderdate"))); + auto o_orderdate_lower = + cudf::timestamp_scalar(days_since_epoch(1993, 10, 1), true); + auto const o_orderdate_lower_limit = cudf::ast::literal(o_orderdate_lower); + auto const o_orderdate_pred_lower = cudf::ast::operation( + cudf::ast::ast_operator::GREATER_EQUAL, o_orderdate_ref, o_orderdate_lower_limit); + auto o_orderdate_upper = + cudf::timestamp_scalar(days_since_epoch(1994, 1, 1), true); + auto const o_orderdate_upper_limit = cudf::ast::literal(o_orderdate_upper); + auto const o_orderdate_pred_upper = + cudf::ast::operation(cudf::ast::ast_operator::LESS, o_orderdate_ref, o_orderdate_upper_limit); + auto const orders_pred = std::make_unique( + cudf::ast::ast_operator::LOGICAL_AND, o_orderdate_pred_lower, o_orderdate_pred_upper); + + auto const l_returnflag_ref = cudf::ast::column_reference(3); + auto r_scalar = cudf::string_scalar("R"); + auto const r_literal = cudf::ast::literal(r_scalar); + auto const lineitem_pred = std::make_unique( + cudf::ast::ast_operator::EQUAL, l_returnflag_ref, r_literal); + + // Read out the tables from parquet files + // while pushing down the column projections and filter predicates + auto const customer = read_parquet( + args.dataset_dir + "/customer.parquet", + {"c_custkey", "c_name", "c_nationkey", "c_acctbal", "c_address", "c_phone", "c_comment"}); + auto const orders = + read_parquet(args.dataset_dir + "/orders.parquet", orders_cols, std::move(orders_pred)); + auto const lineitem = + read_parquet(args.dataset_dir + "/lineitem.parquet", + {"l_extendedprice", "l_discount", "l_orderkey", "l_returnflag"}, + std::move(lineitem_pred)); + auto const nation = read_parquet(args.dataset_dir + "/nation.parquet", {"n_name", "n_nationkey"}); + + // Perform the joins + auto const join_a = apply_inner_join(customer, nation, {"c_nationkey"}, {"n_nationkey"}); + auto const join_b = apply_inner_join(lineitem, orders, {"l_orderkey"}, {"o_orderkey"}); + auto const joined_table = apply_inner_join(join_a, join_b, {"c_custkey"}, {"o_custkey"}); + + // Calculate and append the `revenue` column + auto revenue = + calc_revenue(joined_table->column("l_extendedprice"), joined_table->column("l_discount")); + (*joined_table).append(revenue, "revenue"); + + // Perform the groupby operation + auto const groupedby_table = apply_groupby( + joined_table, + groupby_context_t{ + {"c_custkey", "c_name", "c_acctbal", "c_phone", "n_name", "c_address", "c_comment"}, + { + {"revenue", {{cudf::aggregation::Kind::SUM, "revenue"}}}, + }}); + + // Perform the order by operation + auto const orderedby_table = + apply_orderby(groupedby_table, {"revenue"}, {cudf::order::DESCENDING}); + + timer.print_elapsed_millis(); + + // Write query result to a parquet file + orderedby_table->to_parquet("q10.parquet"); + return 0; +} diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index e56850b94d6..89396a6c968 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -44,14 +44,14 @@ * region * where * c_custkey = o_custkey - * and l_orderkey = o_orderkey - * and l_suppkey = s_suppkey - * and c_nationkey = s_nationkey - * and s_nationkey = n_nationkey - * and n_regionkey = r_regionkey - * and r_name = 'ASIA' - * and o_orderdate >= date '1994-01-01' - * and o_orderdate < date '1995-01-01' + * and l_orderkey = o_orderkey + * and l_suppkey = s_suppkey + * and c_nationkey = s_nationkey + * and s_nationkey = n_nationkey + * and n_regionkey = r_regionkey + * and r_name = 'ASIA' + * and o_orderdate >= date '1994-01-01' + * and o_orderdate < date '1995-01-01' * group by * n_name * order by @@ -109,7 +109,7 @@ int main(int argc, char const** argv) auto const o_orderdate_upper_limit = cudf::ast::literal(o_orderdate_upper); auto const o_orderdate_pred_upper = cudf::ast::operation(cudf::ast::ast_operator::LESS, o_orderdate_ref, o_orderdate_upper_limit); - auto orders_pred = std::make_unique( + auto const orders_pred = std::make_unique( cudf::ast::ast_operator::LOGICAL_AND, o_orderdate_pred_lower, o_orderdate_pred_upper); // Define the column projection and filter predicate for the `region` table @@ -118,7 +118,7 @@ int main(int argc, char const** argv) region_cols.begin(), std::find(region_cols.begin(), region_cols.end(), "r_name"))); auto r_name_value = cudf::string_scalar("ASIA"); auto const r_name_literal = cudf::ast::literal(r_name_value); - auto region_pred = std::make_unique( + auto const region_pred = std::make_unique( cudf::ast::ast_operator::EQUAL, r_name_ref, r_name_literal); // Read out the tables from parquet files diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index f11b3d6ab3b..405b2ac73ca 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -84,7 +84,7 @@ int main(int argc, char const** argv) cudf::ast::ast_operator::GREATER_EQUAL, shipdate_ref, shipdate_lower_literal); auto const shipdate_pred_b = cudf::ast::operation(cudf::ast::ast_operator::LESS, shipdate_ref, shipdate_upper_literal); - auto lineitem_pred = std::make_unique( + auto const lineitem_pred = std::make_unique( cudf::ast::ast_operator::LOGICAL_AND, shipdate_pred_a, shipdate_pred_b); auto lineitem = read_parquet(args.dataset_dir + "/lineitem.parquet", lineitem_cols, std::move(lineitem_pred)); diff --git a/cpp/examples/versions.cmake b/cpp/examples/versions.cmake index 144b3d3721b..44493011673 100644 --- a/cpp/examples/versions.cmake +++ b/cpp/examples/versions.cmake @@ -12,4 +12,4 @@ # the License. # ============================================================================= -set(CUDF_TAG branch-24.08) +set(CUDF_TAG branch-24.10) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index 3c1023017be..f5f514d26d9 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include @@ -31,7 +32,7 @@ * individual function documentation to see what aggregations are supported. */ -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup aggregation_factories * @{ @@ -770,4 +771,4 @@ template std::unique_ptr make_merge_tdigest_aggregation(int max_centroids = 1000); /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/ast/detail/expression_parser.hpp b/cpp/include/cudf/ast/detail/expression_parser.hpp index 38f7ac5291f..da552d95421 100644 --- a/cpp/include/cudf/ast/detail/expression_parser.hpp +++ b/cpp/include/cudf/ast/detail/expression_parser.hpp @@ -29,9 +29,8 @@ #include #include -namespace cudf { -namespace ast { -namespace detail { +namespace CUDF_EXPORT cudf { +namespace ast::detail { /** * @brief Node data reference types. @@ -328,8 +327,6 @@ class expression_parser { std::vector _literals; }; -} // namespace detail +} // namespace ast::detail -} // namespace ast - -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/ast/detail/expression_transformer.hpp b/cpp/include/cudf/ast/detail/expression_transformer.hpp index a6529c338e6..3af1663abf8 100644 --- a/cpp/include/cudf/ast/detail/expression_transformer.hpp +++ b/cpp/include/cudf/ast/detail/expression_transformer.hpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,7 +18,8 @@ #include -namespace cudf::ast::detail { +namespace CUDF_EXPORT cudf { +namespace ast::detail { /** * @brief Base "visitor" pattern class with the `expression` class for expression transformer. * @@ -61,4 +62,7 @@ class expression_transformer { virtual ~expression_transformer() {} }; -} // namespace cudf::ast::detail + +} // namespace ast::detail + +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/ast/detail/operators.hpp b/cpp/include/cudf/ast/detail/operators.hpp index c483d459833..46507700e21 100644 --- a/cpp/include/cudf/ast/detail/operators.hpp +++ b/cpp/include/cudf/ast/detail/operators.hpp @@ -29,7 +29,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace ast { @@ -1233,4 +1233,4 @@ CUDF_HOST_DEVICE inline cudf::size_type ast_operator_arity(ast_operator op) } // namespace ast -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/ast/expressions.hpp b/cpp/include/cudf/ast/expressions.hpp index 918271e3e4f..4299ee5f20f 100644 --- a/cpp/include/cudf/ast/expressions.hpp +++ b/cpp/include/cudf/ast/expressions.hpp @@ -23,7 +23,7 @@ #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace ast { /** * @addtogroup expressions @@ -555,4 +555,4 @@ class column_name_reference : public expression { /** @} */ // end of group } // namespace ast -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/binaryop.hpp b/cpp/include/cudf/binaryop.hpp index 22dad11e109..51199bb5792 100644 --- a/cpp/include/cudf/binaryop.hpp +++ b/cpp/include/cudf/binaryop.hpp @@ -18,13 +18,14 @@ #include #include +#include #include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup transformation_binaryops @@ -290,6 +291,17 @@ cudf::data_type binary_operation_fixed_point_output_type(binary_operator op, namespace binops { +/** + * @brief Returns true if the binary operator is supported for the given input types. + * + * @param out The output data type + * @param lhs The left-hand cudf::data_type + * @param rhs The right-hand cudf::data_type + * @param op The binary operator + * @return true if the binary operator is supported for the given input types + */ +bool is_supported_operation(data_type out, data_type lhs, data_type rhs, binary_operator op); + /** * @brief Computes output valid mask for op between a column and a scalar * @@ -305,8 +317,13 @@ std::pair scalar_col_valid_mask_and( rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); -namespace compiled { -namespace detail { +} // namespace binops + +/** @} */ // end of group +} // namespace CUDF_EXPORT cudf + +namespace CUDF_EXPORT cudf { +namespace binops::compiled::detail { /** * @brief struct binary operation using `NaN` aware sorting physical element comparators @@ -326,9 +343,5 @@ void apply_sorting_struct_binary_op(mutable_column_view& out, bool is_rhs_scalar, binary_operator op, rmm::cuda_stream_view stream); -} // namespace detail -} // namespace compiled -} // namespace binops - -/** @} */ // end of group -} // namespace cudf +} // namespace binops::compiled::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/column/column.hpp b/cpp/include/cudf/column/column.hpp index 22db25bdc83..5d1d74c3f28 100644 --- a/cpp/include/cudf/column/column.hpp +++ b/cpp/include/cudf/column/column.hpp @@ -36,7 +36,7 @@ * @brief Class definition for cudf::column */ -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @brief A container of nullable device data as a column of elements. @@ -332,4 +332,4 @@ class column { }; /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index 787e9c2c479..89fe59bfeaa 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -44,7 +44,7 @@ * @brief Column device view class definitions */ -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @brief Indicates the presence of nulls at compile-time or runtime. @@ -1527,4 +1527,4 @@ ColumnDeviceView* child_columns_to_device_array(ColumnViewIterator child_begin, } } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp index dc4700576e6..c1f295b7ea8 100644 --- a/cpp/include/cudf/column/column_factories.hpp +++ b/cpp/include/cudf/column/column_factories.hpp @@ -27,7 +27,7 @@ #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup column_factories * @{ @@ -571,4 +571,4 @@ std::unique_ptr make_dictionary_from_scalar( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp index 134e835911f..3ef7bafe727 100644 --- a/cpp/include/cudf/column/column_view.hpp +++ b/cpp/include/cudf/column/column_view.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,9 @@ #pragma once #include +#include #include +#include #include #include #include @@ -29,8 +31,7 @@ * @file column_view.hpp * @brief column view class definitions */ - -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { /** * @brief A non-owning, immutable view of device data as a column of elements, @@ -72,7 +73,7 @@ class column_view_base { CUDF_ENABLE_IF(std::is_same_v or is_rep_layout_compatible())> T const* head() const noexcept { - return static_cast(_data); + return static_cast(get_data()); } /** @@ -225,6 +226,17 @@ class column_view_base { [[nodiscard]] size_type offset() const noexcept { return _offset; } protected: + /** + * @brief Returns pointer to the base device memory allocation. + * + * The primary purpose of this function is to allow derived classes to + * override the fundamental properties of memory accesses without needing to + * change all of the different accessors for the underlying pointer. + * + * @return Typed pointer to underlying data + */ + virtual void const* get_data() const noexcept { return _data; } + data_type _type{type_id::EMPTY}; ///< Element type size_type _size{}; ///< Number of elements void const* _data{}; ///< Pointer to device memory containing elements @@ -236,7 +248,7 @@ class column_view_base { ///< Enables zero-copy slicing column_view_base() = default; - ~column_view_base() = default; + virtual ~column_view_base() = default; column_view_base(column_view_base const&) = default; ///< Copy constructor column_view_base(column_view_base&&) = default; ///< Move constructor /** @@ -284,10 +296,6 @@ class column_view_base { size_type offset = 0); }; -class mutable_column_view_base : public column_view_base { - public: - protected: -}; } // namespace detail /** @@ -323,7 +331,7 @@ class column_view : public detail::column_view_base { #ifdef __CUDACC__ #pragma nv_exec_check_disable #endif - ~column_view() = default; + ~column_view() override = default; #ifdef __CUDACC__ #pragma nv_exec_check_disable #endif @@ -447,6 +455,18 @@ class column_view : public detail::column_view_base { return device_span(data(), size()); } + protected: + /** + * @brief Returns pointer to the base device memory allocation. + * + * The primary purpose of this function is to allow derived classes to + * override the fundamental properties of memory accesses without needing to + * change all of the different accessors for the underlying pointer. + * + * @return Typed pointer to underlying data + */ + void const* get_data() const noexcept override; + private: friend column_view bit_cast(column_view const& input, data_type type); @@ -478,7 +498,7 @@ class mutable_column_view : public detail::column_view_base { public: mutable_column_view() = default; - ~mutable_column_view(){ + ~mutable_column_view() override{ // Needed so that the first instance of the implicit destructor for any TU isn't 'constructed' // from a host+device function marking the implicit version also as host+device }; @@ -572,7 +592,7 @@ class mutable_column_view : public detail::column_view_base { } /** - * @brief Return first element (accounting for offset) when underlying data is + * @brief Return first element (accounting for offset) after underlying data is * casted to the specified type. * * This function does not participate in overload resolution if `is_rep_layout_compatible` is @@ -665,6 +685,18 @@ class mutable_column_view : public detail::column_view_base { */ operator column_view() const; + protected: + /** + * @brief Returns pointer to the base device memory allocation. + * + * The primary purpose of this function is to allow derived classes to + * override the fundamental properties of memory accesses without needing to + * change all of the different accessors for the underlying pointer. + * + * @return Typed pointer to underlying data + */ + void const* get_data() const noexcept override; + private: friend mutable_column_view bit_cast(mutable_column_view const& input, data_type type); @@ -765,5 +797,6 @@ std::size_t shallow_hash(column_view const& input); * @return If `lhs` and `rhs` have equivalent shallow state */ bool is_shallow_equivalent(column_view const& lhs, column_view const& rhs); + } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/concatenate.hpp b/cpp/include/cudf/concatenate.hpp index e7b55a2e6d0..0935bdf7def 100644 --- a/cpp/include/cudf/concatenate.hpp +++ b/cpp/include/cudf/concatenate.hpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -25,7 +26,7 @@ #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup copy_concatenate * @{ @@ -97,4 +98,4 @@ std::unique_ptr concatenate( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/contiguous_split.hpp b/cpp/include/cudf/contiguous_split.hpp index 0d4f20d1ef2..195dac25268 100644 --- a/cpp/include/cudf/contiguous_split.hpp +++ b/cpp/include/cudf/contiguous_split.hpp @@ -18,13 +18,14 @@ #include #include +#include #include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup copy_split @@ -124,8 +125,14 @@ std::vector contiguous_split( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); namespace detail { + +/** + * @brief A helper struct containing the state of contiguous_split, whether the caller + * is using the single-pass contiguous_split or chunked_pack. + * + */ struct contiguous_split_state; -}; +} // namespace detail /** * @brief Perform a chunked "pack" operation of the input `table_view` using a user provided @@ -338,4 +345,4 @@ table_view unpack(packed_columns const& input); table_view unpack(uint8_t const* metadata, uint8_t const* gpu_data); /** @} */ -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp index b17cafb05ab..3c44ff48fdf 100644 --- a/cpp/include/cudf/copying.hpp +++ b/cpp/include/cudf/copying.hpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -30,7 +31,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup column_copy @@ -913,7 +914,7 @@ bool may_have_nonempty_nulls(column_view const& input); * * @code{.pseudo} * auto const lists = lists_column_wrapper{ {0,1}, {2,3}, {4,5} }.release(); - * cudf::detail::set_null_mask(lists->null_mask(), 1, 2, false); + * cudf::set_null_mask(lists->null_mask(), 1, 2, false); * * lists[1] is now null, but the lists child column still stores `{2,3}`. * The lists column contents will be: @@ -929,7 +930,7 @@ bool may_have_nonempty_nulls(column_view const& input); * * @code{.pseudo} * auto const strings = strings_column_wrapper{ "AB", "CD", "EF" }.release(); - * cudf::detail::set_null_mask(strings->null_mask(), 1, 2, false); + * cudf::set_null_mask(strings->null_mask(), 1, 2, false); * * strings[1] is now null, but the strings column still stores `"CD"`. * The lists column contents will be: @@ -972,4 +973,4 @@ std::unique_ptr purge_nonempty_nulls( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp index 06b7d24f6cd..f7bed8bdc7e 100644 --- a/cpp/include/cudf/datetime.hpp +++ b/cpp/include/cudf/datetime.hpp @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include @@ -28,7 +29,7 @@ * @brief DateTime column APIs. */ -namespace cudf { +namespace CUDF_EXPORT cudf { namespace datetime { /** * @addtogroup datetime_extract @@ -401,4 +402,4 @@ std::unique_ptr round_datetimes( /** @} */ // end of group } // namespace datetime -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index 843414817e3..b257eef1e9e 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -26,7 +26,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { // Visitor pattern @@ -1674,4 +1674,4 @@ constexpr inline bool is_valid_aggregation() bool is_valid_aggregation(data_type source, aggregation::Kind k); } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/aggregation/result_cache.hpp b/cpp/include/cudf/detail/aggregation/result_cache.hpp index 41eec156c47..ec5a511bb7c 100644 --- a/cpp/include/cudf/detail/aggregation/result_cache.hpp +++ b/cpp/include/cudf/detail/aggregation/result_cache.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,7 +23,7 @@ #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { struct pair_column_aggregation_equal_to { bool operator()(std::pair const& lhs, @@ -66,4 +66,4 @@ class result_cache { }; } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/binaryop.hpp b/cpp/include/cudf/detail/binaryop.hpp index de1fde8bc96..fe739327a08 100644 --- a/cpp/include/cudf/detail/binaryop.hpp +++ b/cpp/include/cudf/detail/binaryop.hpp @@ -17,11 +17,12 @@ #include #include +#include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { //! Inner interfaces and implementations namespace detail { @@ -77,4 +78,4 @@ std::unique_ptr binary_operation(column_view const& lhs, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/concatenate.hpp b/cpp/include/cudf/detail/concatenate.hpp index 3e039175542..1be269710b2 100644 --- a/cpp/include/cudf/detail/concatenate.hpp +++ b/cpp/include/cudf/detail/concatenate.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -26,7 +27,7 @@ #include -namespace cudf { +namespace CUDF_EXPORT cudf { //! Inner interfaces and implementations namespace detail { /** @@ -48,4 +49,4 @@ std::unique_ptr
concatenate(host_span tables_to_concat, rmm::device_async_resource_ref mr); } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/concatenate_masks.hpp b/cpp/include/cudf/detail/concatenate_masks.hpp index dd2fb471a7d..fc829361fde 100644 --- a/cpp/include/cudf/detail/concatenate_masks.hpp +++ b/cpp/include/cudf/detail/concatenate_masks.hpp @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -24,7 +25,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { //! Inner interfaces and implementations namespace detail { @@ -69,4 +70,4 @@ rmm::device_buffer concatenate_masks(host_span views, rmm::device_async_resource_ref mr); } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/contiguous_split.hpp b/cpp/include/cudf/detail/contiguous_split.hpp index 1467ed1aa67..52c51daa917 100644 --- a/cpp/include/cudf/detail/contiguous_split.hpp +++ b/cpp/include/cudf/detail/contiguous_split.hpp @@ -23,7 +23,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { /** @@ -125,4 +125,4 @@ std::vector pack_metadata(table_view const& table, metadata_builder& builder); } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/copy.hpp b/cpp/include/cudf/detail/copy.hpp index f7430eb090d..2be432c0825 100644 --- a/cpp/include/cudf/detail/copy.hpp +++ b/cpp/include/cudf/detail/copy.hpp @@ -28,7 +28,7 @@ #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { /** * @brief Constructs a zero-copy `column_view`/`mutable_column_view` of the @@ -280,4 +280,4 @@ std::unique_ptr purge_nonempty_nulls(column_view const& input, rmm::device_async_resource_ref mr); } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/datetime.hpp b/cpp/include/cudf/detail/datetime.hpp index a93c06d4371..95469de8ae6 100644 --- a/cpp/include/cudf/detail/datetime.hpp +++ b/cpp/include/cudf/detail/datetime.hpp @@ -23,7 +23,7 @@ #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace datetime { namespace detail { /** @@ -174,4 +174,4 @@ std::unique_ptr extract_quarter(cudf::column_view const& column, } // namespace detail } // namespace datetime -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/fill.hpp b/cpp/include/cudf/detail/fill.hpp index 6996cda6974..82c6af8b611 100644 --- a/cpp/include/cudf/detail/fill.hpp +++ b/cpp/include/cudf/detail/fill.hpp @@ -25,7 +25,7 @@ #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { /** @@ -52,4 +52,4 @@ std::unique_ptr fill(column_view const& input, rmm::device_async_resource_ref mr); } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh index 5977c7341c1..41f5494f78f 100644 --- a/cpp/include/cudf/detail/gather.cuh +++ b/cpp/include/cudf/detail/gather.cuh @@ -518,7 +518,7 @@ struct column_gatherer_impl { * Positive indices are unchanged by this transformation. */ template -struct index_converter : public thrust::unary_function { +struct index_converter { index_converter(size_type n_rows) : n_rows(n_rows) {} __device__ map_type operator()(map_type in) const { return ((in % n_rows) + n_rows) % n_rows; } @@ -571,13 +571,13 @@ void gather_bitmask(table_view const& source, not target[i]->nullable()) { auto const state = op == gather_bitmask_op::PASSTHROUGH ? mask_state::ALL_VALID : mask_state::UNINITIALIZED; - auto mask = detail::create_null_mask(target[i]->size(), state, stream, mr); + auto mask = cudf::create_null_mask(target[i]->size(), state, stream, mr); target[i]->set_null_mask(std::move(mask), 0); } } // Make device array of target bitmask pointers - std::vector target_masks(target.size()); + auto target_masks = make_host_vector(target.size(), stream); std::transform(target.begin(), target.end(), target_masks.begin(), [](auto const& col) { return col->mutable_view().null_mask(); }); diff --git a/cpp/include/cudf/detail/gather.hpp b/cpp/include/cudf/detail/gather.hpp index 36824f56895..39cd43934e3 100644 --- a/cpp/include/cudf/detail/gather.hpp +++ b/cpp/include/cudf/detail/gather.hpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -27,7 +28,7 @@ #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { @@ -84,4 +85,4 @@ std::unique_ptr
gather(table_view const& source_table, rmm::device_async_resource_ref mr); } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/groupby.hpp b/cpp/include/cudf/detail/groupby.hpp index 5a8c9b0a27f..36eae05ce39 100644 --- a/cpp/include/cudf/detail/groupby.hpp +++ b/cpp/include/cudf/detail/groupby.hpp @@ -25,10 +25,8 @@ #include #include -namespace cudf { -namespace groupby { -namespace detail { -namespace hash { +namespace CUDF_EXPORT cudf { +namespace groupby::detail::hash { /** * @brief Indicates if a set of aggregation requests can be satisfied with a * hash-based groupby implementation. @@ -47,8 +45,5 @@ std::pair, std::vector> groupby( null_policy include_null_keys, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace hash - -} // namespace detail -} // namespace groupby -} // namespace cudf +} // namespace groupby::detail::hash +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp b/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp index 389c7952875..c0910b4d5ae 100644 --- a/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp +++ b/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp @@ -24,7 +24,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace groupby { namespace detail { @@ -45,4 +45,4 @@ std::unique_ptr group_replace_nulls(cudf::column_view const& grouped_val } // namespace detail } // namespace groupby -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/groupby/sort_helper.hpp b/cpp/include/cudf/detail/groupby/sort_helper.hpp index 567efedb9b2..a411a890622 100644 --- a/cpp/include/cudf/detail/groupby/sort_helper.hpp +++ b/cpp/include/cudf/detail/groupby/sort_helper.hpp @@ -25,10 +25,8 @@ #include #include -namespace cudf { -namespace groupby { -namespace detail { -namespace sort { +namespace CUDF_EXPORT cudf { +namespace groupby::detail::sort { /** * @brief Helper class for computing sort-based groupby * @@ -229,7 +227,5 @@ struct sort_groupby_helper { std::vector _null_precedence; ///< How to sort NULLs }; -} // namespace sort -} // namespace detail -} // namespace groupby -} // namespace cudf +} // namespace groupby::detail::sort +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/interop.hpp b/cpp/include/cudf/detail/interop.hpp index 5b2b9b5e69d..0b9319ba663 100644 --- a/cpp/include/cudf/detail/interop.hpp +++ b/cpp/include/cudf/detail/interop.hpp @@ -34,12 +34,13 @@ #include #include #include +#include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { /** @@ -156,4 +157,4 @@ constexpr std::size_t max_precision() } } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/is_element_valid.hpp b/cpp/include/cudf/detail/is_element_valid.hpp index 72a85d42eb3..4b74d12f306 100644 --- a/cpp/include/cudf/detail/is_element_valid.hpp +++ b/cpp/include/cudf/detail/is_element_valid.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,10 +18,11 @@ #include #include +#include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { /** @@ -44,4 +45,4 @@ bool is_element_valid_sync(column_view const& col_view, rmm::cuda_stream_view stream); } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/join.hpp b/cpp/include/cudf/detail/join.hpp index aabfff746ea..ff7da4462a2 100644 --- a/cpp/include/cudf/detail/join.hpp +++ b/cpp/include/cudf/detail/join.hpp @@ -34,15 +34,12 @@ // Forward declaration namespace cudf::experimental::row::equality { -class preprocessed_table; +class CUDF_EXPORT preprocessed_table; } -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { -// Forward declaration -class cuco_allocator; - constexpr int DEFAULT_JOIN_CG_SIZE = 2; enum class join_kind { INNER_JOIN, LEFT_JOIN, FULL_JOIN, LEFT_SEMI_JOIN, LEFT_ANTI_JOIN }; @@ -188,4 +185,4 @@ struct hash_join { rmm::device_async_resource_ref mr) const; }; } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/label_bins.hpp b/cpp/include/cudf/detail/label_bins.hpp index 9f6dcce448d..92a417b0132 100644 --- a/cpp/include/cudf/detail/label_bins.hpp +++ b/cpp/include/cudf/detail/label_bins.hpp @@ -27,7 +27,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { @@ -55,4 +55,4 @@ std::unique_ptr label_bins(column_view const& input, /** @} */ // end of group } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/merge.hpp b/cpp/include/cudf/detail/merge.hpp index 56ac0554403..72e34b76158 100644 --- a/cpp/include/cudf/detail/merge.hpp +++ b/cpp/include/cudf/detail/merge.hpp @@ -16,12 +16,14 @@ #pragma once +#include + #include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { /** @@ -59,4 +61,4 @@ std::unique_ptr merge(std::vector const& tables_to_merg rmm::device_async_resource_ref mr); } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/null_mask.cuh b/cpp/include/cudf/detail/null_mask.cuh index e62675cbc8c..ae6db5409cc 100644 --- a/cpp/include/cudf/detail/null_mask.cuh +++ b/cpp/include/cudf/detail/null_mask.cuh @@ -430,7 +430,9 @@ std::vector segmented_count_bits(bitmask_type const* bitmask, if (num_segments == 0) { return std::vector{}; } // Construct a contiguous host buffer of indices and copy to device. - auto const h_indices = std::vector(indices_begin, indices_end); + auto h_indices = make_empty_host_vector::value_type>( + std::distance(indices_begin, indices_end), stream); + std::copy(indices_begin, indices_end, std::back_inserter(h_indices)); auto const d_indices = make_device_uvector_async(h_indices, stream, rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/detail/null_mask.hpp b/cpp/include/cudf/detail/null_mask.hpp index 04d8d663acb..67e3617d873 100644 --- a/cpp/include/cudf/detail/null_mask.hpp +++ b/cpp/include/cudf/detail/null_mask.hpp @@ -25,7 +25,7 @@ #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { /** @@ -63,6 +63,7 @@ void set_null_mask(bitmask_type* bitmask, * @param stream CUDA stream used for device memory operations and kernel launches. * @return The number of non-zero bits in the specified range. */ +CUDF_EXPORT cudf::size_type count_set_bits(bitmask_type const* bitmask, size_type start, size_type stop, @@ -82,6 +83,7 @@ cudf::size_type count_set_bits(bitmask_type const* bitmask, * @param stream CUDA stream used for device memory operations and kernel launches. * @return The number of zero bits in the specified range. */ +CUDF_EXPORT cudf::size_type count_unset_bits(bitmask_type const* bitmask, size_type start, size_type stop, @@ -100,6 +102,7 @@ cudf::size_type count_unset_bits(bitmask_type const* bitmask, * @param[in] stream CUDA stream used for device memory operations and kernel launches. * @return A vector storing the number of non-zero bits in the specified ranges. */ +CUDF_EXPORT std::vector segmented_count_set_bits(bitmask_type const* bitmask, host_span indices, rmm::cuda_stream_view stream); @@ -117,6 +120,7 @@ std::vector segmented_count_set_bits(bitmask_type const* bitmask, * @param[in] stream CUDA stream used for device memory operations and kernel launches. * @return A vector storing the number of zero bits in the specified ranges. */ +CUDF_EXPORT std::vector segmented_count_unset_bits(bitmask_type const* bitmask, host_span indices, rmm::cuda_stream_view stream); @@ -137,6 +141,7 @@ std::vector segmented_count_unset_bits(bitmask_type const* bitmask, * @param[in] stream CUDA stream used for device memory operations and kernel launches. * @return The number of valid elements in the specified range. */ +CUDF_EXPORT cudf::size_type valid_count(bitmask_type const* bitmask, size_type start, size_type stop, @@ -169,6 +174,7 @@ cudf::size_type null_count(bitmask_type const* bitmask, * @param[in] stream CUDA stream used for device memory operations and kernel launches. * @return A vector storing the number of valid elements in each specified range. */ +CUDF_EXPORT std::vector segmented_valid_count(bitmask_type const* bitmask, host_span indices, rmm::cuda_stream_view stream); @@ -189,6 +195,7 @@ std::vector segmented_valid_count(bitmask_type const* bitmask, * @param[in] stream CUDA stream used for device memory operations and kernel launches. * @return A vector storing the number of null elements in each specified range. */ +CUDF_EXPORT std::vector segmented_null_count(bitmask_type const* bitmask, host_span indices, rmm::cuda_stream_view stream); @@ -220,6 +227,7 @@ rmm::device_buffer copy_bitmask(column_view const& view, * * @param stream CUDA stream used for device memory operations and kernel launches */ +CUDF_EXPORT std::pair bitmask_and(host_span masks, host_span masks_begin_bits, size_type mask_size_bits, @@ -279,4 +287,4 @@ void set_all_valid_null_masks(column_view const& input, } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/quantiles.hpp b/cpp/include/cudf/detail/quantiles.hpp index 6c188d2ca68..23d5fb73ba3 100644 --- a/cpp/include/cudf/detail/quantiles.hpp +++ b/cpp/include/cudf/detail/quantiles.hpp @@ -18,11 +18,12 @@ #include #include #include +#include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { /** @@ -64,4 +65,4 @@ std::unique_ptr percentile_approx(tdigest::tdigest_column_view const& in rmm::device_async_resource_ref mr); } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/repeat.hpp b/cpp/include/cudf/detail/repeat.hpp index abb9e45a95c..e17f1b7c5fd 100644 --- a/cpp/include/cudf/detail/repeat.hpp +++ b/cpp/include/cudf/detail/repeat.hpp @@ -24,7 +24,7 @@ #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { /** @@ -51,4 +51,4 @@ std::unique_ptr
repeat(table_view const& input_table, rmm::device_async_resource_ref mr); } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/replace.hpp b/cpp/include/cudf/detail/replace.hpp index 46203bdf2f0..e2bd729861b 100644 --- a/cpp/include/cudf/detail/replace.hpp +++ b/cpp/include/cudf/detail/replace.hpp @@ -24,7 +24,7 @@ #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { /** * @copydoc cudf::replace_nulls(column_view const&, column_view const&, @@ -102,4 +102,4 @@ std::unique_ptr normalize_nans_and_zeros(column_view const& input, rmm::device_async_resource_ref mr); } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/reshape.hpp b/cpp/include/cudf/detail/reshape.hpp index 7a1c3d6c4f0..68a856373bf 100644 --- a/cpp/include/cudf/detail/reshape.hpp +++ b/cpp/include/cudf/detail/reshape.hpp @@ -24,12 +24,10 @@ #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { /** * @copydoc cudf::tile - * - * @param stream CUDA stream used for device memory operations and kernel launches */ std::unique_ptr
tile(table_view const& input, size_type count, @@ -38,12 +36,10 @@ std::unique_ptr
tile(table_view const& input, /** * @copydoc cudf::interleave_columns - * - * @param stream CUDA stream used for device memory operations and kernel launches */ std::unique_ptr interleave_columns(table_view const& input, rmm::cuda_stream_view, rmm::device_async_resource_ref mr); } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/rolling.hpp b/cpp/include/cudf/detail/rolling.hpp index ea6f38c421c..5bfa5679531 100644 --- a/cpp/include/cudf/detail/rolling.hpp +++ b/cpp/include/cudf/detail/rolling.hpp @@ -26,7 +26,7 @@ #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { /** @@ -49,4 +49,4 @@ std::unique_ptr rolling_window(column_view const& input, rmm::device_async_resource_ref mr); } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/round.hpp b/cpp/include/cudf/detail/round.hpp index 1a9c5c82c65..ba3ef1c1ce7 100644 --- a/cpp/include/cudf/detail/round.hpp +++ b/cpp/include/cudf/detail/round.hpp @@ -22,7 +22,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { //! Inner interfaces and implementations namespace detail { @@ -39,4 +39,4 @@ std::unique_ptr round(column_view const& input, rmm::device_async_resource_ref mr); } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/scan.hpp b/cpp/include/cudf/detail/scan.hpp index 54c25d0157c..bd60309c5c3 100644 --- a/cpp/include/cudf/detail/scan.hpp +++ b/cpp/include/cudf/detail/scan.hpp @@ -17,11 +17,12 @@ #include #include +#include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { /** @@ -73,6 +74,7 @@ std::unique_ptr scan_exclusive(column_view const& input, * @param mr Device memory resource used to allocate the returned scalar's device memory. * @returns Column with scan results. */ +CUDF_EXPORT std::unique_ptr scan_inclusive(column_view const& input, scan_aggregation const& agg, null_policy null_handling, @@ -99,6 +101,7 @@ std::unique_ptr inclusive_rank_scan(column_view const& order_by, * @param mr Device memory resource used to allocate the returned column's device memory. * @return rank values. */ +CUDF_EXPORT std::unique_ptr inclusive_dense_rank_scan(column_view const& order_by, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); @@ -117,4 +120,4 @@ std::unique_ptr inclusive_one_normalized_percent_rank_scan( column_view const& order_by, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/scatter.hpp b/cpp/include/cudf/detail/scatter.hpp index 95ed6af8c3c..6691ddc5c09 100644 --- a/cpp/include/cudf/detail/scatter.hpp +++ b/cpp/include/cudf/detail/scatter.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -26,7 +27,7 @@ #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { /** * @brief Scatters the rows of the source table into a copy of the target table @@ -144,4 +145,4 @@ std::unique_ptr
boolean_mask_scatter( rmm::device_async_resource_ref mr); } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/search.hpp b/cpp/include/cudf/detail/search.hpp index e60b18f4c8d..72e2cf074bc 100644 --- a/cpp/include/cudf/detail/search.hpp +++ b/cpp/include/cudf/detail/search.hpp @@ -25,7 +25,9 @@ #include #include -namespace cudf::detail { +namespace CUDF_EXPORT cudf { +namespace detail { + /** * @copydoc cudf::lower_bound * @@ -92,6 +94,7 @@ std::unique_ptr contains(column_view const& haystack, * @param mr Device memory resource used to allocate the returned vector * @return A vector of bools indicating if each row in `needles` has matching rows in `haystack` */ +CUDF_EXPORT rmm::device_uvector contains(table_view const& haystack, table_view const& needles, null_equality compare_nulls, @@ -99,4 +102,5 @@ rmm::device_uvector contains(table_view const& haystack, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace cudf::detail +} // namespace detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/sequence.hpp b/cpp/include/cudf/detail/sequence.hpp index a18a9d3b200..a08010a610f 100644 --- a/cpp/include/cudf/detail/sequence.hpp +++ b/cpp/include/cudf/detail/sequence.hpp @@ -23,7 +23,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { /** * @copydoc cudf::sequence(size_type size, scalar const& init, scalar const& step, @@ -65,4 +65,4 @@ std::unique_ptr calendrical_month_sequence(size_type size, rmm::device_async_resource_ref mr); } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/sorting.hpp b/cpp/include/cudf/detail/sorting.hpp index 4ddba38a7e9..08cf329f199 100644 --- a/cpp/include/cudf/detail/sorting.hpp +++ b/cpp/include/cudf/detail/sorting.hpp @@ -26,7 +26,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { /** @@ -163,4 +163,4 @@ std::unique_ptr
stable_sort(table_view const& values, rmm::device_async_resource_ref mr); } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/stream_compaction.hpp b/cpp/include/cudf/detail/stream_compaction.hpp index e3ef4190fd2..05194148a70 100644 --- a/cpp/include/cudf/detail/stream_compaction.hpp +++ b/cpp/include/cudf/detail/stream_compaction.hpp @@ -25,7 +25,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { /** * @copydoc cudf::drop_nulls(table_view const&, std::vector const&, @@ -148,4 +148,4 @@ cudf::size_type distinct_count(table_view const& input, rmm::cuda_stream_view stream); } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/structs/utilities.hpp b/cpp/include/cudf/detail/structs/utilities.hpp index beedc009c84..7de68035b19 100644 --- a/cpp/include/cudf/detail/structs/utilities.hpp +++ b/cpp/include/cudf/detail/structs/utilities.hpp @@ -25,9 +25,8 @@ #include #include -#include - -namespace cudf::structs::detail { +namespace CUDF_EXPORT cudf { +namespace structs::detail { enum class column_nullability { MATCH_INCOMING, ///< generate a null column if the incoming column has nulls @@ -268,4 +267,5 @@ class flattened_table { */ bool contains_null_structs(column_view const& col); -} // namespace cudf::structs::detail +} // namespace structs::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/tdigest/tdigest.hpp b/cpp/include/cudf/detail/tdigest/tdigest.hpp index bfd12c18fff..10eb3d389c7 100644 --- a/cpp/include/cudf/detail/tdigest/tdigest.hpp +++ b/cpp/include/cudf/detail/tdigest/tdigest.hpp @@ -18,14 +18,14 @@ #include #include +#include #include #include #include -namespace cudf { -namespace tdigest { -namespace detail { +namespace CUDF_EXPORT cudf { +namespace tdigest::detail { /** * @brief Generate a tdigest column from a grouped, sorted set of numeric input values. @@ -152,6 +152,7 @@ std::unique_ptr make_tdigest_column(size_type num_rows, * * @returns An empty tdigest column. */ +CUDF_EXPORT std::unique_ptr make_empty_tdigest_column(rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); @@ -236,6 +237,5 @@ std::unique_ptr reduce_merge_tdigest(column_view const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace detail -} // namespace tdigest -} // namespace cudf +} // namespace tdigest::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/timezone.hpp b/cpp/include/cudf/detail/timezone.hpp index 037164aa297..c7798ff60ed 100644 --- a/cpp/include/cudf/detail/timezone.hpp +++ b/cpp/include/cudf/detail/timezone.hpp @@ -16,11 +16,13 @@ #pragma once #include +#include #include #include -namespace cudf::detail { +namespace CUDF_EXPORT cudf { +namespace detail { /** * @copydoc cudf::make_timezone_transition_table(std::optional, std::string_view, @@ -34,4 +36,5 @@ std::unique_ptr
make_timezone_transition_table( rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); -} // namespace cudf::detail +} // namespace detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/transform.hpp b/cpp/include/cudf/detail/transform.hpp index 47e13fa2e5e..02849ef023c 100644 --- a/cpp/include/cudf/detail/transform.hpp +++ b/cpp/include/cudf/detail/transform.hpp @@ -19,11 +19,12 @@ #include #include #include +#include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { /** * @copydoc cudf::transform @@ -112,4 +113,4 @@ std::unique_ptr segmented_row_bit_count(table_view const& t, rmm::device_async_resource_ref mr); } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/transpose.hpp b/cpp/include/cudf/detail/transpose.hpp index 1f8effc8103..559b2c32996 100644 --- a/cpp/include/cudf/detail/transpose.hpp +++ b/cpp/include/cudf/detail/transpose.hpp @@ -18,11 +18,12 @@ #include #include #include +#include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { /** * @copydoc cudf::transpose @@ -34,4 +35,4 @@ std::pair, table_view> transpose(table_view const& input rmm::device_async_resource_ref mr); } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/unary.hpp b/cpp/include/cudf/detail/unary.hpp index 5245cfdf079..bb05138bc8c 100644 --- a/cpp/include/cudf/detail/unary.hpp +++ b/cpp/include/cudf/detail/unary.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -26,7 +27,7 @@ #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { /** * @brief Creates a column of `type_id::BOOL8` elements by applying a predicate to every element @@ -101,4 +102,4 @@ std::unique_ptr is_not_nan(cudf::column_view const& input, rmm::device_async_resource_ref mr); } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/utilities/alignment.hpp b/cpp/include/cudf/detail/utilities/alignment.hpp index e52032fe104..2677eca34db 100644 --- a/cpp/include/cudf/detail/utilities/alignment.hpp +++ b/cpp/include/cudf/detail/utilities/alignment.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,7 +18,7 @@ #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { /** @@ -43,4 +43,4 @@ T* align_ptr_for_type(void* destination) } } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/utilities/cuda_memcpy.hpp b/cpp/include/cudf/detail/utilities/cuda_memcpy.hpp index b66c461ab12..632d5a732ec 100644 --- a/cpp/include/cudf/detail/utilities/cuda_memcpy.hpp +++ b/cpp/include/cudf/detail/utilities/cuda_memcpy.hpp @@ -16,9 +16,12 @@ #pragma once +#include + #include -namespace cudf::detail { +namespace CUDF_EXPORT cudf { +namespace detail { enum class host_memory_kind : uint8_t { PINNED, PAGEABLE }; @@ -50,4 +53,5 @@ void cuda_memcpy_async( void cuda_memcpy( void* dst, void const* src, size_t size, host_memory_kind kind, rmm::cuda_stream_view stream); -} // namespace cudf::detail +} // namespace detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/utilities/default_stream.hpp b/cpp/include/cudf/detail/utilities/default_stream.hpp index fa438f142b7..f988355e6e0 100644 --- a/cpp/include/cudf/detail/utilities/default_stream.hpp +++ b/cpp/include/cudf/detail/utilities/default_stream.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,10 +16,12 @@ #pragma once +#include + #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { @@ -33,4 +35,4 @@ extern rmm::cuda_stream_view const default_stream_value; } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/utilities/host_memory.hpp b/cpp/include/cudf/detail/utilities/host_memory.hpp new file mode 100644 index 00000000000..c6775a950c9 --- /dev/null +++ b/cpp/include/cudf/detail/utilities/host_memory.hpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include + +#include + +namespace cudf::detail { +/** + * @brief Get the memory resource to be used for pageable memory allocations. + * + * @return Reference to the pageable memory resource + */ +CUDF_EXPORT rmm::host_async_resource_ref get_pageable_memory_resource(); + +/** + * @brief Get the allocator to be used for the host memory allocation. + * + * @param size The number of elements of type T to allocate + * @param stream The stream to use for the allocation + * @return The allocator to be used for the host memory allocation + */ +template +rmm_host_allocator get_host_allocator(std::size_t size, rmm::cuda_stream_view stream) +{ + if (size * sizeof(T) <= get_allocate_host_as_pinned_threshold()) { + return {get_pinned_memory_resource(), stream}; + } + return {get_pageable_memory_resource(), stream}; +} + +} // namespace cudf::detail diff --git a/cpp/include/cudf/detail/utilities/host_vector.hpp b/cpp/include/cudf/detail/utilities/host_vector.hpp index 2d14d0306cd..d4dd7b0d626 100644 --- a/cpp/include/cudf/detail/utilities/host_vector.hpp +++ b/cpp/include/cudf/detail/utilities/host_vector.hpp @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -28,7 +29,8 @@ #include #include // for bad_alloc -namespace cudf::detail { +namespace CUDF_EXPORT cudf { +namespace detail { /*! \p rmm_host_allocator is a CUDA-specific host memory allocator * that employs \c a `rmm::host_async_resource_ref` for allocation. @@ -61,6 +63,10 @@ class rmm_host_allocator { }; }; +template +inline constexpr bool contains_property = + (cuda::std::is_same_v || ... || false); + /*! \p rmm_host_allocator is a CUDA-specific host memory allocator * that employs \c `rmm::host_async_resource_ref` for allocation. * @@ -100,8 +106,12 @@ class rmm_host_allocator { /** * @brief Construct from a `cudf::host_async_resource_ref` */ - rmm_host_allocator(rmm::host_async_resource_ref _mr, rmm::cuda_stream_view _stream) - : mr(_mr), stream(_stream) + template + rmm_host_allocator(cuda::mr::async_resource_ref _mr, + rmm::cuda_stream_view _stream) + : mr(_mr), + stream(_stream), + _is_device_accessible{contains_property} { } @@ -173,15 +183,26 @@ class rmm_host_allocator { */ inline bool operator!=(rmm_host_allocator const& x) const { return !operator==(x); } + bool is_device_accessible() const { return _is_device_accessible; } + private: rmm::host_async_resource_ref mr; rmm::cuda_stream_view stream; + bool _is_device_accessible; }; /** * @brief A vector class with rmm host memory allocator */ template -using host_vector = thrust::host_vector>; +class host_vector : public thrust::host_vector> { + public: + using base = thrust::host_vector>; + + host_vector(rmm_host_allocator const& alloc) : base(alloc) {} + + host_vector(size_t size, rmm_host_allocator const& alloc) : base(size, alloc) {} +}; -} // namespace cudf::detail +} // namespace detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/utilities/linked_column.hpp b/cpp/include/cudf/detail/utilities/linked_column.hpp index 0feef0f1a44..0b388938754 100644 --- a/cpp/include/cudf/detail/utilities/linked_column.hpp +++ b/cpp/include/cudf/detail/utilities/linked_column.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,11 +18,13 @@ #include #include +#include #include #include -namespace cudf::detail { +namespace CUDF_EXPORT cudf { +namespace detail { struct linked_column_view; @@ -68,4 +70,5 @@ struct linked_column_view : public column_view_base { */ LinkedColVector table_to_linked_columns(table_view const& table); -} // namespace cudf::detail +} // namespace detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/utilities/stacktrace.hpp b/cpp/include/cudf/detail/utilities/stacktrace.hpp index c3ec9ce7a52..f54f5f3579a 100644 --- a/cpp/include/cudf/detail/utilities/stacktrace.hpp +++ b/cpp/include/cudf/detail/utilities/stacktrace.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,9 +16,12 @@ #pragma once +#include + #include -namespace cudf::detail { +namespace CUDF_EXPORT cudf { +namespace detail { /** * @addtogroup utility_stacktrace * @{ @@ -44,4 +47,5 @@ std::string get_stacktrace(capture_last_stackframe capture_last_frame); /** @} */ // end of group -} // namespace cudf::detail +} // namespace detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/utilities/stream_pool.hpp b/cpp/include/cudf/detail/utilities/stream_pool.hpp index 64c1d4ae514..dfe028bc5b7 100644 --- a/cpp/include/cudf/detail/utilities/stream_pool.hpp +++ b/cpp/include/cudf/detail/utilities/stream_pool.hpp @@ -16,6 +16,7 @@ #pragma once +#include #include #include @@ -23,7 +24,8 @@ #include #include -namespace cudf::detail { +namespace CUDF_EXPORT cudf { +namespace detail { class cuda_stream_pool { public: @@ -122,4 +124,5 @@ cuda_stream_pool& global_cuda_stream_pool(); */ void join_streams(host_span streams, rmm::cuda_stream_view stream); -} // namespace cudf::detail +} // namespace detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/utilities/vector_factories.hpp b/cpp/include/cudf/detail/utilities/vector_factories.hpp index 20cb55bb1c7..a9d91cdeee1 100644 --- a/cpp/include/cudf/detail/utilities/vector_factories.hpp +++ b/cpp/include/cudf/detail/utilities/vector_factories.hpp @@ -21,9 +21,12 @@ * @file vector_factories.hpp */ +#include +#include #include #include #include +#include #include #include @@ -32,11 +35,9 @@ #include #include -#include - #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { /** @@ -100,11 +101,12 @@ rmm::device_uvector make_device_uvector_async(host_span source_data, rmm::device_async_resource_ref mr) { rmm::device_uvector ret(source_data.size(), stream, mr); - CUDF_CUDA_TRY(cudaMemcpyAsync(ret.data(), - source_data.data(), - source_data.size() * sizeof(T), - cudaMemcpyDefault, - stream.value())); + auto const is_pinned = source_data.is_device_accessible(); + cuda_memcpy_async(ret.data(), + source_data.data(), + source_data.size() * sizeof(T), + is_pinned ? host_memory_kind::PINNED : host_memory_kind::PAGEABLE, + stream); return ret; } @@ -271,21 +273,11 @@ rmm::device_uvector make_device_uvector_sync( return make_device_uvector_sync(device_span{c}, stream, mr); } -// Utility function template to allow copying to either a thrust::host_vector or std::vector -template -OutContainer make_vector_async(device_span v, rmm::cuda_stream_view stream) -{ - OutContainer result(v.size()); - CUDF_CUDA_TRY(cudaMemcpyAsync( - result.data(), v.data(), v.size() * sizeof(T), cudaMemcpyDefault, stream.value())); - return result; -} - /** * @brief Asynchronously construct a `std::vector` containing a copy of data from a * `device_span` * - * @note This function does not synchronize `stream`. + * @note This function does not synchronize `stream` after the copy. * * @tparam T The type of the data to copy * @param source_data The device data to copy @@ -295,14 +287,17 @@ OutContainer make_vector_async(device_span v, rmm::cuda_stream_view str template std::vector make_std_vector_async(device_span v, rmm::cuda_stream_view stream) { - return make_vector_async>(v, stream); + std::vector result(v.size()); + CUDF_CUDA_TRY(cudaMemcpyAsync( + result.data(), v.data(), v.size() * sizeof(T), cudaMemcpyDefault, stream.value())); + return result; } /** * @brief Asynchronously construct a `std::vector` containing a copy of data from a device * container * - * @note This function synchronizes `stream`. + * @note This function synchronizes `stream` after the copy. * * @tparam Container The type of the container to copy from * @tparam T The type of the data to copy @@ -324,7 +319,7 @@ std::vector make_std_vector_async(Container cons * @brief Synchronously construct a `std::vector` containing a copy of data from a * `device_span` * - * @note This function does a synchronize on `stream`. + * @note This function does a synchronize on `stream` after the copy. * * @tparam T The type of the data to copy * @param source_data The device data to copy @@ -361,11 +356,46 @@ std::vector make_std_vector_sync(Container const return make_std_vector_sync(device_span{c}, stream); } +/** + * @brief Construct a `cudf::detail::host_vector` of the given size. + * + * @note The returned vector may be using a pinned memory resource. + * + * @tparam T The type of the vector data + * @param size The number of elements in the created vector + * @param stream The stream on which to allocate memory + * @return A host_vector of the given size + */ +template +host_vector make_host_vector(size_t size, rmm::cuda_stream_view stream) +{ + return host_vector(size, get_host_allocator(size, stream)); +} + +/** + * @brief Construct an empty `cudf::detail::host_vector` with the given capacity. + * + * @note The returned vector may be using a pinned memory resource. + * + * @tparam T The type of the vector data + * @param capacity Initial capacity of the vector + * @param stream The stream on which to allocate memory + * @return A host_vector with the given capacity + */ +template +host_vector make_empty_host_vector(size_t capacity, rmm::cuda_stream_view stream) +{ + auto result = host_vector(get_host_allocator(capacity, stream)); + result.reserve(capacity); + return result; +} + /** * @brief Asynchronously construct a `thrust::host_vector` containing a copy of data from a * `device_span` * - * @note This function does not synchronize `stream`. + * @note This function does not synchronize `stream` after the copy. The returned vector may be + * using a pinned memory resource. * * @tparam T The type of the data to copy * @param source_data The device data to copy @@ -373,16 +403,24 @@ std::vector make_std_vector_sync(Container const * @return The data copied to the host */ template -thrust::host_vector make_host_vector_async(device_span v, rmm::cuda_stream_view stream) +host_vector make_host_vector_async(device_span v, rmm::cuda_stream_view stream) { - return make_vector_async>(v, stream); + auto result = make_host_vector(v.size(), stream); + auto const is_pinned = result.get_allocator().is_device_accessible(); + cuda_memcpy_async(result.data(), + v.data(), + v.size() * sizeof(T), + is_pinned ? host_memory_kind::PINNED : host_memory_kind::PAGEABLE, + stream); + return result; } /** * @brief Asynchronously construct a `std::vector` containing a copy of data from a device * container * - * @note This function does not synchronize `stream`. + * @note This function does not synchronize `stream` after the copy. The returned vector may be + * using a pinned memory resource. * * @tparam Container The type of the container to copy from * @tparam T The type of the data to copy @@ -394,8 +432,8 @@ template < typename Container, std::enable_if_t< std::is_convertible_v>>* = nullptr> -thrust::host_vector make_host_vector_async( - Container const& c, rmm::cuda_stream_view stream) +host_vector make_host_vector_async(Container const& c, + rmm::cuda_stream_view stream) { return make_host_vector_async(device_span{c}, stream); } @@ -404,7 +442,8 @@ thrust::host_vector make_host_vector_async( * @brief Synchronously construct a `thrust::host_vector` containing a copy of data from a * `device_span` * - * @note This function does a synchronize on `stream`. + * @note This function does a synchronize on `stream` after the copy. The returned vector may be + * using a pinned memory resource. * * @tparam T The type of the data to copy * @param source_data The device data to copy @@ -412,7 +451,7 @@ thrust::host_vector make_host_vector_async( * @return The data copied to the host */ template -thrust::host_vector make_host_vector_sync(device_span v, rmm::cuda_stream_view stream) +host_vector make_host_vector_sync(device_span v, rmm::cuda_stream_view stream) { auto result = make_host_vector_async(v, stream); stream.synchronize(); @@ -423,7 +462,7 @@ thrust::host_vector make_host_vector_sync(device_span v, rmm::cuda_s * @brief Synchronously construct a `thrust::host_vector` containing a copy of data from a device * container * - * @note This function synchronizes `stream`. + * @note This function synchronizes `stream` after the copy. * * @tparam Container The type of the container to copy from * @tparam T The type of the data to copy @@ -435,8 +474,8 @@ template < typename Container, std::enable_if_t< std::is_convertible_v>>* = nullptr> -thrust::host_vector make_host_vector_sync( - Container const& c, rmm::cuda_stream_view stream) +host_vector make_host_vector_sync(Container const& c, + rmm::cuda_stream_view stream) { return make_host_vector_sync(device_span{c}, stream); } @@ -444,7 +483,7 @@ thrust::host_vector make_host_vector_sync( /** * @brief Asynchronously construct a pinned `cudf::detail::host_vector` of the given size * - * @note This function may not synchronize `stream`. + * @note This function may not synchronize `stream` after the copy. * * @tparam T The type of the vector data * @param size The number of elements in the created vector @@ -460,7 +499,7 @@ host_vector make_pinned_vector_async(size_t size, rmm::cuda_stream_view strea /** * @brief Synchronously construct a pinned `cudf::detail::host_vector` of the given size * - * @note This function synchronizes `stream`. + * @note This function synchronizes `stream` after the copy. * * @tparam T The type of the vector data * @param size The number of elements in the created vector @@ -477,4 +516,4 @@ host_vector make_pinned_vector_sync(size_t size, rmm::cuda_stream_view stream } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/valid_if.cuh b/cpp/include/cudf/detail/valid_if.cuh index 64a3c4edf78..56a2c76b741 100644 --- a/cpp/include/cudf/detail/valid_if.cuh +++ b/cpp/include/cudf/detail/valid_if.cuh @@ -97,7 +97,7 @@ std::pair valid_if(InputIterator begin, size_type size = thrust::distance(begin, end); - auto null_mask = detail::create_null_mask(size, mask_state::UNINITIALIZED, stream, mr); + auto null_mask = cudf::create_null_mask(size, mask_state::UNINITIALIZED, stream, mr); size_type null_count{0}; if (size > 0) { diff --git a/cpp/include/cudf/dictionary/detail/concatenate.hpp b/cpp/include/cudf/dictionary/detail/concatenate.hpp index 55f3825b3ec..0eb17aa06f4 100644 --- a/cpp/include/cudf/dictionary/detail/concatenate.hpp +++ b/cpp/include/cudf/dictionary/detail/concatenate.hpp @@ -23,9 +23,8 @@ #include #include -namespace cudf { -namespace dictionary { -namespace detail { +namespace CUDF_EXPORT cudf { +namespace dictionary::detail { /** * @brief Returns a single column by vertically concatenating the given vector of * dictionary columns. @@ -42,6 +41,5 @@ std::unique_ptr concatenate(host_span columns, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace detail -} // namespace dictionary -} // namespace cudf +} // namespace dictionary::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/dictionary/detail/encode.hpp b/cpp/include/cudf/dictionary/detail/encode.hpp index 3b5a3bbab56..cc7ffbd397f 100644 --- a/cpp/include/cudf/dictionary/detail/encode.hpp +++ b/cpp/include/cudf/dictionary/detail/encode.hpp @@ -23,9 +23,8 @@ #include #include -namespace cudf { -namespace dictionary { -namespace detail { +namespace CUDF_EXPORT cudf { +namespace dictionary::detail { /** * @brief Construct a dictionary column by dictionary encoding an existing column. * @@ -84,6 +83,5 @@ std::unique_ptr decode(dictionary_column_view const& dictionary_column, */ data_type get_indices_type_for_size(size_type keys_size); -} // namespace detail -} // namespace dictionary -} // namespace cudf +} // namespace dictionary::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/dictionary/detail/merge.hpp b/cpp/include/cudf/dictionary/detail/merge.hpp index c4229690ff5..a1777d412fe 100644 --- a/cpp/include/cudf/dictionary/detail/merge.hpp +++ b/cpp/include/cudf/dictionary/detail/merge.hpp @@ -22,9 +22,8 @@ #include #include -namespace cudf { -namespace dictionary { -namespace detail { +namespace CUDF_EXPORT cudf { +namespace dictionary::detail { /** * @brief Merges two dictionary columns. @@ -47,6 +46,5 @@ std::unique_ptr merge(dictionary_column_view const& lcol, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace detail -} // namespace dictionary -} // namespace cudf +} // namespace dictionary::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/dictionary/detail/replace.hpp b/cpp/include/cudf/dictionary/detail/replace.hpp index 81a91d57169..1e1ee182fc5 100644 --- a/cpp/include/cudf/dictionary/detail/replace.hpp +++ b/cpp/include/cudf/dictionary/detail/replace.hpp @@ -23,9 +23,8 @@ #include #include -namespace cudf { -namespace dictionary { -namespace detail { +namespace CUDF_EXPORT cudf { +namespace dictionary::detail { /** * @brief Create a new dictionary column by replacing nulls with values @@ -62,6 +61,5 @@ std::unique_ptr replace_nulls(dictionary_column_view const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace detail -} // namespace dictionary -} // namespace cudf +} // namespace dictionary::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/dictionary/detail/search.hpp b/cpp/include/cudf/dictionary/detail/search.hpp index 2563b96b214..921acc258a9 100644 --- a/cpp/include/cudf/dictionary/detail/search.hpp +++ b/cpp/include/cudf/dictionary/detail/search.hpp @@ -18,11 +18,12 @@ #include #include #include +#include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace dictionary { namespace detail { @@ -63,4 +64,4 @@ std::unique_ptr get_insert_index(dictionary_column_view const& dictionar } // namespace detail } // namespace dictionary -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/dictionary/detail/update_keys.hpp b/cpp/include/cudf/dictionary/detail/update_keys.hpp index 9cdda773dbb..9eb812eb8ee 100644 --- a/cpp/include/cudf/dictionary/detail/update_keys.hpp +++ b/cpp/include/cudf/dictionary/detail/update_keys.hpp @@ -24,9 +24,8 @@ #include #include -namespace cudf { -namespace dictionary { -namespace detail { +namespace CUDF_EXPORT cudf { +namespace dictionary::detail { /** * @copydoc cudf::dictionary::add_keys(dictionary_column_view const&,column_view * const&,rmm::device_async_resource_ref) @@ -103,6 +102,5 @@ std::vector> match_dictionaries( std::pair>, std::vector> match_dictionaries( std::vector tables, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace detail -} // namespace dictionary -} // namespace cudf +} // namespace dictionary::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/dictionary/dictionary_column_view.hpp b/cpp/include/cudf/dictionary/dictionary_column_view.hpp index 9f2bc90c0b2..dc822fee38b 100644 --- a/cpp/include/cudf/dictionary/dictionary_column_view.hpp +++ b/cpp/include/cudf/dictionary/dictionary_column_view.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,7 +23,7 @@ * @brief Class definition for cudf::dictionary_column_view */ -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup dictionary_classes * @{ @@ -124,4 +124,4 @@ class dictionary_column_view : private column_view { namespace dictionary { // defined here for doxygen output } -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/dictionary/dictionary_factories.hpp b/cpp/include/cudf/dictionary/dictionary_factories.hpp index 7cdfa3bf9e5..2f663c4af61 100644 --- a/cpp/include/cudf/dictionary/dictionary_factories.hpp +++ b/cpp/include/cudf/dictionary/dictionary_factories.hpp @@ -23,7 +23,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup column_factories Factories * @{ @@ -87,12 +87,17 @@ std::unique_ptr make_dictionary_column( * @param indices_column Indices to use for the new dictionary column. * @param null_mask Null mask for the output column. * @param null_count Number of nulls for the output column. + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned column's device memory. * @return New dictionary column. */ -std::unique_ptr make_dictionary_column(std::unique_ptr keys_column, - std::unique_ptr indices_column, - rmm::device_buffer&& null_mask, - size_type null_count); +std::unique_ptr make_dictionary_column( + std::unique_ptr keys_column, + std::unique_ptr indices_column, + rmm::device_buffer&& null_mask, + size_type null_count, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a dictionary column by taking ownership of the provided keys @@ -122,4 +127,4 @@ std::unique_ptr make_dictionary_column( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/dictionary/encode.hpp b/cpp/include/cudf/dictionary/encode.hpp index 768e2be2b0d..9e68c947793 100644 --- a/cpp/include/cudf/dictionary/encode.hpp +++ b/cpp/include/cudf/dictionary/encode.hpp @@ -22,7 +22,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace dictionary { /** * @addtogroup dictionary_encode @@ -86,4 +86,4 @@ std::unique_ptr decode( /** @} */ // end of group } // namespace dictionary -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/dictionary/search.hpp b/cpp/include/cudf/dictionary/search.hpp index 1dff6dc1d5d..66275de33e9 100644 --- a/cpp/include/cudf/dictionary/search.hpp +++ b/cpp/include/cudf/dictionary/search.hpp @@ -21,7 +21,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace dictionary { /** * @addtogroup dictionary_search @@ -50,4 +50,4 @@ std::unique_ptr get_index( /** @} */ // end of group } // namespace dictionary -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/dictionary/update_keys.hpp b/cpp/include/cudf/dictionary/update_keys.hpp index ce7057359a1..c02e91f8d78 100644 --- a/cpp/include/cudf/dictionary/update_keys.hpp +++ b/cpp/include/cudf/dictionary/update_keys.hpp @@ -22,7 +22,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace dictionary { /** * @addtogroup dictionary_update @@ -169,4 +169,4 @@ std::vector> match_dictionaries( /** @} */ // end of group } // namespace dictionary -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/filling.hpp b/cpp/include/cudf/filling.hpp index 90139e8634a..054f1e859f4 100644 --- a/cpp/include/cudf/filling.hpp +++ b/cpp/include/cudf/filling.hpp @@ -18,13 +18,14 @@ #include #include +#include #include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup transformation_fill * @{ @@ -244,4 +245,4 @@ std::unique_ptr calendrical_month_sequence( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/fixed_point/fixed_point.hpp b/cpp/include/cudf/fixed_point/fixed_point.hpp index c9cbc603226..ea2f5d4b6ca 100644 --- a/cpp/include/cudf/fixed_point/fixed_point.hpp +++ b/cpp/include/cudf/fixed_point/fixed_point.hpp @@ -30,7 +30,7 @@ #include /// `fixed_point` and supporting types -namespace numeric { +namespace CUDF_EXPORT numeric { /** * @addtogroup fixed_point_classes @@ -799,4 +799,4 @@ using decimal64 = fixed_point; ///< 64-bit decima using decimal128 = fixed_point<__int128_t, Radix::BASE_10>; ///< 128-bit decimal fixed point /** @} */ // end of group -} // namespace numeric +} // namespace CUDF_EXPORT numeric diff --git a/cpp/include/cudf/fixed_point/floating_conversion.hpp b/cpp/include/cudf/fixed_point/floating_conversion.hpp index f12177c6a4b..f0d50edccd1 100644 --- a/cpp/include/cudf/fixed_point/floating_conversion.hpp +++ b/cpp/include/cudf/fixed_point/floating_conversion.hpp @@ -16,6 +16,7 @@ #pragma once +#include #include #include @@ -24,7 +25,7 @@ #include -namespace numeric { +namespace CUDF_EXPORT numeric { /** * @addtogroup floating_conversion @@ -1142,4 +1143,4 @@ CUDF_HOST_DEVICE inline FloatingType convert_integral_to_floating(Rep const& val } // namespace detail /** @} */ // end of group -} // namespace numeric +} // namespace CUDF_EXPORT numeric diff --git a/cpp/include/cudf/fixed_point/temporary.hpp b/cpp/include/cudf/fixed_point/temporary.hpp index 17dba6c2452..2bafe235058 100644 --- a/cpp/include/cudf/fixed_point/temporary.hpp +++ b/cpp/include/cudf/fixed_point/temporary.hpp @@ -24,7 +24,7 @@ #include #include -namespace numeric { +namespace CUDF_EXPORT numeric { namespace detail { template @@ -81,4 +81,4 @@ constexpr auto exp10(int32_t exponent) } } // namespace detail -} // namespace numeric +} // namespace CUDF_EXPORT numeric diff --git a/cpp/include/cudf/groupby.hpp b/cpp/include/cudf/groupby.hpp index 831ef68ed15..f7df9c1aa9b 100644 --- a/cpp/include/cudf/groupby.hpp +++ b/cpp/include/cudf/groupby.hpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -31,7 +32,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { //! `groupby` APIs namespace groupby { namespace detail { @@ -420,4 +421,4 @@ class groupby { }; /** @} */ } // namespace groupby -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/hashing.hpp b/cpp/include/cudf/hashing.hpp index 3c2f6dfe0d5..b8be2af6967 100644 --- a/cpp/include/cudf/hashing.hpp +++ b/cpp/include/cudf/hashing.hpp @@ -17,11 +17,12 @@ #include #include +#include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup column_hash @@ -187,4 +188,4 @@ std::unique_ptr xxhash_64( } // namespace hashing /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/hashing/detail/hashing.hpp b/cpp/include/cudf/hashing/detail/hashing.hpp index 77266ceb48f..1a459430346 100644 --- a/cpp/include/cudf/hashing/detail/hashing.hpp +++ b/cpp/include/cudf/hashing/detail/hashing.hpp @@ -24,9 +24,8 @@ #include #include -namespace cudf { -namespace hashing { -namespace detail { +namespace CUDF_EXPORT cudf { +namespace hashing::detail { std::unique_ptr murmurhash3_x86_32(table_view const& input, uint32_t seed, @@ -109,9 +108,8 @@ constexpr std::size_t hash_combine(std::size_t lhs, std::size_t rhs) return lhs ^ (rhs + 0x9e37'79b9'7f4a'7c15 + (lhs << 6) + (lhs >> 2)); } -} // namespace detail -} // namespace hashing -} // namespace cudf +} // namespace hashing::detail +} // namespace CUDF_EXPORT cudf // specialization of std::hash for cudf::data_type namespace std { diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp index 11f6ce2bad7..9a8f87b4a46 100644 --- a/cpp/include/cudf/interop.hpp +++ b/cpp/include/cudf/interop.hpp @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -53,7 +54,7 @@ struct ArrowArray; struct ArrowArrayStream; -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup interop_dlpack * @{ @@ -136,6 +137,8 @@ struct column_metadata { * Converts the `cudf::table_view` to `arrow::Table` with the provided * metadata `column_names`. * + * @deprecated Since 24.08. Use cudf::to_arrow_host instead. + * * @throws cudf::logic_error if `column_names` size doesn't match with number of columns. * * @param input table_view that needs to be converted to arrow Table @@ -150,16 +153,19 @@ struct column_metadata { * 9 which is the maximum precision for 32-bit types. Similarly, numeric::decimal128 will be * converted to Arrow decimal128 of the precision 38. */ -std::shared_ptr to_arrow(table_view input, - std::vector const& metadata = {}, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - arrow::MemoryPool* ar_mr = arrow::default_memory_pool()); +[[deprecated("Use cudf::to_arrow_host")]] std::shared_ptr to_arrow( + table_view input, + std::vector const& metadata = {}, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + arrow::MemoryPool* ar_mr = arrow::default_memory_pool()); /** * @brief Create `arrow::Scalar` from cudf scalar `input` * * Converts the `cudf::scalar` to `arrow::Scalar`. * + * @deprecated Since 24.08. + * * @param input scalar that needs to be converted to arrow Scalar * @param metadata Contains hierarchy of names of columns and children * @param stream CUDA stream used for device memory operations and kernel launches @@ -172,10 +178,11 @@ std::shared_ptr to_arrow(table_view input, * 9 which is the maximum precision for 32-bit types. Similarly, numeric::decimal128 will be * converted to Arrow decimal128 of the precision 38. */ -std::shared_ptr to_arrow(cudf::scalar const& input, - column_metadata const& metadata = {}, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - arrow::MemoryPool* ar_mr = arrow::default_memory_pool()); +[[deprecated("Use cudf::to_arrow_host")]] std::shared_ptr to_arrow( + cudf::scalar const& input, + column_metadata const& metadata = {}, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + arrow::MemoryPool* ar_mr = arrow::default_memory_pool()); /** * @brief typedef for a unique_ptr to an ArrowSchema with custom deleter @@ -329,15 +336,67 @@ unique_device_array_t to_arrow_device( rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); +/** + * @brief Copy table view data to host and create `ArrowDeviceArray` for it + * + * Populates the C struct ArrowDeviceArray, copying the cudf data to the host. The + * returned ArrowDeviceArray will have a device_type of CPU and will have no ties + * to the memory referenced by the table view passed in. The deleter for the + * returned unique_ptr will call the release callback on the ArrowDeviceArray + * automatically. + * + * @note For decimals, since the precision is not stored for them in libcudf, it will + * be converted to an Arrow decimal128 that has the widest-precision the cudf decimal type + * supports. For example, numeric::decimal32 will be converted to Arrow decimal128 of the precision + * 9 which is the maximum precision for 32-bit types. Similarly, numeric::decimal128 will be + * converted to Arrow decimal128 of precision 38. + * + * @param table Input table + * @param stream CUDA stream used for the device memory operations and kernel launches + * @param mr Device memory resource used for any allocations during conversion + * @return ArrowDeviceArray generated from input table + */ +unique_device_array_t to_arrow_host( + cudf::table_view const& table, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Copy column view data to host and create `ArrowDeviceArray` for it + * + * Populates the C struct ArrowDeviceArray, copying the cudf data to the host. The + * returned ArrowDeviceArray will have a device_type of CPU and will have no ties + * to the memory referenced by the column view passed in. The deleter for the + * returned unique_ptr will call the release callback on the ArrowDeviceArray + * automatically. + * + * @note For decimals, since the precision is not stored for them in libcudf, it will + * be converted to an Arrow decimal128 that has the widest-precision the cudf decimal type + * supports. For example, numeric::decimal32 will be converted to Arrow decimal128 of the precision + * 9 which is the maximum precision for 32-bit types. Similarly, numeric::decimal128 will be + * converted to Arrow decimal128 of precision 38. + * + * @param col Input column + * @param stream CUDA stream used for the device memory operations and kernel launches + * @param mr Device memory resource used for any allocations during conversion + * @return ArrowDeviceArray generated from input column + */ +unique_device_array_t to_arrow_host( + cudf::column_view const& col, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + /** * @brief Create `cudf::table` from given arrow Table input * + * @deprecated Since 24.08. Use cudf::from_arrow_host instead. + * * @param input arrow:Table that needs to be converted to `cudf::table` * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate `cudf::table` * @return cudf table generated from given arrow Table */ -std::unique_ptr
from_arrow( +[[deprecated("Use cudf::from_arrow_host")]] std::unique_ptr
from_arrow( arrow::Table const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); @@ -345,12 +404,17 @@ std::unique_ptr
from_arrow( /** * @brief Create `cudf::scalar` from given arrow Scalar input * + * @deprecated Since 24.08. Use arrow's `MakeArrayFromScalar` on the + * input, followed by `ExportArray` to obtain something that can be + * consumed by `from_arrow_host`. Then use `cudf::get_element` to + * extract a device scalar from the column. + * * @param input `arrow::Scalar` that needs to be converted to `cudf::scalar` * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate `cudf::scalar` * @return cudf scalar generated from given arrow Scalar */ -std::unique_ptr from_arrow( +[[deprecated("See docstring for migration strategies")]] std::unique_ptr from_arrow( arrow::Scalar const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); @@ -585,4 +649,4 @@ unique_column_view_t from_arrow_device_column( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/interop/detail/arrow.hpp b/cpp/include/cudf/interop/detail/arrow.hpp deleted file mode 100644 index 906d48f636b..00000000000 --- a/cpp/include/cudf/interop/detail/arrow.hpp +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// from Arrow C Device Data Interface -// https://arrow.apache.org/docs/format/CDeviceDataInterface.html -#ifndef ARROW_C_DEVICE_DATA_INTERFACE -#define ARROW_C_DEVICE_DATA_INTERFACE - -// Device type for the allocated memory -using ArrowDeviceType = int32_t; - -// The Arrow spec specifies using macros rather than enums here to avoid being -// susceptible to changes in the underlying type chosen by the compiler, but -// clang-tidy doesn't like this. -// NOLINTBEGIN -// CPU device, same as using ArrowArray directly -#define ARROW_DEVICE_CPU 1 -// CUDA GPU Device -#define ARROW_DEVICE_CUDA 2 -// Pinned CUDA CPU memory by cudaMallocHost -#define ARROW_DEVICE_CUDA_HOST 3 -// CUDA managed/unified memory allocated by cudaMallocManaged -#define ARROW_DEVICE_CUDA_MANAGED 13 -// NOLINTEND - -struct ArrowDeviceArray { - struct ArrowArray array; - int64_t device_id; - ArrowDeviceType device_type; - void* sync_event; - - // reserved bytes for future expansion - int64_t reserved[3]; -}; - -#endif // ARROW_C_DEVICE_DATA_INTERFACE diff --git a/cpp/include/cudf/io/arrow_io_source.hpp b/cpp/include/cudf/io/arrow_io_source.hpp index d7a48c34e12..ed5c839cbb4 100644 --- a/cpp/include/cudf/io/arrow_io_source.hpp +++ b/cpp/include/cudf/io/arrow_io_source.hpp @@ -18,6 +18,8 @@ #include "datasource.hpp" +#include + #include #include @@ -25,7 +27,8 @@ #include #include -namespace cudf::io { +namespace CUDF_EXPORT cudf { +namespace io { /** * @addtogroup io_datasources * @{ @@ -86,4 +89,5 @@ class arrow_io_source : public datasource { }; /** @} */ // end of group -} // namespace cudf::io +} // namespace io +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/io/avro.hpp b/cpp/include/cudf/io/avro.hpp index 8bc74eb574c..63f9ea3a624 100644 --- a/cpp/include/cudf/io/avro.hpp +++ b/cpp/include/cudf/io/avro.hpp @@ -28,7 +28,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace io { /** * @addtogroup io_readers @@ -221,4 +221,4 @@ table_with_metadata read_avro( /** @} */ // end of group } // namespace io -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/io/csv.hpp b/cpp/include/cudf/io/csv.hpp index cc361f0918e..bbb4636a5a3 100644 --- a/cpp/include/cudf/io/csv.hpp +++ b/cpp/include/cudf/io/csv.hpp @@ -31,7 +31,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace io { /** @@ -1762,4 +1762,4 @@ void write_csv(csv_writer_options const& options, /** @} */ // end of group } // namespace io -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/io/data_sink.hpp b/cpp/include/cudf/io/data_sink.hpp index 69d8a388d45..e1eb9c042c7 100644 --- a/cpp/include/cudf/io/data_sink.hpp +++ b/cpp/include/cudf/io/data_sink.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,7 +27,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { //! IO interfaces namespace io { @@ -209,4 +209,4 @@ class data_sink { /** @} */ // end of group } // namespace io -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/io/datasource.hpp b/cpp/include/cudf/io/datasource.hpp index 28263d466f3..b12fbe39a57 100644 --- a/cpp/include/cudf/io/datasource.hpp +++ b/cpp/include/cudf/io/datasource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -25,7 +26,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { //! IO interfaces namespace io { @@ -376,4 +377,4 @@ class datasource { /** @} */ // end of group } // namespace io -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/io/detail/avro.hpp b/cpp/include/cudf/io/detail/avro.hpp index fe9f935d2cc..13f695d6866 100644 --- a/cpp/include/cudf/io/detail/avro.hpp +++ b/cpp/include/cudf/io/detail/avro.hpp @@ -18,14 +18,13 @@ #include #include +#include #include #include -namespace cudf { -namespace io { -namespace detail { -namespace avro { +namespace CUDF_EXPORT cudf { +namespace io::detail::avro { /** * @brief Reads the entire dataset. @@ -42,7 +41,5 @@ table_with_metadata read_avro(std::unique_ptr&& source, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace avro -} // namespace detail -} // namespace io -} // namespace cudf +} // namespace io::detail::avro +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/io/detail/csv.hpp b/cpp/include/cudf/io/detail/csv.hpp index 2a70fa888f4..d4cad2f70fd 100644 --- a/cpp/include/cudf/io/detail/csv.hpp +++ b/cpp/include/cudf/io/detail/csv.hpp @@ -17,14 +17,13 @@ #pragma once #include +#include #include #include -namespace cudf { -namespace io { -namespace detail { -namespace csv { +namespace CUDF_EXPORT cudf { +namespace io::detail::csv { /** * @brief Reads the entire dataset. @@ -56,7 +55,5 @@ void write_csv(data_sink* sink, csv_writer_options const& options, rmm::cuda_stream_view stream); -} // namespace csv -} // namespace detail -} // namespace io -} // namespace cudf +} // namespace io::detail::csv +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/io/detail/json.hpp b/cpp/include/cudf/io/detail/json.hpp index 6ff1c12831b..42b10a78ce8 100644 --- a/cpp/include/cudf/io/detail/json.hpp +++ b/cpp/include/cudf/io/detail/json.hpp @@ -18,11 +18,13 @@ #include #include +#include #include #include -namespace cudf::io::json::detail { +namespace CUDF_EXPORT cudf { +namespace io::json::detail { /** * @brief Reads and returns the entire data set. @@ -73,4 +75,5 @@ void normalize_single_quotes(datasource::owning_buffer void normalize_whitespace(datasource::owning_buffer>& indata, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace cudf::io::json::detail +} // namespace io::json::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/io/detail/orc.hpp b/cpp/include/cudf/io/detail/orc.hpp index 597ddd9cf0a..7538cf7d29c 100644 --- a/cpp/include/cudf/io/detail/orc.hpp +++ b/cpp/include/cudf/io/detail/orc.hpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -29,12 +30,13 @@ #include #include -namespace cudf::io { +namespace CUDF_EXPORT cudf { +namespace io { // Forward declaration -class orc_reader_options; -class orc_writer_options; -class chunked_orc_writer_options; +class CUDF_EXPORT orc_reader_options; +class CUDF_EXPORT orc_writer_options; +class CUDF_EXPORT chunked_orc_writer_options; namespace orc::detail { @@ -183,4 +185,5 @@ class writer { }; } // namespace orc::detail -} // namespace cudf::io +} // namespace io +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/io/detail/parquet.hpp b/cpp/include/cudf/io/detail/parquet.hpp index 21c870cb75e..a6945e0b7ab 100644 --- a/cpp/include/cudf/io/detail/parquet.hpp +++ b/cpp/include/cudf/io/detail/parquet.hpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -32,12 +33,13 @@ #include #include -namespace cudf::io { +namespace CUDF_EXPORT cudf { +namespace io { // Forward declaration -class parquet_reader_options; -class parquet_writer_options; -class chunked_parquet_writer_options; +class CUDF_EXPORT parquet_reader_options; +class CUDF_EXPORT parquet_writer_options; +class CUDF_EXPORT chunked_parquet_writer_options; namespace parquet::detail { @@ -257,4 +259,5 @@ class writer { */ parquet_metadata read_parquet_metadata(host_span const> sources); } // namespace parquet::detail -} // namespace cudf::io +} // namespace io +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/io/detail/tokenize_json.hpp b/cpp/include/cudf/io/detail/tokenize_json.hpp index d08c4e7c65a..715eb855daa 100644 --- a/cpp/include/cudf/io/detail/tokenize_json.hpp +++ b/cpp/include/cudf/io/detail/tokenize_json.hpp @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include @@ -117,7 +118,7 @@ enum token_t : PdaTokenT { NUM_TOKENS }; -namespace detail { +namespace CUDF_EXPORT detail { /** * @brief Parses the given JSON string and emits a sequence of tokens that demarcate relevant @@ -136,6 +137,6 @@ std::pair, rmm::device_uvector> ge rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace detail +} // namespace CUDF_EXPORT detail } // namespace cudf::io::json diff --git a/cpp/include/cudf/io/detail/utils.hpp b/cpp/include/cudf/io/detail/utils.hpp index 7bbda21858d..d0da9b410ce 100644 --- a/cpp/include/cudf/io/detail/utils.hpp +++ b/cpp/include/cudf/io/detail/utils.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,13 +16,14 @@ #pragma once -namespace cudf { -namespace io { -namespace detail { +#include + +namespace CUDF_EXPORT cudf { +namespace io::detail { /** * @brief Whether writer writes in chunks or all at once */ enum class single_write_mode : bool { YES, NO }; -} // namespace detail -} // namespace io -} // namespace cudf + +} // namespace io::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/io/json.hpp b/cpp/include/cudf/io/json.hpp index 7af90766ad0..0cb39d15cd5 100644 --- a/cpp/include/cudf/io/json.hpp +++ b/cpp/include/cudf/io/json.hpp @@ -30,7 +30,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace io { /** * @addtogroup io_readers @@ -333,14 +333,14 @@ class json_reader_options { * * @param offset Number of bytes of offset */ - void set_byte_range_offset(size_type offset) { _byte_range_offset = offset; } + void set_byte_range_offset(size_t offset) { _byte_range_offset = offset; } /** * @brief Set number of bytes to read. * * @param size Number of bytes to read */ - void set_byte_range_size(size_type size) { _byte_range_size = size; } + void set_byte_range_size(size_t size) { _byte_range_size = size; } /** * @brief Set delimiter separating records in JSON lines @@ -1024,4 +1024,4 @@ void write_json(json_writer_options const& options, /** @} */ // end of group } // namespace io -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index 623c1d9fc72..8d484b15872 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -31,7 +32,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace io { /** * @addtogroup io_readers @@ -426,7 +427,7 @@ class chunked_orc_reader { * * This is added just to satisfy cython. */ - chunked_orc_reader() = default; + chunked_orc_reader(); /** * @brief Construct the reader from input/output size limits, output row granularity, along with @@ -1429,7 +1430,12 @@ class orc_chunked_writer { * @brief Default constructor, this should never be used. * This is added just to satisfy cython. */ - orc_chunked_writer() = default; + orc_chunked_writer(); + + /** + * @brief virtual destructor, Added so we don't leak detail types. + */ + ~orc_chunked_writer(); /** * @brief Constructor with chunked writer options @@ -1459,4 +1465,4 @@ class orc_chunked_writer { /** @} */ // end of group } // namespace io -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/io/orc_metadata.hpp b/cpp/include/cudf/io/orc_metadata.hpp index 35196a19349..3c6194bb721 100644 --- a/cpp/include/cudf/io/orc_metadata.hpp +++ b/cpp/include/cudf/io/orc_metadata.hpp @@ -23,12 +23,13 @@ #include #include +#include #include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace io { /** * @addtogroup io_types @@ -387,4 +388,4 @@ orc_metadata read_orc_metadata(source_info const& src_info, /** @} */ // end of group } // namespace io -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/io/orc_types.hpp b/cpp/include/cudf/io/orc_types.hpp index abd81d76579..f6c03814c9b 100644 --- a/cpp/include/cudf/io/orc_types.hpp +++ b/cpp/include/cudf/io/orc_types.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,9 +16,12 @@ #pragma once +#include + #include -namespace cudf::io::orc { +namespace CUDF_EXPORT cudf { +namespace io::orc { /** * @addtogroup io_types * @{ @@ -104,4 +107,5 @@ enum ProtofType : uint8_t { }; /** @} */ // end of group -} // namespace cudf::io::orc +} // namespace io::orc +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp index 4d98cae73a7..12897ac77ef 100644 --- a/cpp/include/cudf/io/parquet.hpp +++ b/cpp/include/cudf/io/parquet.hpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -32,7 +33,8 @@ #include #include -namespace cudf::io { +namespace CUDF_EXPORT cudf { +namespace io { /** * @addtogroup io_readers * @{ @@ -480,8 +482,9 @@ class chunked_parquet_reader { * @brief Default constructor, this should never be used. * * This is added just to satisfy cython. + * This is added to not leak detail API */ - chunked_parquet_reader() = default; + chunked_parquet_reader(); /** * @brief Constructor for chunked reader. @@ -1380,8 +1383,9 @@ class parquet_chunked_writer { /** * @brief Default constructor, this should never be used. * This is added just to satisfy cython. + * This is added to not leak detail API */ - parquet_chunked_writer() = default; + parquet_chunked_writer(); /** * @brief Constructor with chunked writer options @@ -1391,6 +1395,11 @@ class parquet_chunked_writer { */ parquet_chunked_writer(chunked_parquet_writer_options const& options, rmm::cuda_stream_view stream = cudf::get_default_stream()); + /** + * @brief Default destructor. + * This is added to not leak detail API + */ + ~parquet_chunked_writer(); /** * @brief Writes table to output. @@ -1423,4 +1432,5 @@ class parquet_chunked_writer { /** @} */ // end of group -} // namespace cudf::io +} // namespace io +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/io/parquet_metadata.hpp b/cpp/include/cudf/io/parquet_metadata.hpp index e0c406c180c..dbb1fd03dca 100644 --- a/cpp/include/cudf/io/parquet_metadata.hpp +++ b/cpp/include/cudf/io/parquet_metadata.hpp @@ -22,13 +22,14 @@ #pragma once #include +#include #include #include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace io { /** * @addtogroup io_types @@ -270,4 +271,4 @@ parquet_metadata read_parquet_metadata(source_info const& src_info); /** @} */ // end of group } // namespace io -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/io/text/byte_range_info.hpp b/cpp/include/cudf/io/text/byte_range_info.hpp index 60ee867f058..7e9256be1d3 100644 --- a/cpp/include/cudf/io/text/byte_range_info.hpp +++ b/cpp/include/cudf/io/text/byte_range_info.hpp @@ -17,11 +17,12 @@ #pragma once #include +#include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace io { namespace text { /** @@ -113,4 +114,4 @@ byte_range_info create_byte_range_info_max(); } // namespace text } // namespace io -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/io/text/data_chunk_source.hpp b/cpp/include/cudf/io/text/data_chunk_source.hpp index 13aff4b3b8f..dd1d2331c1f 100644 --- a/cpp/include/cudf/io/text/data_chunk_source.hpp +++ b/cpp/include/cudf/io/text/data_chunk_source.hpp @@ -16,12 +16,13 @@ #pragma once +#include #include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace io { namespace text { @@ -120,4 +121,4 @@ class data_chunk_source { } // namespace text } // namespace io -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/io/text/data_chunk_source_factories.hpp b/cpp/include/cudf/io/text/data_chunk_source_factories.hpp index 046994d33cc..42d0540b386 100644 --- a/cpp/include/cudf/io/text/data_chunk_source_factories.hpp +++ b/cpp/include/cudf/io/text/data_chunk_source_factories.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,12 +19,14 @@ #include #include #include +#include #include #include #include -namespace cudf::io::text { +namespace CUDF_EXPORT cudf { +namespace io::text { /** * @brief Creates a data source capable of producing device-buffered views of a datasource. @@ -84,4 +86,5 @@ std::unique_ptr make_source_from_bgzip_file(std::string_view */ std::unique_ptr make_source(cudf::string_scalar& data); -} // namespace cudf::io::text +} // namespace io::text +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/io/text/detail/bgzip_utils.hpp b/cpp/include/cudf/io/text/detail/bgzip_utils.hpp index 515bcf16de2..11eb4518210 100644 --- a/cpp/include/cudf/io/text/detail/bgzip_utils.hpp +++ b/cpp/include/cudf/io/text/detail/bgzip_utils.hpp @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include @@ -26,7 +27,8 @@ #include #include -namespace cudf::io::text::detail::bgzip { +namespace CUDF_EXPORT cudf { +namespace io::text::detail::bgzip { struct header { int block_size; @@ -109,4 +111,5 @@ void write_compressed_block(std::ostream& output_stream, host_span pre_size_subfields = {}, host_span post_size_subfields = {}); -} // namespace cudf::io::text::detail::bgzip +} // namespace io::text::detail::bgzip +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/io/text/detail/multistate.hpp b/cpp/include/cudf/io/text/detail/multistate.hpp index e4e47d8f010..32187b43d34 100644 --- a/cpp/include/cudf/io/text/detail/multistate.hpp +++ b/cpp/include/cudf/io/text/detail/multistate.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,9 +16,11 @@ #pragma once +#include + #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace io { namespace text { namespace detail { @@ -125,4 +127,4 @@ constexpr multistate operator+(multistate const& lhs, multistate const& rhs) } // namespace detail } // namespace text } // namespace io -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/io/text/detail/tile_state.hpp b/cpp/include/cudf/io/text/detail/tile_state.hpp index aa9185b4983..3980a7fac02 100644 --- a/cpp/include/cudf/io/text/detail/tile_state.hpp +++ b/cpp/include/cudf/io/text/detail/tile_state.hpp @@ -16,12 +16,14 @@ #pragma once +#include + #include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace io { namespace text { namespace detail { @@ -147,4 +149,4 @@ struct scan_tile_state_callback { } // namespace detail } // namespace text } // namespace io -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/io/text/detail/trie.hpp b/cpp/include/cudf/io/text/detail/trie.hpp index e0b9c7635e3..eee3fefc79f 100644 --- a/cpp/include/cudf/io/text/detail/trie.hpp +++ b/cpp/include/cudf/io/text/detail/trie.hpp @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -30,7 +31,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace io { namespace text { namespace detail { @@ -223,11 +224,11 @@ struct trie { match_length.emplace_back(0); - std::vector trie_nodes; auto token_counts = std::unordered_map(); + auto trie_nodes = cudf::detail::make_empty_host_vector(tokens.size(), stream); for (uint32_t i = 0; i < tokens.size(); i++) { - trie_nodes.emplace_back(trie_node{tokens[i], match_length[i], transitions[i]}); + trie_nodes.push_back(trie_node{tokens[i], match_length[i], transitions[i]}); token_counts[tokens[i]]++; } @@ -248,4 +249,4 @@ struct trie { } // namespace detail } // namespace text } // namespace io -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/io/text/multibyte_split.hpp b/cpp/include/cudf/io/text/multibyte_split.hpp index e29ab78ae46..8624a386d0f 100644 --- a/cpp/include/cudf/io/text/multibyte_split.hpp +++ b/cpp/include/cudf/io/text/multibyte_split.hpp @@ -27,7 +27,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace io { namespace text { /** @@ -120,4 +120,4 @@ std::unique_ptr multibyte_split( } // namespace text } // namespace io -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/io/types.hpp b/cpp/include/cudf/io/types.hpp index 0c96268f6c7..3df737413fa 100644 --- a/cpp/include/cudf/io/types.hpp +++ b/cpp/include/cudf/io/types.hpp @@ -33,16 +33,16 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { //! IO interfaces namespace io { class data_sink; class datasource; } // namespace io -} // namespace cudf +} // namespace CUDF_EXPORT cudf //! cuDF interfaces -namespace cudf { +namespace CUDF_EXPORT cudf { //! IO interfaces namespace io { /** @@ -277,6 +277,9 @@ struct column_name_info { struct table_metadata { std::vector schema_info; //!< Detailed name information for the entire output hierarchy + std::vector num_rows_per_source; //!< Number of rows read from each data source. + //!< Currently only computed for Parquet readers if no + //!< AST filters being used. Empty vector otherwise. std::map user_data; //!< Format-dependent metadata of the first input //!< file as key-values pairs (deprecated) std::vector> @@ -1086,4 +1089,4 @@ class reader_column_schema { /** @} */ // end of group } // namespace io -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp index ba485bd6372..f4139721475 100644 --- a/cpp/include/cudf/join.hpp +++ b/cpp/include/cudf/join.hpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -32,7 +33,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @brief Enum to indicate whether the distinct join table has nested columns or not @@ -43,13 +44,24 @@ enum class has_nested : bool { YES, NO }; // forward declaration namespace hashing::detail { + +/** + * @brief Forward declaration for our Murmur Hash 3 implementation + */ template class MurmurHash3_x86_32; } // namespace hashing::detail namespace detail { + +/** + * @brief Forward declaration for our hash join + */ template class hash_join; +/** + * @brief Forward declaration for our distinct hash join + */ template class distinct_hash_join; } // namespace detail @@ -1179,4 +1191,4 @@ std::size_t conditional_left_anti_join_size( ast::expression const& binary_predicate, rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/json/json.hpp b/cpp/include/cudf/json/json.hpp index 385e8e54bdc..48d5dcf7727 100644 --- a/cpp/include/cudf/json/json.hpp +++ b/cpp/include/cudf/json/json.hpp @@ -17,13 +17,14 @@ #include #include +#include #include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup json_object @@ -173,4 +174,4 @@ std::unique_ptr get_json_object( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/labeling/label_bins.hpp b/cpp/include/cudf/labeling/label_bins.hpp index 9091e31a9ea..7eb25134ca5 100644 --- a/cpp/include/cudf/labeling/label_bins.hpp +++ b/cpp/include/cudf/labeling/label_bins.hpp @@ -24,7 +24,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup label_bins @@ -79,4 +79,4 @@ std::unique_ptr label_bins( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/lists/combine.hpp b/cpp/include/cudf/lists/combine.hpp index 853562acfff..5a310e6651f 100644 --- a/cpp/include/cudf/lists/combine.hpp +++ b/cpp/include/cudf/lists/combine.hpp @@ -17,11 +17,12 @@ #include #include +#include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { //! Lists column APIs namespace lists { @@ -102,4 +103,4 @@ std::unique_ptr concatenate_list_elements( /** @} */ // end of group } // namespace lists -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/lists/contains.hpp b/cpp/include/cudf/lists/contains.hpp index 060882555aa..cd0a216488c 100644 --- a/cpp/include/cudf/lists/contains.hpp +++ b/cpp/include/cudf/lists/contains.hpp @@ -17,11 +17,12 @@ #include #include +#include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace lists { /** * @addtogroup lists_contains @@ -182,4 +183,4 @@ std::unique_ptr index_of( /** @} */ // end of group } // namespace lists -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/lists/count_elements.hpp b/cpp/include/cudf/lists/count_elements.hpp index 2b9f5aa5607..a6f2ea6e68a 100644 --- a/cpp/include/cudf/lists/count_elements.hpp +++ b/cpp/include/cudf/lists/count_elements.hpp @@ -17,11 +17,12 @@ #include #include +#include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace lists { /** * @addtogroup lists_elements @@ -58,4 +59,4 @@ std::unique_ptr count_elements( /** @} */ // end of lists_elements group } // namespace lists -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/lists/detail/combine.hpp b/cpp/include/cudf/lists/detail/combine.hpp index bd4c01bbb4b..07309da2814 100644 --- a/cpp/include/cudf/lists/detail/combine.hpp +++ b/cpp/include/cudf/lists/detail/combine.hpp @@ -21,9 +21,8 @@ #include -namespace cudf { -namespace lists { -namespace detail { +namespace CUDF_EXPORT cudf { +namespace lists::detail { /** * @copydoc cudf::lists::concatenate_rows * @@ -44,6 +43,5 @@ std::unique_ptr concatenate_list_elements(column_view const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace detail -} // namespace lists -} // namespace cudf +} // namespace lists::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/lists/detail/concatenate.hpp b/cpp/include/cudf/lists/detail/concatenate.hpp index d67958ef260..edfa3355dcd 100644 --- a/cpp/include/cudf/lists/detail/concatenate.hpp +++ b/cpp/include/cudf/lists/detail/concatenate.hpp @@ -24,9 +24,8 @@ #include #include -namespace cudf { -namespace lists { -namespace detail { +namespace CUDF_EXPORT cudf { +namespace lists::detail { /** * @brief Returns a single column by concatenating the given vector of @@ -48,6 +47,5 @@ std::unique_ptr concatenate(host_span columns, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace detail -} // namespace lists -} // namespace cudf +} // namespace lists::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/lists/detail/contains.hpp b/cpp/include/cudf/lists/detail/contains.hpp index 638cc7afb81..1ca3651b55a 100644 --- a/cpp/include/cudf/lists/detail/contains.hpp +++ b/cpp/include/cudf/lists/detail/contains.hpp @@ -20,9 +20,8 @@ #include -namespace cudf { -namespace lists { -namespace detail { +namespace CUDF_EXPORT cudf { +namespace lists::detail { /** * @copydoc cudf::lists::index_of(cudf::lists_column_view const&, @@ -71,6 +70,5 @@ std::unique_ptr contains(cudf::lists_column_view const& lists, cudf::column_view const& search_keys, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace detail -} // namespace lists -} // namespace cudf +} // namespace lists::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/lists/detail/copying.hpp b/cpp/include/cudf/lists/detail/copying.hpp index 18a70bba5e9..76154ae7064 100644 --- a/cpp/include/cudf/lists/detail/copying.hpp +++ b/cpp/include/cudf/lists/detail/copying.hpp @@ -20,9 +20,8 @@ #include #include -namespace cudf { -namespace lists { -namespace detail { +namespace CUDF_EXPORT cudf { +namespace lists::detail { /** * @brief Returns a new lists column created from a subset of the @@ -49,6 +48,5 @@ std::unique_ptr copy_slice(lists_column_view const& lists, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace detail -} // namespace lists -} // namespace cudf +} // namespace lists::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/lists/detail/dremel.hpp b/cpp/include/cudf/lists/detail/dremel.hpp index d36a4091947..96ee30dd261 100644 --- a/cpp/include/cudf/lists/detail/dremel.hpp +++ b/cpp/include/cudf/lists/detail/dremel.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,10 +17,12 @@ #pragma once #include +#include #include -namespace cudf::detail { +namespace CUDF_EXPORT cudf { +namespace detail { /** * @brief Device view for `dremel_data`. @@ -31,8 +33,8 @@ struct dremel_device_view { size_type const* offsets; uint8_t const* rep_levels; uint8_t const* def_levels; - size_type const leaf_data_size; - uint8_t const max_def_level; + size_type leaf_data_size; + uint8_t max_def_level; }; /** @@ -45,8 +47,8 @@ struct dremel_data { rmm::device_uvector rep_level; rmm::device_uvector def_level; - size_type const leaf_data_size; - uint8_t const max_def_level; + size_type leaf_data_size; + uint8_t max_def_level; operator dremel_device_view() const { @@ -213,4 +215,5 @@ dremel_data get_comparator_data(column_view input, std::vector nullability, bool output_as_byte_array, rmm::cuda_stream_view stream); -} // namespace cudf::detail +} // namespace detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/lists/detail/extract.hpp b/cpp/include/cudf/lists/detail/extract.hpp index 6f983d44bc9..e14b93ff912 100644 --- a/cpp/include/cudf/lists/detail/extract.hpp +++ b/cpp/include/cudf/lists/detail/extract.hpp @@ -20,9 +20,8 @@ #include -namespace cudf { -namespace lists { -namespace detail { +namespace CUDF_EXPORT cudf { +namespace lists::detail { /** * @copydoc cudf::lists::extract_list_element(lists_column_view, size_type, @@ -44,6 +43,5 @@ std::unique_ptr extract_list_element(lists_column_view lists_column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace detail -} // namespace lists -} // namespace cudf +} // namespace lists::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/lists/detail/gather.cuh b/cpp/include/cudf/lists/detail/gather.cuh index 0cd77556f33..294282d7caa 100644 --- a/cpp/include/cudf/lists/detail/gather.cuh +++ b/cpp/include/cudf/lists/detail/gather.cuh @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -276,6 +277,7 @@ gather_data make_gather_data(cudf::lists_column_view const& source_column, * * @returns column with elements gathered based on `gather_data` */ +CUDF_EXPORT std::unique_ptr gather_list_nested(lists_column_view const& list, gather_data& gd, rmm::cuda_stream_view stream, @@ -293,6 +295,7 @@ std::unique_ptr gather_list_nested(lists_column_view const& list, * * @returns column with elements gathered based on `gather_data` */ +CUDF_EXPORT std::unique_ptr gather_list_leaf(column_view const& column, gather_data const& gd, rmm::cuda_stream_view stream, diff --git a/cpp/include/cudf/lists/detail/interleave_columns.hpp b/cpp/include/cudf/lists/detail/interleave_columns.hpp index 3aff93840a9..ae8caa853f3 100644 --- a/cpp/include/cudf/lists/detail/interleave_columns.hpp +++ b/cpp/include/cudf/lists/detail/interleave_columns.hpp @@ -21,9 +21,8 @@ #include #include -namespace cudf { -namespace lists { -namespace detail { +namespace CUDF_EXPORT cudf { +namespace lists::detail { /** * @brief Returns a single column by interleaving rows of the given table of list elements. @@ -50,6 +49,5 @@ std::unique_ptr interleave_columns(table_view const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace detail -} // namespace lists -} // namespace cudf +} // namespace lists::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/lists/detail/lists_column_factories.hpp b/cpp/include/cudf/lists/detail/lists_column_factories.hpp index 192aee8d811..18d66f15b1e 100644 --- a/cpp/include/cudf/lists/detail/lists_column_factories.hpp +++ b/cpp/include/cudf/lists/detail/lists_column_factories.hpp @@ -23,9 +23,8 @@ #include #include -namespace cudf { -namespace lists { -namespace detail { +namespace CUDF_EXPORT cudf { +namespace lists::detail { /** * @brief Internal API to construct a lists column from a `list_scalar`, for public @@ -67,6 +66,5 @@ std::unique_ptr make_all_nulls_lists_column(size_type size, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace detail -} // namespace lists -} // namespace cudf +} // namespace lists::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/lists/detail/reverse.hpp b/cpp/include/cudf/lists/detail/reverse.hpp index d099a0708b9..d10d7784e6c 100644 --- a/cpp/include/cudf/lists/detail/reverse.hpp +++ b/cpp/include/cudf/lists/detail/reverse.hpp @@ -16,10 +16,12 @@ #pragma once #include +#include #include -namespace cudf::lists::detail { +namespace CUDF_EXPORT cudf { +namespace lists::detail { /** * @copydoc cudf::lists::reverse @@ -29,4 +31,5 @@ std::unique_ptr reverse(lists_column_view const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace cudf::lists::detail +} // namespace lists::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/lists/detail/scatter.cuh b/cpp/include/cudf/lists/detail/scatter.cuh index c550ad5b94f..be76e456900 100644 --- a/cpp/include/cudf/lists/detail/scatter.cuh +++ b/cpp/include/cudf/lists/detail/scatter.cuh @@ -239,11 +239,11 @@ std::unique_ptr scatter(scalar const& slr, auto const num_rows = target.size(); if (num_rows == 0) { return cudf::empty_like(target); } - auto lv = static_cast(&slr); - bool slr_valid = slr.is_valid(stream); - rmm::device_buffer null_mask = - slr_valid ? cudf::detail::create_null_mask(1, mask_state::UNALLOCATED, stream, mr) - : cudf::detail::create_null_mask(1, mask_state::ALL_NULL, stream, mr); + auto lv = static_cast(&slr); + bool slr_valid = slr.is_valid(stream); + rmm::device_buffer null_mask = slr_valid + ? cudf::create_null_mask(1, mask_state::UNALLOCATED, stream, mr) + : cudf::create_null_mask(1, mask_state::ALL_NULL, stream, mr); auto offset_column = make_numeric_column(data_type{type_to_id()}, 2, mask_state::UNALLOCATED, stream, mr); thrust::sequence(rmm::exec_policy_nosync(stream), diff --git a/cpp/include/cudf/lists/detail/set_operations.hpp b/cpp/include/cudf/lists/detail/set_operations.hpp index 8746b1ba62a..abfcef72d47 100644 --- a/cpp/include/cudf/lists/detail/set_operations.hpp +++ b/cpp/include/cudf/lists/detail/set_operations.hpp @@ -24,7 +24,8 @@ #include #include -namespace cudf::lists::detail { +namespace CUDF_EXPORT cudf { +namespace lists::detail { /** * @copydoc cudf::list::have_overlap @@ -75,4 +76,5 @@ std::unique_ptr difference_distinct(lists_column_view const& lhs, rmm::device_async_resource_ref mr); /** @} */ // end of group -} // namespace cudf::lists::detail +} // namespace lists::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/lists/detail/sorting.hpp b/cpp/include/cudf/lists/detail/sorting.hpp index e428ea84ce6..8cbfbbae769 100644 --- a/cpp/include/cudf/lists/detail/sorting.hpp +++ b/cpp/include/cudf/lists/detail/sorting.hpp @@ -20,9 +20,8 @@ #include #include -namespace cudf { -namespace lists { -namespace detail { +namespace CUDF_EXPORT cudf { +namespace lists::detail { /** * @copydoc cudf::lists::sort_lists @@ -46,6 +45,5 @@ std::unique_ptr stable_sort_lists(lists_column_view const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace detail -} // namespace lists -} // namespace cudf +} // namespace lists::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/lists/detail/stream_compaction.hpp b/cpp/include/cudf/lists/detail/stream_compaction.hpp index f5e5b29bc8f..c11e07cd190 100644 --- a/cpp/include/cudf/lists/detail/stream_compaction.hpp +++ b/cpp/include/cudf/lists/detail/stream_compaction.hpp @@ -17,11 +17,13 @@ #include #include +#include #include #include -namespace cudf::lists::detail { +namespace CUDF_EXPORT cudf { +namespace lists::detail { /** * @copydoc cudf::lists::apply_boolean_mask(lists_column_view const&, lists_column_view const&, @@ -45,4 +47,5 @@ std::unique_ptr distinct(lists_column_view const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace cudf::lists::detail +} // namespace lists::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/lists/explode.hpp b/cpp/include/cudf/lists/explode.hpp index 81d82dcfa09..a3375887815 100644 --- a/cpp/include/cudf/lists/explode.hpp +++ b/cpp/include/cudf/lists/explode.hpp @@ -25,7 +25,7 @@ #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup column_reshape * @{ @@ -66,6 +66,7 @@ namespace cudf { * * @param input_table Table to explode. * @param explode_column_idx Column index to explode inside the table. + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. * * @return A new table with explode_col exploded. @@ -73,6 +74,7 @@ namespace cudf { std::unique_ptr
explode( table_view const& input_table, size_type explode_column_idx, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @@ -109,6 +111,7 @@ std::unique_ptr
explode( * * @param input_table Table to explode. * @param explode_column_idx Column index to explode inside the table. + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. * * @return A new table with exploded value and position. The column order of return table is @@ -117,6 +120,7 @@ std::unique_ptr
explode( std::unique_ptr
explode_position( table_view const& input_table, size_type explode_column_idx, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @@ -152,6 +156,7 @@ std::unique_ptr
explode_position( * * @param input_table Table to explode. * @param explode_column_idx Column index to explode inside the table. + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. * * @return A new table with explode_col exploded. @@ -159,6 +164,7 @@ std::unique_ptr
explode_position( std::unique_ptr
explode_outer( table_view const& input_table, size_type explode_column_idx, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @@ -196,6 +202,7 @@ std::unique_ptr
explode_outer( * * @param input_table Table to explode. * @param explode_column_idx Column index to explode inside the table. + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. * * @return A new table with explode_col exploded. @@ -203,8 +210,9 @@ std::unique_ptr
explode_outer( std::unique_ptr
explode_outer_position( table_view const& input_table, size_type explode_column_idx, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/lists/extract.hpp b/cpp/include/cudf/lists/extract.hpp index 096d276fcfb..29a02308c66 100644 --- a/cpp/include/cudf/lists/extract.hpp +++ b/cpp/include/cudf/lists/extract.hpp @@ -18,11 +18,12 @@ #include #include #include +#include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace lists { /** * @addtogroup lists_extract @@ -113,4 +114,4 @@ std::unique_ptr extract_list_element( /** @} */ // end of group } // namespace lists -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/lists/filling.hpp b/cpp/include/cudf/lists/filling.hpp index 1d840c76bf8..a1f3c37ad9e 100644 --- a/cpp/include/cudf/lists/filling.hpp +++ b/cpp/include/cudf/lists/filling.hpp @@ -25,7 +25,8 @@ #include -namespace cudf::lists { +namespace CUDF_EXPORT cudf { +namespace lists { /** * @addtogroup lists_filling * @{ @@ -113,4 +114,5 @@ std::unique_ptr sequences( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group -} // namespace cudf::lists +} // namespace lists +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/lists/gather.hpp b/cpp/include/cudf/lists/gather.hpp index a0d79c05098..6359e0488c9 100644 --- a/cpp/include/cudf/lists/gather.hpp +++ b/cpp/include/cudf/lists/gather.hpp @@ -19,11 +19,12 @@ #include #include #include +#include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace lists { /** * @addtogroup lists_gather @@ -80,4 +81,4 @@ std::unique_ptr segmented_gather( /** @} */ // end of group } // namespace lists -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/lists/list_device_view.cuh b/cpp/include/cudf/lists/list_device_view.cuh index 170a20bd7f5..29b81135d64 100644 --- a/cpp/include/cudf/lists/list_device_view.cuh +++ b/cpp/include/cudf/lists/list_device_view.cuh @@ -25,7 +25,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @brief A non-owning, immutable view of device data that represents @@ -377,4 +377,4 @@ CUDF_HOST_DEVICE auto inline make_list_size_iterator(detail::lists_column_device return detail::make_counting_transform_iterator(0, list_size_functor{c}); } -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/lists/list_view.hpp b/cpp/include/cudf/lists/list_view.hpp index a3f36a9330f..59ad9c9bcee 100644 --- a/cpp/include/cudf/lists/list_view.hpp +++ b/cpp/include/cudf/lists/list_view.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -16,12 +16,14 @@ */ #pragma once +#include + /** * @file list_view.hpp * @brief Class definition for cudf::list_view. */ -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @brief A non-owning, immutable view of device data that represents @@ -29,4 +31,4 @@ namespace cudf { */ class list_view {}; -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/lists/lists_column_device_view.cuh b/cpp/include/cudf/lists/lists_column_device_view.cuh index 4d12ee1cab4..b3ec18a7913 100644 --- a/cpp/include/cudf/lists/lists_column_device_view.cuh +++ b/cpp/include/cudf/lists/lists_column_device_view.cuh @@ -21,9 +21,7 @@ #include -namespace cudf { - -namespace detail { +namespace cudf::detail { /** * @brief Given a column_device_view, an instance of this class provides a @@ -116,6 +114,4 @@ class lists_column_device_view : private column_device_view { } }; -} // namespace detail - -} // namespace cudf +} // namespace cudf::detail diff --git a/cpp/include/cudf/lists/lists_column_view.hpp b/cpp/include/cudf/lists/lists_column_view.hpp index 3397cb0ca1d..b117a871b64 100644 --- a/cpp/include/cudf/lists/lists_column_view.hpp +++ b/cpp/include/cudf/lists/lists_column_view.hpp @@ -17,6 +17,7 @@ #include #include +#include #include @@ -25,7 +26,7 @@ * @brief Class definition for cudf::lists_column_view */ -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup lists_classes @@ -137,4 +138,4 @@ class lists_column_view : private column_view { } }; /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/lists/reverse.hpp b/cpp/include/cudf/lists/reverse.hpp index 34c40c5a3ba..f00e6e5117a 100644 --- a/cpp/include/cudf/lists/reverse.hpp +++ b/cpp/include/cudf/lists/reverse.hpp @@ -17,13 +17,15 @@ #include #include +#include #include #include #include -namespace cudf::lists { +namespace CUDF_EXPORT cudf { +namespace lists { /** * @addtogroup lists_modify * @{ @@ -54,4 +56,5 @@ std::unique_ptr reverse( /** @} */ // end of doxygen group -} // namespace cudf::lists +} // namespace lists +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/lists/set_operations.hpp b/cpp/include/cudf/lists/set_operations.hpp index b8abfd62461..55b1591fc44 100644 --- a/cpp/include/cudf/lists/set_operations.hpp +++ b/cpp/include/cudf/lists/set_operations.hpp @@ -23,7 +23,8 @@ #include #include -namespace cudf::lists { +namespace CUDF_EXPORT cudf { +namespace lists { /** * @addtogroup set_operations * @{ @@ -53,8 +54,8 @@ namespace cudf::lists { * @param nulls_equal Flag to specify whether null elements should be considered as equal, default * to be `UNEQUAL` which means only non-null elements are checked for overlapping * @param nans_equal Flag to specify whether floating-point NaNs should be considered as equal - * @param mr Device memory resource used to allocate the returned object * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned object * @return A column of type BOOL containing the check results */ std::unique_ptr have_overlap( @@ -177,4 +178,5 @@ std::unique_ptr difference_distinct( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group -} // namespace cudf::lists +} // namespace lists +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/lists/sorting.hpp b/cpp/include/cudf/lists/sorting.hpp index 78cea191bc5..39c71f6e9fa 100644 --- a/cpp/include/cudf/lists/sorting.hpp +++ b/cpp/include/cudf/lists/sorting.hpp @@ -18,11 +18,12 @@ #include #include #include +#include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace lists { /** * @addtogroup lists_sort @@ -74,4 +75,4 @@ std::unique_ptr stable_sort_lists( /** @} */ // end of group } // namespace lists -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/lists/stream_compaction.hpp b/cpp/include/cudf/lists/stream_compaction.hpp index 31f09d37560..28ef13cd870 100644 --- a/cpp/include/cudf/lists/stream_compaction.hpp +++ b/cpp/include/cudf/lists/stream_compaction.hpp @@ -17,12 +17,14 @@ #include #include +#include #include #include #include -namespace cudf::lists { +namespace CUDF_EXPORT cudf { +namespace lists { /** * @addtogroup lists_filtering @@ -94,4 +96,5 @@ std::unique_ptr distinct( /** @} */ // end of group -} // namespace cudf::lists +} // namespace lists +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/merge.hpp b/cpp/include/cudf/merge.hpp index 301e56c19b8..83c6ff04500 100644 --- a/cpp/include/cudf/merge.hpp +++ b/cpp/include/cudf/merge.hpp @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include @@ -24,7 +25,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup column_merge * @{ @@ -110,4 +111,4 @@ std::unique_ptr merge( rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/null_mask.hpp b/cpp/include/cudf/null_mask.hpp index 9e375df140b..70ca6aa29c5 100644 --- a/cpp/include/cudf/null_mask.hpp +++ b/cpp/include/cudf/null_mask.hpp @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -25,7 +26,7 @@ #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup column_nullmask @@ -208,4 +209,4 @@ cudf::size_type null_count(bitmask_type const* bitmask, size_type stop, rmm::cuda_stream_view stream = cudf::get_default_stream()); /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/partitioning.hpp b/cpp/include/cudf/partitioning.hpp index 9ed56297908..6a53553063e 100644 --- a/cpp/include/cudf/partitioning.hpp +++ b/cpp/include/cudf/partitioning.hpp @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -26,7 +27,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup reorder_partition * @{ @@ -254,4 +255,4 @@ std::pair, std::vector> round_robi rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/quantiles.hpp b/cpp/include/cudf/quantiles.hpp index a1c98ee4e9d..47eac2e72f9 100644 --- a/cpp/include/cudf/quantiles.hpp +++ b/cpp/include/cudf/quantiles.hpp @@ -20,11 +20,12 @@ #include #include #include +#include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup column_quantiles * @{ @@ -129,4 +130,4 @@ std::unique_ptr percentile_approx( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/reduction.hpp b/cpp/include/cudf/reduction.hpp index 52f39925a2d..e42ff5df15d 100644 --- a/cpp/include/cudf/reduction.hpp +++ b/cpp/include/cudf/reduction.hpp @@ -18,13 +18,14 @@ #include #include +#include #include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup aggregation_reduction * @{ @@ -232,4 +233,4 @@ std::pair, std::unique_ptr> minmax( /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/reduction/detail/histogram.hpp b/cpp/include/cudf/reduction/detail/histogram.hpp index f23c5a14e33..5b17df47ec7 100644 --- a/cpp/include/cudf/reduction/detail/histogram.hpp +++ b/cpp/include/cudf/reduction/detail/histogram.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -27,7 +28,8 @@ #include #include -namespace cudf::reduction::detail { +namespace CUDF_EXPORT cudf { +namespace reduction::detail { /** * @brief Compute the frequency for each distinct row in the input table. @@ -55,4 +57,5 @@ compute_row_frequencies(table_view const& input, */ [[nodiscard]] std::unique_ptr make_empty_histogram_like(column_view const& values); -} // namespace cudf::reduction::detail +} // namespace reduction::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/reduction/detail/reduction.hpp b/cpp/include/cudf/reduction/detail/reduction.hpp index 78f90a1e2c9..a15783fb460 100644 --- a/cpp/include/cudf/reduction/detail/reduction.hpp +++ b/cpp/include/cudf/reduction/detail/reduction.hpp @@ -19,12 +19,14 @@ #include #include #include +#include #include #include -namespace cudf::reduction::detail { +namespace CUDF_EXPORT cudf { +namespace reduction::detail { /** * @copydoc cudf::reduce(column_view const&, reduce_aggregation const&, data_type, @@ -39,4 +41,5 @@ std::unique_ptr reduce(column_view const& col, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace cudf::reduction::detail +} // namespace reduction::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/reduction/detail/reduction_functions.hpp b/cpp/include/cudf/reduction/detail/reduction_functions.hpp index 31d465619b9..fa21dc87e64 100644 --- a/cpp/include/cudf/reduction/detail/reduction_functions.hpp +++ b/cpp/include/cudf/reduction/detail/reduction_functions.hpp @@ -20,15 +20,15 @@ #include #include #include +#include #include #include #include -namespace cudf { -namespace reduction { -namespace detail { +namespace CUDF_EXPORT cudf { +namespace reduction::detail { /** * @brief Computes sum of elements in input column * @@ -352,6 +352,5 @@ std::unique_ptr merge_sets(lists_column_view const& col, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace detail -} // namespace reduction -} // namespace cudf +} // namespace reduction::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/reduction/detail/segmented_reduction_functions.hpp b/cpp/include/cudf/reduction/detail/segmented_reduction_functions.hpp index 770ac6580ef..1c55b387454 100644 --- a/cpp/include/cudf/reduction/detail/segmented_reduction_functions.hpp +++ b/cpp/include/cudf/reduction/detail/segmented_reduction_functions.hpp @@ -20,15 +20,15 @@ #include #include #include +#include #include #include #include -namespace cudf { -namespace reduction { -namespace detail { +namespace CUDF_EXPORT cudf { +namespace reduction::detail { /** * @brief Compute sum of each segment in the input column @@ -354,6 +354,5 @@ std::unique_ptr segmented_nunique(column_view const& col, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace detail -} // namespace reduction -} // namespace cudf +} // namespace reduction::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/replace.hpp b/cpp/include/cudf/replace.hpp index ae20e72f023..43aabd6c6c6 100644 --- a/cpp/include/cudf/replace.hpp +++ b/cpp/include/cudf/replace.hpp @@ -18,13 +18,14 @@ #include #include +#include #include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup transformation_replace * @{ @@ -308,4 +309,4 @@ void normalize_nans_and_zeros(mutable_column_view& in_out, rmm::cuda_stream_view stream = cudf::get_default_stream()); /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/reshape.hpp b/cpp/include/cudf/reshape.hpp index 26316be7fd4..07aaf6488ad 100644 --- a/cpp/include/cudf/reshape.hpp +++ b/cpp/include/cudf/reshape.hpp @@ -19,13 +19,14 @@ #include #include #include +#include #include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup column_reshape * @{ @@ -46,13 +47,14 @@ namespace cudf { * @throws cudf::logic_error if input contains no columns. * @throws cudf::logic_error if input columns dtypes are not identical. * - * @param[in] input Table containing columns to interleave - * @param[in] mr Device memory resource used to allocate the returned column's device memory - * + * @param input Table containing columns to interleave + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory * @return The interleaved columns as a single column */ std::unique_ptr interleave_columns( table_view const& input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @@ -67,15 +69,17 @@ std::unique_ptr interleave_columns( * return = [[8, 4, 7, 8, 4, 7], [5, 2, 3, 5, 2, 3]] * ``` * - * @param[in] input Table containing rows to be repeated - * @param[in] count Number of times to tile "rows". Must be non-negative - * @param[in] mr Device memory resource used to allocate the returned table's device memory + * @param input Table containing rows to be repeated + * @param count Number of times to tile "rows". Must be non-negative + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned table's device memory * * @return The table containing the tiled "rows" */ std::unique_ptr
tile( table_view const& input, size_type count, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @@ -94,6 +98,7 @@ enum class flip_endianness : bool { NO, YES }; * * @param input_column Column to be converted to lists of bytes * @param endian_configuration Whether to retain or flip the endianness of the elements + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * * @return The column containing the lists of bytes @@ -101,8 +106,9 @@ enum class flip_endianness : bool { NO, YES }; std::unique_ptr byte_cast( column_view const& input_column, flip_endianness endian_configuration, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/rolling.hpp b/cpp/include/cudf/rolling.hpp index d55322dd3e8..5a8c454d8fc 100644 --- a/cpp/include/cudf/rolling.hpp +++ b/cpp/include/cudf/rolling.hpp @@ -18,13 +18,14 @@ #include #include +#include #include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup aggregation_rolling * @{ @@ -615,4 +616,4 @@ std::unique_ptr rolling_window( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/rolling/range_window_bounds.hpp b/cpp/include/cudf/rolling/range_window_bounds.hpp index a9ee12cea27..21be609cbe6 100644 --- a/cpp/include/cudf/rolling/range_window_bounds.hpp +++ b/cpp/include/cudf/rolling/range_window_bounds.hpp @@ -17,8 +17,9 @@ #pragma once #include +#include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup aggregation_rolling * @{ @@ -119,4 +120,4 @@ struct range_window_bounds { }; /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/round.hpp b/cpp/include/cudf/round.hpp index 85935f8f05c..ef144b328f7 100644 --- a/cpp/include/cudf/round.hpp +++ b/cpp/include/cudf/round.hpp @@ -17,11 +17,12 @@ #pragma once #include +#include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup transformation_unaryops @@ -78,4 +79,4 @@ std::unique_ptr round( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/scalar/scalar.hpp b/cpp/include/cudf/scalar/scalar.hpp index d78907b473a..2c5cc60fc70 100644 --- a/cpp/include/cudf/scalar/scalar.hpp +++ b/cpp/include/cudf/scalar/scalar.hpp @@ -32,7 +32,7 @@ * @brief Class definitions for cudf::scalar */ -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup scalar_classes * @{ @@ -894,4 +894,4 @@ class struct_scalar : public scalar { }; /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/scalar/scalar_device_view.cuh b/cpp/include/cudf/scalar/scalar_device_view.cuh index 846da0bbe10..cbd3e9175ac 100644 --- a/cpp/include/cudf/scalar/scalar_device_view.cuh +++ b/cpp/include/cudf/scalar/scalar_device_view.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,7 +24,7 @@ * @brief Scalar device view class definitions */ -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { /** * @brief A non-owning view of scalar from device that is trivially copyable @@ -440,4 +440,4 @@ auto get_scalar_device_view(fixed_point_scalar& s) return fixed_point_scalar_device_view(s.type(), s.data(), s.validity_data()); } -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/scalar/scalar_factories.hpp b/cpp/include/cudf/scalar/scalar_factories.hpp index 7dd4674a2fd..a422c3bfbe9 100644 --- a/cpp/include/cudf/scalar/scalar_factories.hpp +++ b/cpp/include/cudf/scalar/scalar_factories.hpp @@ -22,7 +22,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup scalar_factories * @{ @@ -227,4 +227,4 @@ std::unique_ptr make_struct_scalar( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/search.hpp b/cpp/include/cudf/search.hpp index 2e50ba2d687..ad170ec726b 100644 --- a/cpp/include/cudf/search.hpp +++ b/cpp/include/cudf/search.hpp @@ -20,13 +20,14 @@ #include #include #include +#include #include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup column_search * @{ @@ -168,4 +169,4 @@ std::unique_ptr contains( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/sorting.hpp b/cpp/include/cudf/sorting.hpp index 79a00cbce42..4cb265a2a0b 100644 --- a/cpp/include/cudf/sorting.hpp +++ b/cpp/include/cudf/sorting.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -26,7 +27,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup column_sort @@ -346,4 +347,4 @@ std::unique_ptr
stable_segmented_sort_by_key( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/stream_compaction.hpp b/cpp/include/cudf/stream_compaction.hpp index 181af11adb8..cfe404ff6ab 100644 --- a/cpp/include/cudf/stream_compaction.hpp +++ b/cpp/include/cudf/stream_compaction.hpp @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -25,7 +26,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup reorder_compact * @{ @@ -401,4 +402,4 @@ cudf::size_type distinct_count(table_view const& input, null_equality nulls_equal = null_equality::EQUAL); /** @} */ -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/attributes.hpp b/cpp/include/cudf/strings/attributes.hpp index 26f906b3102..323290e907c 100644 --- a/cpp/include/cudf/strings/attributes.hpp +++ b/cpp/include/cudf/strings/attributes.hpp @@ -21,7 +21,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { //! Strings column APIs namespace strings { @@ -91,4 +91,4 @@ std::unique_ptr code_points( /** @} */ // end of strings_apis group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/capitalize.hpp b/cpp/include/cudf/strings/capitalize.hpp index f8cbdc09748..420b46a05b2 100644 --- a/cpp/include/cudf/strings/capitalize.hpp +++ b/cpp/include/cudf/strings/capitalize.hpp @@ -23,7 +23,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { /** * @addtogroup strings_case @@ -129,4 +129,4 @@ std::unique_ptr is_title( /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/case.hpp b/cpp/include/cudf/strings/case.hpp index 5403fa8db7e..45f56a681a6 100644 --- a/cpp/include/cudf/strings/case.hpp +++ b/cpp/include/cudf/strings/case.hpp @@ -21,7 +21,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { /** * @addtogroup strings_case @@ -89,4 +89,4 @@ std::unique_ptr swapcase( /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/char_types/char_cases.hpp b/cpp/include/cudf/strings/char_types/char_cases.hpp index 9eb63f71a2f..e5e619b8a50 100644 --- a/cpp/include/cudf/strings/char_types/char_cases.hpp +++ b/cpp/include/cudf/strings/char_types/char_cases.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,7 +15,9 @@ */ #pragma once -namespace cudf { +#include + +namespace CUDF_EXPORT cudf { namespace strings { namespace detail { /** @@ -31,4 +33,4 @@ void generate_special_mapping_hash_table(); } // namespace detail } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/char_types/char_types.hpp b/cpp/include/cudf/strings/char_types/char_types.hpp index da7a238a400..a6af681eec6 100644 --- a/cpp/include/cudf/strings/char_types/char_types.hpp +++ b/cpp/include/cudf/strings/char_types/char_types.hpp @@ -23,7 +23,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { /** * @addtogroup strings_types @@ -119,4 +119,4 @@ std::unique_ptr filter_characters_of_type( /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/char_types/char_types_enum.hpp b/cpp/include/cudf/strings/char_types/char_types_enum.hpp index 8d248cb2ebf..a9142fdbda6 100644 --- a/cpp/include/cudf/strings/char_types/char_types_enum.hpp +++ b/cpp/include/cudf/strings/char_types/char_types_enum.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,7 +18,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { /** * @addtogroup strings_types @@ -80,4 +80,4 @@ constexpr string_character_types& operator|=(string_character_types& lhs, /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/combine.hpp b/cpp/include/cudf/strings/combine.hpp index 8cc735831b8..2cade813d78 100644 --- a/cpp/include/cudf/strings/combine.hpp +++ b/cpp/include/cudf/strings/combine.hpp @@ -24,7 +24,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { /** * @addtogroup strings_combine @@ -334,4 +334,4 @@ std::unique_ptr join_list_elements( /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/contains.hpp b/cpp/include/cudf/strings/contains.hpp index f79a0f19e9c..59c9b2dea40 100644 --- a/cpp/include/cudf/strings/contains.hpp +++ b/cpp/include/cudf/strings/contains.hpp @@ -23,7 +23,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { struct regex_program; @@ -209,4 +209,4 @@ std::unique_ptr like( /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/convert/convert_booleans.hpp b/cpp/include/cudf/strings/convert/convert_booleans.hpp index 9c922361914..d79dd4a80ea 100644 --- a/cpp/include/cudf/strings/convert/convert_booleans.hpp +++ b/cpp/include/cudf/strings/convert/convert_booleans.hpp @@ -22,7 +22,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { /** * @addtogroup strings_convert @@ -72,4 +72,4 @@ std::unique_ptr from_booleans( /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/convert/convert_datetime.hpp b/cpp/include/cudf/strings/convert/convert_datetime.hpp index b89384d718b..c3b3c91ab35 100644 --- a/cpp/include/cudf/strings/convert/convert_datetime.hpp +++ b/cpp/include/cudf/strings/convert/convert_datetime.hpp @@ -24,7 +24,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { /** * @addtogroup strings_convert @@ -255,4 +255,4 @@ std::unique_ptr from_timestamps( /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/convert/convert_durations.hpp b/cpp/include/cudf/strings/convert/convert_durations.hpp index 2db719a4f1f..8b69968a609 100644 --- a/cpp/include/cudf/strings/convert/convert_durations.hpp +++ b/cpp/include/cudf/strings/convert/convert_durations.hpp @@ -21,7 +21,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { /** * @addtogroup strings_convert @@ -133,4 +133,4 @@ std::unique_ptr from_durations( /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/convert/convert_fixed_point.hpp b/cpp/include/cudf/strings/convert/convert_fixed_point.hpp index 9911bea1948..a9c5aea6343 100644 --- a/cpp/include/cudf/strings/convert/convert_fixed_point.hpp +++ b/cpp/include/cudf/strings/convert/convert_fixed_point.hpp @@ -21,7 +21,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { /** * @addtogroup strings_convert @@ -130,4 +130,4 @@ std::unique_ptr is_fixed_point( /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/convert/convert_floats.hpp b/cpp/include/cudf/strings/convert/convert_floats.hpp index feb5b528686..64e9bb776f4 100644 --- a/cpp/include/cudf/strings/convert/convert_floats.hpp +++ b/cpp/include/cudf/strings/convert/convert_floats.hpp @@ -21,7 +21,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { /** * @addtogroup strings_convert @@ -103,4 +103,4 @@ std::unique_ptr is_float( /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/convert/convert_integers.hpp b/cpp/include/cudf/strings/convert/convert_integers.hpp index 82696811fdc..62eb1fdda4d 100644 --- a/cpp/include/cudf/strings/convert/convert_integers.hpp +++ b/cpp/include/cudf/strings/convert/convert_integers.hpp @@ -21,7 +21,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { /** * @addtogroup strings_convert @@ -235,4 +235,4 @@ std::unique_ptr integers_to_hex( /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/convert/convert_ipv4.hpp b/cpp/include/cudf/strings/convert/convert_ipv4.hpp index 64f8a412ce9..04a04907c12 100644 --- a/cpp/include/cudf/strings/convert/convert_ipv4.hpp +++ b/cpp/include/cudf/strings/convert/convert_ipv4.hpp @@ -21,7 +21,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { /** * @addtogroup strings_convert @@ -113,4 +113,4 @@ std::unique_ptr is_ipv4( /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/convert/convert_lists.hpp b/cpp/include/cudf/strings/convert/convert_lists.hpp index a88bbe99492..85b67907228 100644 --- a/cpp/include/cudf/strings/convert/convert_lists.hpp +++ b/cpp/include/cudf/strings/convert/convert_lists.hpp @@ -23,7 +23,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { /** * @addtogroup strings_convert @@ -68,4 +68,4 @@ std::unique_ptr format_list_column( /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/convert/convert_urls.hpp b/cpp/include/cudf/strings/convert/convert_urls.hpp index 30988d2ff0a..a42a5cd2407 100644 --- a/cpp/include/cudf/strings/convert/convert_urls.hpp +++ b/cpp/include/cudf/strings/convert/convert_urls.hpp @@ -21,7 +21,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { /** * @addtogroup strings_convert @@ -75,4 +75,4 @@ std::unique_ptr url_decode( /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/detail/char_tables.hpp b/cpp/include/cudf/strings/detail/char_tables.hpp index 0901076c835..5d6aff28826 100644 --- a/cpp/include/cudf/strings/detail/char_tables.hpp +++ b/cpp/include/cudf/strings/detail/char_tables.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,11 +15,12 @@ */ #pragma once +#include + #include -namespace cudf { -namespace strings { -namespace detail { +namespace CUDF_EXPORT cudf { +namespace strings::detail { // Type for the character flags table. using character_flags_table_type = std::uint8_t; @@ -101,6 +102,5 @@ constexpr uint16_t get_special_case_hash_index(uint32_t code_point) return static_cast(code_point % special_case_prime); } -} // namespace detail -} // namespace strings -} // namespace cudf +} // namespace strings::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/detail/combine.hpp b/cpp/include/cudf/strings/detail/combine.hpp index 25214055787..962191eae6a 100644 --- a/cpp/include/cudf/strings/detail/combine.hpp +++ b/cpp/include/cudf/strings/detail/combine.hpp @@ -21,13 +21,13 @@ #include #include #include +#include #include #include -namespace cudf { -namespace strings { -namespace detail { +namespace CUDF_EXPORT cudf { +namespace strings::detail { /** * @copydoc concatenate(table_view const&,string_scalar const&,string_scalar @@ -68,6 +68,5 @@ std::unique_ptr join_list_elements(lists_column_view const& lists_string rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace detail -} // namespace strings -} // namespace cudf +} // namespace strings::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/detail/concatenate.hpp b/cpp/include/cudf/strings/detail/concatenate.hpp index b5dd5b9516a..e038102ab1f 100644 --- a/cpp/include/cudf/strings/detail/concatenate.hpp +++ b/cpp/include/cudf/strings/detail/concatenate.hpp @@ -19,14 +19,14 @@ #include #include #include +#include #include #include #include -namespace cudf { -namespace strings { -namespace detail { +namespace CUDF_EXPORT cudf { +namespace strings::detail { /** * @brief Returns a single column by vertically concatenating the given vector of * strings columns. @@ -47,6 +47,5 @@ std::unique_ptr concatenate(host_span columns, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace detail -} // namespace strings -} // namespace cudf +} // namespace strings::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/detail/converters.hpp b/cpp/include/cudf/strings/detail/converters.hpp index d212239264b..73a97499293 100644 --- a/cpp/include/cudf/strings/detail/converters.hpp +++ b/cpp/include/cudf/strings/detail/converters.hpp @@ -18,13 +18,13 @@ #include #include #include +#include #include #include -namespace cudf { -namespace strings { -namespace detail { +namespace CUDF_EXPORT cudf { +namespace strings::detail { /** * @copydoc to_integers(strings_column_view const&,data_type,rmm::device_async_resource_ref) @@ -153,6 +153,5 @@ std::unique_ptr from_fixed_point(column_view const& integers, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace detail -} // namespace strings -} // namespace cudf +} // namespace strings::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/detail/copy_range.hpp b/cpp/include/cudf/strings/detail/copy_range.hpp index 192c5b833c6..71dcf9edaf3 100644 --- a/cpp/include/cudf/strings/detail/copy_range.hpp +++ b/cpp/include/cudf/strings/detail/copy_range.hpp @@ -21,9 +21,8 @@ #include #include -namespace cudf { -namespace strings { -namespace detail { +namespace CUDF_EXPORT cudf { +namespace strings::detail { /** * @brief Internal API to copy a range of string elements out-of-place from @@ -56,6 +55,5 @@ std::unique_ptr copy_range(strings_column_view const& source, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace detail -} // namespace strings -} // namespace cudf +} // namespace strings::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/detail/copying.hpp b/cpp/include/cudf/strings/detail/copying.hpp index 240cac17188..b4d3362359d 100644 --- a/cpp/include/cudf/strings/detail/copying.hpp +++ b/cpp/include/cudf/strings/detail/copying.hpp @@ -19,13 +19,13 @@ #include #include #include +#include #include #include -namespace cudf { -namespace strings { -namespace detail { +namespace CUDF_EXPORT cudf { +namespace strings::detail { /** * @brief Returns a new strings column created from a subset of * of the strings column. @@ -83,6 +83,5 @@ std::unique_ptr shift(strings_column_view const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace detail -} // namespace strings -} // namespace cudf +} // namespace strings::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/detail/fill.hpp b/cpp/include/cudf/strings/detail/fill.hpp index c5d005fbf75..1a3ff2c9166 100644 --- a/cpp/include/cudf/strings/detail/fill.hpp +++ b/cpp/include/cudf/strings/detail/fill.hpp @@ -19,13 +19,13 @@ #include #include #include +#include #include #include -namespace cudf { -namespace strings { -namespace detail { +namespace CUDF_EXPORT cudf { +namespace strings::detail { /** * @brief Returns a strings column replacing a range of rows * with the specified string. @@ -50,6 +50,5 @@ std::unique_ptr fill(strings_column_view const& strings, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace detail -} // namespace strings -} // namespace cudf +} // namespace strings::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/detail/gather.cuh b/cpp/include/cudf/strings/detail/gather.cuh index fcd74bebfe8..4369de317b3 100644 --- a/cpp/include/cudf/strings/detail/gather.cuh +++ b/cpp/include/cudf/strings/detail/gather.cuh @@ -18,11 +18,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include @@ -230,7 +232,8 @@ rmm::device_uvector gather_chars(StringIterator strings_begin, if (output_count == 0) return rmm::device_uvector(0, stream, mr); auto chars_data = rmm::device_uvector(chars_bytes, stream, mr); - auto d_chars = chars_data.data(); + cudf::experimental::prefetch::detail::prefetch("gather", chars_data, stream); + auto d_chars = chars_data.data(); constexpr int warps_per_threadblock = 4; // String parallel strategy will be used if average string length is above this threshold. @@ -312,6 +315,8 @@ std::unique_ptr gather(strings_column_view const& strings, // build chars column auto const offsets_view = cudf::detail::offsetalator_factory::make_input_iterator(out_offsets_column->view()); + cudf::experimental::prefetch::detail::prefetch( + "gather", strings.chars_begin(stream), strings.chars_size(stream), stream); auto out_chars_data = gather_chars( d_strings->begin(), begin, end, offsets_view, total_bytes, stream, mr); diff --git a/cpp/include/cudf/strings/detail/merge.hpp b/cpp/include/cudf/strings/detail/merge.hpp index 35fd9c0593d..0aa5c0c2899 100644 --- a/cpp/include/cudf/strings/detail/merge.hpp +++ b/cpp/include/cudf/strings/detail/merge.hpp @@ -18,10 +18,12 @@ #include #include #include +#include #include -namespace cudf ::strings ::detail { +namespace CUDF_EXPORT cudf { +namespace strings::detail { /** * @brief Merges two strings columns * @@ -38,4 +40,5 @@ std::unique_ptr merge(strings_column_view const& lhs, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace cudf::strings::detail +} // namespace strings::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/detail/replace.hpp b/cpp/include/cudf/strings/detail/replace.hpp index 481d00f1bce..ab092555c48 100644 --- a/cpp/include/cudf/strings/detail/replace.hpp +++ b/cpp/include/cudf/strings/detail/replace.hpp @@ -19,13 +19,13 @@ #include #include #include +#include #include #include -namespace cudf { -namespace strings { -namespace detail { +namespace CUDF_EXPORT cudf { +namespace strings::detail { /** * @copydoc cudf::strings::replace(strings_column_view const&, string_scalar const&, @@ -100,6 +100,5 @@ std::unique_ptr find_and_replace_all( rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace detail -} // namespace strings -} // namespace cudf +} // namespace strings::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/detail/scan.hpp b/cpp/include/cudf/strings/detail/scan.hpp index f32afa64a72..4991fd633d5 100644 --- a/cpp/include/cudf/strings/detail/scan.hpp +++ b/cpp/include/cudf/strings/detail/scan.hpp @@ -21,9 +21,8 @@ #include #include -namespace cudf { -namespace strings { -namespace detail { +namespace CUDF_EXPORT cudf { +namespace strings::detail { /** * @brief Scan function for strings * @@ -43,6 +42,5 @@ std::unique_ptr scan_inclusive(column_view const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace detail -} // namespace strings -} // namespace cudf +} // namespace strings::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/detail/strings_children.cuh b/cpp/include/cudf/strings/detail/strings_children.cuh index f5f3982a5d6..55b59dd4ff2 100644 --- a/cpp/include/cudf/strings/detail/strings_children.cuh +++ b/cpp/include/cudf/strings/detail/strings_children.cuh @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -186,6 +187,7 @@ auto make_strings_children(SizeAndExecuteFunction size_and_exec_fn, // Now build the chars column rmm::device_uvector chars(bytes, stream, mr); + cudf::experimental::prefetch::detail::prefetch("gather", chars, stream); size_and_exec_fn.d_chars = chars.data(); // Execute the function fn again to fill in the chars data. diff --git a/cpp/include/cudf/strings/detail/utf8.hpp b/cpp/include/cudf/strings/detail/utf8.hpp index 5587597cb51..85349a421b1 100644 --- a/cpp/include/cudf/strings/detail/utf8.hpp +++ b/cpp/include/cudf/strings/detail/utf8.hpp @@ -22,9 +22,8 @@ * @brief Standalone string functions. */ -namespace cudf { -namespace strings { -namespace detail { +namespace CUDF_EXPORT cudf { +namespace strings::detail { /** * @brief This will return true if passed a continuation byte of a UTF-8 character. @@ -206,6 +205,5 @@ constexpr cudf::char_utf8 codepoint_to_utf8(uint32_t unchr) return utf8; } -} // namespace detail -} // namespace strings -} // namespace cudf +} // namespace strings::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/detail/utilities.hpp b/cpp/include/cudf/strings/detail/utilities.hpp index 4467a9d0023..1fa505501d8 100644 --- a/cpp/include/cudf/strings/detail/utilities.hpp +++ b/cpp/include/cudf/strings/detail/utilities.hpp @@ -18,15 +18,15 @@ #include #include #include +#include #include #include #include #include -namespace cudf { -namespace strings { -namespace detail { +namespace CUDF_EXPORT cudf { +namespace strings::detail { /** * @brief Create an offsets column to be a child of a strings column @@ -96,6 +96,5 @@ int64_t get_offset_value(cudf::column_view const& offsets, size_type index, rmm::cuda_stream_view stream); -} // namespace detail -} // namespace strings -} // namespace cudf +} // namespace strings::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/extract.hpp b/cpp/include/cudf/strings/extract.hpp index 4138e1e59d5..2ef7308b802 100644 --- a/cpp/include/cudf/strings/extract.hpp +++ b/cpp/include/cudf/strings/extract.hpp @@ -22,7 +22,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { struct regex_program; @@ -104,4 +104,4 @@ std::unique_ptr extract_all_record( /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/find.hpp b/cpp/include/cudf/strings/find.hpp index c116dbc2fe1..efba6da9454 100644 --- a/cpp/include/cudf/strings/find.hpp +++ b/cpp/include/cudf/strings/find.hpp @@ -22,7 +22,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { /** * @addtogroup strings_find @@ -262,4 +262,4 @@ std::unique_ptr ends_with( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/find_multiple.hpp b/cpp/include/cudf/strings/find_multiple.hpp index c2e82aa6f1a..dea08308ff0 100644 --- a/cpp/include/cudf/strings/find_multiple.hpp +++ b/cpp/include/cudf/strings/find_multiple.hpp @@ -21,7 +21,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { /** * @addtogroup strings_find @@ -63,4 +63,4 @@ std::unique_ptr find_multiple( /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/findall.hpp b/cpp/include/cudf/strings/findall.hpp index abc1d28ee4c..26249b6842c 100644 --- a/cpp/include/cudf/strings/findall.hpp +++ b/cpp/include/cudf/strings/findall.hpp @@ -22,7 +22,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { struct regex_program; @@ -70,4 +70,4 @@ std::unique_ptr findall( /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/padding.hpp b/cpp/include/cudf/strings/padding.hpp index f1382d6ea29..11e35f717ae 100644 --- a/cpp/include/cudf/strings/padding.hpp +++ b/cpp/include/cudf/strings/padding.hpp @@ -23,7 +23,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { /** * @addtogroup strings_modify @@ -96,4 +96,4 @@ std::unique_ptr zfill( /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/regex/flags.hpp b/cpp/include/cudf/strings/regex/flags.hpp index 44ca68439e7..f7108129dee 100644 --- a/cpp/include/cudf/strings/regex/flags.hpp +++ b/cpp/include/cudf/strings/regex/flags.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,9 +15,11 @@ */ #pragma once +#include + #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { /** @@ -86,4 +88,4 @@ enum class capture_groups : uint32_t { /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/regex/regex_program.hpp b/cpp/include/cudf/strings/regex/regex_program.hpp index 95c86ae0f8a..9da859d9c87 100644 --- a/cpp/include/cudf/strings/regex/regex_program.hpp +++ b/cpp/include/cudf/strings/regex/regex_program.hpp @@ -21,7 +21,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { /** @@ -135,4 +135,4 @@ struct regex_program { /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/repeat_strings.hpp b/cpp/include/cudf/strings/repeat_strings.hpp index cbf1edc8331..e160f75390b 100644 --- a/cpp/include/cudf/strings/repeat_strings.hpp +++ b/cpp/include/cudf/strings/repeat_strings.hpp @@ -21,7 +21,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { /** * @addtogroup strings_copy @@ -133,4 +133,4 @@ std::unique_ptr repeat_strings( /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/replace.hpp b/cpp/include/cudf/strings/replace.hpp index a714f762a19..5b4ffb98f99 100644 --- a/cpp/include/cudf/strings/replace.hpp +++ b/cpp/include/cudf/strings/replace.hpp @@ -22,7 +22,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { /** * @addtogroup strings_replace @@ -174,4 +174,4 @@ std::unique_ptr replace_multiple( /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/replace_re.hpp b/cpp/include/cudf/strings/replace_re.hpp index f61f9585144..6b487072cb2 100644 --- a/cpp/include/cudf/strings/replace_re.hpp +++ b/cpp/include/cudf/strings/replace_re.hpp @@ -25,7 +25,7 @@ #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { struct regex_program; @@ -112,4 +112,4 @@ std::unique_ptr replace_with_backrefs( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/reverse.hpp b/cpp/include/cudf/strings/reverse.hpp index 86656693c8b..fbda2e5fe7c 100644 --- a/cpp/include/cudf/strings/reverse.hpp +++ b/cpp/include/cudf/strings/reverse.hpp @@ -21,7 +21,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { /** * @addtogroup strings_modify @@ -53,4 +53,4 @@ std::unique_ptr reverse( /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/side_type.hpp b/cpp/include/cudf/strings/side_type.hpp index 5905e087deb..5b794261ad9 100644 --- a/cpp/include/cudf/strings/side_type.hpp +++ b/cpp/include/cudf/strings/side_type.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,7 +15,9 @@ */ #pragma once -namespace cudf { +#include + +namespace CUDF_EXPORT cudf { namespace strings { /** * @addtogroup strings_modify @@ -34,4 +36,4 @@ enum class side_type { /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/slice.hpp b/cpp/include/cudf/strings/slice.hpp index e2be6abd344..b0da6976207 100644 --- a/cpp/include/cudf/strings/slice.hpp +++ b/cpp/include/cudf/strings/slice.hpp @@ -22,7 +22,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { /** * @addtogroup strings_slice @@ -114,4 +114,4 @@ std::unique_ptr slice_strings( /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/split/partition.hpp b/cpp/include/cudf/strings/split/partition.hpp index 0a837034ba1..8f5ae752417 100644 --- a/cpp/include/cudf/strings/split/partition.hpp +++ b/cpp/include/cudf/strings/split/partition.hpp @@ -22,7 +22,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { /** * @addtogroup strings_split @@ -101,4 +101,4 @@ std::unique_ptr
rpartition( /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/split/split.hpp b/cpp/include/cudf/strings/split/split.hpp index d5c44406ca7..ca371d7abd1 100644 --- a/cpp/include/cudf/strings/split/split.hpp +++ b/cpp/include/cudf/strings/split/split.hpp @@ -22,7 +22,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { /** * @addtogroup strings_split @@ -245,4 +245,4 @@ std::unique_ptr rsplit_record( /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/split/split_re.hpp b/cpp/include/cudf/strings/split/split_re.hpp index 81595fa7ed4..96ef0b6e830 100644 --- a/cpp/include/cudf/strings/split/split_re.hpp +++ b/cpp/include/cudf/strings/split/split_re.hpp @@ -22,7 +22,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { struct regex_program; @@ -263,4 +263,4 @@ std::unique_ptr rsplit_record_re( /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/string_view.cuh b/cpp/include/cudf/strings/string_view.cuh index 93cc787683b..abb26d7ccb4 100644 --- a/cpp/include/cudf/strings/string_view.cuh +++ b/cpp/include/cudf/strings/string_view.cuh @@ -18,6 +18,7 @@ #include #include +#include #ifndef __CUDA_ARCH__ #include @@ -35,7 +36,7 @@ // This file should only include device code logic. // Host-only or host/device code should be defined in the string_view.hpp header file. -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { namespace detail { @@ -448,4 +449,4 @@ __device__ inline size_type string_view::character_offset(size_type bytepos) con return strings::detail::characters_in_string(data(), bytepos); } -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/string_view.hpp b/cpp/include/cudf/strings/string_view.hpp index afc7e027a4b..504c31057ae 100644 --- a/cpp/include/cudf/strings/string_view.hpp +++ b/cpp/include/cudf/strings/string_view.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,7 +26,7 @@ * @brief Class definition for cudf::string_view. */ -namespace cudf { +namespace CUDF_EXPORT cudf { using char_utf8 = uint32_t; ///< UTF-8 characters are 1-4 bytes @@ -406,4 +406,4 @@ class string_view { size_type count) const; }; -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/strings_column_view.hpp b/cpp/include/cudf/strings/strings_column_view.hpp index 1e9e73cef4c..4a2512eb7c5 100644 --- a/cpp/include/cudf/strings/strings_column_view.hpp +++ b/cpp/include/cudf/strings/strings_column_view.hpp @@ -17,13 +17,14 @@ #include #include +#include /** * @file * @brief Class definition for cudf::strings_column_view */ -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup strings_classes @@ -126,4 +127,4 @@ namespace strings { } // namespace strings /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/strip.hpp b/cpp/include/cudf/strings/strip.hpp index 6fb9bbc45e6..4cfba59c72c 100644 --- a/cpp/include/cudf/strings/strip.hpp +++ b/cpp/include/cudf/strings/strip.hpp @@ -23,7 +23,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { /** * @addtogroup strings_modify @@ -71,4 +71,4 @@ std::unique_ptr strip( /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/translate.hpp b/cpp/include/cudf/strings/translate.hpp index 9cd6b7d5974..531753f4a8c 100644 --- a/cpp/include/cudf/strings/translate.hpp +++ b/cpp/include/cudf/strings/translate.hpp @@ -25,7 +25,7 @@ #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { /** * @addtogroup strings_modify @@ -109,4 +109,4 @@ std::unique_ptr filter_characters( /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/wrap.hpp b/cpp/include/cudf/strings/wrap.hpp index c05c33fbac8..465a9d15d00 100644 --- a/cpp/include/cudf/strings/wrap.hpp +++ b/cpp/include/cudf/strings/wrap.hpp @@ -21,7 +21,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace strings { /** * @addtogroup strings_modify @@ -72,4 +72,4 @@ std::unique_ptr wrap( /** @} */ // end of doxygen group } // namespace strings -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/structs/detail/concatenate.hpp b/cpp/include/cudf/structs/detail/concatenate.hpp index 5dc3169c0c4..16be868af52 100644 --- a/cpp/include/cudf/structs/detail/concatenate.hpp +++ b/cpp/include/cudf/structs/detail/concatenate.hpp @@ -18,13 +18,13 @@ #include #include #include +#include #include #include -namespace cudf { -namespace structs { -namespace detail { +namespace CUDF_EXPORT cudf { +namespace structs::detail { /** * @brief Returns a single column by concatenating the given vector of structs columns. @@ -54,6 +54,5 @@ std::unique_ptr concatenate(host_span columns, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace detail -} // namespace structs -} // namespace cudf +} // namespace structs::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/structs/detail/scan.hpp b/cpp/include/cudf/structs/detail/scan.hpp index c97a8452ecd..6121f63d42f 100644 --- a/cpp/include/cudf/structs/detail/scan.hpp +++ b/cpp/include/cudf/structs/detail/scan.hpp @@ -17,13 +17,13 @@ #include #include +#include #include #include -namespace cudf { -namespace structs { -namespace detail { +namespace CUDF_EXPORT cudf { +namespace structs::detail { /** * @brief Scan function for struct column type * @@ -41,6 +41,5 @@ std::unique_ptr scan_inclusive(column_view const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace detail -} // namespace structs -} // namespace cudf +} // namespace structs::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/structs/struct_view.hpp b/cpp/include/cudf/structs/struct_view.hpp index 75483709867..65fd3f78d1a 100644 --- a/cpp/include/cudf/structs/struct_view.hpp +++ b/cpp/include/cudf/structs/struct_view.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ * @brief Class definition for cudf::struct_view. */ -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @brief A non-owning, immutable view of device data that represents @@ -29,4 +29,4 @@ namespace cudf { */ class struct_view {}; -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/structs/structs_column_device_view.cuh b/cpp/include/cudf/structs/structs_column_device_view.cuh index 7580582631f..cf71ba87a20 100644 --- a/cpp/include/cudf/structs/structs_column_device_view.cuh +++ b/cpp/include/cudf/structs/structs_column_device_view.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,7 +18,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { @@ -84,4 +84,4 @@ class structs_column_device_view : private column_device_view { } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/structs/structs_column_view.hpp b/cpp/include/cudf/structs/structs_column_view.hpp index 4a50488ef00..19798f51656 100644 --- a/cpp/include/cudf/structs/structs_column_view.hpp +++ b/cpp/include/cudf/structs/structs_column_view.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,7 +26,7 @@ * @brief Class definition for cudf::structs_column_view. */ -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup structs_classes @@ -98,4 +98,4 @@ class structs_column_view : public column_view { int index, rmm::cuda_stream_view stream = cudf::get_default_stream()) const; }; // class structs_column_view; /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index c181ac7d402..f05e5f4ca5c 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -54,7 +54,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace experimental { @@ -2026,4 +2026,4 @@ class row_hasher { } // namespace row } // namespace experimental -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/table/row_operators.cuh b/cpp/include/cudf/table/row_operators.cuh index 0e57d24f4b3..e3b65d77b4a 100644 --- a/cpp/include/cudf/table/row_operators.cuh +++ b/cpp/include/cudf/table/row_operators.cuh @@ -30,7 +30,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @brief Result type of the `element_relational_comparator` function object. @@ -635,4 +635,4 @@ class row_hasher { uint32_t _seed{DEFAULT_HASH_SEED}; }; -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/table/table.hpp b/cpp/include/cudf/table/table.hpp index c4f14af53fb..be2af7ac653 100644 --- a/cpp/include/cudf/table/table.hpp +++ b/cpp/include/cudf/table/table.hpp @@ -31,7 +31,7 @@ * @brief Class definition for cudf::table */ -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @brief A set of cudf::column's of the same size. @@ -194,4 +194,4 @@ class table { size_type _num_rows{}; }; -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/table/table_device_view.cuh b/cpp/include/cudf/table/table_device_view.cuh index 511013b585d..16d532ea2b8 100644 --- a/cpp/include/cudf/table/table_device_view.cuh +++ b/cpp/include/cudf/table/table_device_view.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,7 +32,7 @@ * @brief Table device view class definitions */ -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { /** @@ -271,4 +271,4 @@ auto contiguous_copy_column_device_views(HostTableView source_view, rmm::cuda_st return std::make_tuple(std::move(descendant_storage), d_columns); } -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/tdigest/tdigest_column_view.hpp b/cpp/include/cudf/tdigest/tdigest_column_view.hpp index b2eb341df86..2f19efa5630 100644 --- a/cpp/include/cudf/tdigest/tdigest_column_view.hpp +++ b/cpp/include/cudf/tdigest/tdigest_column_view.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,7 +18,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { //! Tdigest interfaces namespace tdigest { /** @@ -132,4 +132,4 @@ class tdigest_column_view : private column_view { /** @} */ // end of group } // namespace tdigest -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/timezone.hpp b/cpp/include/cudf/timezone.hpp index 7f65128526e..8329c64e24f 100644 --- a/cpp/include/cudf/timezone.hpp +++ b/cpp/include/cudf/timezone.hpp @@ -15,6 +15,8 @@ */ #pragma once +#include + #include #include @@ -22,7 +24,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { class table; // Cycle in which the time offsets repeat in Gregorian calendar @@ -52,4 +54,4 @@ std::unique_ptr
make_timezone_transition_table( std::string_view timezone_name, rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/transform.hpp b/cpp/include/cudf/transform.hpp index 7bb9fb7a42e..adc5bdb2af8 100644 --- a/cpp/include/cudf/transform.hpp +++ b/cpp/include/cudf/transform.hpp @@ -18,13 +18,14 @@ #include #include +#include #include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup transformation_transform * @{ @@ -248,4 +249,4 @@ std::unique_ptr segmented_row_bit_count( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/transpose.hpp b/cpp/include/cudf/transpose.hpp index c01a04afe87..f4433c46a06 100644 --- a/cpp/include/cudf/transpose.hpp +++ b/cpp/include/cudf/transpose.hpp @@ -17,11 +17,12 @@ #include #include +#include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup reshape_transpose * @{ @@ -48,4 +49,4 @@ std::pair, table_view> transpose( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp index baf07fa3db6..409b8c825bb 100644 --- a/cpp/include/cudf/types.hpp +++ b/cpp/include/cudf/types.hpp @@ -36,6 +36,8 @@ #define CUDF_KERNEL static #endif +#include + #include #include #include @@ -54,7 +56,7 @@ class device_buffer; } // namespace rmm -namespace cudf { +namespace CUDF_EXPORT cudf { // Forward declaration class column; class column_view; @@ -344,4 +346,4 @@ inline bool operator!=(data_type const& lhs, data_type const& rhs) { return !(lh std::size_t size_of(data_type t); /** @} */ -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/unary.hpp b/cpp/include/cudf/unary.hpp index 1609c72f175..55f4c1f5a23 100644 --- a/cpp/include/cudf/unary.hpp +++ b/cpp/include/cudf/unary.hpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -27,7 +28,7 @@ #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup transformation_unaryops * @{ @@ -259,4 +260,4 @@ std::unique_ptr is_not_nan( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/utilities/bit.hpp b/cpp/include/cudf/utilities/bit.hpp index 9bdc372419f..736796e610a 100644 --- a/cpp/include/cudf/utilities/bit.hpp +++ b/cpp/include/cudf/utilities/bit.hpp @@ -27,7 +27,7 @@ * @brief Utilities for bit and bitmask operations. */ -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { // @cond // Work around a bug in NVRTC that fails to compile assert() in constexpr @@ -217,4 +217,4 @@ __device__ inline void clear_bit(bitmask_type* bitmask, size_type bit_index) } #endif /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/utilities/default_stream.hpp b/cpp/include/cudf/utilities/default_stream.hpp index aacab996e8a..97a42243250 100644 --- a/cpp/include/cudf/utilities/default_stream.hpp +++ b/cpp/include/cudf/utilities/default_stream.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,11 +17,12 @@ #pragma once #include +#include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup default_stream * @{ @@ -43,4 +44,4 @@ rmm::cuda_stream_view const get_default_stream(); bool is_ptds_enabled(); /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/utilities/error.hpp b/cpp/include/cudf/utilities/error.hpp index f019f516b84..f847ce0f66a 100644 --- a/cpp/include/cudf/utilities/error.hpp +++ b/cpp/include/cudf/utilities/error.hpp @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include @@ -25,7 +26,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup utility_error * @{ @@ -140,7 +141,7 @@ struct data_type_error : public std::invalid_argument, public stacktrace_recorde }; /** @} */ -} // namespace cudf +} // namespace CUDF_EXPORT cudf #define STRINGIFY_DETAIL(x) #x ///< Stringify a macro argument #define CUDF_STRINGIFY(x) STRINGIFY_DETAIL(x) ///< Stringify a macro argument @@ -229,7 +230,7 @@ struct data_type_error : public std::invalid_argument, public stacktrace_recorde /// @endcond -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { // @cond inline void throw_cuda_error(cudaError_t error, char const* file, unsigned int line) @@ -251,7 +252,7 @@ inline void throw_cuda_error(cudaError_t error, char const* file, unsigned int l } // @endcond } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf /** * @brief Error checking macro for CUDA runtime API functions. diff --git a/cpp/include/cudf/utilities/pinned_memory.hpp b/cpp/include/cudf/utilities/pinned_memory.hpp index 3e2fa43cb50..623a033698f 100644 --- a/cpp/include/cudf/utilities/pinned_memory.hpp +++ b/cpp/include/cudf/utilities/pinned_memory.hpp @@ -16,11 +16,13 @@ #pragma once +#include + #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @brief Set the rmm resource to be used for pinned memory allocations. @@ -71,4 +73,20 @@ void set_kernel_pinned_copy_threshold(size_t threshold); */ size_t get_kernel_pinned_copy_threshold(); -} // namespace cudf +/** + * @brief Set the threshold size for allocating host memory as pinned memory. + * + * @param threshold The threshold size in bytes. If the size of the allocation is less or equal to + * this threshold, the memory will be allocated as pinned memory. If the size is greater than this + * threshold, the memory will be allocated as pageable memory. + */ +void set_allocate_host_as_pinned_threshold(size_t threshold); + +/** + * @brief Get the threshold size for allocating host memory as pinned memory. + * + * @return The threshold size in bytes. + */ +size_t get_allocate_host_as_pinned_threshold(); + +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/utilities/prefetch.hpp b/cpp/include/cudf/utilities/prefetch.hpp new file mode 100644 index 00000000000..3384181fc37 --- /dev/null +++ b/cpp/include/cudf/utilities/prefetch.hpp @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include + +#include +#include +#include +#include + +namespace CUDF_EXPORT cudf { +namespace experimental::prefetch { + +namespace detail { + +/** + * @brief A singleton class that manages the prefetching configuration. + */ +class prefetch_config { + public: + prefetch_config& operator=(const prefetch_config&) = delete; + prefetch_config(const prefetch_config&) = delete; + + /** + * @brief Get the singleton instance of the prefetching configuration. + * + * @return The singleton instance of the prefetching configuration. + */ + static prefetch_config& instance(); + + /** + * @brief Get the value of a configuration key. + * + * If the key does not exist, a `false` value will be returned. + * + * @param key The configuration key. + * @return The value of the configuration key. + */ + bool get(std::string_view key); + /** + * @brief Set the value of a configuration key. + * + * This is a thread-safe operation. + * + * @param key The configuration key. + * @param value The value to set. + */ + void set(std::string_view key, bool value); + /** + * @brief Enable or disable debug mode. + * + * In debug mode, the pointers being prefetched are printed to stderr. + */ + bool debug{false}; + + private: + prefetch_config() = default; //< Private constructor to enforce singleton pattern + std::map config_values; //< Map of configuration keys to values + std::shared_mutex config_mtx; //< Mutex for thread-safe config access +}; + +/** + * @brief Enable prefetching for a particular structure or algorithm. + * + * @param key The key to enable prefetching for. + * @param ptr The pointer to prefetch. + * @param size The size of the memory region to prefetch. + * @param stream The stream to prefetch on. + * @param device_id The device to prefetch on. + */ +void prefetch(std::string_view key, + void const* ptr, + std::size_t size, + rmm::cuda_stream_view stream, + rmm::cuda_device_id device_id = rmm::get_current_cuda_device()); + +/** + * @brief Enable prefetching for a particular structure or algorithm. + * + * @note This function will not throw exceptions, so it is safe to call in + * noexcept contexts. If an error occurs, the error code is returned. This + * function primarily exists for [mutable_]column_view::get_data and should be + * removed once an method for stream-ordered data pointer access is added to + * those data structures. + * + * @param key The key to enable prefetching for. + * @param ptr The pointer to prefetch. + * @param size The size of the memory region to prefetch. + * @param stream The stream to prefetch on. + * @param device_id The device to prefetch on. + */ +cudaError_t prefetch_noexcept( + std::string_view key, + void const* ptr, + std::size_t size, + rmm::cuda_stream_view stream, + rmm::cuda_device_id device_id = rmm::get_current_cuda_device()) noexcept; + +/** + * @brief Prefetch the data in a device_uvector. + * + * @note At present this function does not support stream-ordered execution. Prefetching always + * occurs on the default stream. + * + * @param key The key to enable prefetching for. + * @param v The device_uvector to prefetch. + * @param stream The stream to prefetch on. + * @param device_id The device to prefetch on. + */ +template +void prefetch(std::string_view key, + rmm::device_uvector const& v, + rmm::cuda_stream_view stream, + rmm::cuda_device_id device_id = rmm::get_current_cuda_device()) +{ + if (v.is_empty()) { return; } + prefetch(key, v.data(), v.size(), stream, device_id); +} + +} // namespace detail + +/** + * @brief Enable prefetching for a particular structure or algorithm. + * + * @param key The key to enable prefetching for. + */ +void enable_prefetching(std::string_view key); + +/** + * @brief Disable prefetching for a particular structure or algorithm. + * + * @param key The key to disable prefetching for. + */ +void disable_prefetching(std::string_view key); + +/** + * @brief Enable or disable debug mode. + * + * In debug mode, the pointers being prefetched are printed to stderr. + * + * @param enable Whether to enable or disable debug mode. + */ +void prefetch_debugging(bool enable); + +} // namespace experimental::prefetch +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/utilities/span.hpp b/cpp/include/cudf/utilities/span.hpp index 3b35e60e034..0daebc0dd8d 100644 --- a/cpp/include/cudf/utilities/span.hpp +++ b/cpp/include/cudf/utilities/span.hpp @@ -16,6 +16,9 @@ #pragma once +#include +#include + #include #include #include @@ -30,7 +33,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup utility_span * @{ @@ -257,6 +260,26 @@ struct host_span : public cudf::detail::span_base>* = nullptr> + constexpr host_span(cudf::detail::host_vector& in) + : base(in.data(), in.size()), _is_device_accessible{in.get_allocator().is_device_accessible()} + { + } + + /// Constructor from a const host_vector + /// @param in The host_vector to construct the span from + template >* = nullptr> + constexpr host_span(cudf::detail::host_vector const& in) + : base(in.data(), in.size()), _is_device_accessible{in.get_allocator().is_device_accessible()} + { + } + // Copy construction to support const conversion /// @param other The span to copy template using device_2dspan = base_2dspan; } // namespace detail -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/utilities/thread_pool.hpp b/cpp/include/cudf/utilities/thread_pool.hpp deleted file mode 100644 index c8c3eb097c4..00000000000 --- a/cpp/include/cudf/utilities/thread_pool.hpp +++ /dev/null @@ -1,381 +0,0 @@ -/* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -/** - * Modified from https://github.com/bshoshany/thread-pool - * @copyright Copyright (c) 2021 Barak Shoshany. Licensed under the MIT license. - * See file LICENSE for detail or copy at https://opensource.org/licenses/MIT - */ - -#include // std::atomic -#include // std::chrono -#include // std::int_fast64_t, std::uint_fast32_t -#include // std::function -#include // std::future, std::promise -#include // std::shared_ptr, std::unique_ptr -#include // std::mutex, std::scoped_lock -#include // std::queue -#include // std::this_thread, std::thread -#include // std::decay_t, std::enable_if_t, std::is_void_v, std::invoke_result_t -#include // std::move, std::swap - -namespace cudf { -namespace detail { - -/** - * @brief A C++17 thread pool class. The user submits tasks to be executed into a queue. Whenever a - * thread becomes available, it pops a task from the queue and executes it. Each task is - * automatically assigned a future, which can be used to wait for the task to finish executing - * and/or obtain its eventual return value. - */ -class thread_pool { - using ui32 = int; - - public: - /** - * @brief Construct a new thread pool. - * - * @param _thread_count The number of threads to use. The default value is the total number of - * hardware threads available, as reported by the implementation. With a hyperthreaded CPU, this - * will be twice the number of CPU cores. If the argument is zero, the default value will be used - * instead. - */ - thread_pool(ui32 const& _thread_count = std::thread::hardware_concurrency()) - : thread_count(_thread_count ? _thread_count : std::thread::hardware_concurrency()), - threads(new std::thread[_thread_count ? _thread_count : std::thread::hardware_concurrency()]) - { - create_threads(); - } - - /** - * @brief Destruct the thread pool. Waits for all tasks to complete, then destroys all threads. - * Note that if the variable paused is set to true, then any tasks still in the queue will never - * be executed. - */ - ~thread_pool() - { - wait_for_tasks(); - running = false; - destroy_threads(); - } - - /** - * @brief Get the number of tasks currently waiting in the queue to be executed by the threads. - * - * @return The number of queued tasks. - */ - [[nodiscard]] size_t get_tasks_queued() const - { - std::scoped_lock const lock(queue_mutex); - return tasks.size(); - } - - /** - * @brief Get the number of tasks currently being executed by the threads. - * - * @return The number of running tasks. - */ - [[nodiscard]] ui32 get_tasks_running() const { return tasks_total - (ui32)get_tasks_queued(); } - - /** - * @brief Get the total number of unfinished tasks - either still in the queue, or running in a - * thread. - * - * @return The total number of tasks. - */ - [[nodiscard]] ui32 get_tasks_total() const { return tasks_total; } - - /** - * @brief Get the number of threads in the pool. - * - * @return The number of threads. - */ - [[nodiscard]] ui32 get_thread_count() const { return thread_count; } - - /** - * @brief Parallelize a loop by splitting it into blocks, submitting each block separately to the - * thread pool, and waiting for all blocks to finish executing. The loop will be equivalent to: - * for (T i = first_index; i <= last_index; i++) loop(i); - * - * @tparam T The type of the loop index. Should be a signed or unsigned integer. - * @tparam F The type of the function to loop through. - * @param first_index The first index in the loop (inclusive). - * @param last_index The last index in the loop (inclusive). - * @param loop The function to loop through. Should take exactly one argument, the loop index. - * @param num_tasks The maximum number of tasks to split the loop into. The default is to use the - * number of threads in the pool. - */ - template - void parallelize_loop(T first_index, T last_index, F const& loop, ui32 num_tasks = 0) - { - if (num_tasks == 0) num_tasks = thread_count; - if (last_index < first_index) std::swap(last_index, first_index); - size_t total_size = last_index - first_index + 1; - size_t block_size = total_size / num_tasks; - if (block_size == 0) { - block_size = 1; - num_tasks = (ui32)total_size > 1 ? (ui32)total_size : 1; - } - std::atomic blocks_running = 0; - for (ui32 t = 0; t < num_tasks; t++) { - T start = (T)(t * block_size + first_index); - T end = (t == num_tasks - 1) ? last_index : (T)((t + 1) * block_size + first_index - 1); - blocks_running++; - push_task([start, end, &loop, &blocks_running] { - for (T i = start; i <= end; i++) - loop(i); - blocks_running--; - }); - } - while (blocks_running != 0) { - sleep_or_yield(); - } - } - - /** - * @brief Push a function with no arguments or return value into the task queue. - * - * @tparam F The type of the function. - * @param task The function to push. - */ - template - void push_task(F const& task) - { - tasks_total++; - { - std::scoped_lock const lock(queue_mutex); - tasks.push(std::function(task)); - } - } - - /** - * @brief Push a function with arguments, but no return value, into the task queue. - * @details The function is wrapped inside a lambda in order to hide the arguments, as the tasks - * in the queue must be of type std::function, so they cannot have any arguments or return - * value. If no arguments are provided, the other overload will be used, in order to avoid the - * (slight) overhead of using a lambda. - * - * @tparam F The type of the function. - * @tparam A The types of the arguments. - * @param task The function to push. - * @param args The arguments to pass to the function. - */ - template - void push_task(F const& task, A const&... args) - { - push_task([task, args...] { task(args...); }); - } - - /** - * @brief Reset the number of threads in the pool. Waits for all currently running tasks to be - * completed, then destroys all threads in the pool and creates a new thread pool with the new - * number of threads. Any tasks that were waiting in the queue before the pool was reset will then - * be executed by the new threads. If the pool was paused before resetting it, the new pool will - * be paused as well. - * - * @param _thread_count The number of threads to use. The default value is the total number of - * hardware threads available, as reported by the implementation. With a hyperthreaded CPU, this - * will be twice the number of CPU cores. If the argument is zero, the default value will be used - * instead. - */ - void reset(ui32 const& _thread_count = std::thread::hardware_concurrency()) - { - bool was_paused = paused; - paused = true; - wait_for_tasks(); - running = false; - destroy_threads(); - thread_count = _thread_count ? _thread_count : std::thread::hardware_concurrency(); - threads = std::make_unique(thread_count); - paused = was_paused; - create_threads(); - running = true; - } - - /** - * @brief Submit a function with zero or more arguments and a return value into the task queue, - * and get a future for its eventual returned value. - * - * @tparam F The type of the function. - * @tparam A The types of the zero or more arguments to pass to the function. - * @tparam R The return type of the function. - * @param task The function to submit. - * @param args The zero or more arguments to pass to the function. - * @return A future to be used later to obtain the function's returned value, waiting for it to - * finish its execution if needed. - */ - template , std::decay_t...>> - std::future submit(F const& task, A const&... args) - { - std::shared_ptr> promise(new std::promise); - std::future future = promise->get_future(); - push_task([task, args..., promise] { - try { - if constexpr (std::is_void_v) { - task(args...); - promise->set_value(); - } else { - promise->set_value(task(args...)); - } - } catch (...) { - promise->set_exception(std::current_exception()); - }; - }); - return future; - } - - /** - * @brief Wait for tasks to be completed. Normally, this function waits for all tasks, both those - * that are currently running in the threads and those that are still waiting in the queue. - * However, if the variable paused is set to true, this function only waits for the currently - * running tasks (otherwise it would wait forever). To wait for a specific task, use submit() - * instead, and call the wait() member function of the generated future. - */ - void wait_for_tasks() - { - while (true) { - if (!paused) { - if (tasks_total == 0) break; - } else { - if (get_tasks_running() == 0) break; - } - sleep_or_yield(); - } - } - - /** - * @brief An atomic variable indicating to the workers to pause. When set to true, the workers - * temporarily stop popping new tasks out of the queue, although any tasks already executed will - * keep running until they are done. Set to false again to resume popping tasks. - */ - std::atomic paused = false; - - /** - * @brief The duration, in microseconds, that the worker function should sleep for when it cannot - * find any tasks in the queue. If set to 0, then instead of sleeping, the worker function will - * execute std::this_thread::yield() if there are no tasks in the queue. The default value is - * 1000. - */ - ui32 sleep_duration = 1000; - - private: - /** - * @brief Create the threads in the pool and assign a worker to each thread. - */ - void create_threads() - { - for (ui32 i = 0; i < thread_count; i++) { - threads[i] = std::thread(&thread_pool::worker, this); - } - } - - /** - * @brief Destroy the threads in the pool by joining them. - */ - void destroy_threads() - { - for (ui32 i = 0; i < thread_count; i++) { - threads[i].join(); - } - } - - /** - * @brief Try to pop a new task out of the queue. - * - * @param task A reference to the task. Will be populated with a function if the queue is not - * empty. - * @return true if a task was found, false if the queue is empty. - */ - bool pop_task(std::function& task) - { - std::scoped_lock const lock(queue_mutex); - if (tasks.empty()) - return false; - else { - task = std::move(tasks.front()); - tasks.pop(); - return true; - } - } - - /** - * @brief Sleep for sleep_duration microseconds. If that variable is set to zero, yield instead. - * - */ - void sleep_or_yield() - { - if (sleep_duration) - std::this_thread::sleep_for(std::chrono::microseconds(sleep_duration)); - else - std::this_thread::yield(); - } - - /** - * @brief A worker function to be assigned to each thread in the pool. Continuously pops tasks out - * of the queue and executes them, as long as the atomic variable running is set to true. - */ - void worker() - { - while (running) { - std::function task; - if (!paused && pop_task(task)) { - task(); - tasks_total--; - } else { - sleep_or_yield(); - } - } - } - - /** - * @brief A mutex to synchronize access to the task queue by different threads. - */ - mutable std::mutex queue_mutex; - - /** - * @brief An atomic variable indicating to the workers to keep running. When set to false, the - * workers permanently stop working. - */ - std::atomic running = true; - - /** - * @brief A queue of tasks to be executed by the threads. - */ - std::queue> tasks; - - /** - * @brief The number of threads in the pool. - */ - ui32 thread_count; - - /** - * @brief A smart pointer to manage the memory allocated for the threads. - */ - std::unique_ptr threads; - - /** - * @brief An atomic variable to keep track of the total number of unfinished tasks - either still - * in the queue, or running in a thread. - */ - std::atomic tasks_total = 0; -}; - -} // namespace detail -} // namespace cudf diff --git a/cpp/include/cudf/utilities/traits.cuh b/cpp/include/cudf/utilities/traits.cuh index 43587ffa583..5e52e9a9cd9 100644 --- a/cpp/include/cudf/utilities/traits.cuh +++ b/cpp/include/cudf/utilities/traits.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,7 +21,7 @@ #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup utility_types @@ -64,4 +64,4 @@ constexpr inline bool has_atomic_support(data_type type) /** @} */ -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp index d191e44228a..3f37ae02151 100644 --- a/cpp/include/cudf/utilities/traits.hpp +++ b/cpp/include/cudf/utilities/traits.hpp @@ -24,7 +24,7 @@ #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup utility_types @@ -622,4 +622,4 @@ struct is_convertible, cudf::detail::timestam /** @} */ -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/utilities/type_checks.hpp b/cpp/include/cudf/utilities/type_checks.hpp index fd3b0581c11..4fcbca09d17 100644 --- a/cpp/include/cudf/utilities/type_checks.hpp +++ b/cpp/include/cudf/utilities/type_checks.hpp @@ -20,7 +20,7 @@ #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @brief Compare the types of two `column_view`s @@ -147,4 +147,4 @@ inline bool all_have_same_types(ForwardIt first, ForwardIt last) }); } -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/utilities/type_dispatcher.hpp b/cpp/include/cudf/utilities/type_dispatcher.hpp index 1aad197b1e3..15b5f921c1b 100644 --- a/cpp/include/cudf/utilities/type_dispatcher.hpp +++ b/cpp/include/cudf/utilities/type_dispatcher.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,7 +31,7 @@ * @brief Defines the mapping between `cudf::type_id` runtime type information * and concrete C++ types. */ -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup utility_dispatcher * @{ @@ -626,4 +626,4 @@ CUDF_HOST_DEVICE __forceinline__ constexpr decltype(auto) double_type_dispatcher std::string type_to_name(data_type type); /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/wrappers/dictionary.hpp b/cpp/include/cudf/wrappers/dictionary.hpp index 95f4ac00a53..3b1958e7d4f 100644 --- a/cpp/include/cudf/wrappers/dictionary.hpp +++ b/cpp/include/cudf/wrappers/dictionary.hpp @@ -27,7 +27,7 @@ * @brief Concrete type definition for dictionary columns. */ -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup dictionary_classes * @{ @@ -217,4 +217,4 @@ CUDF_HOST_DEVICE inline bool operator>(dictionary_wrapper const& lhs, using dictionary32 = dictionary_wrapper; ///< 32-bit integer indexed dictionary wrapper /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/wrappers/durations.hpp b/cpp/include/cudf/wrappers/durations.hpp index 840dba4f4ba..8c321cba34a 100644 --- a/cpp/include/cudf/wrappers/durations.hpp +++ b/cpp/include/cudf/wrappers/durations.hpp @@ -16,9 +16,11 @@ #pragma once +#include + #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @addtogroup timestamp_classes Timestamp @@ -65,4 +67,4 @@ static_assert(sizeof(duration_us) == sizeof(typename duration_us::rep)); static_assert(sizeof(duration_ns) == sizeof(typename duration_ns::rep)); /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/wrappers/timestamps.hpp b/cpp/include/cudf/wrappers/timestamps.hpp index 5194a3e8f96..1f5d54c6119 100644 --- a/cpp/include/cudf/wrappers/timestamps.hpp +++ b/cpp/include/cudf/wrappers/timestamps.hpp @@ -16,6 +16,7 @@ #pragma once +#include #include /** @@ -23,7 +24,7 @@ * @brief Concrete type definitions for int32_t and int64_t timestamps in * varying resolutions as durations since the UNIX epoch. */ -namespace cudf { +namespace CUDF_EXPORT cudf { namespace detail { // TODO: Use chrono::utc_clock when available in libcu++? template @@ -82,4 +83,4 @@ static_assert(sizeof(timestamp_us) == sizeof(typename timestamp_us::rep)); static_assert(sizeof(timestamp_ns) == sizeof(typename timestamp_ns::rep)); /** @} */ // end of group -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf_test/base_fixture.hpp b/cpp/include/cudf_test/base_fixture.hpp index 0e35ff64af4..04bd51e9aa3 100644 --- a/cpp/include/cudf_test/base_fixture.hpp +++ b/cpp/include/cudf_test/base_fixture.hpp @@ -19,13 +19,14 @@ #include #include +#include #include #include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace test { /** @@ -99,4 +100,4 @@ class TempDirTestEnvironment : public ::testing::Environment { }; } // namespace test -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf_test/column_utilities.hpp b/cpp/include/cudf_test/column_utilities.hpp index c83599a8072..944c6195afb 100644 --- a/cpp/include/cudf_test/column_utilities.hpp +++ b/cpp/include/cudf_test/column_utilities.hpp @@ -24,11 +24,13 @@ #include #include #include +#include #include #include -namespace cudf::test { +namespace CUDF_EXPORT cudf { +namespace test { /** * @brief Verbosity level of output from column and table comparison functions. @@ -194,7 +196,7 @@ std::pair, std::vector> to_host(column_view * `column_view`'s data, and second is the column's bitmask. */ template ()>* = nullptr> -std::pair, std::vector> to_host(column_view c); +CUDF_EXPORT std::pair, std::vector> to_host(column_view c); /** * @brief Copies the data and bitmask of a `column_view` of strings @@ -207,7 +209,8 @@ std::pair, std::vector> to_host(column_view * and second is the column's bitmask. */ template <> -std::pair, std::vector> to_host(column_view c); +CUDF_EXPORT std::pair, std::vector> to_host( + column_view c); //! @endcond /** @@ -233,7 +236,8 @@ struct large_strings_enabler { void disable(); }; -} // namespace cudf::test +} // namespace test +} // namespace CUDF_EXPORT cudf // Macros for showing line of failure. #define CUDF_TEST_EXPECT_COLUMN_PROPERTIES_EQUAL(lhs, rhs) \ diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp index 2abd6f0abac..4e504ec1d30 100644 --- a/cpp/include/cudf_test/column_wrapper.hpp +++ b/cpp/include/cudf_test/column_wrapper.hpp @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -33,6 +32,7 @@ #include #include #include +#include #include #include @@ -51,7 +51,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace test { namespace detail { /** @@ -1755,7 +1755,7 @@ class lists_column_wrapper : public detail::column_wrapper { normalize_column(lists_column_view(col).child(), lists_column_view(expected_hierarchy).child()), col.null_count(), - cudf::detail::copy_bitmask( + cudf::copy_bitmask( col, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()), cudf::test::get_default_stream()); } @@ -1970,4 +1970,4 @@ class structs_column_wrapper : public detail::column_wrapper { }; } // namespace test -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf_test/debug_utilities.hpp b/cpp/include/cudf_test/debug_utilities.hpp index a0881490b82..049b4579316 100644 --- a/cpp/include/cudf_test/debug_utilities.hpp +++ b/cpp/include/cudf_test/debug_utilities.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,10 @@ #include #include +#include -namespace cudf::test { +namespace CUDF_EXPORT cudf { +namespace test { /** * @brief Formats a column view as a string @@ -44,4 +46,5 @@ std::vector to_strings(cudf::column_view const& col); */ void print(cudf::column_view const& col, std::ostream& os = std::cout); -} // namespace cudf::test +} // namespace test +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf_test/default_stream.hpp b/cpp/include/cudf_test/default_stream.hpp index 1da97d71f44..4f63add3071 100644 --- a/cpp/include/cudf_test/default_stream.hpp +++ b/cpp/include/cudf_test/default_stream.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,9 +16,11 @@ #pragma once +#include + #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace test { /** @@ -38,4 +40,4 @@ namespace test { rmm::cuda_stream_view const get_default_stream(); } // namespace test -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf_test/file_utilities.hpp b/cpp/include/cudf_test/file_utilities.hpp index defc6f95823..37347e563cd 100644 --- a/cpp/include/cudf_test/file_utilities.hpp +++ b/cpp/include/cudf_test/file_utilities.hpp @@ -17,6 +17,7 @@ #pragma once #include +#include #include @@ -29,7 +30,7 @@ * @brief RAII class for creating a temporary directory. * */ -class temp_directory { +class CUDF_EXPORT temp_directory { std::string _path; public: diff --git a/cpp/include/cudf_test/io_metadata_utilities.hpp b/cpp/include/cudf_test/io_metadata_utilities.hpp index 6fd1a52239c..c18d427d905 100644 --- a/cpp/include/cudf_test/io_metadata_utilities.hpp +++ b/cpp/include/cudf_test/io_metadata_utilities.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,8 +16,10 @@ #pragma once #include +#include -namespace cudf::test { +namespace CUDF_EXPORT cudf { +namespace test { void expect_metadata_equal(cudf::io::table_input_metadata in_meta, cudf::io::table_metadata out_meta); @@ -28,4 +30,5 @@ void expect_metadata_equal(cudf::io::table_input_metadata in_meta, */ void expect_metadata_equal(cudf::io::table_metadata lhs_meta, cudf::io::table_metadata rhs_meta); -} // namespace cudf::test +} // namespace test +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf_test/iterator_utilities.hpp b/cpp/include/cudf_test/iterator_utilities.hpp index 10f6e77d889..8db0275d2f4 100644 --- a/cpp/include/cudf_test/iterator_utilities.hpp +++ b/cpp/include/cudf_test/iterator_utilities.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,13 +18,14 @@ #include #include +#include #include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace test { namespace iterators { /** @@ -136,4 +137,4 @@ template } // namespace iterators } // namespace test -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf_test/print_utilities.cuh b/cpp/include/cudf_test/print_utilities.cuh index ae6c8cef029..828188e65c3 100644 --- a/cpp/include/cudf_test/print_utilities.cuh +++ b/cpp/include/cudf_test/print_utilities.cuh @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include @@ -25,7 +26,8 @@ #include -namespace cudf::test::print { +namespace CUDF_EXPORT cudf { +namespace test::print { constexpr int32_t hex_tag = 0; @@ -137,4 +139,5 @@ void print_array(std::size_t count, rmm::cuda_stream_view stream, Ts... args) } } -} // namespace cudf::test::print +} // namespace test::print +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf_test/random.hpp b/cpp/include/cudf_test/random.hpp index f4d539ecffe..fe1fb0a14bf 100644 --- a/cpp/include/cudf_test/random.hpp +++ b/cpp/include/cudf_test/random.hpp @@ -16,11 +16,12 @@ #pragma once +#include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace test { template @@ -170,4 +171,4 @@ class UniformRandomGenerator { }; } // namespace test -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf_test/stream_checking_resource_adaptor.hpp b/cpp/include/cudf_test/stream_checking_resource_adaptor.hpp index 5a077e86a0f..417bbb3d9ab 100644 --- a/cpp/include/cudf_test/stream_checking_resource_adaptor.hpp +++ b/cpp/include/cudf_test/stream_checking_resource_adaptor.hpp @@ -24,13 +24,11 @@ #include +namespace cudf::test { + /** * @brief Resource that verifies that the default stream is not used in any allocation. - * - * @tparam Upstream Type of the upstream resource used for - * allocation/deallocation. */ -template class stream_checking_resource_adaptor final : public rmm::mr::device_memory_resource { public: /** @@ -40,14 +38,13 @@ class stream_checking_resource_adaptor final : public rmm::mr::device_memory_res * * @param upstream The resource used for allocating/deallocating device memory */ - stream_checking_resource_adaptor(Upstream* upstream, + stream_checking_resource_adaptor(rmm::device_async_resource_ref upstream, bool error_on_invalid_stream, bool check_default_stream) : upstream_{upstream}, error_on_invalid_stream_{error_on_invalid_stream}, check_default_stream_{check_default_stream} { - CUDF_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer."); } stream_checking_resource_adaptor() = delete; @@ -86,7 +83,7 @@ class stream_checking_resource_adaptor final : public rmm::mr::device_memory_res void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override { verify_stream(stream); - return upstream_->allocate(bytes, stream); + return upstream_.allocate_async(bytes, rmm::CUDA_ALLOCATION_ALIGNMENT, stream); } /** @@ -101,7 +98,7 @@ class stream_checking_resource_adaptor final : public rmm::mr::device_memory_res void do_deallocate(void* ptr, std::size_t bytes, rmm::cuda_stream_view stream) override { verify_stream(stream); - upstream_->deallocate(ptr, bytes, stream); + upstream_.deallocate_async(ptr, bytes, rmm::CUDA_ALLOCATION_ALIGNMENT, stream); } /** @@ -113,8 +110,8 @@ class stream_checking_resource_adaptor final : public rmm::mr::device_memory_res [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override { if (this == &other) { return true; } - auto cast = dynamic_cast const*>(&other); - if (cast == nullptr) { return upstream_->is_equal(other); } + auto cast = dynamic_cast(&other); + if (cast == nullptr) { return false; } return get_upstream_resource() == cast->get_upstream_resource(); } @@ -150,7 +147,8 @@ class stream_checking_resource_adaptor final : public rmm::mr::device_memory_res } } - Upstream* upstream_; // the upstream resource used for satisfying allocation requests + rmm::device_async_resource_ref + upstream_; // the upstream resource used for satisfying allocation requests bool error_on_invalid_stream_; // If true, throw an exception when the wrong stream is detected. // If false, simply print to stdout. bool check_default_stream_; // If true, throw an exception when the default stream is observed. @@ -158,17 +156,4 @@ class stream_checking_resource_adaptor final : public rmm::mr::device_memory_res // cudf::test::get_default_stream() is observed. }; -/** - * @brief Convenience factory to return a `stream_checking_resource_adaptor` around the - * upstream resource `upstream`. - * - * @tparam Upstream Type of the upstream `device_memory_resource`. - * @param upstream Pointer to the upstream resource - */ -template -stream_checking_resource_adaptor make_stream_checking_resource_adaptor( - Upstream* upstream, bool error_on_invalid_stream, bool check_default_stream) -{ - return stream_checking_resource_adaptor{ - upstream, error_on_invalid_stream, check_default_stream}; -} +} // namespace cudf::test diff --git a/cpp/include/cudf_test/table_utilities.hpp b/cpp/include/cudf_test/table_utilities.hpp index 79229df4cd9..5e60419d679 100644 --- a/cpp/include/cudf_test/table_utilities.hpp +++ b/cpp/include/cudf_test/table_utilities.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,10 @@ #include #include +#include -namespace cudf::test::detail { +namespace CUDF_EXPORT cudf { +namespace test::detail { /** * @brief Verifies the property equality of two tables. * @@ -57,7 +59,8 @@ void expect_tables_equal(cudf::table_view lhs, cudf::table_view rhs); */ void expect_tables_equivalent(cudf::table_view lhs, cudf::table_view rhs); -} // namespace cudf::test::detail +} // namespace test::detail +} // namespace CUDF_EXPORT cudf // Macros for showing line of failure. #define CUDF_TEST_EXPECT_TABLE_PROPERTIES_EQUAL(lhs, rhs) \ diff --git a/cpp/include/cudf_test/tdigest_utilities.cuh b/cpp/include/cudf_test/tdigest_utilities.cuh index 742cd764a1f..5fd2403b0f2 100644 --- a/cpp/include/cudf_test/tdigest_utilities.cuh +++ b/cpp/include/cudf_test/tdigest_utilities.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -37,7 +38,7 @@ // for use with groupby and reduction aggregation tests. -namespace cudf { +namespace CUDF_EXPORT cudf { namespace test { using expected_value = thrust::tuple; @@ -583,4 +584,4 @@ void tdigest_merge_empty(MergeFunc merge_op) } } // namespace test -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf_test/testing_main.hpp b/cpp/include/cudf_test/testing_main.hpp index 66b831b917f..ed83ddabb00 100644 --- a/cpp/include/cudf_test/testing_main.hpp +++ b/cpp/include/cudf_test/testing_main.hpp @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -32,7 +33,7 @@ #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace test { /// MR factory functions @@ -92,7 +93,7 @@ inline std::shared_ptr create_memory_resource( } } // namespace test -} // namespace cudf +} // namespace CUDF_EXPORT cudf /** * @brief Parses the cuDF test command line options. @@ -182,8 +183,8 @@ inline auto make_stream_mode_adaptor(cxxopts::ParseResult const& cmd_opts) auto const stream_error_mode = cmd_opts["stream_error_mode"].as(); auto const error_on_invalid_stream = (stream_error_mode == "error"); auto const check_default_stream = (stream_mode == "new_cudf_default"); - auto adaptor = - make_stream_checking_resource_adaptor(resource, error_on_invalid_stream, check_default_stream); + auto adaptor = cudf::test::stream_checking_resource_adaptor( + resource, error_on_invalid_stream, check_default_stream); if ((stream_mode == "new_cudf_default") || (stream_mode == "new_testing_default")) { rmm::mr::set_current_device_resource(&adaptor); } diff --git a/cpp/include/cudf_test/timestamp_utilities.cuh b/cpp/include/cudf_test/timestamp_utilities.cuh index ebd93862151..e0789210bf9 100644 --- a/cpp/include/cudf_test/timestamp_utilities.cuh +++ b/cpp/include/cudf_test/timestamp_utilities.cuh @@ -19,12 +19,13 @@ #include #include +#include #include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace test { using time_point_ms = cuda::std::chrono::time_point; @@ -75,4 +76,4 @@ inline cudf::test::fixed_width_column_wrapper generate_timestamps(in } } // namespace test -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf_test/type_list_utilities.hpp b/cpp/include/cudf_test/type_list_utilities.hpp index b069a34afb8..1793a8ecce0 100644 --- a/cpp/include/cudf_test/type_list_utilities.hpp +++ b/cpp/include/cudf_test/type_list_utilities.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,8 @@ #include "cudf_gtest.hpp" +#include + /** * @file type_list_utilities.hpp * @brief Utilities for creating type lists for typed tests in Google Test @@ -68,7 +70,7 @@ * increased compile-times. Use responsibly. */ -namespace cudf { +namespace CUDF_EXPORT cudf { namespace test { // Utilities for creating parameters for typed tests on GoogleTest // @@ -627,4 +629,4 @@ using Unique = typename UniqueImpl::type; } // namespace test -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf_test/type_lists.hpp b/cpp/include/cudf_test/type_lists.hpp index bbff45e2102..4cd01a09187 100644 --- a/cpp/include/cudf_test/type_lists.hpp +++ b/cpp/include/cudf_test/type_lists.hpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -40,7 +41,7 @@ * These lists should be used for consistency across tests as well as * future-proofing against the addition of any new types in the future. */ -namespace cudf { +namespace CUDF_EXPORT cudf { namespace test { namespace detail { template @@ -433,4 +434,4 @@ static constexpr std::array non_fixed_width_type_ids{cudf::typ cudf::type_id::STRING}; } // namespace test -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/nvtext/byte_pair_encoding.hpp b/cpp/include/nvtext/byte_pair_encoding.hpp index 375d44e367a..6559933f696 100644 --- a/cpp/include/nvtext/byte_pair_encoding.hpp +++ b/cpp/include/nvtext/byte_pair_encoding.hpp @@ -20,10 +20,11 @@ #include #include #include +#include #include -namespace nvtext { +namespace CUDF_EXPORT nvtext { /** * @addtogroup nvtext_tokenize @@ -132,4 +133,4 @@ std::unique_ptr byte_pair_encoding( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group -} // namespace nvtext +} // namespace CUDF_EXPORT nvtext diff --git a/cpp/include/nvtext/detail/generate_ngrams.hpp b/cpp/include/nvtext/detail/generate_ngrams.hpp index c4b89b6d495..7c49421560d 100644 --- a/cpp/include/nvtext/detail/generate_ngrams.hpp +++ b/cpp/include/nvtext/detail/generate_ngrams.hpp @@ -20,7 +20,7 @@ #include #include -namespace nvtext { +namespace CUDF_EXPORT nvtext { namespace detail { /** @@ -35,4 +35,4 @@ std::unique_ptr hash_character_ngrams(cudf::strings_column_view co rmm::device_async_resource_ref mr); } // namespace detail -} // namespace nvtext +} // namespace CUDF_EXPORT nvtext diff --git a/cpp/include/nvtext/detail/load_hash_file.hpp b/cpp/include/nvtext/detail/load_hash_file.hpp index 0c27981f80b..438a4a9afdd 100644 --- a/cpp/include/nvtext/detail/load_hash_file.hpp +++ b/cpp/include/nvtext/detail/load_hash_file.hpp @@ -25,7 +25,7 @@ #include #include -namespace nvtext { +namespace CUDF_EXPORT nvtext { namespace detail { /** @@ -47,4 +47,4 @@ std::unique_ptr load_vocabulary_file( rmm::device_async_resource_ref mr); } // namespace detail -} // namespace nvtext +} // namespace CUDF_EXPORT nvtext diff --git a/cpp/include/nvtext/detail/tokenize.hpp b/cpp/include/nvtext/detail/tokenize.hpp index d48027e4631..57ad008f1a9 100644 --- a/cpp/include/nvtext/detail/tokenize.hpp +++ b/cpp/include/nvtext/detail/tokenize.hpp @@ -23,7 +23,7 @@ #include #include -namespace nvtext { +namespace CUDF_EXPORT nvtext { namespace detail { /** * @copydoc nvtext::tokenize(strings_column_view const&,string_scalar @@ -70,4 +70,4 @@ std::unique_ptr count_tokens(cudf::strings_column_view const& stri rmm::device_async_resource_ref mr); } // namespace detail -} // namespace nvtext +} // namespace CUDF_EXPORT nvtext diff --git a/cpp/include/nvtext/edit_distance.hpp b/cpp/include/nvtext/edit_distance.hpp index bfdfb4d1a1c..102f2cffa18 100644 --- a/cpp/include/nvtext/edit_distance.hpp +++ b/cpp/include/nvtext/edit_distance.hpp @@ -18,11 +18,12 @@ #include #include #include +#include #include //! NVText APIs -namespace nvtext { +namespace CUDF_EXPORT nvtext { /** * @addtogroup nvtext_edit_distance * @{ @@ -104,4 +105,4 @@ std::unique_ptr edit_distance_matrix( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group -} // namespace nvtext +} // namespace CUDF_EXPORT nvtext diff --git a/cpp/include/nvtext/generate_ngrams.hpp b/cpp/include/nvtext/generate_ngrams.hpp index bebe2e46023..ce79d985a49 100644 --- a/cpp/include/nvtext/generate_ngrams.hpp +++ b/cpp/include/nvtext/generate_ngrams.hpp @@ -18,10 +18,11 @@ #include #include #include +#include #include -namespace nvtext { +namespace CUDF_EXPORT nvtext { /** * @addtogroup nvtext_ngrams * @{ @@ -128,4 +129,4 @@ std::unique_ptr hash_character_ngrams( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group -} // namespace nvtext +} // namespace CUDF_EXPORT nvtext diff --git a/cpp/include/nvtext/jaccard.hpp b/cpp/include/nvtext/jaccard.hpp index 649c17f0b1c..3c3486c079e 100644 --- a/cpp/include/nvtext/jaccard.hpp +++ b/cpp/include/nvtext/jaccard.hpp @@ -17,10 +17,11 @@ #include #include +#include #include -namespace nvtext { +namespace CUDF_EXPORT nvtext { /** * @addtogroup nvtext_jaccard * @{ @@ -78,4 +79,4 @@ std::unique_ptr jaccard_index( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group -} // namespace nvtext +} // namespace CUDF_EXPORT nvtext diff --git a/cpp/include/nvtext/minhash.hpp b/cpp/include/nvtext/minhash.hpp index 7d3f6059454..fc28ecfb199 100644 --- a/cpp/include/nvtext/minhash.hpp +++ b/cpp/include/nvtext/minhash.hpp @@ -19,11 +19,12 @@ #include #include #include +#include #include #include -namespace nvtext { +namespace CUDF_EXPORT nvtext { /** * @addtogroup nvtext_minhash * @{ @@ -151,4 +152,4 @@ std::unique_ptr minhash64( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group -} // namespace nvtext +} // namespace CUDF_EXPORT nvtext diff --git a/cpp/include/nvtext/ngrams_tokenize.hpp b/cpp/include/nvtext/ngrams_tokenize.hpp index 09ce323a7ae..1048cd4abad 100644 --- a/cpp/include/nvtext/ngrams_tokenize.hpp +++ b/cpp/include/nvtext/ngrams_tokenize.hpp @@ -18,10 +18,11 @@ #include #include #include +#include #include -namespace nvtext { +namespace CUDF_EXPORT nvtext { /** * @addtogroup nvtext_ngrams * @{ @@ -86,4 +87,4 @@ std::unique_ptr ngrams_tokenize( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group -} // namespace nvtext +} // namespace CUDF_EXPORT nvtext diff --git a/cpp/include/nvtext/normalize.hpp b/cpp/include/nvtext/normalize.hpp index e5967e78318..ec0b8981f8f 100644 --- a/cpp/include/nvtext/normalize.hpp +++ b/cpp/include/nvtext/normalize.hpp @@ -17,11 +17,12 @@ #include #include +#include #include //! NVText APIs -namespace nvtext { +namespace CUDF_EXPORT nvtext { /** * @addtogroup nvtext_normalize * @{ @@ -108,4 +109,4 @@ std::unique_ptr normalize_characters( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group -} // namespace nvtext +} // namespace CUDF_EXPORT nvtext diff --git a/cpp/include/nvtext/replace.hpp b/cpp/include/nvtext/replace.hpp index aac21346c72..eedcd3976ca 100644 --- a/cpp/include/nvtext/replace.hpp +++ b/cpp/include/nvtext/replace.hpp @@ -18,11 +18,12 @@ #include #include #include +#include #include //! NVText APIs -namespace nvtext { +namespace CUDF_EXPORT nvtext { /** * @addtogroup nvtext_replace * @{ @@ -142,4 +143,4 @@ std::unique_ptr filter_tokens( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group -} // namespace nvtext +} // namespace CUDF_EXPORT nvtext diff --git a/cpp/include/nvtext/stemmer.hpp b/cpp/include/nvtext/stemmer.hpp index 20b81aba661..4607c42ceed 100644 --- a/cpp/include/nvtext/stemmer.hpp +++ b/cpp/include/nvtext/stemmer.hpp @@ -18,10 +18,11 @@ #include #include #include +#include #include -namespace nvtext { +namespace CUDF_EXPORT nvtext { /** * @addtogroup nvtext_stemmer * @{ @@ -172,4 +173,4 @@ std::unique_ptr porter_stemmer_measure( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group -} // namespace nvtext +} // namespace CUDF_EXPORT nvtext diff --git a/cpp/include/nvtext/subword_tokenize.hpp b/cpp/include/nvtext/subword_tokenize.hpp index a4e06495a1d..b5636c8401b 100644 --- a/cpp/include/nvtext/subword_tokenize.hpp +++ b/cpp/include/nvtext/subword_tokenize.hpp @@ -18,10 +18,11 @@ #include #include #include +#include #include -namespace nvtext { +namespace CUDF_EXPORT nvtext { /** * @addtogroup nvtext_tokenize @@ -160,4 +161,4 @@ tokenizer_result subword_tokenize( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group -} // namespace nvtext +} // namespace CUDF_EXPORT nvtext diff --git a/cpp/include/nvtext/tokenize.hpp b/cpp/include/nvtext/tokenize.hpp index 29fed0759c7..833b53efcde 100644 --- a/cpp/include/nvtext/tokenize.hpp +++ b/cpp/include/nvtext/tokenize.hpp @@ -18,10 +18,11 @@ #include #include #include +#include #include -namespace nvtext { +namespace CUDF_EXPORT nvtext { /** * @addtogroup nvtext_tokenize * @{ @@ -309,4 +310,4 @@ std::unique_ptr tokenize_with_vocabulary( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of tokenize group -} // namespace nvtext +} // namespace CUDF_EXPORT nvtext diff --git a/cpp/src/aggregation/aggregation.cpp b/cpp/src/aggregation/aggregation.cpp index 5422304c5cb..a60a7f63882 100644 --- a/cpp/src/aggregation/aggregation.cpp +++ b/cpp/src/aggregation/aggregation.cpp @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -423,13 +424,16 @@ std::unique_ptr make_sum_aggregation() { return std::make_unique(); } -template std::unique_ptr make_sum_aggregation(); -template std::unique_ptr make_sum_aggregation(); -template std::unique_ptr make_sum_aggregation(); -template std::unique_ptr make_sum_aggregation(); -template std::unique_ptr make_sum_aggregation(); -template std::unique_ptr make_sum_aggregation(); -template std::unique_ptr +template CUDF_EXPORT std::unique_ptr make_sum_aggregation(); +template CUDF_EXPORT std::unique_ptr +make_sum_aggregation(); +template CUDF_EXPORT std::unique_ptr +make_sum_aggregation(); +template CUDF_EXPORT std::unique_ptr +make_sum_aggregation(); +template CUDF_EXPORT std::unique_ptr make_sum_aggregation(); +template CUDF_EXPORT std::unique_ptr make_sum_aggregation(); +template CUDF_EXPORT std::unique_ptr make_sum_aggregation(); /// Factory to create a PRODUCT aggregation @@ -438,13 +442,15 @@ std::unique_ptr make_product_aggregation() { return std::make_unique(); } -template std::unique_ptr make_product_aggregation(); -template std::unique_ptr make_product_aggregation(); -template std::unique_ptr +template CUDF_EXPORT std::unique_ptr make_product_aggregation(); +template CUDF_EXPORT std::unique_ptr +make_product_aggregation(); +template CUDF_EXPORT std::unique_ptr make_product_aggregation(); -template std::unique_ptr make_product_aggregation(); -template std::unique_ptr make_product_aggregation(); -template std::unique_ptr +template CUDF_EXPORT std::unique_ptr +make_product_aggregation(); +template CUDF_EXPORT std::unique_ptr make_product_aggregation(); +template CUDF_EXPORT std::unique_ptr make_product_aggregation(); /// Factory to create a MIN aggregation @@ -453,13 +459,16 @@ std::unique_ptr make_min_aggregation() { return std::make_unique(); } -template std::unique_ptr make_min_aggregation(); -template std::unique_ptr make_min_aggregation(); -template std::unique_ptr make_min_aggregation(); -template std::unique_ptr make_min_aggregation(); -template std::unique_ptr make_min_aggregation(); -template std::unique_ptr make_min_aggregation(); -template std::unique_ptr +template CUDF_EXPORT std::unique_ptr make_min_aggregation(); +template CUDF_EXPORT std::unique_ptr +make_min_aggregation(); +template CUDF_EXPORT std::unique_ptr +make_min_aggregation(); +template CUDF_EXPORT std::unique_ptr +make_min_aggregation(); +template CUDF_EXPORT std::unique_ptr make_min_aggregation(); +template CUDF_EXPORT std::unique_ptr make_min_aggregation(); +template CUDF_EXPORT std::unique_ptr make_min_aggregation(); /// Factory to create a MAX aggregation @@ -468,13 +477,16 @@ std::unique_ptr make_max_aggregation() { return std::make_unique(); } -template std::unique_ptr make_max_aggregation(); -template std::unique_ptr make_max_aggregation(); -template std::unique_ptr make_max_aggregation(); -template std::unique_ptr make_max_aggregation(); -template std::unique_ptr make_max_aggregation(); -template std::unique_ptr make_max_aggregation(); -template std::unique_ptr +template CUDF_EXPORT std::unique_ptr make_max_aggregation(); +template CUDF_EXPORT std::unique_ptr +make_max_aggregation(); +template CUDF_EXPORT std::unique_ptr +make_max_aggregation(); +template CUDF_EXPORT std::unique_ptr +make_max_aggregation(); +template CUDF_EXPORT std::unique_ptr make_max_aggregation(); +template CUDF_EXPORT std::unique_ptr make_max_aggregation(); +template CUDF_EXPORT std::unique_ptr make_max_aggregation(); /// Factory to create a COUNT aggregation @@ -485,14 +497,14 @@ std::unique_ptr make_count_aggregation(null_policy null_handling) (null_handling == null_policy::INCLUDE) ? aggregation::COUNT_ALL : aggregation::COUNT_VALID; return std::make_unique(kind); } -template std::unique_ptr make_count_aggregation( - null_policy null_handling); -template std::unique_ptr make_count_aggregation( - null_policy null_handling); -template std::unique_ptr make_count_aggregation( - null_policy null_handling); -template std::unique_ptr make_count_aggregation( +template CUDF_EXPORT std::unique_ptr make_count_aggregation( null_policy null_handling); +template CUDF_EXPORT std::unique_ptr +make_count_aggregation(null_policy null_handling); +template CUDF_EXPORT std::unique_ptr +make_count_aggregation(null_policy null_handling); +template CUDF_EXPORT std::unique_ptr +make_count_aggregation(null_policy null_handling); /// Factory to create a HISTOGRAM aggregation template @@ -500,9 +512,11 @@ std::unique_ptr make_histogram_aggregation() { return std::make_unique(); } -template std::unique_ptr make_histogram_aggregation(); -template std::unique_ptr make_histogram_aggregation(); -template std::unique_ptr make_histogram_aggregation(); +template CUDF_EXPORT std::unique_ptr make_histogram_aggregation(); +template CUDF_EXPORT std::unique_ptr +make_histogram_aggregation(); +template CUDF_EXPORT std::unique_ptr +make_histogram_aggregation(); /// Factory to create a ANY aggregation template @@ -510,9 +524,9 @@ std::unique_ptr make_any_aggregation() { return std::make_unique(); } -template std::unique_ptr make_any_aggregation(); -template std::unique_ptr make_any_aggregation(); -template std::unique_ptr +template CUDF_EXPORT std::unique_ptr make_any_aggregation(); +template CUDF_EXPORT std::unique_ptr make_any_aggregation(); +template CUDF_EXPORT std::unique_ptr make_any_aggregation(); /// Factory to create a ALL aggregation @@ -521,9 +535,9 @@ std::unique_ptr make_all_aggregation() { return std::make_unique(); } -template std::unique_ptr make_all_aggregation(); -template std::unique_ptr make_all_aggregation(); -template std::unique_ptr +template CUDF_EXPORT std::unique_ptr make_all_aggregation(); +template CUDF_EXPORT std::unique_ptr make_all_aggregation(); +template CUDF_EXPORT std::unique_ptr make_all_aggregation(); /// Factory to create a SUM_OF_SQUARES aggregation @@ -532,11 +546,12 @@ std::unique_ptr make_sum_of_squares_aggregation() { return std::make_unique(); } -template std::unique_ptr make_sum_of_squares_aggregation(); -template std::unique_ptr +template CUDF_EXPORT std::unique_ptr make_sum_of_squares_aggregation(); +template CUDF_EXPORT std::unique_ptr make_sum_of_squares_aggregation(); -template std::unique_ptr make_sum_of_squares_aggregation(); -template std::unique_ptr +template CUDF_EXPORT std::unique_ptr +make_sum_of_squares_aggregation(); +template CUDF_EXPORT std::unique_ptr make_sum_of_squares_aggregation(); /// Factory to create a MEAN aggregation @@ -545,11 +560,14 @@ std::unique_ptr make_mean_aggregation() { return std::make_unique(); } -template std::unique_ptr make_mean_aggregation(); -template std::unique_ptr make_mean_aggregation(); -template std::unique_ptr make_mean_aggregation(); -template std::unique_ptr make_mean_aggregation(); -template std::unique_ptr +template CUDF_EXPORT std::unique_ptr make_mean_aggregation(); +template CUDF_EXPORT std::unique_ptr +make_mean_aggregation(); +template CUDF_EXPORT std::unique_ptr +make_mean_aggregation(); +template CUDF_EXPORT std::unique_ptr +make_mean_aggregation(); +template CUDF_EXPORT std::unique_ptr make_mean_aggregation(); /// Factory to create a M2 aggregation @@ -558,8 +576,9 @@ std::unique_ptr make_m2_aggregation() { return std::make_unique(); } -template std::unique_ptr make_m2_aggregation(); -template std::unique_ptr make_m2_aggregation(); +template CUDF_EXPORT std::unique_ptr make_m2_aggregation(); +template CUDF_EXPORT std::unique_ptr +make_m2_aggregation(); /// Factory to create a VARIANCE aggregation template @@ -567,14 +586,15 @@ std::unique_ptr make_variance_aggregation(size_type ddof) { return std::make_unique(ddof); } -template std::unique_ptr make_variance_aggregation(size_type ddof); -template std::unique_ptr make_variance_aggregation( - size_type ddof); -template std::unique_ptr make_variance_aggregation( +template CUDF_EXPORT std::unique_ptr make_variance_aggregation( size_type ddof); -template std::unique_ptr make_variance_aggregation( - size_type ddof); -template std::unique_ptr +template CUDF_EXPORT std::unique_ptr +make_variance_aggregation(size_type ddof); +template CUDF_EXPORT std::unique_ptr +make_variance_aggregation(size_type ddof); +template CUDF_EXPORT std::unique_ptr +make_variance_aggregation(size_type ddof); +template CUDF_EXPORT std::unique_ptr make_variance_aggregation(size_type ddof); /// Factory to create a STD aggregation @@ -583,14 +603,14 @@ std::unique_ptr make_std_aggregation(size_type ddof) { return std::make_unique(ddof); } -template std::unique_ptr make_std_aggregation(size_type ddof); -template std::unique_ptr make_std_aggregation( +template CUDF_EXPORT std::unique_ptr make_std_aggregation(size_type ddof); +template CUDF_EXPORT std::unique_ptr make_std_aggregation( size_type ddof); -template std::unique_ptr make_std_aggregation( +template CUDF_EXPORT std::unique_ptr make_std_aggregation( size_type ddof); -template std::unique_ptr make_std_aggregation( +template CUDF_EXPORT std::unique_ptr make_std_aggregation( size_type ddof); -template std::unique_ptr +template CUDF_EXPORT std::unique_ptr make_std_aggregation(size_type ddof); /// Factory to create a MEDIAN aggregation @@ -599,9 +619,11 @@ std::unique_ptr make_median_aggregation() { return std::make_unique(); } -template std::unique_ptr make_median_aggregation(); -template std::unique_ptr make_median_aggregation(); -template std::unique_ptr make_median_aggregation(); +template CUDF_EXPORT std::unique_ptr make_median_aggregation(); +template CUDF_EXPORT std::unique_ptr +make_median_aggregation(); +template CUDF_EXPORT std::unique_ptr +make_median_aggregation(); /// Factory to create a QUANTILE aggregation template @@ -610,12 +632,14 @@ std::unique_ptr make_quantile_aggregation(std::vector const& quant { return std::make_unique(quantiles, interp); } -template std::unique_ptr make_quantile_aggregation( - std::vector const& quantiles, interpolation interp); -template std::unique_ptr make_quantile_aggregation( - std::vector const& quantiles, interpolation interp); -template std::unique_ptr make_quantile_aggregation( +template CUDF_EXPORT std::unique_ptr make_quantile_aggregation( std::vector const& quantiles, interpolation interp); +template CUDF_EXPORT std::unique_ptr +make_quantile_aggregation(std::vector const& quantiles, + interpolation interp); +template CUDF_EXPORT std::unique_ptr +make_quantile_aggregation(std::vector const& quantiles, + interpolation interp); /// Factory to create an ARGMAX aggregation template @@ -623,9 +647,11 @@ std::unique_ptr make_argmax_aggregation() { return std::make_unique(); } -template std::unique_ptr make_argmax_aggregation(); -template std::unique_ptr make_argmax_aggregation(); -template std::unique_ptr make_argmax_aggregation(); +template CUDF_EXPORT std::unique_ptr make_argmax_aggregation(); +template CUDF_EXPORT std::unique_ptr +make_argmax_aggregation(); +template CUDF_EXPORT std::unique_ptr +make_argmax_aggregation(); /// Factory to create an ARGMIN aggregation template @@ -633,9 +659,11 @@ std::unique_ptr make_argmin_aggregation() { return std::make_unique(); } -template std::unique_ptr make_argmin_aggregation(); -template std::unique_ptr make_argmin_aggregation(); -template std::unique_ptr make_argmin_aggregation(); +template CUDF_EXPORT std::unique_ptr make_argmin_aggregation(); +template CUDF_EXPORT std::unique_ptr +make_argmin_aggregation(); +template CUDF_EXPORT std::unique_ptr +make_argmin_aggregation(); /// Factory to create an NUNIQUE aggregation template @@ -643,13 +671,13 @@ std::unique_ptr make_nunique_aggregation(null_policy null_handling) { return std::make_unique(null_handling); } -template std::unique_ptr make_nunique_aggregation( - null_policy null_handling); -template std::unique_ptr make_nunique_aggregation( +template CUDF_EXPORT std::unique_ptr make_nunique_aggregation( null_policy null_handling); -template std::unique_ptr make_nunique_aggregation( - null_policy null_handling); -template std::unique_ptr +template CUDF_EXPORT std::unique_ptr +make_nunique_aggregation(null_policy null_handling); +template CUDF_EXPORT std::unique_ptr +make_nunique_aggregation(null_policy null_handling); +template CUDF_EXPORT std::unique_ptr make_nunique_aggregation(null_policy null_handling); /// Factory to create an NTH_ELEMENT aggregation @@ -658,14 +686,14 @@ std::unique_ptr make_nth_element_aggregation(size_type n, null_policy null { return std::make_unique(n, null_handling); } -template std::unique_ptr make_nth_element_aggregation( - size_type n, null_policy null_handling); -template std::unique_ptr make_nth_element_aggregation( - size_type n, null_policy null_handling); -template std::unique_ptr make_nth_element_aggregation( - size_type n, null_policy null_handling); -template std::unique_ptr make_nth_element_aggregation( +template CUDF_EXPORT std::unique_ptr make_nth_element_aggregation( size_type n, null_policy null_handling); +template CUDF_EXPORT std::unique_ptr +make_nth_element_aggregation(size_type n, null_policy null_handling); +template CUDF_EXPORT std::unique_ptr +make_nth_element_aggregation(size_type n, null_policy null_handling); +template CUDF_EXPORT std::unique_ptr +make_nth_element_aggregation(size_type n, null_policy null_handling); /// Factory to create a ROW_NUMBER aggregation template @@ -673,8 +701,9 @@ std::unique_ptr make_row_number_aggregation() { return std::make_unique(); } -template std::unique_ptr make_row_number_aggregation(); -template std::unique_ptr make_row_number_aggregation(); +template CUDF_EXPORT std::unique_ptr make_row_number_aggregation(); +template CUDF_EXPORT std::unique_ptr +make_row_number_aggregation(); /// Factory to create an EWMA aggregation template @@ -682,9 +711,9 @@ std::unique_ptr make_ewma_aggregation(double const com, cudf::ewm_history { return std::make_unique(com, history); } -template std::unique_ptr make_ewma_aggregation(double const com, - cudf::ewm_history history); -template std::unique_ptr make_ewma_aggregation( +template CUDF_EXPORT std::unique_ptr make_ewma_aggregation( + double const com, cudf::ewm_history history); +template CUDF_EXPORT std::unique_ptr make_ewma_aggregation( double const com, cudf::ewm_history history); /// Factory to create a RANK aggregation @@ -698,19 +727,19 @@ std::unique_ptr make_rank_aggregation(rank_method method, return std::make_unique( method, column_order, null_handling, null_precedence, percentage); } -template std::unique_ptr make_rank_aggregation( +template CUDF_EXPORT std::unique_ptr make_rank_aggregation( rank_method method, order column_order, null_policy null_handling, null_order null_precedence, rank_percentage percentage); -template std::unique_ptr make_rank_aggregation( - rank_method method, - order column_order, - null_policy null_handling, - null_order null_precedence, - rank_percentage percentage); -template std::unique_ptr make_rank_aggregation( +template CUDF_EXPORT std::unique_ptr +make_rank_aggregation(rank_method method, + order column_order, + null_policy null_handling, + null_order null_precedence, + rank_percentage percentage); +template CUDF_EXPORT std::unique_ptr make_rank_aggregation( rank_method method, order column_order, null_policy null_handling, @@ -723,14 +752,14 @@ std::unique_ptr make_collect_list_aggregation(null_policy null_handling) { return std::make_unique(null_handling); } -template std::unique_ptr make_collect_list_aggregation( - null_policy null_handling); -template std::unique_ptr make_collect_list_aggregation( - null_policy null_handling); -template std::unique_ptr make_collect_list_aggregation( - null_policy null_handling); -template std::unique_ptr make_collect_list_aggregation( +template CUDF_EXPORT std::unique_ptr make_collect_list_aggregation( null_policy null_handling); +template CUDF_EXPORT std::unique_ptr +make_collect_list_aggregation(null_policy null_handling); +template CUDF_EXPORT std::unique_ptr +make_collect_list_aggregation(null_policy null_handling); +template CUDF_EXPORT std::unique_ptr +make_collect_list_aggregation(null_policy null_handling); /// Factory to create a COLLECT_SET aggregation template @@ -740,14 +769,20 @@ std::unique_ptr make_collect_set_aggregation(null_policy null_handling, { return std::make_unique(null_handling, nulls_equal, nans_equal); } -template std::unique_ptr make_collect_set_aggregation( - null_policy null_handling, null_equality nulls_equal, nan_equality nans_equal); -template std::unique_ptr make_collect_set_aggregation( - null_policy null_handling, null_equality nulls_equal, nan_equality nans_equal); -template std::unique_ptr make_collect_set_aggregation( - null_policy null_handling, null_equality nulls_equal, nan_equality nans_equal); -template std::unique_ptr make_collect_set_aggregation( +template CUDF_EXPORT std::unique_ptr make_collect_set_aggregation( null_policy null_handling, null_equality nulls_equal, nan_equality nans_equal); +template CUDF_EXPORT std::unique_ptr +make_collect_set_aggregation(null_policy null_handling, + null_equality nulls_equal, + nan_equality nans_equal); +template CUDF_EXPORT std::unique_ptr +make_collect_set_aggregation(null_policy null_handling, + null_equality nulls_equal, + nan_equality nans_equal); +template CUDF_EXPORT std::unique_ptr +make_collect_set_aggregation(null_policy null_handling, + null_equality nulls_equal, + nan_equality nans_equal); /// Factory to create a LAG aggregation template @@ -755,8 +790,9 @@ std::unique_ptr make_lag_aggregation(size_type offset) { return std::make_unique(aggregation::LAG, offset); } -template std::unique_ptr make_lag_aggregation(size_type offset); -template std::unique_ptr make_lag_aggregation( +template CUDF_EXPORT std::unique_ptr make_lag_aggregation( + size_type offset); +template CUDF_EXPORT std::unique_ptr make_lag_aggregation( size_type offset); /// Factory to create a LEAD aggregation @@ -765,9 +801,10 @@ std::unique_ptr make_lead_aggregation(size_type offset) { return std::make_unique(aggregation::LEAD, offset); } -template std::unique_ptr make_lead_aggregation(size_type offset); -template std::unique_ptr make_lead_aggregation( +template CUDF_EXPORT std::unique_ptr make_lead_aggregation( size_type offset); +template CUDF_EXPORT std::unique_ptr +make_lead_aggregation(size_type offset); /// Factory to create a UDF aggregation template @@ -781,9 +818,9 @@ std::unique_ptr make_udf_aggregation(udf_type type, output_type}; return std::unique_ptr(a); } -template std::unique_ptr make_udf_aggregation( +template CUDF_EXPORT std::unique_ptr make_udf_aggregation( udf_type type, std::string const& user_defined_aggregator, data_type output_type); -template std::unique_ptr make_udf_aggregation( +template CUDF_EXPORT std::unique_ptr make_udf_aggregation( udf_type type, std::string const& user_defined_aggregator, data_type output_type); /// Factory to create a MERGE_LISTS aggregation @@ -792,9 +829,11 @@ std::unique_ptr make_merge_lists_aggregation() { return std::make_unique(); } -template std::unique_ptr make_merge_lists_aggregation(); -template std::unique_ptr make_merge_lists_aggregation(); -template std::unique_ptr make_merge_lists_aggregation(); +template CUDF_EXPORT std::unique_ptr make_merge_lists_aggregation(); +template CUDF_EXPORT std::unique_ptr +make_merge_lists_aggregation(); +template CUDF_EXPORT std::unique_ptr +make_merge_lists_aggregation(); /// Factory to create a MERGE_SETS aggregation template @@ -803,12 +842,12 @@ std::unique_ptr make_merge_sets_aggregation(null_equality nulls_equal, { return std::make_unique(nulls_equal, nans_equal); } -template std::unique_ptr make_merge_sets_aggregation(null_equality, - nan_equality); -template std::unique_ptr make_merge_sets_aggregation( - null_equality, nan_equality); -template std::unique_ptr make_merge_sets_aggregation( +template CUDF_EXPORT std::unique_ptr make_merge_sets_aggregation( null_equality, nan_equality); +template CUDF_EXPORT std::unique_ptr + make_merge_sets_aggregation(null_equality, nan_equality); +template CUDF_EXPORT std::unique_ptr + make_merge_sets_aggregation(null_equality, nan_equality); /// Factory to create a MERGE_M2 aggregation template @@ -816,8 +855,9 @@ std::unique_ptr make_merge_m2_aggregation() { return std::make_unique(); } -template std::unique_ptr make_merge_m2_aggregation(); -template std::unique_ptr make_merge_m2_aggregation(); +template CUDF_EXPORT std::unique_ptr make_merge_m2_aggregation(); +template CUDF_EXPORT std::unique_ptr +make_merge_m2_aggregation(); /// Factory to create a MERGE_HISTOGRAM aggregation template @@ -825,10 +865,11 @@ std::unique_ptr make_merge_histogram_aggregation() { return std::make_unique(); } -template std::unique_ptr make_merge_histogram_aggregation(); -template std::unique_ptr +template CUDF_EXPORT std::unique_ptr make_merge_histogram_aggregation(); +template CUDF_EXPORT std::unique_ptr make_merge_histogram_aggregation(); -template std::unique_ptr make_merge_histogram_aggregation(); +template CUDF_EXPORT std::unique_ptr +make_merge_histogram_aggregation(); /// Factory to create a COVARIANCE aggregation template @@ -836,10 +877,10 @@ std::unique_ptr make_covariance_aggregation(size_type min_periods, size_ty { return std::make_unique(min_periods, ddof); } -template std::unique_ptr make_covariance_aggregation( - size_type min_periods, size_type ddof); -template std::unique_ptr make_covariance_aggregation( +template CUDF_EXPORT std::unique_ptr make_covariance_aggregation( size_type min_periods, size_type ddof); +template CUDF_EXPORT std::unique_ptr +make_covariance_aggregation(size_type min_periods, size_type ddof); /// Factory to create a CORRELATION aggregation template @@ -847,33 +888,34 @@ std::unique_ptr make_correlation_aggregation(correlation_type type, size_t { return std::make_unique(type, min_periods); } -template std::unique_ptr make_correlation_aggregation( - correlation_type type, size_type min_periods); -template std::unique_ptr make_correlation_aggregation( +template CUDF_EXPORT std::unique_ptr make_correlation_aggregation( correlation_type type, size_type min_periods); +template CUDF_EXPORT std::unique_ptr +make_correlation_aggregation(correlation_type type, size_type min_periods); template std::unique_ptr make_tdigest_aggregation(int max_centroids) { return std::make_unique(max_centroids); } -template std::unique_ptr make_tdigest_aggregation(int max_centroids); -template std::unique_ptr make_tdigest_aggregation( - int max_centroids); -template std::unique_ptr make_tdigest_aggregation( +template CUDF_EXPORT std::unique_ptr make_tdigest_aggregation( int max_centroids); +template CUDF_EXPORT std::unique_ptr +make_tdigest_aggregation(int max_centroids); +template CUDF_EXPORT std::unique_ptr +make_tdigest_aggregation(int max_centroids); template std::unique_ptr make_merge_tdigest_aggregation(int max_centroids) { return std::make_unique(max_centroids); } -template std::unique_ptr make_merge_tdigest_aggregation( - int max_centroids); -template std::unique_ptr make_merge_tdigest_aggregation( - int max_centroids); -template std::unique_ptr make_merge_tdigest_aggregation( +template CUDF_EXPORT std::unique_ptr make_merge_tdigest_aggregation( int max_centroids); +template CUDF_EXPORT std::unique_ptr +make_merge_tdigest_aggregation(int max_centroids); +template CUDF_EXPORT std::unique_ptr +make_merge_tdigest_aggregation(int max_centroids); namespace detail { namespace { diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp index 8ac1491547d..3ac8547baad 100644 --- a/cpp/src/binaryop/binaryop.cpp +++ b/cpp/src/binaryop/binaryop.cpp @@ -50,6 +50,11 @@ namespace cudf { namespace binops { +bool is_supported_operation(data_type out, data_type lhs, data_type rhs, binary_operator op) +{ + return cudf::binops::compiled::is_supported_operation(out, lhs, rhs, op); +} + /** * @brief Computes output valid mask for op between a column and a scalar */ @@ -194,7 +199,7 @@ std::unique_ptr binary_operation(LhsType const& lhs, rmm::device_async_resource_ref mr) { if constexpr (std::is_same_v and std::is_same_v) - CUDF_EXPECTS(lhs.size() == rhs.size(), "Column sizes don't match"); + CUDF_EXPECTS(lhs.size() == rhs.size(), "Column sizes don't match", std::invalid_argument); if (lhs.type().id() == type_id::STRING and rhs.type().id() == type_id::STRING and output_type.id() == type_id::STRING and diff --git a/cpp/src/binaryop/compiled/binary_ops.cu b/cpp/src/binaryop/compiled/binary_ops.cu index ba0253ec853..7a0bc312434 100644 --- a/cpp/src/binaryop/compiled/binary_ops.cu +++ b/cpp/src/binaryop/compiled/binary_ops.cu @@ -18,6 +18,7 @@ #include "operation.cuh" #include "struct_binary_ops.cuh" +#include #include #include #include diff --git a/cpp/src/bitmask/is_element_valid.cpp b/cpp/src/bitmask/is_element_valid.cpp index e0f0ccdc861..4806c7a94e8 100644 --- a/cpp/src/bitmask/is_element_valid.cpp +++ b/cpp/src/bitmask/is_element_valid.cpp @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,7 +14,7 @@ * limitations under the License. */ -#include +#include #include #include diff --git a/cpp/src/column/column_view.cpp b/cpp/src/column/column_view.cpp index 4d16298c605..b0f9e9f0e74 100644 --- a/cpp/src/column/column_view.cpp +++ b/cpp/src/column/column_view.cpp @@ -15,8 +15,10 @@ */ #include +#include #include #include +#include #include #include #include @@ -27,10 +29,37 @@ #include #include #include +#include #include namespace cudf { namespace detail { +namespace { + +template +void prefetch_col_data(ColumnView& col, void const* data_ptr, std::string_view key) noexcept +{ + if (cudf::experimental::prefetch::detail::prefetch_config::instance().get(key)) { + if (cudf::is_fixed_width(col.type())) { + cudf::experimental::prefetch::detail::prefetch_noexcept( + key, data_ptr, col.size() * size_of(col.type()), cudf::get_default_stream()); + } else if (col.type().id() == type_id::STRING) { + strings_column_view scv{col}; + + cudf::experimental::prefetch::detail::prefetch_noexcept( + key, + data_ptr, + scv.chars_size(cudf::get_default_stream()) * sizeof(char), + cudf::get_default_stream()); + } else { + std::cout << key << ": Unsupported type: " << static_cast(col.type().id()) + << std::endl; + } + } +} + +} // namespace + column_view_base::column_view_base(data_type type, size_type size, void const* data, @@ -126,6 +155,7 @@ bool is_shallow_equivalent(column_view const& lhs, column_view const& rhs) { return shallow_equivalent_impl(lhs, rhs); } + } // namespace detail // Immutable view constructor @@ -175,6 +205,18 @@ mutable_column_view::operator column_view() const return column_view{_type, _size, _data, _null_mask, _null_count, _offset, std::move(child_views)}; } +void const* column_view::get_data() const noexcept +{ + detail::prefetch_col_data(*this, _data, "column_view::get_data"); + return _data; +} + +void const* mutable_column_view::get_data() const noexcept +{ + detail::prefetch_col_data(*this, _data, "mutable_column_view::get_data"); + return _data; +} + size_type count_descendants(column_view parent) { auto descendants = [](auto const& child) { return count_descendants(child); }; diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu index 6acbafd24fb..ac9931335ff 100644 --- a/cpp/src/copying/concatenate.cu +++ b/cpp/src/copying/concatenate.cu @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -73,8 +74,8 @@ auto create_device_views(host_span views, rmm::cuda_stream_vi }); // Assemble contiguous array of device views - auto device_views = thrust::host_vector(); - device_views.reserve(views.size()); + auto device_views = + cudf::detail::make_empty_host_vector(views.size(), stream); std::transform(device_view_owners.cbegin(), device_view_owners.cend(), std::back_inserter(device_views), @@ -84,7 +85,7 @@ auto create_device_views(host_span views, rmm::cuda_stream_vi make_device_uvector_async(device_views, stream, rmm::mr::get_current_device_resource()); // Compute the partition offsets - auto offsets = thrust::host_vector(views.size() + 1); + auto offsets = cudf::detail::make_host_vector(views.size() + 1, stream); thrust::transform_inclusive_scan( thrust::host, device_views.cbegin(), diff --git a/cpp/src/copying/contiguous_split.cu b/cpp/src/copying/contiguous_split.cu index 37db2c74790..95544742fb7 100644 --- a/cpp/src/copying/contiguous_split.cu +++ b/cpp/src/copying/contiguous_split.cu @@ -1539,7 +1539,8 @@ std::unique_ptr chunk_iteration_state::create( std::vector num_batches_per_iteration; std::vector size_of_batches_per_iteration; - std::vector accum_size_per_iteration; + auto accum_size_per_iteration = + cudf::detail::make_empty_host_vector(h_offsets.size(), stream); std::size_t accum_size = 0; { auto current_offset_it = h_offsets.begin(); diff --git a/cpp/src/copying/purge_nonempty_nulls.cu b/cpp/src/copying/purge_nonempty_nulls.cu index d69d214a881..581d0a00924 100644 --- a/cpp/src/copying/purge_nonempty_nulls.cu +++ b/cpp/src/copying/purge_nonempty_nulls.cu @@ -14,6 +14,7 @@ * limitations under the License. */ #include +#include #include #include #include diff --git a/cpp/src/datetime/timezone.cpp b/cpp/src/datetime/timezone.cpp index 1b0d201501b..7ca1b51df98 100644 --- a/cpp/src/datetime/timezone.cpp +++ b/cpp/src/datetime/timezone.cpp @@ -485,14 +485,12 @@ std::unique_ptr
make_timezone_transition_table(std::optional ttimes_typed; - ttimes_typed.reserve(transition_times.size()); + auto ttimes_typed = make_empty_host_vector(transition_times.size(), stream); std::transform(transition_times.cbegin(), transition_times.cend(), std::back_inserter(ttimes_typed), [](auto ts) { return timestamp_s{duration_s{ts}}; }); - std::vector offsets_typed; - offsets_typed.reserve(offsets.size()); + auto offsets_typed = make_empty_host_vector(offsets.size(), stream); std::transform(offsets.cbegin(), offsets.cend(), std::back_inserter(offsets_typed), [](auto ts) { return duration_s{ts}; }); diff --git a/cpp/src/dictionary/detail/concatenate.cu b/cpp/src/dictionary/detail/concatenate.cu index fdc3d9d0ecf..72828309425 100644 --- a/cpp/src/dictionary/detail/concatenate.cu +++ b/cpp/src/dictionary/detail/concatenate.cu @@ -105,7 +105,7 @@ struct compute_children_offsets_fn { */ rmm::device_uvector create_children_offsets(rmm::cuda_stream_view stream) { - std::vector offsets(columns_ptrs.size()); + auto offsets = cudf::detail::make_host_vector(columns_ptrs.size(), stream); thrust::transform_exclusive_scan( thrust::host, columns_ptrs.begin(), diff --git a/cpp/src/dictionary/dictionary_factories.cu b/cpp/src/dictionary/dictionary_factories.cu index 37f8fa7a05b..0617d71fa51 100644 --- a/cpp/src/dictionary/dictionary_factories.cu +++ b/cpp/src/dictionary/dictionary_factories.cu @@ -77,7 +77,9 @@ std::unique_ptr make_dictionary_column(column_view const& keys_column, std::unique_ptr make_dictionary_column(std::unique_ptr keys_column, std::unique_ptr indices_column, rmm::device_buffer&& null_mask, - size_type null_count) + size_type null_count, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(!keys_column->has_nulls(), "keys column must not have nulls"); CUDF_EXPECTS(!indices_column->has_nulls(), "indices column must not have nulls"); @@ -89,7 +91,7 @@ std::unique_ptr make_dictionary_column(std::unique_ptr keys_colu children.emplace_back(std::move(keys_column)); return std::make_unique(data_type{type_id::DICTIONARY32}, count, - rmm::device_buffer{}, + rmm::device_buffer{0, stream, mr}, std::move(null_mask), null_count, std::move(children)); @@ -134,8 +136,11 @@ std::unique_ptr make_dictionary_column(std::unique_ptr keys, auto indices_column = [&] { // If the types match, then just commandeer the column's data buffer. if (new_type.id() == indices_type) { - return std::make_unique( - new_type, indices_size, std::move(*(contents.data.release())), rmm::device_buffer{}, 0); + return std::make_unique(new_type, + indices_size, + std::move(*(contents.data.release())), + rmm::device_buffer{0, stream, mr}, + 0); } // If the new type does not match, then convert the data. cudf::column_view cast_view{ diff --git a/cpp/src/dictionary/set_keys.cu b/cpp/src/dictionary/set_keys.cu index 08a33d40abe..cf40fda5971 100644 --- a/cpp/src/dictionary/set_keys.cu +++ b/cpp/src/dictionary/set_keys.cu @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/cpp/src/filling/calendrical_month_sequence.cu b/cpp/src/filling/calendrical_month_sequence.cu index 3e6d693dde5..f984f307ddd 100644 --- a/cpp/src/filling/calendrical_month_sequence.cu +++ b/cpp/src/filling/calendrical_month_sequence.cu @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/cpp/src/interop/arrow_utilities.cpp b/cpp/src/interop/arrow_utilities.cpp index 605d813ed1e..4292552a800 100644 --- a/cpp/src/interop/arrow_utilities.cpp +++ b/cpp/src/interop/arrow_utilities.cpp @@ -16,9 +16,16 @@ #include "arrow_utilities.hpp" +#include #include #include +#include +#include + +#include +#include + #include namespace cudf { @@ -83,9 +90,33 @@ ArrowType id_to_arrow_type(cudf::type_id id) case cudf::type_id::FLOAT32: return NANOARROW_TYPE_FLOAT; case cudf::type_id::FLOAT64: return NANOARROW_TYPE_DOUBLE; case cudf::type_id::TIMESTAMP_DAYS: return NANOARROW_TYPE_DATE32; + case cudf::type_id::DECIMAL128: return NANOARROW_TYPE_DECIMAL128; default: CUDF_FAIL("Unsupported type_id conversion to arrow type", cudf::data_type_error); } } +ArrowType id_to_arrow_storage_type(cudf::type_id id) +{ + switch (id) { + case cudf::type_id::TIMESTAMP_SECONDS: + case cudf::type_id::TIMESTAMP_MILLISECONDS: + case cudf::type_id::TIMESTAMP_MICROSECONDS: + case cudf::type_id::TIMESTAMP_NANOSECONDS: return NANOARROW_TYPE_INT64; + case cudf::type_id::DURATION_SECONDS: + case cudf::type_id::DURATION_MILLISECONDS: + case cudf::type_id::DURATION_MICROSECONDS: + case cudf::type_id::DURATION_NANOSECONDS: return NANOARROW_TYPE_INT64; + default: return id_to_arrow_type(id); + } +} + +int initialize_array(ArrowArray* arr, ArrowType storage_type, cudf::column_view column) +{ + NANOARROW_RETURN_NOT_OK(ArrowArrayInitFromType(arr, storage_type)); + arr->length = column.size(); + arr->null_count = column.null_count(); + return NANOARROW_OK; +} + } // namespace detail } // namespace cudf diff --git a/cpp/src/interop/arrow_utilities.hpp b/cpp/src/interop/arrow_utilities.hpp index 4e2628ab689..1cee3071fcb 100644 --- a/cpp/src/interop/arrow_utilities.hpp +++ b/cpp/src/interop/arrow_utilities.hpp @@ -18,8 +18,12 @@ #include +#include +#include +#include +#include + #include -#include namespace cudf { namespace detail { @@ -47,5 +51,42 @@ data_type arrow_to_cudf_type(ArrowSchemaView const* arrow_view); */ ArrowType id_to_arrow_type(cudf::type_id id); +/** + * @brief Map cudf column type id to the storage type for Arrow + * + * Specifically this is for handling the underlying storage type of + * timestamps and durations. + * + * @param id column type id + * @return ArrowType storage type + */ +ArrowType id_to_arrow_storage_type(cudf::type_id id); + +/** + * @brief Helper to initialize ArrowArray struct + * + * @param arr Pointer to ArrowArray to initialize + * @param storage_type The type to initialize with + * @param column view for column to get the length and null count from + * @return nanoarrow status code, should be NANOARROW_OK if there are no errors + */ +int initialize_array(ArrowArray* arr, ArrowType storage_type, cudf::column_view column); + +/** + * @brief Helper to convert decimal values to 128-bit versions for Arrow compatibility + * + * The template parameter should be the underlying type of the data (e.g. int32_t for + * 32-bit decimal and int64_t for 64-bit decimal). + * + * @param input column_view of the data + * @param stream cuda stream to perform the operations on + * @param mr memory resource to allocate the returned device_uvector with + * @return unique_ptr to a device_buffer containing the upcasted data + */ +template +std::unique_ptr decimals_to_arrow(cudf::column_view input, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); + } // namespace detail } // namespace cudf diff --git a/cpp/src/interop/decimal_conversion_utilities.cu b/cpp/src/interop/decimal_conversion_utilities.cu new file mode 100644 index 00000000000..2f81c754a30 --- /dev/null +++ b/cpp/src/interop/decimal_conversion_utilities.cu @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "decimal_conversion_utilities.cuh" + +#include +#include +#include + +#include + +#include + +#include + +namespace cudf { +namespace detail { + +template +std::unique_ptr convert_decimals_to_decimal128( + cudf::column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) +{ + static_assert(std::is_same_v or std::is_same_v, + "Only int32 and int64 decimal types can be converted to decimal128."); + + constexpr size_type BIT_WIDTH_RATIO = sizeof(__int128_t) / sizeof(DecimalType); + auto buf = std::make_unique(column.size() * sizeof(__int128_t), stream, mr); + + thrust::for_each(rmm::exec_policy_nosync(stream, mr), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(column.size()), + [in = column.begin(), + out = reinterpret_cast(buf->data()), + BIT_WIDTH_RATIO] __device__(auto in_idx) { + auto const out_idx = in_idx * BIT_WIDTH_RATIO; + // the lowest order bits are the value, the remainder + // simply matches the sign bit to satisfy the two's + // complement integer representation of negative numbers. + out[out_idx] = in[in_idx]; +#pragma unroll BIT_WIDTH_RATIO - 1 + for (auto i = 1; i < BIT_WIDTH_RATIO; ++i) { + out[out_idx + i] = in[in_idx] < 0 ? -1 : 0; + } + }); + + return buf; +} + +// Instantiate templates for int32_t and int64_t decimal types +template std::unique_ptr convert_decimals_to_decimal128( + cudf::column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); + +template std::unique_ptr convert_decimals_to_decimal128( + cudf::column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); + +} // namespace detail +} // namespace cudf diff --git a/cpp/src/interop/decimal_conversion_utilities.cuh b/cpp/src/interop/decimal_conversion_utilities.cuh new file mode 100644 index 00000000000..41263147404 --- /dev/null +++ b/cpp/src/interop/decimal_conversion_utilities.cuh @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include +#include + +#include + +namespace cudf::detail { + +/** + * @brief Convert decimal32 and decimal64 numeric data to decimal128 and return the device vector + * + * @tparam DecimalType to convert from + * + * @param column A view of the input columns + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource to use for device memory allocation + * + * @return A device vector containing the converted decimal128 data + */ +template +std::unique_ptr convert_decimals_to_decimal128( + cudf::column_view const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); + +} // namespace cudf::detail diff --git a/cpp/src/interop/from_arrow_device.cu b/cpp/src/interop/from_arrow_device.cu index e1d289e67a3..440df571de0 100644 --- a/cpp/src/interop/from_arrow_device.cu +++ b/cpp/src/interop/from_arrow_device.cu @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include @@ -39,6 +38,7 @@ #include #include +#include namespace cudf { @@ -144,9 +144,6 @@ dispatch_tuple_t dispatch_from_arrow_device::operator()( rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - CUDF_EXPECTS(schema->type != NANOARROW_TYPE_LARGE_STRING, - "Large strings are not yet supported in from_arrow_device", - cudf::data_type_error); if (input->length == 0) { return std::make_tuple( {type, @@ -158,12 +155,15 @@ dispatch_tuple_t dispatch_from_arrow_device::operator()( {}); } - auto offsets_view = column_view{data_type(type_id::INT32), + data_type offsets_type(type_id::INT32); + if (schema->type == NANOARROW_TYPE_LARGE_STRING) { offsets_type = data_type(type_id::INT64); } + auto offsets_view = column_view{offsets_type, static_cast(input->offset + input->length) + 1, input->buffers[fixed_width_data_buffer_idx], nullptr, 0, 0}; + return std::make_tuple( {type, static_cast(input->length), diff --git a/cpp/src/interop/from_arrow_host.cu b/cpp/src/interop/from_arrow_host.cu index b3087dedf98..efde8f2a463 100644 --- a/cpp/src/interop/from_arrow_host.cu +++ b/cpp/src/interop/from_arrow_host.cu @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include @@ -42,6 +41,7 @@ #include #include +#include namespace cudf { namespace detail { diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu index 622a3aba4bb..3d41f856f4f 100644 --- a/cpp/src/interop/to_arrow.cu +++ b/cpp/src/interop/to_arrow.cu @@ -14,6 +14,8 @@ * limitations under the License. */ +#include "arrow_utilities.hpp" +#include "decimal_conversion_utilities.cuh" #include "detail/arrow_allocator.hpp" #include @@ -157,33 +159,20 @@ std::shared_ptr unsupported_decimals_to_arrow(column_view input, arrow::MemoryPool* ar_mr, rmm::cuda_stream_view stream) { - constexpr size_type BIT_WIDTH_RATIO = sizeof(__int128_t) / sizeof(DeviceType); - - rmm::device_uvector buf(input.size() * BIT_WIDTH_RATIO, stream); - - auto count = thrust::make_counting_iterator(0); - - thrust::for_each( - rmm::exec_policy(cudf::get_default_stream()), - count, - count + input.size(), - [in = input.begin(), out = buf.data(), BIT_WIDTH_RATIO] __device__(auto in_idx) { - auto const out_idx = in_idx * BIT_WIDTH_RATIO; - // The lowest order bits are the value, the remainder - // simply matches the sign bit to satisfy the two's - // complement integer representation of negative numbers. - out[out_idx] = in[in_idx]; -#pragma unroll BIT_WIDTH_RATIO - 1 - for (auto i = 1; i < BIT_WIDTH_RATIO; ++i) { - out[out_idx + i] = in[in_idx] < 0 ? -1 : 0; - } - }); + auto buf = detail::convert_decimals_to_decimal128( + input, stream, rmm::mr::get_current_device_resource()); - auto const buf_size_in_bytes = buf.size() * sizeof(DeviceType); + // Synchronize stream here to ensure the decimal128 buffer is ready. + stream.synchronize(); + + auto const buf_size_in_bytes = buf->size(); auto data_buffer = allocate_arrow_buffer(buf_size_in_bytes, ar_mr); - CUDF_CUDA_TRY(cudaMemcpyAsync( - data_buffer->mutable_data(), buf.data(), buf_size_in_bytes, cudaMemcpyDefault, stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(data_buffer->mutable_data(), + buf->data(), + buf_size_in_bytes, + cudaMemcpyDefault, + stream.value())); auto type = arrow::decimal(precision, -input.type().scale()); auto mask = fetch_mask_buffer(input, ar_mr, stream); @@ -473,7 +462,7 @@ std::shared_ptr to_arrow(cudf::scalar const& input, { auto const column = cudf::make_column_from_scalar(input, 1, stream); cudf::table_view const tv{{column->view()}}; - auto const arrow_table = cudf::to_arrow(tv, {metadata}, stream); + auto const arrow_table = detail::to_arrow(tv, {metadata}, stream, ar_mr); auto const ac = arrow_table->column(0); auto const maybe_scalar = ac->GetScalar(0); if (!maybe_scalar.ok()) { CUDF_FAIL("Failed to produce a scalar"); } diff --git a/cpp/src/interop/to_arrow_device.cu b/cpp/src/interop/to_arrow_device.cu index b9d3a59e647..cea7cdebcba 100644 --- a/cpp/src/interop/to_arrow_device.cu +++ b/cpp/src/interop/to_arrow_device.cu @@ -15,6 +15,7 @@ */ #include "arrow_utilities.hpp" +#include "decimal_conversion_utilities.cuh" #include #include @@ -24,7 +25,6 @@ #include #include #include -#include #include #include #include @@ -44,6 +44,7 @@ #include #include +#include namespace cudf { namespace detail { @@ -56,14 +57,6 @@ void device_buffer_finalize(ArrowBufferAllocator* allocator, uint8_t*, int64_t) delete unique_buffer; } -int initialize_array(ArrowArray* arr, ArrowType storage_type, cudf::column_view column) -{ - NANOARROW_RETURN_NOT_OK(ArrowArrayInitFromType(arr, storage_type)); - arr->length = column.size(); - arr->null_count = column.null_count(); - return NANOARROW_OK; -} - template struct is_device_scalar : public std::false_type {}; @@ -99,21 +92,6 @@ int set_buffer(std::unique_ptr device_buf, int64_t i, ArrowArray* out) return NANOARROW_OK; } -ArrowType id_to_arrow_storage_type(cudf::type_id id) -{ - switch (id) { - case cudf::type_id::TIMESTAMP_SECONDS: - case cudf::type_id::TIMESTAMP_MILLISECONDS: - case cudf::type_id::TIMESTAMP_MICROSECONDS: - case cudf::type_id::TIMESTAMP_NANOSECONDS: return NANOARROW_TYPE_INT64; - case cudf::type_id::DURATION_SECONDS: - case cudf::type_id::DURATION_MILLISECONDS: - case cudf::type_id::DURATION_MICROSECONDS: - case cudf::type_id::DURATION_NANOSECONDS: return NANOARROW_TYPE_INT64; - default: return id_to_arrow_type(id); - } -} - struct dispatch_to_arrow_device { template ())> int operator()(cudf::column&&, rmm::cuda_stream_view, rmm::device_async_resource_ref, ArrowArray*) @@ -156,35 +134,17 @@ struct dispatch_to_arrow_device { }; template -int decimals_to_arrow(cudf::column_view input, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr, - ArrowArray* out) +int construct_decimals(cudf::column_view input, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr, + ArrowArray* out) { nanoarrow::UniqueArray tmp; NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), NANOARROW_TYPE_DECIMAL128, input)); - constexpr size_type BIT_WIDTH_RATIO = sizeof(__int128_t) / sizeof(DeviceType); - auto buf = - std::make_unique>(input.size() * BIT_WIDTH_RATIO, stream, mr); - - auto count = thrust::counting_iterator(0); - - thrust::for_each( - rmm::exec_policy(stream, mr), - count, - count + input.size(), - [in = input.begin(), out = buf->data(), BIT_WIDTH_RATIO] __device__(auto in_idx) { - auto const out_idx = in_idx * BIT_WIDTH_RATIO; - // the lowest order bits are the value, the remainder - // simply matches the sign bit to satisfy the two's - // complement integer representation of negative numbers. - out[out_idx] = in[in_idx]; -#pragma unroll BIT_WIDTH_RATIO - 1 - for (auto i = 1; i < BIT_WIDTH_RATIO; ++i) { - out[out_idx + i] = in[in_idx] < 0 ? -1 : 0; - } - }); + auto buf = detail::convert_decimals_to_decimal128(input, stream, mr); + // Synchronize stream here to ensure the decimal128 buffer is ready. + stream.synchronize(); NANOARROW_RETURN_NOT_OK(set_buffer(std::move(buf), fixed_width_data_buffer_idx, tmp.get())); ArrowArrayMove(tmp.get(), out); @@ -198,7 +158,7 @@ int dispatch_to_arrow_device::operator()(cudf::column&& colu ArrowArray* out) { using DeviceType = int32_t; - NANOARROW_RETURN_NOT_OK(decimals_to_arrow(column.view(), stream, mr, out)); + NANOARROW_RETURN_NOT_OK(construct_decimals(column.view(), stream, mr, out)); auto contents = column.release(); NANOARROW_RETURN_NOT_OK(set_null_mask(contents, out)); return NANOARROW_OK; @@ -211,7 +171,7 @@ int dispatch_to_arrow_device::operator()(cudf::column&& colu ArrowArray* out) { using DeviceType = int64_t; - NANOARROW_RETURN_NOT_OK(decimals_to_arrow(column.view(), stream, mr, out)); + NANOARROW_RETURN_NOT_OK(construct_decimals(column.view(), stream, mr, out)); auto contents = column.release(); NANOARROW_RETURN_NOT_OK(set_null_mask(contents, out)); return NANOARROW_OK; @@ -256,8 +216,15 @@ int dispatch_to_arrow_device::operator()(cudf::column&& colum rmm::device_async_resource_ref mr, ArrowArray* out) { + ArrowType nanoarrow_type = NANOARROW_TYPE_STRING; + if (column.num_children() > 0 && + column.child(cudf::strings_column_view::offsets_column_index).type().id() == + cudf::type_id::INT64) { + nanoarrow_type = NANOARROW_TYPE_LARGE_STRING; + } + nanoarrow::UniqueArray tmp; - NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), NANOARROW_TYPE_STRING, column)); + NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), nanoarrow_type, column)); if (column.size() == 0) { // the scalar zero here is necessary because the spec for string arrays states @@ -265,8 +232,14 @@ int dispatch_to_arrow_device::operator()(cudf::column&& colum // the case of a 0 length string array, there should be exactly 1 value, zero, // in the offsets buffer. While some arrow implementations may accept a zero-sized // offsets buffer, best practices would be to allocate the buffer with the single value. - auto zero = std::make_unique>(0, stream, mr); - NANOARROW_RETURN_NOT_OK(set_buffer(std::move(zero), fixed_width_data_buffer_idx, tmp.get())); + if (nanoarrow_type == NANOARROW_TYPE_STRING) { + auto zero = std::make_unique>(0, stream, mr); + NANOARROW_RETURN_NOT_OK(set_buffer(std::move(zero), fixed_width_data_buffer_idx, tmp.get())); + } else { + auto zero = std::make_unique>(0, stream, mr); + NANOARROW_RETURN_NOT_OK(set_buffer(std::move(zero), fixed_width_data_buffer_idx, tmp.get())); + } + ArrowArrayMove(tmp.get(), out); return NANOARROW_OK; } @@ -436,7 +409,7 @@ template <> int dispatch_to_arrow_device_view::operator()(ArrowArray* out) const { using DeviceType = int32_t; - NANOARROW_RETURN_NOT_OK(decimals_to_arrow(column, stream, mr, out)); + NANOARROW_RETURN_NOT_OK(construct_decimals(column, stream, mr, out)); NANOARROW_RETURN_NOT_OK(set_null_mask(column, out)); return NANOARROW_OK; } @@ -445,7 +418,7 @@ template <> int dispatch_to_arrow_device_view::operator()(ArrowArray* out) const { using DeviceType = int64_t; - NANOARROW_RETURN_NOT_OK(decimals_to_arrow(column, stream, mr, out)); + NANOARROW_RETURN_NOT_OK(construct_decimals(column, stream, mr, out)); NANOARROW_RETURN_NOT_OK(set_null_mask(column, out)); return NANOARROW_OK; } @@ -481,13 +454,26 @@ int dispatch_to_arrow_device_view::operator()(ArrowArray* out) const template <> int dispatch_to_arrow_device_view::operator()(ArrowArray* out) const { + ArrowType nanoarrow_type = NANOARROW_TYPE_STRING; + if (column.num_children() > 0 && + column.child(cudf::strings_column_view::offsets_column_index).type().id() == + cudf::type_id::INT64) { + nanoarrow_type = NANOARROW_TYPE_LARGE_STRING; + } + nanoarrow::UniqueArray tmp; - NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), NANOARROW_TYPE_STRING, column)); + NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), nanoarrow_type, column)); if (column.size() == 0) { // https://github.com/rapidsai/cudf/pull/15047#discussion_r1546528552 - auto zero = std::make_unique>(0, stream, mr); - NANOARROW_RETURN_NOT_OK(set_buffer(std::move(zero), fixed_width_data_buffer_idx, tmp.get())); + if (nanoarrow_type == NANOARROW_TYPE_LARGE_STRING) { + auto zero = std::make_unique>(0, stream, mr); + NANOARROW_RETURN_NOT_OK(set_buffer(std::move(zero), fixed_width_data_buffer_idx, tmp.get())); + } else { + auto zero = std::make_unique>(0, stream, mr); + NANOARROW_RETURN_NOT_OK(set_buffer(std::move(zero), fixed_width_data_buffer_idx, tmp.get())); + } + ArrowArrayMove(tmp.get(), out); return NANOARROW_OK; } diff --git a/cpp/src/interop/to_arrow_host.cu b/cpp/src/interop/to_arrow_host.cu new file mode 100644 index 00000000000..193b3a3b5a2 --- /dev/null +++ b/cpp/src/interop/to_arrow_host.cu @@ -0,0 +1,396 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arrow_utilities.hpp" +#include "decimal_conversion_utilities.cuh" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include + +namespace cudf { +namespace detail { + +namespace { + +struct dispatch_to_arrow_host { + cudf::column_view column; + rmm::cuda_stream_view stream; + rmm::device_async_resource_ref mr; + + int populate_validity_bitmap(ArrowBitmap* bitmap) const + { + if (!column.has_nulls()) { return NANOARROW_OK; } + + NANOARROW_RETURN_NOT_OK(ArrowBitmapResize(bitmap, static_cast(column.size()), 0)); + CUDF_CUDA_TRY(cudaMemcpyAsync(bitmap->buffer.data, + (column.offset() > 0) + ? cudf::detail::copy_bitmask(column, stream, mr).data() + : column.null_mask(), + bitmap->buffer.size_bytes, + cudaMemcpyDefault, + stream.value())); + return NANOARROW_OK; + } + + template + int populate_data_buffer(device_span input, ArrowBuffer* buffer) const + { + NANOARROW_RETURN_NOT_OK(ArrowBufferResize(buffer, input.size_bytes(), 1)); + CUDF_CUDA_TRY(cudaMemcpyAsync( + buffer->data, input.data(), input.size_bytes(), cudaMemcpyDefault, stream.value())); + return NANOARROW_OK; + } + + template () && !cudf::is_fixed_point())> + int operator()(ArrowArray*) const + { + CUDF_FAIL("Unsupported type for to_arrow_host", cudf::data_type_error); + } + + template () || std::is_same_v)> + int operator()(ArrowArray* out) const + { + nanoarrow::UniqueArray tmp; + + auto const storage_type = id_to_arrow_storage_type(column.type().id()); + NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), storage_type, column)); + + NANOARROW_RETURN_NOT_OK(populate_validity_bitmap(ArrowArrayValidityBitmap(tmp.get()))); + using DataType = std::conditional_t, __int128_t, T>; + NANOARROW_RETURN_NOT_OK( + populate_data_buffer(device_span(column.data(), column.size()), + ArrowArrayBuffer(tmp.get(), fixed_width_data_buffer_idx))); + + ArrowArrayMove(tmp.get(), out); + return NANOARROW_OK; + } + + // convert decimal types from libcudf to arrow where those types are not directly + // supported by Arrow. These types must be fit into 128 bits, the smallest + // decimal resolution supported by Arrow + template () && + (std::is_same_v || + std::is_same_v))> + int operator()(ArrowArray* out) const + { + using DeviceType = std::conditional_t, int32_t, int64_t>; + nanoarrow::UniqueArray tmp; + NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), NANOARROW_TYPE_DECIMAL128, column)); + + NANOARROW_RETURN_NOT_OK(populate_validity_bitmap(ArrowArrayValidityBitmap(tmp.get()))); + auto buf = detail::convert_decimals_to_decimal128(column, stream, mr); + // No need to synchronize stream here as populate_data_buffer uses the same stream to copy data + // to host. + NANOARROW_RETURN_NOT_OK( + populate_data_buffer(device_span<__int128_t const>( + reinterpret_cast(buf->data()), column.size()), + ArrowArrayBuffer(tmp.get(), fixed_width_data_buffer_idx))); + + ArrowArrayMove(tmp.get(), out); + return NANOARROW_OK; + } +}; + +int get_column(cudf::column_view column, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr, + ArrowArray* out); + +template <> +int dispatch_to_arrow_host::operator()(ArrowArray* out) const +{ + nanoarrow::UniqueArray tmp; + NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), NANOARROW_TYPE_BOOL, column)); + + NANOARROW_RETURN_NOT_OK(populate_validity_bitmap(ArrowArrayValidityBitmap(tmp.get()))); + auto bitmask = bools_to_mask(column, stream, mr); + NANOARROW_RETURN_NOT_OK(populate_data_buffer( + device_span(reinterpret_cast(bitmask.first->data()), + bitmask.first->size()), + ArrowArrayBuffer(tmp.get(), fixed_width_data_buffer_idx))); + + ArrowArrayMove(tmp.get(), out); + return NANOARROW_OK; +} + +template <> +int dispatch_to_arrow_host::operator()(ArrowArray* out) const +{ + ArrowType nanoarrow_type = NANOARROW_TYPE_STRING; + if (column.num_children() > 0 && + column.child(cudf::strings_column_view::offsets_column_index).type().id() == + cudf::type_id::INT64) { + nanoarrow_type = NANOARROW_TYPE_LARGE_STRING; + } + + nanoarrow::UniqueArray tmp; + NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), nanoarrow_type, column)); + + if (column.size() == 0) { + // initialize the offset buffer with a single zero by convention + if (nanoarrow_type == NANOARROW_TYPE_LARGE_STRING) { + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppendInt64(ArrowArrayBuffer(tmp.get(), fixed_width_data_buffer_idx), 0)); + } else { + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppendInt32(ArrowArrayBuffer(tmp.get(), fixed_width_data_buffer_idx), 0)); + } + + ArrowArrayMove(tmp.get(), out); + return NANOARROW_OK; + } + + NANOARROW_RETURN_NOT_OK(populate_validity_bitmap(ArrowArrayValidityBitmap(tmp.get()))); + + auto const scv = cudf::strings_column_view(column); + auto const offsets = scv.offsets(); + if (offsets.type().id() == cudf::type_id::INT64) { + NANOARROW_RETURN_NOT_OK(populate_data_buffer( + device_span(offsets.data() + scv.offset(), scv.size() + 1), + ArrowArrayBuffer(tmp.get(), fixed_width_data_buffer_idx))); + } else { + NANOARROW_RETURN_NOT_OK(populate_data_buffer( + device_span(offsets.data() + scv.offset(), scv.size() + 1), + ArrowArrayBuffer(tmp.get(), fixed_width_data_buffer_idx))); + } + + NANOARROW_RETURN_NOT_OK( + populate_data_buffer(device_span(scv.chars_begin(stream), scv.chars_size(stream)), + ArrowArrayBuffer(tmp.get(), 2))); + + ArrowArrayMove(tmp.get(), out); + return NANOARROW_OK; +} + +template <> +int dispatch_to_arrow_host::operator()(ArrowArray* out) const +{ + nanoarrow::UniqueArray tmp; + NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), NANOARROW_TYPE_LIST, column)); + NANOARROW_RETURN_NOT_OK(ArrowArrayAllocateChildren(tmp.get(), 1)); + + NANOARROW_RETURN_NOT_OK(populate_validity_bitmap(ArrowArrayValidityBitmap(tmp.get()))); + auto const lcv = cudf::lists_column_view(column); + + if (column.size() == 0) { + // initialize the offsets buffer with a single zero by convention for 0 length + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppendInt32(ArrowArrayBuffer(tmp.get(), fixed_width_data_buffer_idx), 0)); + } else { + NANOARROW_RETURN_NOT_OK( + populate_data_buffer(device_span(lcv.offsets_begin(), (column.size() + 1)), + ArrowArrayBuffer(tmp.get(), fixed_width_data_buffer_idx))); + } + + NANOARROW_RETURN_NOT_OK(get_column(lcv.child(), stream, mr, tmp->children[0])); + + ArrowArrayMove(tmp.get(), out); + return NANOARROW_OK; +} + +template <> +int dispatch_to_arrow_host::operator()(ArrowArray* out) const +{ + nanoarrow::UniqueArray tmp; + NANOARROW_RETURN_NOT_OK(initialize_array( + tmp.get(), + id_to_arrow_type(column.child(cudf::dictionary_column_view::indices_column_index).type().id()), + column)); + NANOARROW_RETURN_NOT_OK(ArrowArrayAllocateDictionary(tmp.get())); + + NANOARROW_RETURN_NOT_OK(populate_validity_bitmap(ArrowArrayValidityBitmap(tmp.get()))); + auto dcv = cudf::dictionary_column_view(column); + auto dict_indices = dcv.get_indices_annotated(); + switch (dict_indices.type().id()) { + case type_id::INT8: + case type_id::UINT8: + NANOARROW_RETURN_NOT_OK(populate_data_buffer( + device_span(dict_indices.data(), dict_indices.size()), + ArrowArrayBuffer(tmp.get(), fixed_width_data_buffer_idx))); + break; + case type_id::INT16: + case type_id::UINT16: + NANOARROW_RETURN_NOT_OK(populate_data_buffer( + device_span(dict_indices.data(), dict_indices.size()), + ArrowArrayBuffer(tmp.get(), fixed_width_data_buffer_idx))); + break; + case type_id::INT32: + case type_id::UINT32: + NANOARROW_RETURN_NOT_OK(populate_data_buffer( + device_span(dict_indices.data(), dict_indices.size()), + ArrowArrayBuffer(tmp.get(), fixed_width_data_buffer_idx))); + break; + case type_id::INT64: + case type_id::UINT64: + NANOARROW_RETURN_NOT_OK(populate_data_buffer( + device_span(dict_indices.data(), dict_indices.size()), + ArrowArrayBuffer(tmp.get(), fixed_width_data_buffer_idx))); + break; + default: CUDF_FAIL("unsupported type for dictionary indices"); + } + + NANOARROW_RETURN_NOT_OK(get_column(dcv.keys(), stream, mr, tmp->dictionary)); + + ArrowArrayMove(tmp.get(), out); + return NANOARROW_OK; +} + +template <> +int dispatch_to_arrow_host::operator()(ArrowArray* out) const +{ + nanoarrow::UniqueArray tmp; + + NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), NANOARROW_TYPE_STRUCT, column)); + NANOARROW_RETURN_NOT_OK(ArrowArrayAllocateChildren(tmp.get(), column.num_children())); + NANOARROW_RETURN_NOT_OK(populate_validity_bitmap(ArrowArrayValidityBitmap(tmp.get()))); + + auto const scv = cudf::structs_column_view(column); + + for (size_t i = 0; i < size_t(tmp->n_children); ++i) { + ArrowArray* child_ptr = tmp->children[i]; + auto const child = scv.get_sliced_child(i, stream); + NANOARROW_RETURN_NOT_OK(get_column(child, stream, mr, child_ptr)); + } + + ArrowArrayMove(tmp.get(), out); + return NANOARROW_OK; +} + +int get_column(cudf::column_view column, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr, + ArrowArray* out) +{ + return column.type().id() != type_id::EMPTY + ? type_dispatcher(column.type(), dispatch_to_arrow_host{column, stream, mr}, out) + : initialize_array(out, NANOARROW_TYPE_NA, column); +} + +unique_device_array_t create_device_array(nanoarrow::UniqueArray&& out) +{ + ArrowError err; + if (ArrowArrayFinishBuildingDefault(out.get(), &err) != NANOARROW_OK) { + std::cerr << err.message << std::endl; + CUDF_FAIL("failed to build"); + } + + unique_device_array_t result(new ArrowDeviceArray, [](ArrowDeviceArray* arr) { + if (arr->array.release != nullptr) { ArrowArrayRelease(&arr->array); } + delete arr; + }); + + result->device_id = -1; + result->device_type = ARROW_DEVICE_CPU; + result->sync_event = nullptr; + ArrowArrayMove(out.get(), &result->array); + return result; +} + +} // namespace + +unique_device_array_t to_arrow_host(cudf::table_view const& table, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + nanoarrow::UniqueArray tmp; + NANOARROW_THROW_NOT_OK(ArrowArrayInitFromType(tmp.get(), NANOARROW_TYPE_STRUCT)); + + NANOARROW_THROW_NOT_OK(ArrowArrayAllocateChildren(tmp.get(), table.num_columns())); + tmp->length = table.num_rows(); + tmp->null_count = 0; + + for (cudf::size_type i = 0; i < table.num_columns(); ++i) { + auto child = tmp->children[i]; + auto col = table.column(i); + NANOARROW_THROW_NOT_OK( + cudf::type_dispatcher(col.type(), detail::dispatch_to_arrow_host{col, stream, mr}, child)); + } + + // wait for all the stream operations to complete before we return. + // this ensures that the host memory that we're returning will be populated + // before we return from this function. + stream.synchronize(); + + return create_device_array(std::move(tmp)); +} + +unique_device_array_t to_arrow_host(cudf::column_view const& col, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + nanoarrow::UniqueArray tmp; + + NANOARROW_THROW_NOT_OK( + cudf::type_dispatcher(col.type(), detail::dispatch_to_arrow_host{col, stream, mr}, tmp.get())); + + // wait for all the stream operations to complete before we return. + // this ensures that the host memory that we're returning will be populated + // before we return from this function. + stream.synchronize(); + + return create_device_array(std::move(tmp)); +} + +} // namespace detail + +unique_device_array_t to_arrow_host(cudf::column_view const& col, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + return detail::to_arrow_host(col, stream, mr); +} + +unique_device_array_t to_arrow_host(cudf::table_view const& table, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + return detail::to_arrow_host(table, stream, mr); +} + +} // namespace cudf diff --git a/cpp/src/interop/to_arrow_schema.cpp b/cpp/src/interop/to_arrow_schema.cpp index 19915464236..b98ca8a7bed 100644 --- a/cpp/src/interop/to_arrow_schema.cpp +++ b/cpp/src/interop/to_arrow_schema.cpp @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -120,7 +119,11 @@ int dispatch_to_arrow_type::operator()(column_view input, column_metadata const&, ArrowSchema* out) { - return ArrowSchemaSetType(out, NANOARROW_TYPE_STRING); + return ((input.num_children() == 0 || + input.child(cudf::strings_column_view::offsets_column_index).type().id() == + type_id::INT32)) + ? ArrowSchemaSetType(out, NANOARROW_TYPE_STRING) + : ArrowSchemaSetType(out, NANOARROW_TYPE_LARGE_STRING); } // these forward declarations are needed due to the recursive calls to them diff --git a/cpp/src/io/avro/reader_impl.cu b/cpp/src/io/avro/reader_impl.cu index 814efe2b5a1..69a0e982a5b 100644 --- a/cpp/src/io/avro/reader_impl.cu +++ b/cpp/src/io/avro/reader_impl.cu @@ -554,9 +554,11 @@ table_with_metadata read_avro(std::unique_ptr&& source, auto d_global_dict_data = rmm::device_uvector(0, stream); if (total_dictionary_entries > 0) { - auto h_global_dict = std::vector(total_dictionary_entries); - auto h_global_dict_data = std::vector(dictionary_data_size); - size_t dict_pos = 0; + auto h_global_dict = + cudf::detail::make_host_vector(total_dictionary_entries, stream); + auto h_global_dict_data = + cudf::detail::make_host_vector(dictionary_data_size, stream); + size_t dict_pos = 0; for (size_t i = 0; i < column_types.size(); ++i) { auto const col_idx = selected_columns[i].first; diff --git a/cpp/src/io/comp/gpuinflate.hpp b/cpp/src/io/comp/gpuinflate.hpp index 5908b77c98b..8bfca2b30df 100644 --- a/cpp/src/io/comp/gpuinflate.hpp +++ b/cpp/src/io/comp/gpuinflate.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2023, NVIDIA CORPORATION. + * Copyright (c) 2018-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include @@ -73,6 +74,7 @@ constexpr std::size_t BUFFER_PADDING_MULTIPLE{8}; * @param[in] parse_hdr Whether or not to parse GZIP header * @param[in] stream CUDA stream to use */ +CUDF_EXPORT void gpuinflate(device_span const> inputs, device_span const> outputs, device_span results, @@ -101,6 +103,7 @@ void gpu_copy_uncompressed_blocks(device_span const> * @param[out] results List of output status structures * @param[in] stream CUDA stream to use */ +CUDF_EXPORT void gpu_unsnap(device_span const> inputs, device_span const> outputs, device_span results, @@ -113,6 +116,7 @@ void gpu_unsnap(device_span const> inputs, * * @return The size in bytes of required temporary memory */ +CUDF_EXPORT size_t get_gpu_debrotli_scratch_size(int max_num_inputs = 0); /** @@ -128,6 +132,7 @@ size_t get_gpu_debrotli_scratch_size(int max_num_inputs = 0); * @param[in] scratch_size Size in bytes of the temporary memory * @param[in] stream CUDA stream to use */ +CUDF_EXPORT void gpu_debrotli(device_span const> inputs, device_span const> outputs, device_span results, diff --git a/cpp/src/io/csv/reader_impl.cu b/cpp/src/io/csv/reader_impl.cu index 05faded651d..40d4372ae9d 100644 --- a/cpp/src/io/csv/reader_impl.cu +++ b/cpp/src/io/csv/reader_impl.cu @@ -567,7 +567,7 @@ void infer_column_types(parse_options const& parse_opts, } std::vector decode_data(parse_options const& parse_opts, - std::vector const& column_flags, + host_span column_flags, std::vector const& column_names, device_span data, device_span row_offsets, @@ -592,8 +592,8 @@ std::vector decode_data(parse_options const& parse_opts, } } - thrust::host_vector h_data(num_active_columns); - thrust::host_vector h_valid(num_active_columns); + auto h_data = cudf::detail::make_host_vector(num_active_columns, stream); + auto h_valid = cudf::detail::make_host_vector(num_active_columns, stream); for (int i = 0; i < num_active_columns; ++i) { h_data[i] = out_buffers[i].data(); @@ -622,14 +622,16 @@ std::vector decode_data(parse_options const& parse_opts, return out_buffers; } -std::vector determine_column_types(csv_reader_options const& reader_opts, - parse_options const& parse_opts, - host_span column_names, - device_span data, - device_span row_offsets, - int32_t num_records, - host_span column_flags, - rmm::cuda_stream_view stream) +cudf::detail::host_vector determine_column_types( + csv_reader_options const& reader_opts, + parse_options const& parse_opts, + host_span column_names, + device_span data, + device_span row_offsets, + int32_t num_records, + host_span column_flags, + cudf::size_type num_active_columns, + rmm::cuda_stream_view stream) { std::vector column_types(column_flags.size()); @@ -653,7 +655,8 @@ std::vector determine_column_types(csv_reader_options const& reader_o stream); // compact column_types to only include active columns - std::vector active_col_types; + auto active_col_types = + cudf::detail::make_empty_host_vector(num_active_columns, stream); std::copy_if(column_types.cbegin(), column_types.cend(), std::back_inserter(active_col_types), @@ -697,8 +700,10 @@ table_with_metadata read_csv(cudf::io::datasource* source, auto const num_actual_columns = static_cast(column_names.size()); auto num_active_columns = num_actual_columns; - auto column_flags = std::vector( - num_actual_columns, column_parse::enabled | column_parse::inferred); + auto column_flags = + cudf::detail::make_host_vector(num_actual_columns, stream); + std::fill( + column_flags.begin(), column_flags.end(), column_parse::enabled | column_parse::inferred); // User did not pass column names to override names in the file // Process names from the file to remove empty and duplicated strings @@ -842,8 +847,15 @@ table_with_metadata read_csv(cudf::io::datasource* source, // Exclude the end-of-data row from number of rows with actual data auto const num_records = std::max(row_offsets.size(), 1ul) - 1; - auto const column_types = determine_column_types( - reader_opts, parse_opts, column_names, data, row_offsets, num_records, column_flags, stream); + auto const column_types = determine_column_types(reader_opts, + parse_opts, + column_names, + data, + row_offsets, + num_records, + column_flags, + num_active_columns, + stream); auto metadata = table_metadata{}; auto out_columns = std::vector>(); diff --git a/cpp/src/io/fst/agent_dfa.cuh b/cpp/src/io/fst/agent_dfa.cuh index 2171764decd..0e70984b39c 100644 --- a/cpp/src/io/fst/agent_dfa.cuh +++ b/cpp/src/io/fst/agent_dfa.cuh @@ -18,7 +18,9 @@ #include "in_reg_array.cuh" #include +#include #include +#include #include namespace cudf::io::fst::detail { @@ -44,9 +46,10 @@ using StateIndexT = uint32_t; template struct VectorCompositeOp { template - __host__ __device__ __forceinline__ VectorT operator()(VectorT const& lhs, VectorT const& rhs) + __device__ __forceinline__ VectorT operator()(VectorT const& lhs, VectorT const& rhs) { VectorT res{}; +#pragma unroll for (int32_t i = 0; i < NUM_ITEMS; ++i) { res.Set(i, rhs.Get(lhs.Get(i))); } @@ -57,61 +60,275 @@ struct VectorCompositeOp { /** * @brief A class whose ReadSymbol member function is invoked for each symbol being read from the * input tape. The wrapper class looks up whether a state transition caused by a symbol is supposed - * to emit any output symbol (the "transduced" output) and, if so, keeps track of how many symbols - * it intends to write out and writing out such symbols to the given output iterators. + * to emit any output symbol (the "transduced" output) and, if so, keeps track of *how many* symbols + * it intends to write out. + */ +template +class DFACountCallbackWrapper { + public: + __device__ __forceinline__ DFACountCallbackWrapper(TransducerTableT transducer_table) + : transducer_table(transducer_table) + { + } + + template + __device__ __forceinline__ void Init(OffsetT const&) + { + out_count = 0; + } + + template + __device__ __forceinline__ void ReadSymbol(CharIndexT const character_index, + StateIndexT const old_state, + StateIndexT const new_state, + SymbolIndexT const symbol_id, + SymbolT const read_symbol) + { + uint32_t const count = transducer_table(old_state, symbol_id, read_symbol); + out_count += count; + } + + __device__ __forceinline__ void TearDown() {} + TransducerTableT const transducer_table; + uint32_t out_count{}; +}; + +/** + * @brief A class whose ReadSymbol member function is invoked for each symbol being read from the + * input tape. The wrapper class looks up whether a state transition caused by a symbol is supposed + * to emit any output symbol (the "transduced" output) and, if so, writes out such symbols to the + * given output iterators. * + * @tparam MaxTranslatedOutChars The maximum number of symbols that are written on a any given state + * transition * @tparam TransducerTableT The type implementing a transducer table that can be used for looking up * the symbols that are supposed to be emitted on a given state transition. - * @tparam TransducedOutItT A Random-access output iterator type to which symbols returned by the + * @tparam TransducedOutItT A random-access output iterator type to which symbols returned by the * transducer table are assignable. - * @tparam TransducedIndexOutItT A Random-access output iterator type to which indexes are written. + * @tparam TransducedIndexOutItT A random-access output iterator type to which indexes are written. */ -template -class DFASimulationCallbackWrapper { +template +class DFAWriteCallbackWrapper { public: - __host__ __device__ __forceinline__ DFASimulationCallbackWrapper( - TransducerTableT transducer_table, TransducedOutItT out_it, TransducedIndexOutItT out_idx_it) - : transducer_table(transducer_table), out_it(out_it), out_idx_it(out_idx_it), write(false) + __device__ __forceinline__ DFAWriteCallbackWrapper(TransducerTableT transducer_table, + TransducedOutItT out_it, + TransducedIndexOutItT out_idx_it, + uint32_t out_offset, + uint32_t /*tile_out_offset*/, + uint32_t /*tile_in_offset*/, + uint32_t /*tile_out_count*/) + : transducer_table(transducer_table), + out_it(out_it), + out_idx_it(out_idx_it), + out_offset(out_offset) { } template - __host__ __device__ __forceinline__ void Init(OffsetT const& offset) + __device__ __forceinline__ void Init(OffsetT const& in_offset) + { + this->in_offset = in_offset; + } + + template + __device__ __forceinline__ + typename ::cuda::std::enable_if<(MaxTranslatedOutChars_ <= 2), void>::type + ReadSymbol(CharIndexT const character_index, + StateIndexT const old_state, + StateIndexT const new_state, + SymbolIndexT const symbol_id, + SymbolT const read_symbol, + cub::Int2Type /*MaxTranslatedOutChars*/) + { + uint32_t const count = transducer_table(old_state, symbol_id, read_symbol); + +#pragma unroll + for (uint32_t out_char = 0; out_char < MaxTranslatedOutChars_; out_char++) { + if (out_char < count) { + out_it[out_offset + out_char] = + transducer_table(old_state, symbol_id, out_char, read_symbol); + out_idx_it[out_offset + out_char] = in_offset + character_index; + } + } + out_offset += count; + } + + template + __device__ __forceinline__ + typename ::cuda::std::enable_if<(MaxTranslatedOutChars_ > 2), void>::type + ReadSymbol(CharIndexT const character_index, + StateIndexT const old_state, + StateIndexT const new_state, + SymbolIndexT const symbol_id, + SymbolT const read_symbol, + cub::Int2Type) { - this->offset = offset; - if (!write) out_count = 0; + uint32_t const count = transducer_table(old_state, symbol_id, read_symbol); + + for (uint32_t out_char = 0; out_char < count; out_char++) { + out_it[out_offset + out_char] = transducer_table(old_state, symbol_id, out_char, read_symbol); + out_idx_it[out_offset + out_char] = in_offset + character_index; + } + out_offset += count; } template - __host__ __device__ __forceinline__ void ReadSymbol(CharIndexT const character_index, - StateIndexT const old_state, - StateIndexT const new_state, - SymbolIndexT const symbol_id, - SymbolT const read_symbol) + __device__ __forceinline__ void ReadSymbol(CharIndexT const character_index, + StateIndexT const old_state, + StateIndexT const new_state, + SymbolIndexT const symbol_id, + SymbolT const read_symbol) + { + ReadSymbol(character_index, + old_state, + new_state, + symbol_id, + read_symbol, + cub::Int2Type{}); + } + + __device__ __forceinline__ void TearDown() {} + + public: + TransducerTableT const transducer_table; + TransducedOutItT out_it; + TransducedIndexOutItT out_idx_it; + uint32_t out_offset; + uint32_t in_offset; +}; + +/** + * @brief A class whose ReadSymbol member function is invoked for each symbol being read from the + * input tape. The wrapper class looks up whether a state transition caused by a symbol is supposed + * to emit any output symbol (the "transduced" output) and, if so, writes out such symbols to the + * given output iterators. This class uses a shared memory-backed write buffer to coalesce writes to + * global memory. + * + * @tparam DiscardIndexOutput Whether to discard the indexes instead of writing them to the given + * output iterator + * @tparam DiscardTranslatedOutput Whether to discard the translated output symbols instead of + * writing them to the given output iterator + * @tparam NumWriteBufferItems The number of items to allocate in shared memory for the write + * buffer. + * @tparam OutputT The type of the translated items + * @tparam TransducerTableT The type implementing a transducer table that can be used for looking up + * the symbols that are supposed to be emitted on a given state transition. + * @tparam TransducedOutItT A random-access output iterator type to which symbols returned by the + * transducer table are assignable. + * @tparam TransducedIndexOutItT A random-access output iterator type to which indexes are written. + */ +template +class WriteCoalescingCallbackWrapper { + struct TempStorage_Offsets { + uint16_t compacted_offset[NumWriteBufferItems]; + }; + struct TempStorage_Symbols { + OutputT compacted_symbols[NumWriteBufferItems]; + }; + using offset_cache_t = + ::cuda::std::conditional_t; + using symbol_cache_t = ::cuda::std:: + conditional_t, TempStorage_Symbols>; + struct TempStorage_ : offset_cache_t, symbol_cache_t {}; + + __device__ __forceinline__ TempStorage_& PrivateStorage() + { + __shared__ TempStorage private_storage; + return private_storage.Alias(); + } + TempStorage_& temp_storage; + + public: + struct TempStorage : cub::Uninitialized {}; + + __device__ __forceinline__ WriteCoalescingCallbackWrapper(TransducerTableT transducer_table, + TransducedOutItT out_it, + TransducedIndexOutItT out_idx_it, + uint32_t thread_out_offset, + uint32_t tile_out_offset, + uint32_t tile_in_offset, + uint32_t tile_out_count) + : temp_storage(PrivateStorage()), + transducer_table(transducer_table), + out_it(out_it), + out_idx_it(out_idx_it), + thread_out_offset(thread_out_offset), + tile_out_offset(tile_out_offset), + tile_in_offset(tile_in_offset), + tile_out_count(tile_out_count) + { + } + + template + __device__ __forceinline__ void Init(OffsetT const& offset) + { + this->in_offset = offset; + } + + template + __device__ __forceinline__ void ReadSymbol(CharIndexT const character_index, + StateIndexT const old_state, + StateIndexT const new_state, + SymbolIndexT const symbol_id, + SymbolT const read_symbol) { uint32_t const count = transducer_table(old_state, symbol_id, read_symbol); - if (write) { -#if defined(__CUDA_ARCH__) -#pragma unroll 1 -#endif - for (uint32_t out_char = 0; out_char < count; out_char++) { - out_it[out_count + out_char] = + for (uint32_t out_char = 0; out_char < count; out_char++) { + if constexpr (!DiscardIndexOutput) { + temp_storage.compacted_offset[thread_out_offset + out_char - tile_out_offset] = + in_offset + character_index - tile_in_offset; + } + if constexpr (!DiscardTranslatedOutput) { + temp_storage.compacted_symbols[thread_out_offset + out_char - tile_out_offset] = transducer_table(old_state, symbol_id, out_char, read_symbol); - out_idx_it[out_count + out_char] = offset + character_index; } } - out_count += count; + thread_out_offset += count; } - __host__ __device__ __forceinline__ void TearDown() {} + __device__ __forceinline__ void TearDown() + { + __syncthreads(); + if constexpr (!DiscardTranslatedOutput) { + for (uint32_t out_char = threadIdx.x; out_char < tile_out_count; out_char += blockDim.x) { + out_it[tile_out_offset + out_char] = temp_storage.compacted_symbols[out_char]; + } + } + if constexpr (!DiscardIndexOutput) { + for (uint32_t out_char = threadIdx.x; out_char < tile_out_count; out_char += blockDim.x) { + out_idx_it[tile_out_offset + out_char] = + temp_storage.compacted_offset[out_char] + tile_in_offset; + } + } + __syncthreads(); + } public: TransducerTableT const transducer_table; TransducedOutItT out_it; TransducedIndexOutItT out_idx_it; - uint32_t out_count; - uint32_t offset; - bool write; + uint32_t thread_out_offset; + uint32_t tile_out_offset; + uint32_t tile_in_offset; + uint32_t in_offset; + uint32_t tile_out_count; }; /** @@ -125,17 +342,18 @@ class DFASimulationCallbackWrapper { template class StateVectorTransitionOp { public: - __host__ __device__ __forceinline__ StateVectorTransitionOp( + __device__ __forceinline__ StateVectorTransitionOp( TransitionTableT const& transition_table, std::array& state_vector) : transition_table(transition_table), state_vector(state_vector) { } template - __host__ __device__ __forceinline__ void ReadSymbol(CharIndexT const& character_index, - SymbolIndexT const& read_symbol_id, - SymbolT const& read_symbol) const + __device__ __forceinline__ void ReadSymbol(CharIndexT const& character_index, + SymbolIndexT const& read_symbol_id, + SymbolT const& read_symbol) const { +#pragma unroll for (int32_t i = 0; i < NUM_INSTANCES; ++i) { state_vector[i] = transition_table(state_vector[i], read_symbol_id); } @@ -152,17 +370,17 @@ struct StateTransitionOp { TransitionTableT const& transition_table; CallbackOpT& callback_op; - __host__ __device__ __forceinline__ StateTransitionOp(TransitionTableT const& transition_table, - StateIndexT state, - CallbackOpT& callback_op) + __device__ __forceinline__ StateTransitionOp(TransitionTableT const& transition_table, + StateIndexT state, + CallbackOpT& callback_op) : transition_table(transition_table), state(state), callback_op(callback_op) { } template - __host__ __device__ __forceinline__ void ReadSymbol(CharIndexT const& character_index, - SymbolIndexT const& read_symbol_id, - SymbolT const& read_symbol) + __device__ __forceinline__ void ReadSymbol(CharIndexT const& character_index, + SymbolIndexT const& read_symbol_id, + SymbolT const& read_symbol) { // Remember what state we were in before we made the transition StateIndexT previous_state = state; @@ -420,7 +638,7 @@ struct AgentDFA { __syncthreads(); // Thread's symbols - CharT* t_chars = &temp_storage.chars[threadIdx.x * SYMBOLS_PER_THREAD]; + CharT const* t_chars = &temp_storage.chars[threadIdx.x * SYMBOLS_PER_THREAD]; // Parse thread's symbols and transition the state-vector if (is_full_block) { @@ -538,6 +756,43 @@ __launch_bounds__(int32_t(AgentDFAPolicy::BLOCK_THREADS)) CUDF_KERNEL // The state transition vector passed on to the second stage of the algorithm StateVectorT out_state_vector; + using OutSymbolT = typename DfaT::OutSymbolT; + // static constexpr int32_t MIN_TRANSLATED_OUT = DfaT::MIN_TRANSLATED_OUT; + static constexpr int32_t num_max_translated_out = DfaT::MAX_TRANSLATED_OUT; + static constexpr bool discard_out_index = + ::cuda::std::is_same>::value; + static constexpr bool discard_out_it = + ::cuda::std::is_same>::value; + using NonWriteCoalescingT = + DFAWriteCallbackWrapper; + + using WriteCoalescingT = + WriteCoalescingCallbackWrapper; + + static constexpr bool is_translation_pass = (!IS_TRANS_VECTOR_PASS) || IS_SINGLE_PASS; + + // Use write-coalescing only if the worst-case output size per tile fits into shared memory + static constexpr bool can_use_smem_cache = + (sizeof(typename WriteCoalescingT::TempStorage) + sizeof(typename AgentDfaSimT::TempStorage) + + sizeof(typename DfaT::SymbolGroupStorageT) + sizeof(typename DfaT::TransitionTableStorageT) + + sizeof(typename DfaT::TranslationTableStorageT)) < (48 * 1024); + static constexpr bool use_smem_cache = + is_translation_pass and + (sizeof(typename WriteCoalescingT::TempStorage) <= AgentDFAPolicy::SMEM_THRESHOLD) and + can_use_smem_cache; + + using DFASimulationCallbackWrapperT = + cuda::std::conditional_t; + // Stage 1: Compute the state-transition vector if (IS_TRANS_VECTOR_PASS || IS_SINGLE_PASS) { // Keeping track of the state for each of the state machines @@ -576,7 +831,7 @@ __launch_bounds__(int32_t(AgentDFAPolicy::BLOCK_THREADS)) CUDF_KERNEL // -> first block/tile: write out block aggregate as the "tile's" inclusive (i.e., the one that // incorporates all preceding blocks/tiles results) //------------------------------------------------------------------------------ - if (IS_SINGLE_PASS) { + if constexpr (IS_SINGLE_PASS) { uint32_t tile_idx = blockIdx.x; using StateVectorCompositeOpT = VectorCompositeOp; @@ -623,10 +878,7 @@ __launch_bounds__(int32_t(AgentDFAPolicy::BLOCK_THREADS)) CUDF_KERNEL } // Perform finite-state machine simulation, computing size of transduced output - DFASimulationCallbackWrapper - callback_wrapper(transducer_table, transduced_out_it, transduced_out_idx_it); + DFACountCallbackWrapper count_chars_callback_op{transducer_table}; StateIndexT t_start_state = state; agent_dfa.GetThreadStateTransitions(symbol_matcher, @@ -635,7 +887,7 @@ __launch_bounds__(int32_t(AgentDFAPolicy::BLOCK_THREADS)) CUDF_KERNEL blockIdx.x * SYMBOLS_PER_BLOCK, num_chars, state, - callback_wrapper, + count_chars_callback_op, cub::Int2Type()); __syncthreads(); @@ -650,15 +902,18 @@ __launch_bounds__(int32_t(AgentDFAPolicy::BLOCK_THREADS)) CUDF_KERNEL __shared__ typename OffsetPrefixScanCallbackOpT_::TempStorage prefix_callback_temp_storage; uint32_t tile_idx = blockIdx.x; + uint32_t tile_out_offset{}; + uint32_t tile_out_count{}; + uint32_t thread_out_offset{}; if (tile_idx == 0) { OffsetT block_aggregate = 0; OutOffsetBlockScan(scan_temp_storage) - .ExclusiveScan(callback_wrapper.out_count, - callback_wrapper.out_count, + .ExclusiveScan(count_chars_callback_op.out_count, + thread_out_offset, static_cast(0), cub::Sum{}, block_aggregate); - + tile_out_count = block_aggregate; if (threadIdx.x == 0 /*and not IS_LAST_TILE*/) { offset_tile_state.SetInclusive(0, block_aggregate); } @@ -671,22 +926,28 @@ __launch_bounds__(int32_t(AgentDFAPolicy::BLOCK_THREADS)) CUDF_KERNEL offset_tile_state, prefix_callback_temp_storage, cub::Sum{}, tile_idx); OutOffsetBlockScan(scan_temp_storage) - .ExclusiveScan( - callback_wrapper.out_count, callback_wrapper.out_count, cub::Sum{}, prefix_op); - + .ExclusiveScan(count_chars_callback_op.out_count, thread_out_offset, cub::Sum{}, prefix_op); + tile_out_offset = prefix_op.GetExclusivePrefix(); + tile_out_count = prefix_op.GetBlockAggregate(); if (tile_idx == gridDim.x - 1 && threadIdx.x == 0) { *d_num_transduced_out_it = prefix_op.GetInclusivePrefix(); } } - callback_wrapper.write = true; + DFASimulationCallbackWrapperT write_translated_callback_op{transducer_table, + transduced_out_it, + transduced_out_idx_it, + thread_out_offset, + tile_out_offset, + blockIdx.x * SYMBOLS_PER_BLOCK, + tile_out_count}; agent_dfa.GetThreadStateTransitions(symbol_matcher, transition_table, d_chars, blockIdx.x * SYMBOLS_PER_BLOCK, num_chars, t_start_state, - callback_wrapper, + write_translated_callback_op, cub::Int2Type()); } } diff --git a/cpp/src/io/fst/dispatch_dfa.cuh b/cpp/src/io/fst/dispatch_dfa.cuh index be63ec6539f..ef5e9c8a78f 100644 --- a/cpp/src/io/fst/dispatch_dfa.cuh +++ b/cpp/src/io/fst/dispatch_dfa.cuh @@ -37,6 +37,11 @@ struct AgentDFAPolicy { // The number of symbols processed by each thread static constexpr int32_t ITEMS_PER_THREAD = _ITEMS_PER_THREAD; + + // If the shared memory-backed write buffer exceeds this threshold, the FST will skip buffering + // the output in a write buffer and instead immediately write out to global memory, potentially + // resulting in non-coalesced writes + static constexpr std::size_t SMEM_THRESHOLD = 24 * 1024; }; /** @@ -49,7 +54,7 @@ struct DeviceFSMPolicy { struct Policy900 : cub::ChainedPolicy<900, Policy900, Policy900> { enum { BLOCK_THREADS = 128, - ITEMS_PER_THREAD = 32, + ITEMS_PER_THREAD = 16, }; using AgentDFAPolicy = AgentDFAPolicy; diff --git a/cpp/src/io/fst/lookup_tables.cuh b/cpp/src/io/fst/lookup_tables.cuh index 5532a7f994b..ae1f81fd541 100644 --- a/cpp/src/io/fst/lookup_tables.cuh +++ b/cpp/src/io/fst/lookup_tables.cuh @@ -367,18 +367,18 @@ class TransitionTable { template static KernelParameter InitDeviceTransitionTable( - std::array, MAX_NUM_STATES> const& translation_table) + std::array, MAX_NUM_STATES> const& transition_table) { KernelParameter init_data{}; - // translation_table[state][symbol] -> new state - for (std::size_t state = 0; state < translation_table.size(); ++state) { - for (std::size_t symbol = 0; symbol < translation_table[state].size(); ++symbol) { + // transition_table[state][symbol] -> new state + for (std::size_t state = 0; state < transition_table.size(); ++state) { + for (std::size_t symbol = 0; symbol < transition_table[state].size(); ++symbol) { CUDF_EXPECTS( - static_cast(translation_table[state][symbol]) <= + static_cast(transition_table[state][symbol]) <= std::numeric_limits::max(), "Target state index value exceeds value representable by the transition table's type"); init_data.transitions[symbol * MAX_NUM_STATES + state] = - static_cast(translation_table[state][symbol]); + static_cast(transition_table[state][symbol]); } } @@ -494,6 +494,10 @@ class dfa_device_view { // This is a value queried by the DFA simulation algorithm static constexpr int32_t MAX_NUM_STATES = NUM_STATES; + using OutSymbolT = typename TranslationTableT::OutSymbolT; + static constexpr int32_t MIN_TRANSLATED_OUT = TranslationTableT::MIN_TRANSLATED_OUT; + static constexpr int32_t MAX_TRANSLATED_OUT = TranslationTableT::MAX_TRANSLATED_OUT; + using SymbolGroupStorageT = std::conditional_t::value, typename SymbolGroupIdLookupT::TempStorage, typename cub::NullType>; @@ -542,24 +546,33 @@ class dfa_device_view { * @tparam OutSymbolT The symbol type being output * @tparam OutSymbolOffsetT Type sufficiently large to index into the lookup table of output * symbols - * @tparam MAX_NUM_SYMBOLS The maximum number of symbols being output by a single state transition + * @tparam MAX_NUM_SYMBOLS The maximum number of symbol groups supported by this lookup table * @tparam MAX_NUM_STATES The maximum number of states that this lookup table shall support + * @tparam MIN_TRANSLATED_OUT_ The minimum number of symbols being output by a single state + * transition + * @tparam MAX_TRANSLATED_OUT_ The maximum number of symbols being output by a single state + * transition * @tparam MAX_TABLE_SIZE The maximum number of items in the lookup table of output symbols - * be used. */ -template class TransducerLookupTable { private: struct _TempStorage { OutSymbolOffsetT out_offset[MAX_NUM_STATES * MAX_NUM_SYMBOLS + 1]; - OutSymbolT out_symbols[MAX_TABLE_SIZE]; + OutSymbolT_ out_symbols[MAX_TABLE_SIZE]; }; public: + using OutSymbolT = OutSymbolT_; + static constexpr int32_t MIN_TRANSLATED_OUT = MIN_TRANSLATED_OUT_; + static constexpr int32_t MAX_TRANSLATED_OUT = MAX_TRANSLATED_OUT_; + using TempStorage = cub::Uninitialized<_TempStorage>; struct KernelParameter { @@ -567,6 +580,8 @@ class TransducerLookupTable { OutSymbolOffsetT, MAX_NUM_SYMBOLS, MAX_NUM_STATES, + MIN_TRANSLATED_OUT, + MAX_TRANSLATED_OUT, MAX_TABLE_SIZE>; OutSymbolOffsetT d_out_offsets[MAX_NUM_STATES * MAX_NUM_SYMBOLS + 1]; @@ -686,14 +701,19 @@ class TransducerLookupTable { * sequence of symbols that the finite-state transducer is supposed to output for each transition. * * @tparam MAX_TABLE_SIZE The maximum number of items in the lookup table of output symbols - * be used + * @tparam MIN_TRANSLATED_OUT The minimum number of symbols being output by a single state + * transition + * @tparam MAX_TRANSLATED_OUT The maximum number of symbols being output by a single state + * transition * @tparam OutSymbolT The symbol type being output - * @tparam MAX_NUM_SYMBOLS The maximum number of symbols being output by a single state transition + * @tparam MAX_NUM_SYMBOLS The maximum number of symbol groups supported by this lookup table * @tparam MAX_NUM_STATES The maximum number of states that this lookup table shall support * @param translation_table The translation table * @return A translation table of type `TransducerLookupTable`. */ template @@ -705,20 +725,30 @@ auto make_translation_table(std::array, MAX_N OutSymbolOffsetT, MAX_NUM_SYMBOLS, MAX_NUM_STATES, + MIN_TRANSLATED_OUT, + MAX_TRANSLATED_OUT, MAX_TABLE_SIZE>; return translation_table_t::InitDeviceTranslationTable(translation_table); } -template +template class TranslationOp { private: struct _TempStorage {}; public: + using OutSymbolT = OutSymbolT_; + static constexpr int32_t MIN_TRANSLATED_OUT = MIN_TRANSLATED_OUT_; + static constexpr int32_t MAX_TRANSLATED_OUT = MAX_TRANSLATED_OUT_; + using TempStorage = cub::Uninitialized<_TempStorage>; struct KernelParameter { - using LookupTableT = TranslationOp; + using LookupTableT = + TranslationOp; TranslationOpT translation_op; }; @@ -772,6 +802,10 @@ class TranslationOp { * * @tparam FunctorT A function object type that must implement two signatures: (1) with `(state_id, * match_id, read_symbol)` and (2) with `(state_id, match_id, relative_offset, read_symbol)` + * @tparam MIN_TRANSLATED_SYMBOLS The minimum number of translated output symbols for any given + * input symbol + * @tparam MAX_TRANSLATED_SYMBOLS The maximum number of translated output symbols for any given + * input symbol * @param map_op A function object that must implement two signatures: (1) with `(state_id, * match_id, read_symbol)` and (2) with `(state_id, match_id, relative_offset, read_symbol)`. * Invocations of the first signature, (1), must return the number of symbols that are emitted for @@ -779,10 +813,14 @@ class TranslationOp { * that transition, where `i` corresponds to `relative_offse` * @return A translation table of type `TranslationO` */ -template +template auto make_translation_functor(FunctorT map_op) { - return TranslationOp::InitDeviceTranslationTable(map_op); + return TranslationOp:: + InitDeviceTranslationTable(map_op); } /** diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp index 6d2834206d4..62c3c5cd245 100644 --- a/cpp/src/io/functions.cpp +++ b/cpp/src/io/functions.cpp @@ -41,6 +41,7 @@ #include namespace cudf::io { + // Returns builder for csv_reader_options csv_reader_options_builder csv_reader_options::builder(source_info src) { @@ -472,6 +473,8 @@ chunked_orc_reader::chunked_orc_reader(std::size_t chunk_read_limit, { } +chunked_orc_reader::chunked_orc_reader() = default; + // This destructor destroys the internal reader instance. // Since the declaration of the internal `reader` object does not exist in the header, this // destructor needs to be defined in a separate source file which can access to that object's @@ -492,6 +495,10 @@ table_with_metadata chunked_orc_reader::read_chunk() const return reader->read_chunk(); } +orc_chunked_writer::orc_chunked_writer() = default; + +orc_chunked_writer::~orc_chunked_writer() = default; + /** * @copydoc cudf::io::orc_chunked_writer::orc_chunked_writer */ @@ -618,6 +625,8 @@ std::unique_ptr> write_parquet(parquet_writer_options const return writer->close(options.get_column_chunks_file_paths()); } +chunked_parquet_reader::chunked_parquet_reader() = default; + /** * @copydoc cudf::io::chunked_parquet_reader::chunked_parquet_reader */ @@ -672,6 +681,8 @@ table_with_metadata chunked_parquet_reader::read_chunk() const return reader->read_chunk(); } +parquet_chunked_writer::parquet_chunked_writer() = default; + /** * @copydoc cudf::io::parquet_chunked_writer::parquet_chunked_writer */ @@ -686,6 +697,8 @@ parquet_chunked_writer::parquet_chunked_writer(chunked_parquet_writer_options co std::move(sinks), options, io_detail::single_write_mode::NO, stream); } +parquet_chunked_writer::~parquet_chunked_writer() = default; + /** * @copydoc cudf::io::parquet_chunked_writer::write */ diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index 3e587768b11..17fa7abdffe 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -622,7 +622,7 @@ void make_device_json_column(device_span input, // map{parent_col_id, child_col_name}> = child_col_id, used for null value column tracking std::map, NodeIndexT> mapped_columns; // find column_ids which are values, but should be ignored in validity - std::vector ignore_vals(num_columns, 0); + auto ignore_vals = cudf::detail::make_host_vector(num_columns, stream); std::vector is_mixed_type_column(num_columns, 0); std::vector is_pruned(num_columns, 0); columns.try_emplace(parent_node_sentinel, std::ref(root)); @@ -812,7 +812,7 @@ void make_device_json_column(device_span input, return thrust::get<1>(a) < thrust::get<1>(b); }); // move columns data to device. - std::vector columns_data(num_columns); + auto columns_data = cudf::detail::make_host_vector(num_columns, stream); for (auto& [col_id, col_ref] : columns) { if (col_id == parent_node_sentinel) continue; auto& col = col_ref.get(); diff --git a/cpp/src/io/json/json_normalization.cu b/cpp/src/io/json/json_normalization.cu index ca56a12eb36..760b2214365 100644 --- a/cpp/src/io/json/json_normalization.cu +++ b/cpp/src/io/json/json_normalization.cu @@ -302,11 +302,14 @@ void normalize_single_quotes(datasource::owning_buffer( + normalize_quotes::TransduceToNormalizedQuotes{}), + stream); rmm::device_uvector outbuf(indata.size() * 2, stream, mr); rmm::device_scalar outbuf_size(stream, mr); @@ -327,11 +330,14 @@ void normalize_whitespace(datasource::owning_buffer rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - auto parser = fst::detail::make_fst( - fst::detail::make_symbol_group_lut(normalize_whitespace::wna_sgs), - fst::detail::make_transition_table(normalize_whitespace::wna_state_tt), - fst::detail::make_translation_functor(normalize_whitespace::TransduceToNormalizedWS{}), - stream); + static constexpr std::int32_t min_out = 0; + static constexpr std::int32_t max_out = 2; + auto parser = + fst::detail::make_fst(fst::detail::make_symbol_group_lut(normalize_whitespace::wna_sgs), + fst::detail::make_transition_table(normalize_whitespace::wna_state_tt), + fst::detail::make_translation_functor( + normalize_whitespace::TransduceToNormalizedWS{}), + stream); rmm::device_uvector outbuf(indata.size(), stream, mr); rmm::device_scalar outbuf_size(stream, mr); diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp index e12892a2d50..20c143f66c7 100644 --- a/cpp/src/io/json/nested_json.hpp +++ b/cpp/src/io/json/nested_json.hpp @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -28,10 +29,12 @@ #include // Forward declaration of parse_options from parsing_utils.cuh -namespace cudf::io { +namespace cudf { +namespace io { + struct parse_options; -} -namespace cudf::io::json { + +namespace json { /** * @brief Struct that encapsulate all information of a columnar tree representation. @@ -201,6 +204,7 @@ namespace detail { * @param[in] delimiter Specifies the delimiter to use as separator for JSON lines input * @param[in] stream The cuda stream to dispatch GPU kernels to */ +CUDF_EXPORT void get_stack_context(device_span json_in, SymbolT* d_top_of_stack, stack_behavior_t stack_behavior, @@ -216,6 +220,7 @@ void get_stack_context(device_span json_in, * @param stream The cuda stream to dispatch GPU kernels to * @return Returns the post-processed token stream */ +CUDF_EXPORT std::pair, rmm::device_uvector> process_token_stream( device_span tokens, device_span token_indices, @@ -232,6 +237,7 @@ std::pair, rmm::device_uvector> pr * @return A tree representation of the input JSON string as vectors of node type, parent index, * level, begin index, and end index in the input JSON string */ +CUDF_EXPORT tree_meta_t get_tree_representation(device_span tokens, device_span token_indices, bool is_strict_nested_boundaries, @@ -251,6 +257,7 @@ tree_meta_t get_tree_representation(device_span tokens, * @param mr Optional, resource with which to allocate * @return A tuple of the output column indices and the row offsets within each column for each node */ +CUDF_EXPORT std::tuple, rmm::device_uvector> records_orient_tree_traversal(device_span d_input, tree_meta_t const& d_tree, @@ -315,6 +322,7 @@ cudf::io::parse_options parsing_options(cudf::io::json_reader_options const& opt * @param mr Optional, resource with which to allocate * @return The data parsed from the given JSON input */ +CUDF_EXPORT table_with_metadata device_parse_nested_json(device_span input, cudf::io::json_reader_options const& options, rmm::cuda_stream_view stream, @@ -348,4 +356,6 @@ struct path_from_tree { } // namespace detail -} // namespace cudf::io::json +} // namespace json +} // namespace io +} // namespace cudf diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu index a007754ef4f..1e484d74679 100644 --- a/cpp/src/io/json/nested_json_gpu.cu +++ b/cpp/src/io/json/nested_json_gpu.cu @@ -1455,11 +1455,14 @@ void get_stack_context(device_span json_in, constexpr auto max_translation_table_size = to_stack_op::NUM_SYMBOL_GROUPS * to_stack_op::TT_NUM_STATES; - auto json_to_stack_ops_fst = fst::detail::make_fst( + static constexpr auto min_translated_out = 0; + static constexpr auto max_translated_out = 1; + auto json_to_stack_ops_fst = fst::detail::make_fst( fst::detail::make_symbol_group_lut(to_stack_op::get_sgid_lut(delimiter)), fst::detail::make_transition_table(to_stack_op::get_transition_table(stack_behavior)), - fst::detail::make_translation_table( - to_stack_op::get_translation_table(stack_behavior)), + fst::detail:: + make_translation_table( + to_stack_op::get_translation_table(stack_behavior)), stream); // "Search" for relevant occurrence of brackets and braces that indicate the beginning/end @@ -1507,11 +1510,12 @@ std::pair, rmm::device_uvector> pr // Instantiate FST for post-processing the token stream to remove all tokens that belong to an // invalid JSON line token_filter::UnwrapTokenFromSymbolOp sgid_op{}; - auto filter_fst = - fst::detail::make_fst(fst::detail::make_symbol_group_lut(token_filter::symbol_groups, sgid_op), - fst::detail::make_transition_table(token_filter::transition_table), - fst::detail::make_translation_functor(token_filter::TransduceToken{}), - stream); + using symbol_t = thrust::tuple; + auto filter_fst = fst::detail::make_fst( + fst::detail::make_symbol_group_lut(token_filter::symbol_groups, sgid_op), + fst::detail::make_transition_table(token_filter::transition_table), + fst::detail::make_translation_functor(token_filter::TransduceToken{}), + stream); auto const mr = rmm::mr::get_current_device_resource(); rmm::device_scalar d_num_selected_tokens(stream, mr); @@ -1598,7 +1602,8 @@ std::pair, rmm::device_uvector> ge fst::detail::make_symbol_group_lookup_op( fix_stack_of_excess_chars::SymbolPairToSymbolGroupId{delimiter}), fst::detail::make_transition_table(fix_stack_of_excess_chars::transition_table), - fst::detail::make_translation_functor(fix_stack_of_excess_chars::TransduceInputOp{}), + fst::detail::make_translation_functor( + fix_stack_of_excess_chars::TransduceInputOp{}), stream); fix_stack_of_excess_chars.Transduce(zip_in, static_cast(json_in.size()), @@ -1619,7 +1624,7 @@ std::pair, rmm::device_uvector> ge auto json_to_tokens_fst = fst::detail::make_fst( fst::detail::make_symbol_group_lookup_op(tokenizer_pda::PdaSymbolToSymbolGroupId{delimiter}), fst::detail::make_transition_table(tokenizer_pda::get_transition_table(format)), - fst::detail::make_translation_table( + fst::detail::make_translation_table( tokenizer_pda::get_translation_table(recover_from_error)), stream); @@ -1698,10 +1703,8 @@ void make_json_column(json_column& root_column, auto const [d_tokens_gpu, d_token_indices_gpu] = get_token_stream(d_input, options, stream, mr); // Copy the JSON tokens to the host - thrust::host_vector tokens = - cudf::detail::make_host_vector_async(d_tokens_gpu, stream); - thrust::host_vector token_indices_gpu = - cudf::detail::make_host_vector_async(d_token_indices_gpu, stream); + auto tokens = cudf::detail::make_host_vector_async(d_tokens_gpu, stream); + auto token_indices_gpu = cudf::detail::make_host_vector_async(d_token_indices_gpu, stream); // Make sure tokens have been copied to the host stream.synchronize(); diff --git a/cpp/src/io/json/read_json.cu b/cpp/src/io/json/read_json.cu index 9cd39038348..590f70864b1 100644 --- a/cpp/src/io/json/read_json.cu +++ b/cpp/src/io/json/read_json.cu @@ -78,10 +78,9 @@ device_span ingest_raw_input(device_span buffer, auto constexpr num_delimiter_chars = 1; if (compression == compression_type::NONE) { - std::vector delimiter_map{}; + auto delimiter_map = cudf::detail::make_empty_host_vector(sources.size(), stream); std::vector prefsum_source_sizes(sources.size()); std::vector> h_buffers; - delimiter_map.reserve(sources.size()); size_t bytes_read = 0; std::transform_inclusive_scan(sources.begin(), sources.end(), @@ -148,20 +147,12 @@ device_span ingest_raw_input(device_span buffer, return buffer.first(uncomp_data.size()); } -size_type find_first_delimiter_in_chunk(host_span> sources, - json_reader_options const& reader_opts, - char const delimiter, - rmm::cuda_stream_view stream) +size_t estimate_size_per_subchunk(size_t chunk_size) { - auto total_source_size = sources_size(sources, 0, 0) + (sources.size() - 1); - rmm::device_uvector buffer(total_source_size, stream); - auto readbufspan = ingest_raw_input(buffer, - sources, - reader_opts.get_compression(), - reader_opts.get_byte_range_offset(), - reader_opts.get_byte_range_size(), - stream); - return find_first_delimiter(readbufspan, '\n', stream); + auto geometric_mean = [](double a, double b) { return std::sqrt(a * b); }; + // NOTE: heuristic for choosing subchunk size: geometric mean of minimum subchunk size (set to + // 10kb) and the byte range size + return geometric_mean(std::ceil((double)chunk_size / num_subchunks), min_subchunk_size); } /** @@ -183,7 +174,6 @@ datasource::owning_buffer> get_record_range_raw_input( rmm::cuda_stream_view stream) { CUDF_FUNC_RANGE(); - auto geometric_mean = [](double a, double b) { return std::sqrt(a * b); }; size_t const total_source_size = sources_size(sources, 0, 0); auto constexpr num_delimiter_chars = 1; @@ -198,17 +188,8 @@ datasource::owning_buffer> get_record_range_raw_input( auto should_load_all_sources = !chunk_size || chunk_size >= total_source_size - chunk_offset; chunk_size = should_load_all_sources ? total_source_size - chunk_offset : chunk_size; - // Some magic numbers - constexpr int num_subchunks = 10; // per chunk_size - constexpr size_t min_subchunk_size = 10000; - int const num_subchunks_prealloced = should_load_all_sources ? 0 : 3; - constexpr int estimated_compression_ratio = 4; - - // NOTE: heuristic for choosing subchunk size: geometric mean of minimum subchunk size (set to - // 10kb) and the byte range size - - size_t const size_per_subchunk = - geometric_mean(std::ceil((double)chunk_size / num_subchunks), min_subchunk_size); + int const num_subchunks_prealloced = should_load_all_sources ? 0 : max_subchunks_prealloced; + size_t const size_per_subchunk = estimate_size_per_subchunk(chunk_size); // The allocation for single source compressed input is estimated by assuming a ~4:1 // compression ratio. For uncompressed inputs, we can getter a better estimate using the idea @@ -308,67 +289,78 @@ table_with_metadata read_json(host_span> sources, "Multiple inputs are supported only for JSON Lines format"); } - std::for_each(sources.begin(), sources.end(), [](auto const& source) { - CUDF_EXPECTS(source->size() < std::numeric_limits::max(), - "The size of each source file must be less than INT_MAX bytes"); - }); - - constexpr size_t batch_size_ub = std::numeric_limits::max(); - size_t const chunk_offset = reader_opts.get_byte_range_offset(); + /* + * The batched JSON reader enforces that the size of each batch is at most INT_MAX + * bytes (~2.14GB). Batches are defined to be byte range chunks - characterized by + * chunk offset and chunk size - that may span across multiple source files. + * Note that the batched reader does not work for compressed inputs or for regular + * JSON inputs. + */ + size_t const total_source_size = sources_size(sources, 0, 0); + size_t chunk_offset = reader_opts.get_byte_range_offset(); size_t chunk_size = reader_opts.get_byte_range_size(); - chunk_size = !chunk_size ? sources_size(sources, 0, 0) : chunk_size; - - // Identify the position of starting source file from which to begin batching based on - // byte range offset. If the offset is larger than the sum of all source - // sizes, then start_source is total number of source files i.e. no file is read - size_t const start_source = [&]() { - size_t sum = 0; + chunk_size = !chunk_size ? total_source_size - chunk_offset + : std::min(chunk_size, total_source_size - chunk_offset); + + size_t const size_per_subchunk = estimate_size_per_subchunk(chunk_size); + size_t const batch_size_ub = + std::numeric_limits::max() - (max_subchunks_prealloced * size_per_subchunk); + + /* + * Identify the position (zero-indexed) of starting source file from which to begin + * batching based on byte range offset. If the offset is larger than the sum of all + * source sizes, then start_source is total number of source files i.e. no file is + * read + */ + + // Prefix sum of source file sizes + size_t pref_source_size = 0; + // Starting source file from which to being batching evaluated using byte range offset + size_t const start_source = [chunk_offset, &sources, &pref_source_size]() { for (size_t src_idx = 0; src_idx < sources.size(); ++src_idx) { - if (sum + sources[src_idx]->size() > chunk_offset) return src_idx; - sum += sources[src_idx]->size(); + if (pref_source_size + sources[src_idx]->size() > chunk_offset) { return src_idx; } + pref_source_size += sources[src_idx]->size(); } return sources.size(); }(); - - // Construct batches of source files, with starting position of batches indicated by - // batch_positions. The size of each batch i.e. the sum of sizes of the source files in the batch - // is capped at INT_MAX bytes. - size_t cur_size = 0; - std::vector batch_positions; - std::vector batch_sizes; - batch_positions.push_back(0); - for (size_t i = start_source; i < sources.size(); i++) { - cur_size += sources[i]->size(); - if (cur_size >= batch_size_ub) { - batch_positions.push_back(i); - batch_sizes.push_back(cur_size - sources[i]->size()); - cur_size = sources[i]->size(); + /* + * Construct batches of byte ranges spanning source files, with the starting position of batches + * indicated by `batch_offsets`. `pref_bytes_size` gives the bytes position from which the current + * batch begins, and `end_bytes_size` gives the terminal bytes position after which reading + * stops. + */ + size_t pref_bytes_size = chunk_offset; + size_t end_bytes_size = chunk_offset + chunk_size; + std::vector batch_offsets{pref_bytes_size}; + for (size_t i = start_source; i < sources.size() && pref_bytes_size < end_bytes_size;) { + pref_source_size += sources[i]->size(); + // If the current source file can subsume multiple batches, we split the file until the + // boundary of the last batch exceeds the end of the file (indexed by `pref_source_size`) + while (pref_bytes_size < end_bytes_size && + pref_source_size >= std::min(pref_bytes_size + batch_size_ub, end_bytes_size)) { + auto next_batch_size = std::min(batch_size_ub, end_bytes_size - pref_bytes_size); + batch_offsets.push_back(batch_offsets.back() + next_batch_size); + pref_bytes_size += next_batch_size; } + i++; } - batch_positions.push_back(sources.size()); - batch_sizes.push_back(cur_size); - - // If there is a single batch, then we can directly return the table without the - // unnecessary concatenate - if (batch_sizes.size() == 1) return read_batch(sources, reader_opts, stream, mr); + /* + * If there is a single batch, then we can directly return the table without the + * unnecessary concatenate. The size of batch_offsets is 1 if all sources are empty, + * or if end_bytes_size is larger than total_source_size. + */ + if (batch_offsets.size() <= 2) return read_batch(sources, reader_opts, stream, mr); std::vector partial_tables; json_reader_options batched_reader_opts{reader_opts}; - // Dispatch individual batches to read_batch and push the resulting table into // partial_tables array. Note that the reader options need to be updated for each // batch to adjust byte range offset and byte range size. - for (size_t i = 0; i < batch_sizes.size(); i++) { - batched_reader_opts.set_byte_range_size(std::min(batch_sizes[i], chunk_size)); - partial_tables.emplace_back(read_batch( - host_span>(sources.begin() + batch_positions[i], - batch_positions[i + 1] - batch_positions[i]), - batched_reader_opts, - stream, - rmm::mr::get_current_device_resource())); - if (chunk_size <= batch_sizes[i]) break; - chunk_size -= batch_sizes[i]; - batched_reader_opts.set_byte_range_offset(0); + for (size_t i = 0; i < batch_offsets.size() - 1; i++) { + batched_reader_opts.set_byte_range_offset(batch_offsets[i]); + batched_reader_opts.set_byte_range_size(batch_offsets[i + 1] - batch_offsets[i]); + partial_tables.emplace_back( + read_batch(sources, batched_reader_opts, stream, rmm::mr::get_current_device_resource())); } auto expects_schema_equality = diff --git a/cpp/src/io/json/read_json.hpp b/cpp/src/io/json/read_json.hpp index 0c30b4cad46..32de4ebabfa 100644 --- a/cpp/src/io/json/read_json.hpp +++ b/cpp/src/io/json/read_json.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -27,7 +28,21 @@ #include -namespace cudf::io::json::detail { +namespace CUDF_EXPORT cudf { +namespace io::json::detail { + +// Some magic numbers +constexpr int num_subchunks = 10; // per chunk_size +constexpr size_t min_subchunk_size = 10000; +constexpr int estimated_compression_ratio = 4; +constexpr int max_subchunks_prealloced = 3; + +device_span ingest_raw_input(device_span buffer, + host_span> sources, + compression_type compression, + size_t range_offset, + size_t range_size, + rmm::cuda_stream_view stream); table_with_metadata read_json(host_span> sources, json_reader_options const& reader_opts, @@ -38,9 +53,5 @@ size_type find_first_delimiter(device_span d_data, char const delimiter, rmm::cuda_stream_view stream); -size_type find_first_delimiter_in_chunk(host_span> sources, - json_reader_options const& reader_opts, - char const delimiter, - rmm::cuda_stream_view stream); - -} // namespace cudf::io::json::detail +} // namespace io::json::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 8e20505d3ff..e3b9a048be8 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -492,11 +492,17 @@ void scan_null_counts(cudf::detail::hostdevice_2dvector const& if (num_stripes == 0) return; auto const num_columns = chunks.size().second; - std::vector> prefix_sums_to_update; + auto const num_struct_cols = + std::count_if(chunks[0].begin(), chunks[0].end(), [](auto const& chunk) { + return chunk.type_kind == STRUCT; + }); + auto prefix_sums_to_update = + cudf::detail::make_empty_host_vector>(num_struct_cols, + stream); for (auto col_idx = 0ul; col_idx < num_columns; ++col_idx) { // Null counts sums are only needed for children of struct columns if (chunks[0][col_idx].type_kind == STRUCT) { - prefix_sums_to_update.emplace_back(col_idx, d_prefix_sums + num_stripes * col_idx); + prefix_sums_to_update.push_back({col_idx, d_prefix_sums + num_stripes * col_idx}); } } auto const d_prefix_sums_to_update = cudf::detail::make_device_uvector_async( diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu index 805959327ac..80f32512b98 100644 --- a/cpp/src/io/orc/stripe_enc.cu +++ b/cpp/src/io/orc/stripe_enc.cu @@ -1417,8 +1417,8 @@ void decimal_sizes_to_offsets(device_2dspan rg_bounds, if (rg_bounds.count() == 0) return; // Convert map to a vector of views of the `elem_sizes` device buffers - std::vector h_sizes; - h_sizes.reserve(elem_sizes.size()); + auto h_sizes = + cudf::detail::make_empty_host_vector(elem_sizes.size(), stream); std::transform(elem_sizes.begin(), elem_sizes.end(), std::back_inserter(h_sizes), [](auto& p) { return decimal_column_element_sizes{p.first, p.second}; }); diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index 4cb20bb7518..f3b8cfbc836 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -444,14 +444,17 @@ namespace { */ file_segmentation calculate_segmentation(host_span columns, hostdevice_2dvector&& rowgroup_bounds, - stripe_size_limits max_stripe_size) + stripe_size_limits max_stripe_size, + rmm::cuda_stream_view stream) { - std::vector infos; - auto const num_rowgroups = rowgroup_bounds.size().first; - size_t stripe_start = 0; - size_t stripe_bytes = 0; - size_type stripe_rows = 0; - for (size_t rg_idx = 0; rg_idx < num_rowgroups; ++rg_idx) { + // Number of stripes is not known in advance. Only reserve a single element to use pinned memory + // resource if at all enabled. + auto infos = cudf::detail::make_empty_host_vector(1, stream); + size_type const num_rowgroups = rowgroup_bounds.size().first; + size_type stripe_start = 0; + size_t stripe_bytes = 0; + size_type stripe_rows = 0; + for (size_type rg_idx = 0; rg_idx < num_rowgroups; ++rg_idx) { auto const rowgroup_total_bytes = std::accumulate(columns.begin(), columns.end(), 0ul, [&](size_t total_size, auto const& col) { auto const rows = rowgroup_bounds[rg_idx][col.index()].size(); @@ -470,7 +473,9 @@ file_segmentation calculate_segmentation(host_span column // Check if adding the current rowgroup to the stripe will make the stripe too large or long if ((rg_idx > stripe_start) && (stripe_bytes + rowgroup_total_bytes > max_stripe_size.bytes || stripe_rows + rowgroup_rows_max > max_stripe_size.rows)) { - infos.emplace_back(infos.size(), stripe_start, rg_idx - stripe_start); + infos.push_back(stripe_rowgroups{static_cast(infos.size()), + stripe_start, + static_cast(rg_idx - stripe_start)}); stripe_start = rg_idx; stripe_bytes = 0; stripe_rows = 0; @@ -479,7 +484,9 @@ file_segmentation calculate_segmentation(host_span column stripe_bytes += rowgroup_total_bytes; stripe_rows += rowgroup_rows_max; if (rg_idx + 1 == num_rowgroups) { - infos.emplace_back(infos.size(), stripe_start, num_rowgroups - stripe_start); + infos.push_back(stripe_rowgroups{static_cast(infos.size()), + stripe_start, + static_cast(num_rowgroups - stripe_start)}); } } @@ -1336,7 +1343,7 @@ encoded_footer_statistics finish_statistic_blobs(Footer const& footer, if (num_file_blobs == 0) { return {}; } // Create empty file stats and merge groups - std::vector h_stat_chunks(num_file_blobs); + auto h_stat_chunks = cudf::detail::make_host_vector(num_file_blobs, stream); cudf::detail::hostdevice_vector stats_merge(num_file_blobs, stream); // Fill in stats_merge and stat_chunks on the host for (auto i = 0u; i < num_file_blobs; ++i) { @@ -1677,39 +1684,39 @@ struct pushdown_null_masks { // Owning vector for masks in device memory std::vector> data; // Pointers to pushdown masks in device memory. Can be same for multiple columns. - std::vector masks; + cudf::detail::host_vector masks; }; pushdown_null_masks init_pushdown_null_masks(orc_table_view& orc_table, rmm::cuda_stream_view stream) { - std::vector mask_ptrs; - mask_ptrs.reserve(orc_table.num_columns()); + auto mask_ptrs = + cudf::detail::make_empty_host_vector(orc_table.num_columns(), stream); std::vector> pd_masks; for (auto const& col : orc_table.columns) { // Leaf columns don't need pushdown masks if (col.num_children() == 0) { - mask_ptrs.emplace_back(nullptr); + mask_ptrs.push_back({nullptr}); continue; } auto const parent_pd_mask = col.is_child() ? mask_ptrs[col.parent_index()] : nullptr; auto const null_mask = col.null_mask(); if (null_mask == nullptr and parent_pd_mask == nullptr) { - mask_ptrs.emplace_back(nullptr); + mask_ptrs.push_back({nullptr}); continue; } if (col.orc_kind() == STRUCT) { if (null_mask != nullptr and parent_pd_mask == nullptr) { // Reuse own null mask - mask_ptrs.emplace_back(null_mask); + mask_ptrs.push_back(null_mask); } else if (null_mask == nullptr and parent_pd_mask != nullptr) { // Reuse parent's pushdown mask - mask_ptrs.emplace_back(parent_pd_mask); + mask_ptrs.push_back(parent_pd_mask); } else { // Both are nullable, allocate new pushdown mask pd_masks.emplace_back(num_bitmask_words(col.size()), stream); - mask_ptrs.emplace_back(pd_masks.back().data()); + mask_ptrs.push_back({pd_masks.back().data()}); thrust::transform(rmm::exec_policy(stream), null_mask, @@ -1724,7 +1731,7 @@ pushdown_null_masks init_pushdown_null_masks(orc_table_view& orc_table, auto const child_col = orc_table.column(col.child_begin()[0]); // pushdown mask applies to child column(s); use the child column size pd_masks.emplace_back(num_bitmask_words(child_col.size()), stream); - mask_ptrs.emplace_back(pd_masks.back().data()); + mask_ptrs.push_back({pd_masks.back().data()}); pushdown_lists_null_mask(col, orc_table.d_columns, parent_pd_mask, pd_masks.back(), stream); } } @@ -1815,8 +1822,7 @@ orc_table_view make_orc_table_view(table_view const& table, append_orc_column(table.column(col_idx), nullptr, table_meta.column_metadata[col_idx]); } - std::vector type_kinds; - type_kinds.reserve(orc_columns.size()); + auto type_kinds = cudf::detail::make_empty_host_vector(orc_columns.size(), stream); std::transform( orc_columns.cbegin(), orc_columns.cend(), std::back_inserter(type_kinds), [](auto& orc_column) { return orc_column.orc_kind(); @@ -2299,7 +2305,7 @@ auto convert_table_to_orc_data(table_view const& input, // Decide stripe boundaries based on rowgroups and char counts auto segmentation = - calculate_segmentation(orc_table.columns, std::move(rowgroup_bounds), max_stripe_size); + calculate_segmentation(orc_table.columns, std::move(rowgroup_bounds), max_stripe_size, stream); auto stripe_dicts = build_dictionaries(orc_table, segmentation, sort_dictionaries, stream); auto dec_chunk_sizes = decimal_chunk_sizes(orc_table, segmentation, stream); diff --git a/cpp/src/io/orc/writer_impl.hpp b/cpp/src/io/orc/writer_impl.hpp index bd082befe0c..f5f8b3cfed9 100644 --- a/cpp/src/io/orc/writer_impl.hpp +++ b/cpp/src/io/orc/writer_impl.hpp @@ -78,10 +78,9 @@ struct orc_table_view { * Provides a container-like interface to iterate over rowgroup indices. */ struct stripe_rowgroups { - uint32_t id; // stripe id - uint32_t first; // first rowgroup in the stripe - uint32_t size; // number of rowgroups in the stripe - stripe_rowgroups(uint32_t id, uint32_t first, uint32_t size) : id{id}, first{first}, size{size} {} + size_type id; // stripe id + size_type first; // first rowgroup in the stripe + size_type size; // number of rowgroups in the stripe [[nodiscard]] auto cbegin() const { return thrust::make_counting_iterator(first); } [[nodiscard]] auto cend() const { return thrust::make_counting_iterator(first + size); } }; @@ -125,7 +124,7 @@ class orc_streams { */ struct file_segmentation { hostdevice_2dvector rowgroups; - std::vector stripes; + cudf::detail::host_vector stripes; auto num_rowgroups() const noexcept { return rowgroups.size().first; } auto num_stripes() const noexcept { return stripes.size(); } diff --git a/cpp/src/io/parquet/compact_protocol_reader.hpp b/cpp/src/io/parquet/compact_protocol_reader.hpp index bcc9adfc8c0..12c24e2b848 100644 --- a/cpp/src/io/parquet/compact_protocol_reader.hpp +++ b/cpp/src/io/parquet/compact_protocol_reader.hpp @@ -18,6 +18,8 @@ #include "parquet.hpp" +#include + #include #include #include @@ -25,7 +27,8 @@ #include #include -namespace cudf::io::parquet::detail { +namespace CUDF_EXPORT cudf { +namespace io::parquet::detail { /** * @brief Class for parsing Parquet's Thrift Compact Protocol encoded metadata @@ -149,4 +152,5 @@ class CompactProtocolReader { friend class parquet_field_struct_blob; }; -} // namespace cudf::io::parquet::detail +} // namespace io::parquet::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/src/io/parquet/predicate_pushdown.cpp b/cpp/src/io/parquet/predicate_pushdown.cpp index 11f4a00ee8b..481c1e9fcdd 100644 --- a/cpp/src/io/parquet/predicate_pushdown.cpp +++ b/cpp/src/io/parquet/predicate_pushdown.cpp @@ -141,11 +141,11 @@ struct stats_caster { // Local struct to hold host columns struct host_column { // using thrust::host_vector because std::vector uses bitmap instead of byte per bool. - thrust::host_vector val; + cudf::detail::host_vector val; std::vector null_mask; cudf::size_type null_count = 0; - host_column(size_type total_row_groups) - : val(total_row_groups), + host_column(size_type total_row_groups, rmm::cuda_stream_view stream) + : val{cudf::detail::make_host_vector(total_row_groups, stream)}, null_mask( cudf::util::div_rounding_up_safe( cudf::bitmask_allocation_size_bytes(total_row_groups), sizeof(bitmask_type)), @@ -170,8 +170,14 @@ struct stats_caster { rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - std::vector chars{}; - std::vector offsets(1, 0); + auto const total_char_count = std::accumulate( + host_strings.begin(), host_strings.end(), 0, [](auto sum, auto const& str) { + return sum + str.size_bytes(); + }); + auto chars = cudf::detail::make_empty_host_vector(total_char_count, stream); + auto offsets = + cudf::detail::make_empty_host_vector(host_strings.size() + 1, stream); + offsets.push_back(0); for (auto const& str : host_strings) { auto tmp = str.empty() ? std::string_view{} : std::string_view(str.data(), str.size_bytes()); @@ -206,8 +212,8 @@ struct stats_caster { null_count); } }; // local struct host_column - host_column min(total_row_groups); - host_column max(total_row_groups); + host_column min(total_row_groups, stream); + host_column max(total_row_groups, stream); size_type stats_idx = 0; for (size_t src_idx = 0; src_idx < row_group_indices.size(); ++src_idx) { for (auto const rg_idx : row_group_indices[src_idx]) { diff --git a/cpp/src/io/parquet/reader.cpp b/cpp/src/io/parquet/reader.cpp index 8dfd68cd9b8..65dafb568c0 100644 --- a/cpp/src/io/parquet/reader.cpp +++ b/cpp/src/io/parquet/reader.cpp @@ -41,6 +41,11 @@ chunked_reader::chunked_reader(std::size_t chunk_read_limit, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { + // TODO: skip_rows not currently supported in chunked parquet reader until + // https://github.com/rapidsai/cudf/issues/16186 is closed + CUDF_EXPECTS(options.get_skip_rows() == 0, + "skip_rows > 0 is not currently supported in the Chunked Parquet reader."); + _impl = std::make_unique( chunk_read_limit, pass_read_limit, std::move(sources), options, stream, mr); } diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp index f705f6626e7..68ec61ead0a 100644 --- a/cpp/src/io/parquet/reader_impl.cpp +++ b/cpp/src/io/parquet/reader_impl.cpp @@ -26,6 +26,7 @@ #include +#include #include #include @@ -549,7 +550,17 @@ table_with_metadata reader::impl::read_chunk_internal(read_mode mode) out_columns.reserve(_output_buffers.size()); // no work to do (this can happen on the first pass if we have no rows to read) - if (!has_more_work()) { return finalize_output(out_metadata, out_columns); } + if (!has_more_work()) { + // Check if number of rows per source should be included in output metadata. + if (include_output_num_rows_per_source()) { + // Empty dataframe case: Simply initialize to a list of zeros + out_metadata.num_rows_per_source = + std::vector(_file_itm_data.num_rows_per_source.size(), 0); + } + + // Finalize output + return finalize_output(mode, out_metadata, out_columns); + } auto& pass = *_pass_itm_data; auto& subpass = *pass.subpass; @@ -585,11 +596,80 @@ table_with_metadata reader::impl::read_chunk_internal(read_mode mode) } } + // Check if number of rows per source should be included in output metadata. + if (include_output_num_rows_per_source()) { + // For chunked reading, compute the output number of rows per source + if (mode == read_mode::CHUNKED_READ) { + out_metadata.num_rows_per_source = + calculate_output_num_rows_per_source(read_info.skip_rows, read_info.num_rows); + } + // Simply move the number of rows per file if reading all at once + else { + // Move is okay here as we are reading in one go. + out_metadata.num_rows_per_source = std::move(_file_itm_data.num_rows_per_source); + } + } + // Add empty columns if needed. Filter output columns based on filter. - return finalize_output(out_metadata, out_columns); + return finalize_output(mode, out_metadata, out_columns); +} + +std::vector reader::impl::calculate_output_num_rows_per_source(size_t const chunk_start_row, + size_t const chunk_num_rows) +{ + // Handle base cases. + if (_file_itm_data.num_rows_per_source.size() == 0) { + return {}; + } else if (_file_itm_data.num_rows_per_source.size() == 1) { + return {chunk_num_rows}; + } + + std::vector num_rows_per_source(_file_itm_data.num_rows_per_source.size(), 0); + + // Subtract global skip rows from the start_row as we took care of that when computing + // _file_itm_data.num_rows_per_source + auto const start_row = chunk_start_row - _file_itm_data.global_skip_rows; + auto const end_row = start_row + chunk_num_rows; + CUDF_EXPECTS(start_row <= end_row and end_row <= _file_itm_data.global_num_rows, + "Encountered invalid output chunk row bounds."); + + // Copy reference to a const local variable for better readability + auto const& partial_sum_nrows_source = _file_itm_data.exclusive_sum_num_rows_per_source; + + // Binary search start_row and end_row in exclusive_sum_num_rows_per_source vector + auto const start_iter = + std::upper_bound(partial_sum_nrows_source.cbegin(), partial_sum_nrows_source.cend(), start_row); + auto const end_iter = + (end_row == _file_itm_data.global_skip_rows + _file_itm_data.global_num_rows) + ? partial_sum_nrows_source.cend() - 1 + : std::upper_bound(start_iter, partial_sum_nrows_source.cend(), end_row); + + // Compute the array offset index for both iterators + auto const start_idx = std::distance(partial_sum_nrows_source.cbegin(), start_iter); + auto const end_idx = std::distance(partial_sum_nrows_source.cbegin(), end_iter); + + CUDF_EXPECTS(start_idx <= end_idx, + "Encountered invalid source files indexes for output chunk row bounds"); + + // If the entire chunk is from the same source file, then the count is simply num_rows + if (start_idx == end_idx) { + num_rows_per_source[start_idx] = chunk_num_rows; + } else { + // Compute the number of rows from the first source file + num_rows_per_source[start_idx] = partial_sum_nrows_source[start_idx] - start_row; + // Compute the number of rows from the last source file + num_rows_per_source[end_idx] = end_row - partial_sum_nrows_source[end_idx - 1]; + // Simply copy the number of rows for each source in range: (start_idx, end_idx) + std::copy(_file_itm_data.num_rows_per_source.cbegin() + start_idx + 1, + _file_itm_data.num_rows_per_source.cbegin() + end_idx, + num_rows_per_source.begin() + start_idx + 1); + } + + return num_rows_per_source; } -table_with_metadata reader::impl::finalize_output(table_metadata& out_metadata, +table_with_metadata reader::impl::finalize_output(read_mode mode, + table_metadata& out_metadata, std::vector>& out_columns) { // Create empty columns as needed (this can happen if we've ended up with no actual data to read) diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp index 3b8e80a29e6..5e3cc4301f9 100644 --- a/cpp/src/io/parquet/reader_impl.hpp +++ b/cpp/src/io/parquet/reader_impl.hpp @@ -262,11 +262,13 @@ class reader::impl { * @brief Finalize the output table by adding empty columns for the non-selected columns in * schema. * + * @param read_mode Value indicating if the data sources are read all at once or chunk by chunk * @param out_metadata The output table metadata * @param out_columns The columns for building the output table * @return The output table along with columns' metadata */ - table_with_metadata finalize_output(table_metadata& out_metadata, + table_with_metadata finalize_output(read_mode mode, + table_metadata& out_metadata, std::vector>& out_columns); /** @@ -336,11 +338,36 @@ class reader::impl { : true; } + /** + * @brief Check if this is the first output chunk + * + * @return True if this is the first output chunk + */ [[nodiscard]] bool is_first_output_chunk() const { return _file_itm_data._output_chunk_count == 0; } + /** + * @brief Check if number of rows per source should be included in output metadata. + * + * @return True if AST filter is not present + */ + [[nodiscard]] bool include_output_num_rows_per_source() const + { + return not _expr_conv.get_converted_expr().has_value(); + } + + /** + * @brief Calculate the number of rows read from each source in the output chunk + * + * @param chunk_start_row The offset of the first row in the output chunk + * @param chunk_num_rows The number of rows in the the output chunk + * @return Vector of number of rows from each respective data source in the output chunk + */ + [[nodiscard]] std::vector calculate_output_num_rows_per_source(size_t chunk_start_row, + size_t chunk_num_rows); + rmm::cuda_stream_view _stream; rmm::device_async_resource_ref _mr{rmm::mr::get_current_device_resource()}; @@ -387,7 +414,7 @@ class reader::impl { // chunked reading happens in 2 parts: // - // At the top level, the entire file is divided up into "passes" omn which we try and limit the + // At the top level, the entire file is divided up into "passes" on which we try and limit the // total amount of temporary memory (compressed data, decompressed data) in use // via _input_pass_read_limit. // diff --git a/cpp/src/io/parquet/reader_impl_chunking.cu b/cpp/src/io/parquet/reader_impl_chunking.cu index 3da303e6928..794750ab6d2 100644 --- a/cpp/src/io/parquet/reader_impl_chunking.cu +++ b/cpp/src/io/parquet/reader_impl_chunking.cu @@ -804,16 +804,16 @@ std::vector compute_page_splits_by_row(device_span> comp_in; - comp_in.reserve(num_comp_pages); - std::vector> comp_out; - comp_out.reserve(num_comp_pages); + auto comp_in = + cudf::detail::make_empty_host_vector>(num_comp_pages, stream); + auto comp_out = + cudf::detail::make_empty_host_vector>(num_comp_pages, stream); // vectors to save v2 def and rep level data, if any - std::vector> copy_in; - copy_in.reserve(num_comp_pages); - std::vector> copy_out; - copy_out.reserve(num_comp_pages); + auto copy_in = + cudf::detail::make_empty_host_vector>(num_comp_pages, stream); + auto copy_out = + cudf::detail::make_empty_host_vector>(num_comp_pages, stream); rmm::device_uvector comp_res(num_comp_pages, stream); thrust::fill(rmm::exec_policy_nosync(stream), @@ -822,7 +822,6 @@ std::vector compute_page_splits_by_row(device_span compute_page_splits_by_row(device_span(offset)}); + copy_out.push_back({dst_base, static_cast(offset)}); } - comp_in.emplace_back(page.page_data + offset, - static_cast(page.compressed_page_size - offset)); - comp_out.emplace_back(dst_base + offset, - static_cast(page.uncompressed_page_size - offset)); + comp_in.push_back( + {page.page_data + offset, static_cast(page.compressed_page_size - offset)}); + comp_out.push_back( + {dst_base + offset, static_cast(page.uncompressed_page_size - offset)}); page.page_data = dst_base; decomp_offset += page.uncompressed_page_size; }); + } + auto d_comp_in = cudf::detail::make_device_uvector_async( + comp_in, stream, rmm::mr::get_current_device_resource()); + auto d_comp_out = cudf::detail::make_device_uvector_async( + comp_out, stream, rmm::mr::get_current_device_resource()); + + int32_t start_pos = 0; + for (auto const& codec : codecs) { + if (codec.num_pages == 0) { continue; } + + device_span const> d_comp_in_view{d_comp_in.data() + start_pos, + codec.num_pages}; + + device_span const> d_comp_out_view(d_comp_out.data() + start_pos, + codec.num_pages); - host_span const> comp_in_view{comp_in.data() + start_pos, - codec.num_pages}; - auto const d_comp_in = cudf::detail::make_device_uvector_async( - comp_in_view, stream, rmm::mr::get_current_device_resource()); - host_span const> comp_out_view(comp_out.data() + start_pos, - codec.num_pages); - auto const d_comp_out = cudf::detail::make_device_uvector_async( - comp_out_view, stream, rmm::mr::get_current_device_resource()); device_span d_comp_res_view(comp_res.data() + start_pos, codec.num_pages); switch (codec.compression_type) { case GZIP: - gpuinflate(d_comp_in, d_comp_out, d_comp_res_view, gzip_header_included::YES, stream); + gpuinflate( + d_comp_in_view, d_comp_out_view, d_comp_res_view, gzip_header_included::YES, stream); break; case SNAPPY: if (cudf::io::nvcomp_integration::is_stable_enabled()) { nvcomp::batched_decompress(nvcomp::compression_type::SNAPPY, - d_comp_in, - d_comp_out, + d_comp_in_view, + d_comp_out_view, d_comp_res_view, codec.max_decompressed_size, codec.total_decomp_size, stream); } else { - gpu_unsnap(d_comp_in, d_comp_out, d_comp_res_view, stream); + gpu_unsnap(d_comp_in_view, d_comp_out, d_comp_res_view, stream); } break; case ZSTD: nvcomp::batched_decompress(nvcomp::compression_type::ZSTD, - d_comp_in, - d_comp_out, + d_comp_in_view, + d_comp_out_view, d_comp_res_view, codec.max_decompressed_size, codec.total_decomp_size, stream); break; case BROTLI: - gpu_debrotli(d_comp_in, - d_comp_out, + gpu_debrotli(d_comp_in_view, + d_comp_out_view, d_comp_res_view, debrotli_scratch.data(), debrotli_scratch.size(), @@ -893,8 +900,8 @@ std::vector compute_page_splits_by_row(device_span chunk decomp_sum{}); // retrieve to host so we can call nvcomp to get compression scratch sizes - std::vector h_decomp_info = - cudf::detail::make_std_vector_sync(decomp_info, stream); - std::vector temp_cost(pages.size()); + auto h_decomp_info = cudf::detail::make_host_vector_sync(decomp_info, stream); + auto temp_cost = cudf::detail::make_host_vector(pages.size(), stream); thrust::transform(thrust::host, h_decomp_info.begin(), h_decomp_info.end(), @@ -1232,22 +1238,22 @@ void reader::impl::setup_next_pass(read_mode mode) pass.skip_rows = _file_itm_data.global_skip_rows; pass.num_rows = _file_itm_data.global_num_rows; } else { - auto const global_start_row = _file_itm_data.global_skip_rows; - auto const global_end_row = global_start_row + _file_itm_data.global_num_rows; - auto const start_row = - std::max(_file_itm_data.input_pass_start_row_count[_file_itm_data._current_input_pass], - global_start_row); - auto const end_row = - std::min(_file_itm_data.input_pass_start_row_count[_file_itm_data._current_input_pass + 1], - global_end_row); - - // skip_rows is always global in the sense that it is relative to the first row of - // everything we will be reading, regardless of what pass we are on. - // num_rows is how many rows we are reading this pass. - pass.skip_rows = - global_start_row + + // pass_start_row and pass_end_row are computed from the selected row groups relative to the + // global_skip_rows. + auto const pass_start_row = _file_itm_data.input_pass_start_row_count[_file_itm_data._current_input_pass]; - pass.num_rows = end_row - start_row; + auto const pass_end_row = + std::min(_file_itm_data.input_pass_start_row_count[_file_itm_data._current_input_pass + 1], + _file_itm_data.global_num_rows); + + // pass.skip_rows is always global in the sense that it is relative to the first row of + // the data source (global row number 0), regardless of what pass we are on. Therefore, + // we must re-add global_skip_rows to the pass_start_row which is relative to the + // global_skip_rows. + pass.skip_rows = _file_itm_data.global_skip_rows + pass_start_row; + // num_rows is how many rows we are reading this pass. Since this is a difference, adding + // global_skip_rows to both variables is redundant. + pass.num_rows = pass_end_row - pass_start_row; } // load page information for the chunk. this retrieves the compressed bytes for all the @@ -1509,6 +1515,7 @@ void reader::impl::create_global_chunk_info() // Initialize column chunk information auto remaining_rows = num_rows; + auto skip_rows = _file_itm_data.global_skip_rows; for (auto const& rg : row_groups_info) { auto const& row_group = _metadata->get_row_group(rg.index, rg.source_index); auto const row_group_start = rg.start_row; @@ -1561,7 +1568,12 @@ void reader::impl::create_global_chunk_info() schema.type == BYTE_ARRAY and _strings_to_categorical)); } - remaining_rows -= row_group_rows; + // Adjust for skip_rows when updating the remaining rows after the first group + remaining_rows -= + (skip_rows) ? std::min(rg.start_row + row_group.num_rows - skip_rows, remaining_rows) + : row_group_rows; + // Set skip_rows = 0 as it is no longer needed for subsequent row_groups + skip_rows = 0; } } @@ -1598,6 +1610,9 @@ void reader::impl::compute_input_passes() _file_itm_data.input_pass_row_group_offsets.push_back(0); _file_itm_data.input_pass_start_row_count.push_back(0); + // To handle global_skip_rows when computing input passes + int skip_rows = _file_itm_data.global_skip_rows; + for (size_t cur_rg_index = 0; cur_rg_index < row_groups_info.size(); cur_rg_index++) { auto const& rgi = row_groups_info[cur_rg_index]; auto const& row_group = _metadata->get_row_group(rgi.index, rgi.source_index); @@ -1606,6 +1621,14 @@ void reader::impl::compute_input_passes() auto const [compressed_rg_size, _ /*compressed + uncompressed*/] = get_row_group_size(row_group); + // We must use the effective size of the first row group we are reading to accurately calculate + // the first non-zero input_pass_start_row_count. + auto const row_group_rows = + (skip_rows) ? rgi.start_row + row_group.num_rows - skip_rows : row_group.num_rows; + + // Set skip_rows = 0 as it is no longer needed for subsequent row_groups + skip_rows = 0; + // can we add this row group if (cur_pass_byte_size + compressed_rg_size >= comp_read_limit) { // A single row group (the current one) is larger than the read limit: @@ -1613,7 +1636,7 @@ void reader::impl::compute_input_passes() // row group if (cur_rg_start == cur_rg_index) { _file_itm_data.input_pass_row_group_offsets.push_back(cur_rg_index + 1); - _file_itm_data.input_pass_start_row_count.push_back(cur_row_count + row_group.num_rows); + _file_itm_data.input_pass_start_row_count.push_back(cur_row_count + row_group_rows); cur_rg_start = cur_rg_index + 1; cur_pass_byte_size = 0; } @@ -1627,7 +1650,7 @@ void reader::impl::compute_input_passes() } else { cur_pass_byte_size += compressed_rg_size; } - cur_row_count += row_group.num_rows; + cur_row_count += row_group_rows; } // add the last pass if necessary diff --git a/cpp/src/io/parquet/reader_impl_chunking.hpp b/cpp/src/io/parquet/reader_impl_chunking.hpp index b959c793011..3a3cdd34a58 100644 --- a/cpp/src/io/parquet/reader_impl_chunking.hpp +++ b/cpp/src/io/parquet/reader_impl_chunking.hpp @@ -41,6 +41,12 @@ struct file_intermediate_data { // is not capped by global_skip_rows and global_num_rows. std::vector input_pass_start_row_count{}; + // number of rows to be read from each data source + std::vector num_rows_per_source{}; + + // partial sum of the number of rows per data source + std::vector exclusive_sum_num_rows_per_source{}; + size_t _current_input_pass{0}; // current input pass index size_t _output_chunk_count{0}; // how many output chunks we have produced diff --git a/cpp/src/io/parquet/reader_impl_helpers.cpp b/cpp/src/io/parquet/reader_impl_helpers.cpp index d1e9a823d3b..581c44d024b 100644 --- a/cpp/src/io/parquet/reader_impl_helpers.cpp +++ b/cpp/src/io/parquet/reader_impl_helpers.cpp @@ -945,7 +945,7 @@ std::vector aggregate_reader_metadata::get_pandas_index_names() con return names; } -std::tuple> +std::tuple, std::vector> aggregate_reader_metadata::select_row_groups( host_span const> row_group_indices, int64_t skip_rows_opt, @@ -976,6 +976,9 @@ aggregate_reader_metadata::select_row_groups( static_cast(from_opts.second)}; }(); + // Get number of rows in each data source + std::vector num_rows_per_source(per_file_metadata.size(), 0); + if (!row_group_indices.empty()) { CUDF_EXPECTS(row_group_indices.size() == per_file_metadata.size(), "Must specify row groups for each source"); @@ -989,28 +992,45 @@ aggregate_reader_metadata::select_row_groups( selection.emplace_back(rowgroup_idx, rows_to_read, src_idx); // if page-level indexes are present, then collect extra chunk and page info. column_info_for_row_group(selection.back(), 0); - rows_to_read += get_row_group(rowgroup_idx, src_idx).num_rows; + auto const rows_this_rg = get_row_group(rowgroup_idx, src_idx).num_rows; + rows_to_read += rows_this_rg; + num_rows_per_source[src_idx] += rows_this_rg; } } } else { size_type count = 0; for (size_t src_idx = 0; src_idx < per_file_metadata.size(); ++src_idx) { auto const& fmd = per_file_metadata[src_idx]; - for (size_t rg_idx = 0; rg_idx < fmd.row_groups.size(); ++rg_idx) { + for (size_t rg_idx = 0; + rg_idx < fmd.row_groups.size() and count < rows_to_skip + rows_to_read; + ++rg_idx) { auto const& rg = fmd.row_groups[rg_idx]; auto const chunk_start_row = count; count += rg.num_rows; if (count > rows_to_skip || count == 0) { + // start row of this row group adjusted with rows_to_skip + num_rows_per_source[src_idx] += count; + num_rows_per_source[src_idx] -= + (chunk_start_row <= rows_to_skip) ? rows_to_skip : chunk_start_row; + + // We need the unadjusted start index of this row group to correctly initialize + // ColumnChunkDesc for this row group in create_global_chunk_info() and calculate + // the row offset for the first pass in compute_input_passes(). selection.emplace_back(rg_idx, chunk_start_row, src_idx); - // if page-level indexes are present, then collect extra chunk and page info. + + // If page-level indexes are present, then collect extra chunk and page info. + // The page indexes rely on absolute row numbers, not adjusted for skip_rows. column_info_for_row_group(selection.back(), chunk_start_row); } - if (count >= rows_to_skip + rows_to_read) { break; } + // Adjust the number of rows for the last source file. + if (count >= rows_to_skip + rows_to_read) { + num_rows_per_source[src_idx] -= count - rows_to_skip - rows_to_read; + } } } } - return {rows_to_skip, rows_to_read, std::move(selection)}; + return {rows_to_skip, rows_to_read, std::move(selection), std::move(num_rows_per_source)}; } std::tuple, diff --git a/cpp/src/io/parquet/reader_impl_helpers.hpp b/cpp/src/io/parquet/reader_impl_helpers.hpp index 6bfa8519c76..309132a5347 100644 --- a/cpp/src/io/parquet/reader_impl_helpers.hpp +++ b/cpp/src/io/parquet/reader_impl_helpers.hpp @@ -282,17 +282,17 @@ class aggregate_reader_metadata { * @param output_column_schemas schema indices of output columns * @param filter Optional AST expression to filter row groups based on Column chunk statistics * @param stream CUDA stream used for device memory operations and kernel launches - * @return A tuple of corrected row_start, row_count and list of row group indexes and its - * starting row + * @return A tuple of corrected row_start, row_count, list of row group indexes and its + * starting row, and list of number of rows per source. */ - [[nodiscard]] std::tuple> select_row_groups( - host_span const> row_group_indices, - int64_t row_start, - std::optional const& row_count, - host_span output_dtypes, - host_span output_column_schemas, - std::optional> filter, - rmm::cuda_stream_view stream) const; + [[nodiscard]] std::tuple, std::vector> + select_row_groups(host_span const> row_group_indices, + int64_t row_start, + std::optional const& row_count, + host_span output_dtypes, + host_span output_column_schemas, + std::optional> filter, + rmm::cuda_stream_view stream) const; /** * @brief Filters and reduces down to a selection of columns diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu index f28a7311ccb..e006cc7d714 100644 --- a/cpp/src/io/parquet/reader_impl_preprocess.cu +++ b/cpp/src/io/parquet/reader_impl_preprocess.cu @@ -370,7 +370,7 @@ void fill_in_page_info(host_span chunks, rmm::cuda_stream_view stream) { auto const num_pages = pages.size(); - std::vector page_indexes(num_pages); + auto page_indexes = cudf::detail::make_host_vector(num_pages, stream); for (size_t c = 0, page_count = 0; c < chunks.size(); c++) { auto const& chunk = chunks[c]; @@ -1031,8 +1031,8 @@ struct get_page_num_rows { }; struct input_col_info { - int const schema_idx; - size_type const nesting_depth; + int schema_idx; + size_type nesting_depth; }; /** @@ -1235,8 +1235,10 @@ void reader::impl::preprocess_file(read_mode mode) [](auto const& col) { return col.type; }); } - std::tie( - _file_itm_data.global_skip_rows, _file_itm_data.global_num_rows, _file_itm_data.row_groups) = + std::tie(_file_itm_data.global_skip_rows, + _file_itm_data.global_num_rows, + _file_itm_data.row_groups, + _file_itm_data.num_rows_per_source) = _metadata->select_row_groups(_options.row_group_indices, _options.skip_rows, _options.num_rows, @@ -1245,9 +1247,18 @@ void reader::impl::preprocess_file(read_mode mode) _expr_conv.get_converted_expr(), _stream); + // Inclusive scan the number of rows per source + if (not _expr_conv.get_converted_expr().has_value() and mode == read_mode::CHUNKED_READ) { + _file_itm_data.exclusive_sum_num_rows_per_source.resize( + _file_itm_data.num_rows_per_source.size()); + thrust::inclusive_scan(_file_itm_data.num_rows_per_source.cbegin(), + _file_itm_data.num_rows_per_source.cend(), + _file_itm_data.exclusive_sum_num_rows_per_source.begin()); + } + // check for page indexes - _has_page_index = std::all_of(_file_itm_data.row_groups.begin(), - _file_itm_data.row_groups.end(), + _has_page_index = std::all_of(_file_itm_data.row_groups.cbegin(), + _file_itm_data.row_groups.cend(), [](auto const& row_group) { return row_group.has_page_index(); }); if (_file_itm_data.global_num_rows > 0 && not _file_itm_data.row_groups.empty() && @@ -1512,8 +1523,8 @@ void reader::impl::allocate_columns(read_mode mode, size_t skip_rows, size_t num // compute output column sizes by examining the pages of the -input- columns if (has_lists) { - std::vector h_cols_info; - h_cols_info.reserve(_input_columns.size()); + auto h_cols_info = + cudf::detail::make_empty_host_vector(_input_columns.size(), _stream); std::transform(_input_columns.cbegin(), _input_columns.cend(), std::back_inserter(h_cols_info), diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 8413e716224..36a1d8377bf 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -22,6 +22,7 @@ #include "arrow_schema_writer.hpp" #include "compact_protocol_reader.hpp" #include "compact_protocol_writer.hpp" +#include "interop/decimal_conversion_utilities.cuh" #include "io/comp/nvcomp_adapter.hpp" #include "io/parquet/parquet.hpp" #include "io/parquet/parquet_gpu.hpp" @@ -1601,50 +1602,12 @@ size_t column_index_buffer_size(EncColumnChunk* ck, return ck->ck_stat_size * num_pages + column_index_truncate_length + padding + size_struct_size; } -/** - * @brief Convert decimal32 and decimal64 data to decimal128 and return the device vector - * - * @tparam DecimalType to convert from - * - * @param column A view of the input columns - * @param stream CUDA stream used for device memory operations and kernel launches - * - * @return A device vector containing the converted decimal128 data - */ -template -rmm::device_uvector<__int128_t> convert_data_to_decimal128(column_view const& column, - rmm::cuda_stream_view stream) -{ - size_type constexpr BIT_WIDTH_RATIO = sizeof(__int128_t) / sizeof(DecimalType); - - rmm::device_uvector<__int128_t> d128_buffer(column.size(), stream); - - thrust::for_each(rmm::exec_policy_nosync(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(column.size()), - [in = column.begin(), - out = reinterpret_cast(d128_buffer.data()), - BIT_WIDTH_RATIO] __device__(auto in_idx) { - auto const out_idx = in_idx * BIT_WIDTH_RATIO; - // The lowest order bits are the value, the remainder - // simply matches the sign bit to satisfy the two's - // complement integer representation of negative numbers. - out[out_idx] = in[in_idx]; -#pragma unroll BIT_WIDTH_RATIO - 1 - for (auto i = 1; i < BIT_WIDTH_RATIO; ++i) { - out[out_idx + i] = in[in_idx] < 0 ? -1 : 0; - } - }); - - return d128_buffer; -} - /** * @brief Function to convert decimal32 and decimal64 columns to decimal128 data, * update the input table metadata, and return a new vector of column views. * * @param[in,out] table_meta The table metadata - * @param[in,out] d128_vectors Vector containing the computed decimal128 data buffers. + * @param[in,out] d128_buffers Buffers containing the converted decimal128 data. * @param input The input table * @param stream CUDA stream used for device memory operations and kernel launches * @@ -1652,7 +1615,7 @@ rmm::device_uvector<__int128_t> convert_data_to_decimal128(column_view const& co */ std::vector convert_decimal_columns_and_metadata( table_input_metadata& table_meta, - std::vector>& d128_vectors, + std::vector>& d128_buffers, table_view const& table, rmm::cuda_stream_view stream) { @@ -1673,28 +1636,30 @@ std::vector convert_decimal_columns_and_metadata( switch (column.type().id()) { case type_id::DECIMAL32: // Convert data to decimal128 type - d128_vectors.emplace_back(convert_data_to_decimal128(column, stream)); + d128_buffers.emplace_back(cudf::detail::convert_decimals_to_decimal128( + column, stream, rmm::mr::get_current_device_resource())); // Update metadata metadata.set_decimal_precision(MAX_DECIMAL32_PRECISION); metadata.set_type_length(size_of(data_type{type_id::DECIMAL128, column.type().scale()})); // Create a new column view from the d128 data vector return {data_type{type_id::DECIMAL128, column.type().scale()}, column.size(), - d128_vectors.back().data(), + d128_buffers.back()->data(), column.null_mask(), column.null_count(), column.offset(), converted_children}; case type_id::DECIMAL64: // Convert data to decimal128 type - d128_vectors.emplace_back(convert_data_to_decimal128(column, stream)); + d128_buffers.emplace_back(cudf::detail::convert_decimals_to_decimal128( + column, stream, rmm::mr::get_current_device_resource())); // Update metadata metadata.set_decimal_precision(MAX_DECIMAL64_PRECISION); metadata.set_type_length(size_of(data_type{type_id::DECIMAL128, column.type().scale()})); // Create a new column view from the d128 data vector return {data_type{type_id::DECIMAL128, column.type().scale()}, column.size(), - d128_vectors.back().data(), + d128_buffers.back()->data(), column.null_mask(), column.null_count(), column.offset(), @@ -1722,6 +1687,9 @@ std::vector convert_decimal_columns_and_metadata( std::back_inserter(converted_column_views), [&](auto elem) { return convert_column(thrust::get<0>(elem), thrust::get<1>(elem)); }); + // Synchronize stream here to ensure all decimal128 buffers are ready. + stream.synchronize(); + return converted_column_views; } @@ -1780,13 +1748,13 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta, rmm::cuda_stream_view stream) { // Container to store decimal128 converted data if needed - std::vector> d128_vectors; + std::vector> d128_buffers; // Convert decimal32/decimal64 data to decimal128 if writing arrow schema // and initialize LinkedColVector auto vec = table_to_linked_columns( (write_arrow_schema) - ? table_view({convert_decimal_columns_and_metadata(table_meta, d128_vectors, input, stream)}) + ? table_view({convert_decimal_columns_and_metadata(table_meta, d128_buffers, input, stream)}) : input); auto schema_tree = construct_parquet_schema_tree( @@ -1824,7 +1792,8 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta, size_type max_page_fragment_size = max_page_fragment_size_opt.value_or(default_max_page_fragment_size); - std::vector column_frag_size(num_columns, max_page_fragment_size); + auto column_frag_size = cudf::detail::make_host_vector(num_columns, stream); + std::fill(column_frag_size.begin(), column_frag_size.end(), max_page_fragment_size); if (input.num_rows() > 0 && not max_page_fragment_size_opt.has_value()) { std::vector column_sizes; @@ -1880,7 +1849,9 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta, size_type num_fragments = std::reduce(num_frag_in_part.begin(), num_frag_in_part.end()); - std::vector part_frag_offset; // Store the idx of the first fragment in each partition + auto part_frag_offset = + cudf::detail::make_empty_host_vector(num_frag_in_part.size() + 1, stream); + // Store the idx of the first fragment in each partition std::exclusive_scan( num_frag_in_part.begin(), num_frag_in_part.end(), std::back_inserter(part_frag_offset), 0); part_frag_offset.push_back(part_frag_offset.back() + num_frag_in_part.back()); diff --git a/cpp/src/io/utilities/base64_utilities.hpp b/cpp/src/io/utilities/base64_utilities.hpp index 537d9c96d6b..b1eb120c47f 100644 --- a/cpp/src/io/utilities/base64_utilities.hpp +++ b/cpp/src/io/utilities/base64_utilities.hpp @@ -61,10 +61,13 @@ // altered: applying clang-format for libcudf on this file. // altered: include required headers +#include + #include // altered: use cudf namespaces -namespace cudf::io::detail { +namespace CUDF_EXPORT cudf { +namespace io::detail { /** * @brief Encodes input string to base64 and returns it @@ -84,4 +87,5 @@ std::string base64_encode(std::string_view string_to_encode); */ std::string base64_decode(std::string_view encoded_string); -} // namespace cudf::io::detail +} // namespace io::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/src/io/utilities/data_casting.cu b/cpp/src/io/utilities/data_casting.cu index aa1b29a101f..73362334e26 100644 --- a/cpp/src/io/utilities/data_casting.cu +++ b/cpp/src/io/utilities/data_casting.cu @@ -20,11 +20,11 @@ #include #include #include -#include #include #include #include #include +#include #include #include #include @@ -933,7 +933,7 @@ std::unique_ptr parse_data( auto d_null_count = rmm::device_scalar(null_count, stream); auto null_count_data = d_null_count.data(); if (null_mask.is_empty()) { - null_mask = cudf::detail::create_null_mask(col_size, mask_state::ALL_VALID, stream, mr); + null_mask = cudf::create_null_mask(col_size, mask_state::ALL_VALID, stream, mr); } // Prepare iterator that returns (string_ptr, string_length)-pairs needed by type conversion diff --git a/cpp/src/io/utilities/file_io_utilities.cpp b/cpp/src/io/utilities/file_io_utilities.cpp index 9fe5959436d..d7b54399f8d 100644 --- a/cpp/src/io/utilities/file_io_utilities.cpp +++ b/cpp/src/io/utilities/file_io_utilities.cpp @@ -223,7 +223,6 @@ cufile_input_impl::cufile_input_impl(std::string const& filepath) // The benefit from multithreaded read plateaus around 16 threads pool(getenv_or("LIBCUDF_CUFILE_THREAD_COUNT", 16)) { - pool.sleep_duration = 10; } namespace { @@ -232,14 +231,15 @@ template > std::vector> make_sliced_tasks( - F function, DataT* ptr, size_t offset, size_t size, cudf::detail::thread_pool& pool) + F function, DataT* ptr, size_t offset, size_t size, BS::thread_pool& pool) { constexpr size_t default_max_slice_size = 4 * 1024 * 1024; static auto const max_slice_size = getenv_or("LIBCUDF_CUFILE_SLICE_SIZE", default_max_slice_size); auto const slices = make_file_io_slices(size, max_slice_size); std::vector> slice_tasks; std::transform(slices.cbegin(), slices.cend(), std::back_inserter(slice_tasks), [&](auto& slice) { - return pool.submit(function, ptr + slice.offset, slice.size, offset + slice.offset); + return pool.submit_task( + [&] { return function(ptr + slice.offset, slice.size, offset + slice.offset); }); }); return slice_tasks; } diff --git a/cpp/src/io/utilities/file_io_utilities.hpp b/cpp/src/io/utilities/file_io_utilities.hpp index 91ef41fba6e..7e47b5b3d10 100644 --- a/cpp/src/io/utilities/file_io_utilities.hpp +++ b/cpp/src/io/utilities/file_io_utilities.hpp @@ -19,13 +19,13 @@ #ifdef CUFILE_FOUND #include -#include - +#include #include #endif #include #include +#include #include @@ -150,7 +150,7 @@ class cufile_input_impl final : public cufile_input { private: cufile_shim const* shim = nullptr; cufile_registered_file const cf_file; - cudf::detail::thread_pool pool; + BS::thread_pool pool; }; /** @@ -167,7 +167,7 @@ class cufile_output_impl final : public cufile_output { private: cufile_shim const* shim = nullptr; cufile_registered_file const cf_file; - cudf::detail::thread_pool pool; + BS::thread_pool pool; }; #else @@ -212,7 +212,7 @@ std::unique_ptr make_cufile_output(std::string const& filepa /** * @brief Byte range to be read/written in a single operation. */ -struct file_io_slice { +CUDF_EXPORT struct file_io_slice { size_t offset; size_t size; }; @@ -222,7 +222,7 @@ struct file_io_slice { * * If `max_slice_size` is below 1024, 1024 will be used instead to prevent potential misuse. */ -std::vector make_file_io_slices(size_t size, size_t max_slice_size); +CUDF_EXPORT std::vector make_file_io_slices(size_t size, size_t max_slice_size); } // namespace detail } // namespace io diff --git a/cpp/src/io/utilities/row_selection.hpp b/cpp/src/io/utilities/row_selection.hpp index 7fdcc65d77b..7c607099cdc 100644 --- a/cpp/src/io/utilities/row_selection.hpp +++ b/cpp/src/io/utilities/row_selection.hpp @@ -21,7 +21,8 @@ #include #include -namespace cudf::io::detail { +namespace CUDF_EXPORT cudf { +namespace io::detail { /** * @brief Adjusts the input skip_rows and num_rows options to the actual number of rows to @@ -38,4 +39,5 @@ std::pair skip_rows_num_rows_from_options(int64_t skip_rows, std::optional const& num_rows, int64_t num_source_rows); -} // namespace cudf::io::detail +} // namespace io::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/src/io/utilities/string_parsing.hpp b/cpp/src/io/utilities/string_parsing.hpp index 3e6f57f2896..0d9e7e40e4e 100644 --- a/cpp/src/io/utilities/string_parsing.hpp +++ b/cpp/src/io/utilities/string_parsing.hpp @@ -18,6 +18,7 @@ #include "io/utilities/parsing_utils.cuh" #include +#include #include #include @@ -43,7 +44,7 @@ namespace detail { * @param stream CUDA stream used for device memory operations and kernel launches * @return The inferred data type */ -cudf::data_type infer_data_type( +CUDF_EXPORT cudf::data_type infer_data_type( cudf::io::json_inference_options_view const& options, device_span data, thrust::zip_iterator> offset_length_begin, @@ -66,7 +67,7 @@ namespace json::detail { * @param mr The resource to be used for device memory allocation * @return The column that contains the parsed data */ -std::unique_ptr parse_data( +CUDF_EXPORT std::unique_ptr parse_data( char const* data, thrust::zip_iterator> offset_length_begin, size_type col_size, diff --git a/cpp/src/io/utilities/trie.cuh b/cpp/src/io/utilities/trie.cuh index 677743d77d0..caea8dabb88 100644 --- a/cpp/src/io/utilities/trie.cuh +++ b/cpp/src/io/utilities/trie.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2023, NVIDIA CORPORATION. + * Copyright (c) 2018-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #pragma once +#include #include #include @@ -67,7 +68,8 @@ inline trie_view make_trie_view(optional_trie const& t) * * @return A host vector of nodes representing the serialized trie */ -trie create_serialized_trie(std::vector const& keys, rmm::cuda_stream_view stream); +CUDF_EXPORT trie create_serialized_trie(std::vector const& keys, + rmm::cuda_stream_view stream); /* * @brief Searches for a string in a serialized trie. diff --git a/cpp/src/jit/parser.hpp b/cpp/src/jit/parser.hpp index 55528bed6cf..85c8d63192f 100644 --- a/cpp/src/jit/parser.hpp +++ b/cpp/src/jit/parser.hpp @@ -16,12 +16,14 @@ #pragma once +#include + #include #include #include #include -namespace cudf { +namespace CUDF_EXPORT cudf { namespace jit { /** * @brief Parse and transform a piece of PTX code that contains the implementation @@ -239,4 +241,4 @@ inline std::string parse_single_function_ptx(std::string const& src, std::string parse_single_function_cuda(std::string const& src, std::string const& function_name); } // namespace jit -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index b0184ff6a86..eb9b687630b 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -185,6 +185,8 @@ probe_join_hash_table( auto left_indices = std::make_unique>(join_size, stream, mr); auto right_indices = std::make_unique>(join_size, stream, mr); + cudf::experimental::prefetch::detail::prefetch("hash_join", *left_indices, stream); + cudf::experimental::prefetch::detail::prefetch("hash_join", *right_indices, stream); auto const probe_nulls = cudf::nullate::DYNAMIC{has_nulls}; diff --git a/cpp/src/lists/contains.cu b/cpp/src/lists/contains.cu index f03d394d6d7..30c03a8cd68 100644 --- a/cpp/src/lists/contains.cu +++ b/cpp/src/lists/contains.cu @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/cpp/src/lists/copying/concatenate.cu b/cpp/src/lists/copying/concatenate.cu index 3d609a262b9..8cd58e7eff2 100644 --- a/cpp/src/lists/copying/concatenate.cu +++ b/cpp/src/lists/copying/concatenate.cu @@ -23,6 +23,7 @@ #include #include #include +#include #include #include diff --git a/cpp/src/lists/copying/segmented_gather.cu b/cpp/src/lists/copying/segmented_gather.cu index 779eca438db..90f7994b21d 100644 --- a/cpp/src/lists/copying/segmented_gather.cu +++ b/cpp/src/lists/copying/segmented_gather.cu @@ -20,6 +20,7 @@ #include #include #include +#include #include #include diff --git a/cpp/src/lists/dremel.cu b/cpp/src/lists/dremel.cu index 5625e1bf05c..50f40924478 100644 --- a/cpp/src/lists/dremel.cu +++ b/cpp/src/lists/dremel.cu @@ -257,10 +257,8 @@ dremel_data get_encoding(column_view h_col, }, stream); - thrust::host_vector column_offsets = - cudf::detail::make_host_vector_async(d_column_offsets, stream); - thrust::host_vector column_ends = - cudf::detail::make_host_vector_async(d_column_ends, stream); + auto column_offsets = cudf::detail::make_host_vector_async(d_column_offsets, stream); + auto column_ends = cudf::detail::make_host_vector_async(d_column_ends, stream); stream.synchronize(); size_t max_vals_size = 0; diff --git a/cpp/src/lists/explode.cu b/cpp/src/lists/explode.cu index 370d7480578..46c4fc78a6f 100644 --- a/cpp/src/lists/explode.cu +++ b/cpp/src/lists/explode.cu @@ -229,8 +229,8 @@ std::unique_ptr
explode_outer(table_view const& input_table, if (null_or_empty_count == 0) { // performance penalty to run the below loop if there are no nulls or empty lists. // run simple explode instead - return include_position ? explode_position(input_table, explode_column_idx, stream, mr) - : explode(input_table, explode_column_idx, stream, mr); + return include_position ? detail::explode_position(input_table, explode_column_idx, stream, mr) + : detail::explode(input_table, explode_column_idx, stream, mr); } auto gather_map_size = sliced_child.size() + null_or_empty_count; @@ -300,58 +300,63 @@ std::unique_ptr
explode_outer(table_view const& input_table, } // namespace detail /** - * @copydoc cudf::explode(table_view const&, size_type, rmm::device_async_resource_ref) + * @copydoc cudf::explode(table_view const&, size_type, rmm::cuda_stream_view, + * rmm::device_async_resource_ref) */ std::unique_ptr
explode(table_view const& input_table, size_type explode_column_idx, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); CUDF_EXPECTS(input_table.column(explode_column_idx).type().id() == type_id::LIST, "Unsupported non-list column"); - return detail::explode(input_table, explode_column_idx, cudf::get_default_stream(), mr); + return detail::explode(input_table, explode_column_idx, stream, mr); } /** - * @copydoc cudf::explode_position(table_view const&, size_type, rmm::device_async_resource_ref) + * @copydoc cudf::explode_position(table_view const&, size_type, rmm::cuda_stream_view, + * rmm::device_async_resource_ref) */ std::unique_ptr
explode_position(table_view const& input_table, size_type explode_column_idx, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); CUDF_EXPECTS(input_table.column(explode_column_idx).type().id() == type_id::LIST, "Unsupported non-list column"); - return detail::explode_position(input_table, explode_column_idx, cudf::get_default_stream(), mr); + return detail::explode_position(input_table, explode_column_idx, stream, mr); } /** - * @copydoc cudf::explode_outer(table_view const&, size_type, rmm::device_async_resource_ref) + * @copydoc cudf::explode_outer(table_view const&, size_type, rmm::cuda_stream_view, + * rmm::device_async_resource_ref) */ std::unique_ptr
explode_outer(table_view const& input_table, size_type explode_column_idx, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); CUDF_EXPECTS(input_table.column(explode_column_idx).type().id() == type_id::LIST, "Unsupported non-list column"); - return detail::explode_outer( - input_table, explode_column_idx, false, cudf::get_default_stream(), mr); + return detail::explode_outer(input_table, explode_column_idx, false, stream, mr); } /** * @copydoc cudf::explode_outer_position(table_view const&, size_type, - * rmm::device_async_resource_ref) + * rmm::cuda_stream_view, rmm::device_async_resource_ref) */ std::unique_ptr
explode_outer_position(table_view const& input_table, size_type explode_column_idx, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); CUDF_EXPECTS(input_table.column(explode_column_idx).type().id() == type_id::LIST, "Unsupported non-list column"); - return detail::explode_outer( - input_table, explode_column_idx, true, cudf::get_default_stream(), mr); + return detail::explode_outer(input_table, explode_column_idx, true, stream, mr); } } // namespace cudf diff --git a/cpp/src/lists/set_operations.cu b/cpp/src/lists/set_operations.cu index 1d18b8c677c..5c7ab68d64b 100644 --- a/cpp/src/lists/set_operations.cu +++ b/cpp/src/lists/set_operations.cu @@ -26,6 +26,7 @@ #include #include #include +#include #include #include diff --git a/cpp/src/lists/stream_compaction/distinct.cu b/cpp/src/lists/stream_compaction/distinct.cu index 40dee010bd5..cdcb4aa957f 100644 --- a/cpp/src/lists/stream_compaction/distinct.cu +++ b/cpp/src/lists/stream_compaction/distinct.cu @@ -22,6 +22,7 @@ #include #include #include +#include #include #include diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu index 7ecaa0fba56..e2c8d49a4ab 100644 --- a/cpp/src/merge/merge.cu +++ b/cpp/src/merge/merge.cu @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/cpp/src/partitioning/round_robin.cu b/cpp/src/partitioning/round_robin.cu index 82b169c78ed..9810373b751 100644 --- a/cpp/src/partitioning/round_robin.cu +++ b/cpp/src/partitioning/round_robin.cu @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -271,8 +272,8 @@ std::pair, std::vector> round_robin_part std::pair, std::vector> round_robin_partition( table_view const& input, cudf::size_type num_partitions, - cudf::size_type start_partition = 0, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) + cudf::size_type start_partition, + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::round_robin_partition( diff --git a/cpp/src/quantiles/quantile.cu b/cpp/src/quantiles/quantile.cu index b25254cfe49..5d748de0019 100644 --- a/cpp/src/quantiles/quantile.cu +++ b/cpp/src/quantiles/quantile.cu @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include diff --git a/cpp/src/quantiles/quantiles.cu b/cpp/src/quantiles/quantiles.cu index af3bda2e62e..0b0e6701304 100644 --- a/cpp/src/quantiles/quantiles.cu +++ b/cpp/src/quantiles/quantiles.cu @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/cpp/src/quantiles/tdigest/tdigest.cu b/cpp/src/quantiles/tdigest/tdigest.cu index da36b7ab1da..421ed26e26d 100644 --- a/cpp/src/quantiles/tdigest/tdigest.cu +++ b/cpp/src/quantiles/tdigest/tdigest.cu @@ -22,6 +22,7 @@ #include #include #include +#include #include #include diff --git a/cpp/src/reductions/minmax.cu b/cpp/src/reductions/minmax.cu index 2c1181972c5..6cb58786971 100644 --- a/cpp/src/reductions/minmax.cu +++ b/cpp/src/reductions/minmax.cu @@ -107,8 +107,7 @@ rmm::device_scalar reduce_device(InputIterator d_in, * respectively of the minimums and maximums of the input pairs. */ template -struct minmax_binary_op - : public thrust::binary_function, minmax_pair, minmax_pair> { +struct minmax_binary_op { __device__ minmax_pair operator()(minmax_pair const& lhs, minmax_pair const& rhs) const { return minmax_pair{thrust::min(lhs.min_val, rhs.min_val), diff --git a/cpp/src/reductions/scan/rank_scan.cu b/cpp/src/reductions/scan/rank_scan.cu index 0befb6ac7d7..0dbfc271a25 100644 --- a/cpp/src/reductions/scan/rank_scan.cu +++ b/cpp/src/reductions/scan/rank_scan.cu @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/cpp/src/reductions/scan/scan_inclusive.cu b/cpp/src/reductions/scan/scan_inclusive.cu index 7c02a8d1b99..ee35d716d6e 100644 --- a/cpp/src/reductions/scan/scan_inclusive.cu +++ b/cpp/src/reductions/scan/scan_inclusive.cu @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include diff --git a/cpp/src/reductions/segmented/reductions.cpp b/cpp/src/reductions/segmented/reductions.cpp index 48ab5963a29..e6de065dabb 100644 --- a/cpp/src/reductions/segmented/reductions.cpp +++ b/cpp/src/reductions/segmented/reductions.cpp @@ -13,11 +13,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include #include #include #include +#include #include #include #include diff --git a/cpp/src/reshape/byte_cast.cu b/cpp/src/reshape/byte_cast.cu index 3dfa0b65814..2a03a5504c1 100644 --- a/cpp/src/reshape/byte_cast.cu +++ b/cpp/src/reshape/byte_cast.cu @@ -167,11 +167,6 @@ struct byte_list_conversion_fn byte_cast(column_view const& input, flip_endianness endian_configuration, rmm::cuda_stream_view stream, @@ -183,15 +178,13 @@ std::unique_ptr byte_cast(column_view const& input, } // namespace detail -/** - * @copydoc cudf::byte_cast(column_view const&, flip_endianness, rmm::device_async_resource_ref) - */ std::unique_ptr byte_cast(column_view const& input, flip_endianness endian_configuration, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::byte_cast(input, endian_configuration, cudf::get_default_stream(), mr); + return detail::byte_cast(input, endian_configuration, stream, mr); } } // namespace cudf diff --git a/cpp/src/reshape/interleave_columns.cu b/cpp/src/reshape/interleave_columns.cu index 580db0e24c5..7473b6045af 100644 --- a/cpp/src/reshape/interleave_columns.cu +++ b/cpp/src/reshape/interleave_columns.cu @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -263,10 +264,11 @@ std::unique_ptr interleave_columns(table_view const& input, } // namespace detail std::unique_ptr interleave_columns(table_view const& input, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::interleave_columns(input, cudf::get_default_stream(), mr); + return detail::interleave_columns(input, stream, mr); } } // namespace cudf diff --git a/cpp/src/reshape/tile.cu b/cpp/src/reshape/tile.cu index 1c4019b2c73..3d4fb73c000 100644 --- a/cpp/src/reshape/tile.cu +++ b/cpp/src/reshape/tile.cu @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -63,10 +64,11 @@ std::unique_ptr
tile(table_view const& in, std::unique_ptr
tile(table_view const& in, size_type count, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::tile(in, count, cudf::get_default_stream(), mr); + return detail::tile(in, count, stream, mr); } } // namespace cudf diff --git a/cpp/src/rolling/rolling.cu b/cpp/src/rolling/rolling.cu index e612bd01118..5dff40a3396 100644 --- a/cpp/src/rolling/rolling.cu +++ b/cpp/src/rolling/rolling.cu @@ -18,6 +18,7 @@ #include #include +#include #include #include diff --git a/cpp/src/scalar/scalar.cpp b/cpp/src/scalar/scalar.cpp index 07425a92413..83209c55c8a 100644 --- a/cpp/src/scalar/scalar.cpp +++ b/cpp/src/scalar/scalar.cpp @@ -216,7 +216,7 @@ template class fixed_point_scalar; template class fixed_point_scalar; template class fixed_point_scalar; -namespace detail { +namespace CUDF_HIDDEN detail { template fixed_width_scalar::fixed_width_scalar(T value, @@ -306,7 +306,7 @@ template class fixed_width_scalar; template class fixed_width_scalar; template class fixed_width_scalar; -} // namespace detail +} // namespace CUDF_HIDDEN detail template numeric_scalar::numeric_scalar(T value, diff --git a/cpp/src/search/contains_column.cu b/cpp/src/search/contains_column.cu index 8f05196a71c..57f2c59de40 100644 --- a/cpp/src/search/contains_column.cu +++ b/cpp/src/search/contains_column.cu @@ -19,6 +19,7 @@ #include #include #include +#include #include #include diff --git a/cpp/src/search/contains_scalar.cu b/cpp/src/search/contains_scalar.cu index e88acf68e28..2aa9e24174b 100644 --- a/cpp/src/search/contains_scalar.cu +++ b/cpp/src/search/contains_scalar.cu @@ -17,10 +17,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include #include diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu index 4fb983dc5a6..81227cb9a2d 100644 --- a/cpp/src/search/contains_table.cu +++ b/cpp/src/search/contains_table.cu @@ -18,6 +18,7 @@ #include #include +#include #include #include #include diff --git a/cpp/src/search/search_ordered.cu b/cpp/src/search/search_ordered.cu index 328d3f0cee4..80651a4ec44 100644 --- a/cpp/src/search/search_ordered.cu +++ b/cpp/src/search/search_ordered.cu @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/cpp/src/strings/combine/join.cu b/cpp/src/strings/combine/join.cu index c4cc0dbe09d..b534e9b2e5b 100644 --- a/cpp/src/strings/combine/join.cu +++ b/cpp/src/strings/combine/join.cu @@ -169,8 +169,10 @@ std::unique_ptr join_strings(strings_column_view const& input, // build the offsets: single string output has offsets [0,chars-size] auto offsets_column = [&] { - auto offsets = cudf::detail::make_device_uvector_async( - std::vector({0, static_cast(chars.size())}), stream, mr); + auto h_offsets = cudf::detail::make_host_vector(2, stream); + h_offsets[0] = 0; + h_offsets[1] = chars.size(); + auto offsets = cudf::detail::make_device_uvector_async(h_offsets, stream, mr); return std::make_unique(std::move(offsets), rmm::device_buffer{}, 0); }(); diff --git a/cpp/src/strings/convert/convert_datetime.cu b/cpp/src/strings/convert/convert_datetime.cu index 2f4ebf97264..64a2107e17a 100644 --- a/cpp/src/strings/convert/convert_datetime.cu +++ b/cpp/src/strings/convert/convert_datetime.cu @@ -123,7 +123,7 @@ struct format_compiler { : format(fmt), d_items(0, stream) { specifiers.insert(extra_specifiers.begin(), extra_specifiers.end()); - std::vector items; + auto items = cudf::detail::make_empty_host_vector(format.length(), stream); auto str = format.data(); auto length = format.length(); while (length > 0) { diff --git a/cpp/src/strings/convert/convert_durations.cu b/cpp/src/strings/convert/convert_durations.cu index 2e4a776d3c0..514ab965fc5 100644 --- a/cpp/src/strings/convert/convert_durations.cu +++ b/cpp/src/strings/convert/convert_durations.cu @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/cpp/src/strings/copying/concatenate.cu b/cpp/src/strings/copying/concatenate.cu index 7622e39e735..352e0f9f41a 100644 --- a/cpp/src/strings/copying/concatenate.cu +++ b/cpp/src/strings/copying/concatenate.cu @@ -79,7 +79,7 @@ auto create_strings_device_views(host_span views, rmm::cuda_s // Compute the partition offsets and size of offset column // Note: Using 64-bit size_t so we can detect overflow of 32-bit size_type - auto input_offsets = std::vector(views.size() + 1); + auto input_offsets = cudf::detail::make_host_vector(views.size() + 1, stream); auto offset_it = std::next(input_offsets.begin()); thrust::transform( thrust::host, views.begin(), views.end(), offset_it, [](auto const& col) -> size_t { diff --git a/cpp/src/strings/filter_chars.cu b/cpp/src/strings/filter_chars.cu index a34828fa97e..48620af8cad 100644 --- a/cpp/src/strings/filter_chars.cu +++ b/cpp/src/strings/filter_chars.cu @@ -129,7 +129,7 @@ std::unique_ptr filter_characters( // convert input table for copy to device memory size_type table_size = static_cast(characters_to_filter.size()); - thrust::host_vector htable(table_size); + auto htable = cudf::detail::make_host_vector(table_size, stream); std::transform( characters_to_filter.begin(), characters_to_filter.end(), htable.begin(), [](auto entry) { return char_range{entry.first, entry.second}; diff --git a/cpp/src/strings/replace/multi_re.cu b/cpp/src/strings/replace/multi_re.cu index cd60a4296b9..31234ea42ec 100644 --- a/cpp/src/strings/replace/multi_re.cu +++ b/cpp/src/strings/replace/multi_re.cu @@ -171,7 +171,7 @@ std::unique_ptr replace_re(strings_column_view const& input, auto d_buffer = rmm::device_buffer(buffer_size, stream); // copy all the reprog_device instances to a device memory array - std::vector progs; + auto progs = cudf::detail::make_empty_host_vector(h_progs.size(), stream); std::transform(h_progs.begin(), h_progs.end(), std::back_inserter(progs), diff --git a/cpp/src/strings/strings_scalar_factories.cpp b/cpp/src/strings/strings_scalar_factories.cpp index 233fee14694..cf973638cc4 100644 --- a/cpp/src/strings/strings_scalar_factories.cpp +++ b/cpp/src/strings/strings_scalar_factories.cpp @@ -15,6 +15,7 @@ */ #include +#include #include #include diff --git a/cpp/src/strings/translate.cu b/cpp/src/strings/translate.cu index 16b22d0de4c..a242b008a54 100644 --- a/cpp/src/strings/translate.cu +++ b/cpp/src/strings/translate.cu @@ -97,7 +97,7 @@ std::unique_ptr translate(strings_column_view const& strings, size_type table_size = static_cast(chars_table.size()); // convert input table - thrust::host_vector htable(table_size); + auto htable = cudf::detail::make_host_vector(table_size, stream); std::transform(chars_table.begin(), chars_table.end(), htable.begin(), [](auto entry) { return translate_table{entry.first, entry.second}; }); diff --git a/cpp/src/strings/utilities.cu b/cpp/src/strings/utilities.cu index f70598f33be..068d89a52dc 100644 --- a/cpp/src/strings/utilities.cu +++ b/cpp/src/strings/utilities.cu @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "strings/char_types/char_cases.h" #include "strings/char_types/char_flags.h" diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 13c31e8ae4c..2969557c78f 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -308,7 +308,10 @@ auto decompose_structs(table_view table, auto list_lex_preprocess(table_view const& table, rmm::cuda_stream_view stream) { std::vector dremel_data; - std::vector dremel_device_views; + auto const num_list_columns = std::count_if( + table.begin(), table.end(), [](auto const& col) { return col.type().id() == type_id::LIST; }); + auto dremel_device_views = + cudf::detail::make_empty_host_vector(num_list_columns, stream); for (auto const& col : table) { if (col.type().id() == type_id::LIST) { dremel_data.push_back(detail::get_comparator_data(col, {}, false, stream)); diff --git a/cpp/src/transform/one_hot_encode.cu b/cpp/src/transform/one_hot_encode.cu index 723c306da1d..808f2d1b284 100644 --- a/cpp/src/transform/one_hot_encode.cu +++ b/cpp/src/transform/one_hot_encode.cu @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/cpp/src/transform/row_bit_count.cu b/cpp/src/transform/row_bit_count.cu index bfac7ab586e..12a15eb7e34 100644 --- a/cpp/src/transform/row_bit_count.cu +++ b/cpp/src/transform/row_bit_count.cu @@ -25,6 +25,7 @@ #include #include #include +#include #include #include diff --git a/cpp/src/utilities/cuda_memcpy.cu b/cpp/src/utilities/cuda_memcpy.cu index 3d0822d8545..0efb881eb3e 100644 --- a/cpp/src/utilities/cuda_memcpy.cu +++ b/cpp/src/utilities/cuda_memcpy.cu @@ -14,6 +14,9 @@ * limitations under the License. */ +#include "cudf/detail/utilities/integer_utils.hpp" + +#include #include #include #include @@ -26,15 +29,24 @@ namespace cudf::detail { namespace { +// Simple kernel to copy between device buffers +CUDF_KERNEL void copy_kernel(char const* src, char* dst, size_t n) +{ + auto const idx = cudf::detail::grid_1d::global_thread_id(); + if (idx < n) { dst[idx] = src[idx]; } +} + void copy_pinned(void* dst, void const* src, std::size_t size, rmm::cuda_stream_view stream) { if (size == 0) return; if (size < get_kernel_pinned_copy_threshold()) { - thrust::copy_n(rmm::exec_policy_nosync(stream), - static_cast(src), - size, - static_cast(dst)); + const int block_size = 256; + auto const grid_size = cudf::util::div_rounding_up_safe(size, block_size); + // We are explicitly launching the kernel here instead of calling a thrust function because the + // thrust function can potentially call cudaMemcpyAsync instead of using a kernel + copy_kernel<<>>( + static_cast(src), static_cast(dst), size); } else { CUDF_CUDA_TRY(cudaMemcpyAsync(dst, src, size, cudaMemcpyDefault, stream)); } diff --git a/cpp/src/utilities/pinned_memory.cpp b/cpp/src/utilities/host_memory.cpp similarity index 73% rename from cpp/src/utilities/pinned_memory.cpp rename to cpp/src/utilities/host_memory.cpp index 3ea4293fc60..7c3cea42023 100644 --- a/cpp/src/utilities/pinned_memory.cpp +++ b/cpp/src/utilities/host_memory.cpp @@ -83,7 +83,7 @@ class fixed_pinned_pool_memory_resource { void deallocate_async(void* ptr, std::size_t bytes, std::size_t alignment, - cuda::stream_ref stream) noexcept + cuda::stream_ref stream) { if (bytes <= pool_size_ && ptr >= pool_begin_ && ptr < pool_end_) { pool_->deallocate_async(ptr, bytes, alignment, stream); @@ -92,14 +92,14 @@ class fixed_pinned_pool_memory_resource { } } - void deallocate_async(void* ptr, std::size_t bytes, cuda::stream_ref stream) noexcept + void deallocate_async(void* ptr, std::size_t bytes, cuda::stream_ref stream) { return deallocate_async(ptr, bytes, rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream); } void deallocate(void* ptr, std::size_t bytes, - std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) noexcept + std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) { deallocate_async(ptr, bytes, alignment, stream_); stream_.wait(); @@ -186,6 +186,61 @@ CUDF_EXPORT rmm::host_device_async_resource_ref& host_mr() return mr_ref; } +class new_delete_memory_resource { + public: + void* allocate(std::size_t bytes, std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) + { + try { + return rmm::detail::aligned_host_allocate( + bytes, alignment, [](std::size_t size) { return ::operator new(size); }); + } catch (std::bad_alloc const& e) { + CUDF_FAIL("Failed to allocate memory: " + std::string{e.what()}, rmm::out_of_memory); + } + } + + void* allocate_async(std::size_t bytes, [[maybe_unused]] cuda::stream_ref stream) + { + return allocate(bytes, rmm::RMM_DEFAULT_HOST_ALIGNMENT); + } + + void* allocate_async(std::size_t bytes, + std::size_t alignment, + [[maybe_unused]] cuda::stream_ref stream) + { + return allocate(bytes, alignment); + } + + void deallocate(void* ptr, + std::size_t bytes, + std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) + { + rmm::detail::aligned_host_deallocate( + ptr, bytes, alignment, [](void* ptr) { ::operator delete(ptr); }); + } + + void deallocate_async(void* ptr, + std::size_t bytes, + std::size_t alignment, + [[maybe_unused]] cuda::stream_ref stream) + { + deallocate(ptr, bytes, alignment); + } + + void deallocate_async(void* ptr, std::size_t bytes, cuda::stream_ref stream) + { + deallocate(ptr, bytes, rmm::RMM_DEFAULT_HOST_ALIGNMENT); + } + + bool operator==(new_delete_memory_resource const& other) const { return true; } + + bool operator!=(new_delete_memory_resource const& other) const { return !operator==(other); } + + friend void get_property(new_delete_memory_resource const&, cuda::mr::host_accessible) noexcept {} +}; + +static_assert(cuda::mr::resource_with, + "Pageable pool mr must be accessible from the host"); + } // namespace rmm::host_device_async_resource_ref set_pinned_memory_resource( @@ -225,4 +280,29 @@ void set_kernel_pinned_copy_threshold(size_t threshold) size_t get_kernel_pinned_copy_threshold() { return kernel_pinned_copy_threshold(); } +CUDF_EXPORT auto& allocate_host_as_pinned_threshold() +{ + // use pageable memory for all host allocations + static std::atomic threshold = 0; + return threshold; +} + +void set_allocate_host_as_pinned_threshold(size_t threshold) +{ + allocate_host_as_pinned_threshold() = threshold; +} + +size_t get_allocate_host_as_pinned_threshold() { return allocate_host_as_pinned_threshold(); } + +namespace detail { + +CUDF_EXPORT rmm::host_async_resource_ref get_pageable_memory_resource() +{ + static new_delete_memory_resource mr{}; + static rmm::host_async_resource_ref mr_ref{mr}; + return mr_ref; +} + +} // namespace detail + } // namespace cudf diff --git a/cpp/src/utilities/prefetch.cpp b/cpp/src/utilities/prefetch.cpp new file mode 100644 index 00000000000..86d6cc00764 --- /dev/null +++ b/cpp/src/utilities/prefetch.cpp @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include + +#include + +namespace cudf::experimental::prefetch { + +namespace detail { + +prefetch_config& prefetch_config::instance() +{ + static prefetch_config instance; + return instance; +} + +bool prefetch_config::get(std::string_view key) +{ + std::shared_lock lock(config_mtx); + auto const it = config_values.find(key.data()); + return it == config_values.end() ? false : it->second; // default to not prefetching +} + +void prefetch_config::set(std::string_view key, bool value) +{ + std::lock_guard lock(config_mtx); + config_values[key.data()] = value; +} + +cudaError_t prefetch_noexcept(std::string_view key, + void const* ptr, + std::size_t size, + rmm::cuda_stream_view stream, + rmm::cuda_device_id device_id) noexcept +{ + if (prefetch_config::instance().get(key)) { + if (prefetch_config::instance().debug) { + std::cerr << "Prefetching " << size << " bytes for key " << key << " at location " << ptr + << std::endl; + } + auto result = cudaMemPrefetchAsync(ptr, size, device_id.value(), stream.value()); + // Need to flush the CUDA error so that the context is not corrupted. + if (result == cudaErrorInvalidValue) { cudaGetLastError(); } + return result; + } + return cudaSuccess; +} + +void prefetch(std::string_view key, + void const* ptr, + std::size_t size, + rmm::cuda_stream_view stream, + rmm::cuda_device_id device_id) +{ + auto result = prefetch_noexcept(key, ptr, size, stream, device_id); + // Ignore cudaErrorInvalidValue because that will be raised if prefetching is + // attempted on unmanaged memory. + if ((result != cudaErrorInvalidValue) && (result != cudaSuccess)) { + std::cerr << "Prefetch failed" << std::endl; + CUDF_CUDA_TRY(result); + } +} + +} // namespace detail + +void enable_prefetching(std::string_view key) +{ + detail::prefetch_config::instance().set(key, true); +} + +void disable_prefetching(std::string_view key) +{ + detail::prefetch_config::instance().set(key, false); +} + +void prefetch_debugging(bool enable) { detail::prefetch_config::instance().debug = enable; } +} // namespace cudf::experimental::prefetch diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 8e2017ccb97..4dffcb41ba2 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -270,6 +270,7 @@ ConfigureTest( INTEROP_TEST interop/to_arrow_device_test.cpp interop/to_arrow_test.cpp + interop/to_arrow_host_test.cpp interop/from_arrow_test.cpp interop/from_arrow_device_test.cpp interop/from_arrow_host_test.cpp @@ -313,17 +314,17 @@ ConfigureTest( PERCENT 30 ) ConfigureTest( - JSON_TEST io/json_test.cpp io/json_chunked_reader.cpp + JSON_TEST io/json/json_test.cpp io/json/json_chunked_reader.cu GPUS 1 PERCENT 30 ) -ConfigureTest(JSON_WRITER_TEST io/json_writer.cpp) -ConfigureTest(JSON_TYPE_CAST_TEST io/json_type_cast_test.cu) -ConfigureTest(NESTED_JSON_TEST io/nested_json_test.cpp io/json_tree.cpp) +ConfigureTest(JSON_WRITER_TEST io/json/json_writer.cpp) +ConfigureTest(JSON_TYPE_CAST_TEST io/json/json_type_cast_test.cu) +ConfigureTest(NESTED_JSON_TEST io/json/nested_json_test.cpp io/json/json_tree.cpp) ConfigureTest(ARROW_IO_SOURCE_TEST io/arrow_io_source_test.cpp) ConfigureTest(MULTIBYTE_SPLIT_TEST io/text/multibyte_split_test.cpp) -ConfigureTest(JSON_QUOTE_NORMALIZATION io/json_quote_normalization_test.cpp) -ConfigureTest(JSON_WHITESPACE_NORMALIZATION io/json_whitespace_normalization_test.cu) +ConfigureTest(JSON_QUOTE_NORMALIZATION io/json/json_quote_normalization_test.cpp) +ConfigureTest(JSON_WHITESPACE_NORMALIZATION io/json/json_whitespace_normalization_test.cu) ConfigureTest( DATA_CHUNK_SOURCE_TEST io/text/data_chunk_source_test.cpp GPUS 1 @@ -572,7 +573,7 @@ ConfigureTest( LARGE_STRINGS_TEST large_strings/concatenate_tests.cpp large_strings/case_tests.cpp - large_strings/json_tests.cpp + large_strings/json_tests.cu large_strings/large_strings_fixture.cpp large_strings/merge_tests.cpp large_strings/parquet_tests.cpp @@ -688,7 +689,10 @@ ConfigureTest(STREAM_DICTIONARY_TEST streams/dictionary_test.cpp STREAM_MODE tes ConfigureTest(STREAM_FILLING_TEST streams/filling_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_GROUPBY_TEST streams/groupby_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_HASHING_TEST streams/hash_test.cpp STREAM_MODE testing) -ConfigureTest(STREAM_INTEROP_TEST streams/interop_test.cpp STREAM_MODE testing) +# Deprecation from 16297 and fixes in 16379 caused this test to be empty This will be reenabled once +# the deprecated APIs have been replaced in 24.10. +# +# ConfigureTest(STREAM_INTEROP_TEST streams/interop_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_JSONIO_TEST streams/io/json_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_LABELING_BINS_TEST streams/labeling_bins_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_LISTS_TEST streams/lists_test.cpp STREAM_MODE testing) @@ -700,6 +704,7 @@ ConfigureTest(STREAM_PARQUETIO_TEST streams/io/parquet_test.cpp STREAM_MODE test ConfigureTest(STREAM_POOL_TEST streams/pool_test.cu STREAM_MODE testing) ConfigureTest(STREAM_REDUCTION_TEST streams/reduction_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_REPLACE_TEST streams/replace_test.cpp STREAM_MODE testing) +ConfigureTest(STREAM_RESHAPE_TEST streams/reshape_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_ROLLING_TEST streams/rolling_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_SEARCH_TEST streams/search_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_SORTING_TEST streams/sorting_test.cpp STREAM_MODE testing) diff --git a/cpp/tests/binaryop/binop-verify-input-test.cpp b/cpp/tests/binaryop/binop-verify-input-test.cpp index 1346dcd4666..def6e94452e 100644 --- a/cpp/tests/binaryop/binop-verify-input-test.cpp +++ b/cpp/tests/binaryop/binop-verify-input-test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Copyright 2018-2019 BlazingDB, Inc. * Copyright 2018 Christian Noboa Mardini @@ -42,5 +42,5 @@ TEST_F(BinopVerifyInputTest, Vector_Vector_ErrorSecondOperandVectorZeroSize) EXPECT_THROW(cudf::binary_operation( lhs, rhs, cudf::binary_operator::ADD, cudf::data_type(cudf::type_id::INT64)), - cudf::logic_error); + std::invalid_argument); } diff --git a/cpp/tests/interop/from_arrow_test.cpp b/cpp/tests/interop/from_arrow_test.cpp index 6eaa1a07e08..733e5814425 100644 --- a/cpp/tests/interop/from_arrow_test.cpp +++ b/cpp/tests/interop/from_arrow_test.cpp @@ -14,6 +14,13 @@ * limitations under the License. */ +// These interop functions are deprecated. We keep the code in this +// test and will migrate the tests to export the arrow C data +// interface which we consume with from_arrow_host. For now, the tests +// are commented out. + +#if 0 + #include #include @@ -595,3 +602,5 @@ TEST_F(FromArrowStructScalarTest, Basic) CUDF_TEST_EXPECT_TABLES_EQUAL(lhs, cudf_struct_scalar->view()); } + +#endif diff --git a/cpp/tests/interop/nanoarrow_utils.hpp b/cpp/tests/interop/nanoarrow_utils.hpp index 4147728b2a6..a961f73d955 100644 --- a/cpp/tests/interop/nanoarrow_utils.hpp +++ b/cpp/tests/interop/nanoarrow_utils.hpp @@ -18,7 +18,6 @@ #include #include -#include #include #include #include @@ -29,6 +28,7 @@ #include #include +#include struct generated_test_data { generated_test_data(cudf::size_type length) @@ -211,6 +211,7 @@ DEFINE_NANOARROW_STORAGE(cudf::duration_us, INT64); DEFINE_NANOARROW_STORAGE(cudf::duration_ns, INT64); DEFINE_NANOARROW_STORAGE(uint8_t, UINT8); DEFINE_NANOARROW_STORAGE(int32_t, INT32); +DEFINE_NANOARROW_STORAGE(__int128_t, DECIMAL128); #undef DEFINE_NANOARROW_STORAGE @@ -255,8 +256,7 @@ std::enable_if_t, nanoarrow::UniqueArray> get_nanoarrow_ ArrowBitmap out; ArrowBitmapInit(&out); NANOARROW_THROW_NOT_OK(ArrowBitmapResize(&out, b.size(), 1)); - out.buffer.size_bytes = (b.size() >> 3) + ((b.size() & 7) != 0); - out.size_bits = b.size(); + std::memset(out.buffer.data, 0, out.buffer.size_bytes); for (size_t i = 0; i < b.size(); ++i) { ArrowBitSetTo(out.buffer.data, i, static_cast(b[i])); @@ -296,6 +296,7 @@ std::enable_if_t, nanoarrow::UniqueArray> g { nanoarrow::UniqueArray tmp; NANOARROW_THROW_NOT_OK(ArrowArrayInitFromType(tmp.get(), NANOARROW_TYPE_STRING)); + NANOARROW_THROW_NOT_OK(ArrowBitmapReserve(ArrowArrayValidityBitmap(tmp.get()), mask.size())); NANOARROW_THROW_NOT_OK(ArrowArrayStartAppending(tmp.get())); NANOARROW_THROW_NOT_OK(ArrowArrayReserve(tmp.get(), data.size())); @@ -378,3 +379,5 @@ get_nanoarrow_cudf_table(cudf::size_type length); std::tuple, nanoarrow::UniqueSchema, nanoarrow::UniqueArray> get_nanoarrow_host_tables(cudf::size_type length); + +void slice_host_nanoarrow(ArrowArray* arr, int64_t start, int64_t end); diff --git a/cpp/tests/interop/to_arrow_device_test.cpp b/cpp/tests/interop/to_arrow_device_test.cpp index 8903f09b82b..51216a8512c 100644 --- a/cpp/tests/interop/to_arrow_device_test.cpp +++ b/cpp/tests/interop/to_arrow_device_test.cpp @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include @@ -711,6 +710,83 @@ TEST_F(ToArrowDeviceTest, StructColumn) template using fp_wrapper = cudf::test::fixed_point_column_wrapper; +TEST_F(ToArrowDeviceTest, FixedPoint32Table) +{ + using namespace numeric; + + for (auto const scale : {6, 4, 2, 0, -1, -3, -5}) { + auto const expect_data = + std::vector{-1000, -1, -1, -1, 2400, 0, 0, 0, -3456, -1, -1, -1, + 4650, 0, 0, 0, 5154, 0, 0, 0, 6800, 0, 0, 0}; + auto col = fp_wrapper({-1000, 2400, -3456, 4650, 5154, 6800}, scale_type{scale}); + std::vector> table_cols; + table_cols.emplace_back(col.release()); + auto input = cudf::table(std::move(table_cols)); + + nanoarrow::UniqueSchema expected_schema; + ArrowSchemaInit(expected_schema.get()); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(expected_schema.get(), 1)); + ArrowSchemaInit(expected_schema->children[0]); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(expected_schema->children[0], + NANOARROW_TYPE_DECIMAL128, + cudf::detail::max_precision(), + -scale)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(expected_schema->children[0], "a")); + expected_schema->children[0]->flags = 0; + + auto got_arrow_schema = + cudf::to_arrow_schema(input.view(), std::vector{{"a"}}); + compare_schemas(expected_schema.get(), got_arrow_schema.get()); + + auto result_dev_data = std::make_unique>( + expect_data.size(), cudf::get_default_stream()); + cudaMemcpy(result_dev_data->data(), + expect_data.data(), + sizeof(int32_t) * expect_data.size(), + cudaMemcpyHostToDevice); + + cudf::get_default_stream().synchronize(); + nanoarrow::UniqueArray expected_array; + NANOARROW_THROW_NOT_OK( + ArrowArrayInitFromSchema(expected_array.get(), expected_schema.get(), nullptr)); + expected_array->length = input.num_rows(); + + expected_array->children[0]->length = input.num_rows(); + NANOARROW_THROW_NOT_OK( + ArrowBufferSetAllocator(ArrowArrayBuffer(expected_array->children[0], 0), noop_alloc)); + ArrowArrayValidityBitmap(expected_array->children[0])->buffer.data = + const_cast(reinterpret_cast(input.view().column(0).null_mask())); + + auto data_ptr = reinterpret_cast(result_dev_data->data()); + NANOARROW_THROW_NOT_OK(ArrowBufferSetAllocator( + ArrowArrayBuffer(expected_array->children[0], 1), + ArrowBufferDeallocator( + [](ArrowBufferAllocator* alloc, uint8_t*, int64_t) { + auto buf = + reinterpret_cast>*>(alloc->private_data); + delete buf; + }, + new std::unique_ptr>(std::move(result_dev_data))))); + ArrowArrayBuffer(expected_array->children[0], 1)->data = data_ptr; + NANOARROW_THROW_NOT_OK( + ArrowArrayFinishBuilding(expected_array.get(), NANOARROW_VALIDATION_LEVEL_NONE, nullptr)); + + auto got_arrow_array = cudf::to_arrow_device(input.view()); + ASSERT_EQ(rmm::get_current_cuda_device().value(), got_arrow_array->device_id); + ASSERT_EQ(ARROW_DEVICE_CUDA, got_arrow_array->device_type); + ASSERT_CUDA_SUCCEEDED( + cudaEventSynchronize(*reinterpret_cast(got_arrow_array->sync_event))); + compare_arrays(expected_schema.get(), expected_array.get(), &got_arrow_array->array); + + got_arrow_array = cudf::to_arrow_device(std::move(input)); + ASSERT_EQ(rmm::get_current_cuda_device().value(), got_arrow_array->device_id); + ASSERT_EQ(ARROW_DEVICE_CUDA, got_arrow_array->device_type); + ASSERT_CUDA_SUCCEEDED( + cudaEventSynchronize(*reinterpret_cast(got_arrow_array->sync_event))); + compare_arrays(expected_schema.get(), expected_array.get(), &got_arrow_array->array); + } +} + TEST_F(ToArrowDeviceTest, FixedPoint64Table) { using namespace numeric; diff --git a/cpp/tests/interop/to_arrow_host_test.cpp b/cpp/tests/interop/to_arrow_host_test.cpp new file mode 100644 index 00000000000..fc0ed6c9352 --- /dev/null +++ b/cpp/tests/interop/to_arrow_host_test.cpp @@ -0,0 +1,1117 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "nanoarrow_utils.hpp" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +using vector_of_columns = std::vector>; + +struct BaseToArrowHostFixture : public cudf::test::BaseFixture { + template + std::enable_if_t() and !std::is_same_v, void> compare_subset( + ArrowArrayView const* expected, + int64_t start_offset_expected, + ArrowArrayView const* actual, + int64_t start_offset_actual, + int64_t length) + { + for (int64_t i = 0; i < length; ++i) { + const bool is_null = ArrowArrayViewIsNull(expected, start_offset_expected + i); + EXPECT_EQ(is_null, ArrowArrayViewIsNull(actual, start_offset_actual + i)); + if (is_null) continue; + + const auto expected_val = ArrowArrayViewGetIntUnsafe(expected, start_offset_expected + i); + const auto actual_val = ArrowArrayViewGetIntUnsafe(actual, start_offset_actual + i); + + EXPECT_EQ(expected_val, actual_val); + } + } + + template + std::enable_if_t, void> compare_subset( + ArrowArrayView const* expected, + int64_t start_offset_expected, + ArrowArrayView const* actual, + int64_t start_offset_actual, + int64_t length) + { + for (int64_t i = 0; i < length; ++i) { + const bool is_null = ArrowArrayViewIsNull(expected, start_offset_expected + i); + EXPECT_EQ(is_null, ArrowArrayViewIsNull(actual, start_offset_actual + i)); + if (is_null) continue; + + const auto expected_view = ArrowArrayViewGetBytesUnsafe(expected, start_offset_expected + i); + const auto actual_view = ArrowArrayViewGetBytesUnsafe(actual, start_offset_actual + i); + + EXPECT_EQ(expected_view.size_bytes, actual_view.size_bytes); + EXPECT_TRUE( + 0 == std::memcmp(expected_view.data.data, actual_view.data.data, expected_view.size_bytes)); + } + } + + void compare_child_subset(ArrowArrayView const* expected, + int64_t exp_start_offset, + ArrowArrayView const* actual, + int64_t act_start_offset, + int64_t length) + { + EXPECT_EQ(expected->storage_type, actual->storage_type); + EXPECT_EQ(expected->n_children, actual->n_children); + + switch (expected->storage_type) { + case NANOARROW_TYPE_LIST: + for (int64_t i = 0; i < length; ++i) { + const auto expected_start = exp_start_offset + i; + const auto actual_start = act_start_offset + i; + + // ArrowArrayViewIsNull accounts for the array offset, so we can properly + // compare the validity of indexes + const bool is_null = ArrowArrayViewIsNull(expected, expected_start); + EXPECT_EQ(is_null, ArrowArrayViewIsNull(actual, actual_start)); + if (is_null) continue; + + // ArrowArrayViewListChildOffset does not account for array offset, so we need + // to add the offset to the index in order to get the correct offset into the list + const int64_t start_offset_expected = + ArrowArrayViewListChildOffset(expected, expected->offset + expected_start); + const int64_t start_offset_actual = + ArrowArrayViewListChildOffset(actual, actual->offset + actual_start); + + const int64_t end_offset_expected = + ArrowArrayViewListChildOffset(expected, expected->offset + expected_start + 1); + const int64_t end_offset_actual = + ArrowArrayViewListChildOffset(actual, actual->offset + actual_start + 1); + + // verify the list lengths are the same + EXPECT_EQ(end_offset_expected - start_offset_expected, + end_offset_actual - start_offset_actual); + // compare the list values + compare_child_subset(expected->children[0], + start_offset_expected, + actual->children[0], + start_offset_actual, + end_offset_expected - start_offset_expected); + } + break; + case NANOARROW_TYPE_STRUCT: + for (int64_t i = 0; i < length; ++i) { + SCOPED_TRACE("idx: " + std::to_string(i)); + const auto expected_start = exp_start_offset + i; + const auto actual_start = act_start_offset + i; + + const bool is_null = ArrowArrayViewIsNull(expected, expected_start); + EXPECT_EQ(is_null, ArrowArrayViewIsNull(actual, actual_start)); + if (is_null) continue; + + for (int64_t child = 0; child < expected->n_children; ++child) { + SCOPED_TRACE("child: " + std::to_string(child)); + compare_child_subset(expected->children[child], + expected_start + expected->offset, + actual->children[child], + actual_start + actual->offset, + 1); + } + } + break; + case NANOARROW_TYPE_STRING: + case NANOARROW_TYPE_LARGE_STRING: + case NANOARROW_TYPE_BINARY: + case NANOARROW_TYPE_LARGE_BINARY: + compare_subset( + expected, exp_start_offset, actual, act_start_offset, length); + break; + default: + compare_subset(expected, exp_start_offset, actual, act_start_offset, length); + break; + } + } + + void compare_arrays(ArrowArrayView const* expected, ArrowArrayView const* actual) + { + EXPECT_EQ(expected->length, actual->length); + EXPECT_EQ(expected->null_count, actual->null_count); + EXPECT_EQ(expected->offset, actual->offset); + EXPECT_EQ(expected->n_children, actual->n_children); + EXPECT_EQ(expected->storage_type, actual->storage_type); + + // cudf automatically pushes down nulls and purges non-empty, non-zero nulls + // from the children columns. So while we can memcmp the buffers for top + // level arrays, we need to do an "equivalence" comparison for nested + // arrays (lists and structs) by checking each index for null and skipping + // comparisons for children if null. + switch (expected->storage_type) { + case NANOARROW_TYPE_STRUCT: + // if we're a struct with no children, then we just skip + // attempting to compare the children + if (expected->n_children == 0) { + EXPECT_EQ(nullptr, actual->children); + break; + } + // otherwise we can fallthrough and do the same thing we do for lists + case NANOARROW_TYPE_LIST: + compare_child_subset(expected, 0, actual, 0, expected->length); + break; + default: + for (int64_t i = 0; i < actual->array->n_buffers; ++i) { + SCOPED_TRACE("buffer " + std::to_string(i)); + auto expected_buf = expected->buffer_views[i]; + auto actual_buf = actual->buffer_views[i]; + + EXPECT_TRUE(0 == std::memcmp(expected_buf.data.data, + actual_buf.data.data, + expected_buf.size_bytes)); + } + } + + if (expected->dictionary != nullptr) { + EXPECT_NE(nullptr, actual->dictionary); + SCOPED_TRACE("dictionary"); + compare_arrays(expected->dictionary, actual->dictionary); + } else { + EXPECT_EQ(nullptr, actual->dictionary); + } + } +}; + +struct ToArrowHostDeviceTest : public BaseToArrowHostFixture {}; +template +struct ToArrowHostDeviceTestDurationsTest : public BaseToArrowHostFixture {}; + +TYPED_TEST_SUITE(ToArrowHostDeviceTestDurationsTest, cudf::test::DurationTypes); + +TEST_F(ToArrowHostDeviceTest, EmptyTable) +{ + auto [tbl, schema, arr] = get_nanoarrow_host_tables(0); + + auto got_arrow_host = cudf::to_arrow_host(tbl->view()); + EXPECT_EQ(ARROW_DEVICE_CPU, got_arrow_host->device_type); + EXPECT_EQ(-1, got_arrow_host->device_id); + EXPECT_EQ(nullptr, got_arrow_host->sync_event); + + ArrowArrayView expected, actual; + NANOARROW_THROW_NOT_OK(ArrowArrayViewInitFromSchema(&expected, schema.get(), nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&expected, arr.get(), nullptr)); + + NANOARROW_THROW_NOT_OK(ArrowArrayViewInitFromSchema(&actual, schema.get(), nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&actual, &got_arrow_host->array, nullptr)); + compare_arrays(&expected, &actual); + + ArrowArrayViewReset(&expected); + ArrowArrayViewReset(&actual); +} + +TEST_F(ToArrowHostDeviceTest, DateTimeTable) +{ + auto data = std::initializer_list{1, 2, 3, 4, 5, 6}; + auto col = + cudf::test::fixed_width_column_wrapper(data); + cudf::table_view input_view({col}); + + nanoarrow::UniqueSchema expected_schema; + ArrowSchemaInit(expected_schema.get()); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(expected_schema.get(), 1)); + ArrowSchemaInit(expected_schema->children[0]); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDateTime( + expected_schema->children[0], NANOARROW_TYPE_TIMESTAMP, NANOARROW_TIME_UNIT_MILLI, nullptr)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(expected_schema->children[0], "a")); + expected_schema->children[0]->flags = 0; + + auto got_arrow_host = cudf::to_arrow_host(input_view); + EXPECT_EQ(ARROW_DEVICE_CPU, got_arrow_host->device_type); + EXPECT_EQ(-1, got_arrow_host->device_id); + EXPECT_EQ(nullptr, got_arrow_host->sync_event); + + ArrowArrayView expected, actual; + NANOARROW_THROW_NOT_OK(ArrowArrayViewInitFromSchema(&expected, expected_schema.get(), nullptr)); + expected.length = data.size(); + expected.children[0]->length = data.size(); + ArrowArrayViewSetLength(expected.children[0], data.size()); + expected.children[0]->buffer_views[0].data.data = nullptr; + expected.children[0]->buffer_views[0].size_bytes = 0; + expected.children[0]->buffer_views[1].data.data = data.begin(); + + NANOARROW_THROW_NOT_OK(ArrowArrayViewInitFromSchema(&actual, expected_schema.get(), nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&actual, &got_arrow_host->array, nullptr)); + compare_arrays(&expected, &actual); + ArrowArrayViewReset(&actual); + + got_arrow_host = cudf::to_arrow_host(input_view.column(0)); + NANOARROW_THROW_NOT_OK( + ArrowArrayViewInitFromSchema(&actual, expected_schema->children[0], nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&actual, &got_arrow_host->array, nullptr)); + BaseToArrowHostFixture::compare_arrays(expected.children[0], &actual); + ArrowArrayViewReset(&actual); + + ArrowArrayViewReset(&expected); + ArrowArrayViewReset(&actual); +} + +TYPED_TEST(ToArrowHostDeviceTestDurationsTest, DurationTable) +{ + using T = TypeParam; + + if (cudf::type_to_id() == cudf::type_id::DURATION_DAYS) { return; } + + auto data = {T{1}, T{2}, T{3}, T{4}, T{5}, T{6}}; + auto col = cudf::test::fixed_width_column_wrapper(data); + + cudf::table_view input_view({col}); + + nanoarrow::UniqueSchema expected_schema; + ArrowSchemaInit(expected_schema.get()); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(expected_schema.get(), 1)); + + ArrowSchemaInit(expected_schema->children[0]); + const ArrowTimeUnit arrow_unit = [&] { + switch (cudf::type_to_id()) { + case cudf::type_id::DURATION_SECONDS: return NANOARROW_TIME_UNIT_SECOND; + case cudf::type_id::DURATION_MILLISECONDS: return NANOARROW_TIME_UNIT_MILLI; + case cudf::type_id::DURATION_MICROSECONDS: return NANOARROW_TIME_UNIT_MICRO; + case cudf::type_id::DURATION_NANOSECONDS: return NANOARROW_TIME_UNIT_NANO; + default: CUDF_FAIL("Unsupported duration unit in arrow"); + } + }(); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDateTime( + expected_schema->children[0], NANOARROW_TYPE_DURATION, arrow_unit, nullptr)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(expected_schema->children[0], "a")); + expected_schema->children[0]->flags = 0; + + auto got_arrow_host = cudf::to_arrow_host(input_view); + EXPECT_EQ(ARROW_DEVICE_CPU, got_arrow_host->device_type); + EXPECT_EQ(-1, got_arrow_host->device_id); + EXPECT_EQ(nullptr, got_arrow_host->sync_event); + + ArrowArrayView expected, actual; + NANOARROW_THROW_NOT_OK(ArrowArrayViewInitFromSchema(&expected, expected_schema.get(), nullptr)); + + expected.length = data.size(); + expected.children[0]->length = data.size(); + ArrowArrayViewSetLength(expected.children[0], data.size()); + expected.children[0]->buffer_views[0].data.data = nullptr; + expected.children[0]->buffer_views[0].size_bytes = 0; + expected.children[0]->buffer_views[1].data.data = data.begin(); + + NANOARROW_THROW_NOT_OK(ArrowArrayViewInitFromSchema(&actual, expected_schema.get(), nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&actual, &got_arrow_host->array, nullptr)); + BaseToArrowHostFixture::compare_arrays(&expected, &actual); + ArrowArrayViewReset(&actual); + + got_arrow_host = cudf::to_arrow_host(input_view.column(0)); + NANOARROW_THROW_NOT_OK( + ArrowArrayViewInitFromSchema(&actual, expected_schema->children[0], nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&actual, &got_arrow_host->array, nullptr)); + BaseToArrowHostFixture::compare_arrays(expected.children[0], &actual); + ArrowArrayViewReset(&actual); + + ArrowArrayViewReset(&expected); +} + +TEST_F(ToArrowHostDeviceTest, NestedList) +{ + auto valids = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 3 != 0; }); + auto col = cudf::test::lists_column_wrapper( + {{{{{1, 2}, valids}, {{3, 4}, valids}, {5}}, {{6}, {{7, 8, 9}, valids}}}, valids}); + cudf::table_view input_view({col}); + + nanoarrow::UniqueSchema expected_schema; + ArrowSchemaInit(expected_schema.get()); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(expected_schema.get(), 1)); + + NANOARROW_THROW_NOT_OK( + ArrowSchemaInitFromType(expected_schema->children[0], NANOARROW_TYPE_LIST)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(expected_schema->children[0], "a")); + expected_schema->children[0]->flags = ARROW_FLAG_NULLABLE; + + NANOARROW_THROW_NOT_OK( + ArrowSchemaInitFromType(expected_schema->children[0]->children[0], NANOARROW_TYPE_LIST)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(expected_schema->children[0]->children[0], "element")); + expected_schema->children[0]->children[0]->flags = 0; + + NANOARROW_THROW_NOT_OK(ArrowSchemaInitFromType( + expected_schema->children[0]->children[0]->children[0], NANOARROW_TYPE_INT64)); + NANOARROW_THROW_NOT_OK( + ArrowSchemaSetName(expected_schema->children[0]->children[0]->children[0], "element")); + expected_schema->children[0]->children[0]->children[0]->flags = ARROW_FLAG_NULLABLE; + + auto got_arrow_host = cudf::to_arrow_host(input_view); + EXPECT_EQ(ARROW_DEVICE_CPU, got_arrow_host->device_type); + EXPECT_EQ(-1, got_arrow_host->device_id); + EXPECT_EQ(nullptr, got_arrow_host->sync_event); + + auto list_arr = get_nanoarrow_list_array({6, 7, 8, 9}, {0, 1, 4}, {1, 0, 1, 1}); + std::vector offset{0, 0, 2}; + + ArrowBitmap mask; + ArrowBitmapInit(&mask); + NANOARROW_THROW_NOT_OK(ArrowBitmapReserve(&mask, 2)); + NANOARROW_THROW_NOT_OK(ArrowBitmapAppend(&mask, 0, 1)); + NANOARROW_THROW_NOT_OK(ArrowBitmapAppend(&mask, 1, 1)); + + nanoarrow::UniqueArray expected_arr; + EXPECT_EQ(NANOARROW_OK, + ArrowArrayInitFromSchema(expected_arr.get(), expected_schema.get(), nullptr)); + expected_arr->length = input_view.num_rows(); + expected_arr->null_count = 0; + + ArrowArraySetValidityBitmap(expected_arr->children[0], &mask); + expected_arr->children[0]->length = input_view.num_rows(); + expected_arr->children[0]->null_count = 1; + auto offset_buf = ArrowArrayBuffer(expected_arr->children[0], 1); + EXPECT_EQ( + NANOARROW_OK, + ArrowBufferAppend( + offset_buf, reinterpret_cast(offset.data()), offset.size() * sizeof(int32_t))); + list_arr.move(expected_arr->children[0]->children[0]); + NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(expected_arr.get(), nullptr)); + + ArrowArrayView expected, actual; + NANOARROW_THROW_NOT_OK(ArrowArrayViewInitFromSchema(&expected, expected_schema.get(), nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&expected, expected_arr.get(), nullptr)); + + NANOARROW_THROW_NOT_OK(ArrowArrayViewInitFromSchema(&actual, expected_schema.get(), nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&actual, &got_arrow_host->array, nullptr)); + compare_arrays(&expected, &actual); + ArrowArrayViewReset(&actual); + + got_arrow_host = cudf::to_arrow_host(input_view.column(0)); + NANOARROW_THROW_NOT_OK( + ArrowArrayViewInitFromSchema(&actual, expected_schema->children[0], nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&actual, &got_arrow_host->array, nullptr)); + compare_arrays(expected.children[0], &actual); + ArrowArrayViewReset(&actual); + + ArrowArrayViewReset(&expected); +} + +TEST_F(ToArrowHostDeviceTest, StructColumn) +{ + // Create cudf table + auto nested_type_field_names = + std::vector>{{"string", "integral", "bool", "nested_list", "struct"}}; + auto str_col = + cudf::test::strings_column_wrapper{ + "Samuel Vimes", "Carrot Ironfoundersson", "Angua von Überwald"} + .release(); + auto str_col2 = + cudf::test::strings_column_wrapper{{"CUDF", "ROCKS", "EVERYWHERE"}, {0, 1, 0}}.release(); + int num_rows{str_col->size()}; + auto int_col = cudf::test::fixed_width_column_wrapper{{48, 27, 25}}.release(); + auto int_col2 = + cudf::test::fixed_width_column_wrapper{{12, 24, 47}, {1, 0, 1}}.release(); + auto bool_col = cudf::test::fixed_width_column_wrapper{{true, true, false}}.release(); + auto list_col = + cudf::test::lists_column_wrapper({{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}}) + .release(); + vector_of_columns cols2; + cols2.push_back(std::move(str_col2)); + cols2.push_back(std::move(int_col2)); + auto [null_mask, null_count] = + cudf::bools_to_mask(cudf::test::fixed_width_column_wrapper{{true, true, false}}); + auto sub_struct_col = + cudf::make_structs_column(num_rows, std::move(cols2), null_count, std::move(*null_mask)); + vector_of_columns cols; + cols.push_back(std::move(str_col)); + cols.push_back(std::move(int_col)); + cols.push_back(std::move(bool_col)); + cols.push_back(std::move(list_col)); + cols.push_back(std::move(sub_struct_col)); + + auto struct_col = cudf::make_structs_column(num_rows, std::move(cols), 0, {}); + cudf::table_view input_view({struct_col->view()}); + + nanoarrow::UniqueSchema expected_schema; + ArrowSchemaInit(expected_schema.get()); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(expected_schema.get(), 1)); + + ArrowSchemaInit(expected_schema->children[0]); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(expected_schema->children[0], 5)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(expected_schema->children[0], "a")); + expected_schema->children[0]->flags = 0; + + auto child = expected_schema->children[0]; + NANOARROW_THROW_NOT_OK(ArrowSchemaInitFromType(child->children[0], NANOARROW_TYPE_STRING)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(child->children[0], "string")); + child->children[0]->flags = 0; + + NANOARROW_THROW_NOT_OK(ArrowSchemaInitFromType(child->children[1], NANOARROW_TYPE_INT32)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(child->children[1], "integral")); + child->children[1]->flags = 0; + + NANOARROW_THROW_NOT_OK(ArrowSchemaInitFromType(child->children[2], NANOARROW_TYPE_BOOL)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(child->children[2], "bool")); + child->children[2]->flags = 0; + + NANOARROW_THROW_NOT_OK(ArrowSchemaInitFromType(child->children[3], NANOARROW_TYPE_LIST)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(child->children[3], "nested_list")); + child->children[3]->flags = 0; + NANOARROW_THROW_NOT_OK( + ArrowSchemaInitFromType(child->children[3]->children[0], NANOARROW_TYPE_LIST)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(child->children[3]->children[0], "element")); + child->children[3]->children[0]->flags = 0; + NANOARROW_THROW_NOT_OK( + ArrowSchemaInitFromType(child->children[3]->children[0]->children[0], NANOARROW_TYPE_INT64)); + NANOARROW_THROW_NOT_OK( + ArrowSchemaSetName(child->children[3]->children[0]->children[0], "element")); + child->children[3]->children[0]->children[0]->flags = 0; + + ArrowSchemaInit(child->children[4]); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(child->children[4], 2)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(child->children[4], "struct")); + + NANOARROW_THROW_NOT_OK( + ArrowSchemaInitFromType(child->children[4]->children[0], NANOARROW_TYPE_STRING)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(child->children[4]->children[0], "string2")); + NANOARROW_THROW_NOT_OK( + ArrowSchemaInitFromType(child->children[4]->children[1], NANOARROW_TYPE_INT32)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(child->children[4]->children[1], "integral2")); + + // create nanoarrow table + // first our underlying arrays + std::vector str{"Samuel Vimes", "Carrot Ironfoundersson", "Angua von Überwald"}; + std::vector str2{"CUDF", "ROCKS", "EVERYWHERE"}; + auto str_array = get_nanoarrow_array(str); + auto int_array = get_nanoarrow_array({48, 27, 25}); + auto str2_array = get_nanoarrow_array(str2, {0, 1, 0}); + // struct null will get pushed down and superimposed on this array + auto int2_array = get_nanoarrow_array({12, 24, 47}, {1, 0, 0}); + auto bool_array = get_nanoarrow_array({true, true, false}); + auto list_arr = + get_nanoarrow_list_array({1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 2, 4, 5, 6, 7, 9}); + std::vector offset{0, 3, 4, 6}; + + nanoarrow::UniqueArray expected_arr; + NANOARROW_THROW_NOT_OK( + ArrowArrayInitFromSchema(expected_arr.get(), expected_schema.get(), nullptr)); + expected_arr->length = input_view.num_rows(); + + auto array_a = expected_arr->children[0]; + auto view_a = input_view.column(0); + array_a->length = view_a.size(); + array_a->null_count = view_a.null_count(); + + str_array.move(array_a->children[0]); + int_array.move(array_a->children[1]); + bool_array.move(array_a->children[2]); + + array_a->children[3]->length = input_view.num_rows(); + array_a->children[3]->null_count = 0; + + auto offset_buf = ArrowArrayBuffer(array_a->children[3], 1); + EXPECT_EQ( + NANOARROW_OK, + ArrowBufferAppend( + offset_buf, reinterpret_cast(offset.data()), offset.size() * sizeof(int32_t))); + list_arr.move(array_a->children[3]->children[0]); + + ArrowBitmap mask; + ArrowBitmapInit(&mask); + NANOARROW_THROW_NOT_OK(ArrowBitmapReserve(&mask, 3)); + NANOARROW_THROW_NOT_OK(ArrowBitmapAppend(&mask, 1, 2)); + NANOARROW_THROW_NOT_OK(ArrowBitmapAppend(&mask, 0, 1)); + + auto array_struct = array_a->children[4]; + auto view_struct = view_a.child(4); + ArrowArraySetValidityBitmap(array_struct, &mask); + array_struct->null_count = view_struct.null_count(); + array_struct->length = view_struct.size(); + + str2_array.move(array_struct->children[0]); + int2_array.move(array_struct->children[1]); + + NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(expected_arr.get(), nullptr)); + + auto got_arrow_host = cudf::to_arrow_host(input_view); + EXPECT_EQ(ARROW_DEVICE_CPU, got_arrow_host->device_type); + EXPECT_EQ(-1, got_arrow_host->device_id); + EXPECT_EQ(nullptr, got_arrow_host->sync_event); + + ArrowArrayView expected, actual; + NANOARROW_THROW_NOT_OK(ArrowArrayViewInitFromSchema(&expected, expected_schema.get(), nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&expected, expected_arr.get(), nullptr)); + + NANOARROW_THROW_NOT_OK(ArrowArrayViewInitFromSchema(&actual, expected_schema.get(), nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&actual, &got_arrow_host->array, nullptr)); + compare_arrays(&expected, &actual); + ArrowArrayViewReset(&actual); + + got_arrow_host = cudf::to_arrow_host(input_view.column(0)); + NANOARROW_THROW_NOT_OK( + ArrowArrayViewInitFromSchema(&actual, expected_schema->children[0], nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&actual, &got_arrow_host->array, nullptr)); + compare_arrays(expected.children[0], &actual); + ArrowArrayViewReset(&actual); + + ArrowArrayViewReset(&expected); +} + +template +using fp_wrapper = cudf::test::fixed_point_column_wrapper; + +TEST_F(ToArrowHostDeviceTest, FixedPoint32Table) +{ + using namespace numeric; + + for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) { + auto const col = fp_wrapper({-1, 2, 3, 4, 5, 6}, scale_type{scale}); + auto const input = cudf::table_view({col}); + + auto const data = std::vector<__int128_t>{-1, 2, 3, 4, 5, 6}; + nanoarrow::UniqueSchema expected_schema; + ArrowSchemaInit(expected_schema.get()); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(expected_schema.get(), 1)); + ArrowSchemaInit(expected_schema->children[0]); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(expected_schema->children[0], + NANOARROW_TYPE_DECIMAL128, + cudf::detail::max_precision(), + -scale)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(expected_schema->children[0], "a")); + expected_schema->children[0]->flags = 0; + + nanoarrow::UniqueArray expected_array; + NANOARROW_THROW_NOT_OK( + ArrowArrayInitFromSchema(expected_array.get(), expected_schema.get(), nullptr)); + expected_array->length = input.num_rows(); + + get_nanoarrow_array<__int128_t>(data).move(expected_array->children[0]); + NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(expected_array.get(), nullptr)); + + auto got_arrow_host = cudf::to_arrow_host(input); + EXPECT_EQ(ARROW_DEVICE_CPU, got_arrow_host->device_type); + EXPECT_EQ(-1, got_arrow_host->device_id); + EXPECT_EQ(nullptr, got_arrow_host->sync_event); + + ArrowArrayView expected, actual; + NANOARROW_THROW_NOT_OK(ArrowArrayViewInitFromSchema(&expected, expected_schema.get(), nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&expected, expected_array.get(), nullptr)); + + NANOARROW_THROW_NOT_OK(ArrowArrayViewInitFromSchema(&actual, expected_schema.get(), nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&actual, &got_arrow_host->array, nullptr)); + compare_arrays(&expected, &actual); + ArrowArrayViewReset(&actual); + + got_arrow_host = cudf::to_arrow_host(input.column(0)); + NANOARROW_THROW_NOT_OK( + ArrowArrayViewInitFromSchema(&actual, expected_schema->children[0], nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&actual, &got_arrow_host->array, nullptr)); + compare_arrays(expected.children[0], &actual); + ArrowArrayViewReset(&actual); + + ArrowArrayViewReset(&expected); + } +} + +TEST_F(ToArrowHostDeviceTest, FixedPoint64Table) +{ + using namespace numeric; + + for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) { + auto const col = fp_wrapper({-1, 2, 3, 4, 5, 6}, scale_type{scale}); + auto const input = cudf::table_view({col}); + + auto const data = std::vector<__int128_t>{-1, 2, 3, 4, 5, 6}; + nanoarrow::UniqueSchema expected_schema; + ArrowSchemaInit(expected_schema.get()); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(expected_schema.get(), 1)); + ArrowSchemaInit(expected_schema->children[0]); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(expected_schema->children[0], + NANOARROW_TYPE_DECIMAL128, + cudf::detail::max_precision(), + -scale)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(expected_schema->children[0], "a")); + expected_schema->children[0]->flags = 0; + + nanoarrow::UniqueArray expected_array; + NANOARROW_THROW_NOT_OK( + ArrowArrayInitFromSchema(expected_array.get(), expected_schema.get(), nullptr)); + expected_array->length = input.num_rows(); + + get_nanoarrow_array<__int128_t>(data).move(expected_array->children[0]); + NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(expected_array.get(), nullptr)); + + auto got_arrow_host = cudf::to_arrow_host(input); + EXPECT_EQ(ARROW_DEVICE_CPU, got_arrow_host->device_type); + EXPECT_EQ(-1, got_arrow_host->device_id); + EXPECT_EQ(nullptr, got_arrow_host->sync_event); + + ArrowArrayView expected, actual; + NANOARROW_THROW_NOT_OK(ArrowArrayViewInitFromSchema(&expected, expected_schema.get(), nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&expected, expected_array.get(), nullptr)); + + NANOARROW_THROW_NOT_OK(ArrowArrayViewInitFromSchema(&actual, expected_schema.get(), nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&actual, &got_arrow_host->array, nullptr)); + compare_arrays(&expected, &actual); + ArrowArrayViewReset(&actual); + + got_arrow_host = cudf::to_arrow_host(input.column(0)); + NANOARROW_THROW_NOT_OK( + ArrowArrayViewInitFromSchema(&actual, expected_schema->children[0], nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&actual, &got_arrow_host->array, nullptr)); + compare_arrays(expected.children[0], &actual); + ArrowArrayViewReset(&actual); + + ArrowArrayViewReset(&expected); + } +} + +TEST_F(ToArrowHostDeviceTest, FixedPoint128Table) +{ + using namespace numeric; + + for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) { + auto const col = fp_wrapper<__int128_t>({-1, 2, 3, 4, 5, 6}, scale_type{scale}); + auto const input = cudf::table_view({col}); + + auto const data = std::vector<__int128_t>{-1, 2, 3, 4, 5, 6}; + + nanoarrow::UniqueSchema expected_schema; + ArrowSchemaInit(expected_schema.get()); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(expected_schema.get(), 1)); + ArrowSchemaInit(expected_schema->children[0]); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(expected_schema->children[0], + NANOARROW_TYPE_DECIMAL128, + cudf::detail::max_precision<__int128_t>(), + -scale)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(expected_schema->children[0], "a")); + expected_schema->children[0]->flags = 0; + + nanoarrow::UniqueArray expected_array; + NANOARROW_THROW_NOT_OK( + ArrowArrayInitFromSchema(expected_array.get(), expected_schema.get(), nullptr)); + expected_array->length = input.num_rows(); + + get_nanoarrow_array<__int128_t>(data).move(expected_array->children[0]); + NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(expected_array.get(), nullptr)); + + auto got_arrow_host = cudf::to_arrow_host(input); + EXPECT_EQ(ARROW_DEVICE_CPU, got_arrow_host->device_type); + EXPECT_EQ(-1, got_arrow_host->device_id); + EXPECT_EQ(nullptr, got_arrow_host->sync_event); + + ArrowArrayView expected, actual; + NANOARROW_THROW_NOT_OK(ArrowArrayViewInitFromSchema(&expected, expected_schema.get(), nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&expected, expected_array.get(), nullptr)); + + NANOARROW_THROW_NOT_OK(ArrowArrayViewInitFromSchema(&actual, expected_schema.get(), nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&actual, &got_arrow_host->array, nullptr)); + compare_arrays(&expected, &actual); + ArrowArrayViewReset(&actual); + + got_arrow_host = cudf::to_arrow_host(input.column(0)); + NANOARROW_THROW_NOT_OK( + ArrowArrayViewInitFromSchema(&actual, expected_schema->children[0], nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&actual, &got_arrow_host->array, nullptr)); + compare_arrays(expected.children[0], &actual); + ArrowArrayViewReset(&actual); + + ArrowArrayViewReset(&expected); + } +} + +TEST_F(ToArrowHostDeviceTest, FixedPoint32TableLarge) +{ + using namespace numeric; + auto constexpr NUM_ELEMENTS = 1000; + + for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) { + auto const iota = thrust::make_counting_iterator(1); + auto const col = fp_wrapper(iota, iota + NUM_ELEMENTS, scale_type{scale}); + auto const input = cudf::table_view({col}); + + auto expect_data = std::vector<__int128_t>(NUM_ELEMENTS); + std::iota(expect_data.begin(), expect_data.end(), 1); + + nanoarrow::UniqueSchema expected_schema; + ArrowSchemaInit(expected_schema.get()); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(expected_schema.get(), 1)); + ArrowSchemaInit(expected_schema->children[0]); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(expected_schema->children[0], + NANOARROW_TYPE_DECIMAL128, + cudf::detail::max_precision(), + -scale)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(expected_schema->children[0], "a")); + expected_schema->children[0]->flags = 0; + + nanoarrow::UniqueArray expected_array; + NANOARROW_THROW_NOT_OK( + ArrowArrayInitFromSchema(expected_array.get(), expected_schema.get(), nullptr)); + expected_array->length = input.num_rows(); + + get_nanoarrow_array<__int128_t>(expect_data).move(expected_array->children[0]); + NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(expected_array.get(), nullptr)); + + auto got_arrow_host = cudf::to_arrow_host(input); + EXPECT_EQ(ARROW_DEVICE_CPU, got_arrow_host->device_type); + EXPECT_EQ(-1, got_arrow_host->device_id); + EXPECT_EQ(nullptr, got_arrow_host->sync_event); + + ArrowArrayView expected, actual; + NANOARROW_THROW_NOT_OK(ArrowArrayViewInitFromSchema(&expected, expected_schema.get(), nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&expected, expected_array.get(), nullptr)); + + NANOARROW_THROW_NOT_OK(ArrowArrayViewInitFromSchema(&actual, expected_schema.get(), nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&actual, &got_arrow_host->array, nullptr)); + compare_arrays(&expected, &actual); + ArrowArrayViewReset(&actual); + + got_arrow_host = cudf::to_arrow_host(input.column(0)); + NANOARROW_THROW_NOT_OK( + ArrowArrayViewInitFromSchema(&actual, expected_schema->children[0], nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&actual, &got_arrow_host->array, nullptr)); + compare_arrays(expected.children[0], &actual); + ArrowArrayViewReset(&actual); + + ArrowArrayViewReset(&expected); + } +} + +TEST_F(ToArrowHostDeviceTest, FixedPoint64TableLarge) +{ + using namespace numeric; + auto constexpr NUM_ELEMENTS = 1000; + + for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) { + auto const iota = thrust::make_counting_iterator(1); + auto const col = fp_wrapper(iota, iota + NUM_ELEMENTS, scale_type{scale}); + auto const input = cudf::table_view({col}); + + auto expect_data = std::vector<__int128_t>(NUM_ELEMENTS); + std::iota(expect_data.begin(), expect_data.end(), 1); + + nanoarrow::UniqueSchema expected_schema; + ArrowSchemaInit(expected_schema.get()); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(expected_schema.get(), 1)); + ArrowSchemaInit(expected_schema->children[0]); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(expected_schema->children[0], + NANOARROW_TYPE_DECIMAL128, + cudf::detail::max_precision(), + -scale)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(expected_schema->children[0], "a")); + expected_schema->children[0]->flags = 0; + + nanoarrow::UniqueArray expected_array; + NANOARROW_THROW_NOT_OK( + ArrowArrayInitFromSchema(expected_array.get(), expected_schema.get(), nullptr)); + expected_array->length = input.num_rows(); + + get_nanoarrow_array<__int128_t>(expect_data).move(expected_array->children[0]); + NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(expected_array.get(), nullptr)); + + auto got_arrow_host = cudf::to_arrow_host(input); + EXPECT_EQ(ARROW_DEVICE_CPU, got_arrow_host->device_type); + EXPECT_EQ(-1, got_arrow_host->device_id); + EXPECT_EQ(nullptr, got_arrow_host->sync_event); + + ArrowArrayView expected, actual; + NANOARROW_THROW_NOT_OK(ArrowArrayViewInitFromSchema(&expected, expected_schema.get(), nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&expected, expected_array.get(), nullptr)); + + NANOARROW_THROW_NOT_OK(ArrowArrayViewInitFromSchema(&actual, expected_schema.get(), nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&actual, &got_arrow_host->array, nullptr)); + compare_arrays(&expected, &actual); + ArrowArrayViewReset(&actual); + + got_arrow_host = cudf::to_arrow_host(input.column(0)); + NANOARROW_THROW_NOT_OK( + ArrowArrayViewInitFromSchema(&actual, expected_schema->children[0], nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&actual, &got_arrow_host->array, nullptr)); + compare_arrays(expected.children[0], &actual); + ArrowArrayViewReset(&actual); + + ArrowArrayViewReset(&expected); + } +} + +TEST_F(ToArrowHostDeviceTest, FixedPoint128TableLarge) +{ + using namespace numeric; + auto constexpr NUM_ELEMENTS = 1000; + + for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) { + auto const iota = thrust::make_counting_iterator(1); + auto const col = fp_wrapper<__int128_t>(iota, iota + NUM_ELEMENTS, scale_type{scale}); + auto const input = cudf::table_view({col}); + + auto expect_data = std::vector<__int128_t>(NUM_ELEMENTS); + std::iota(expect_data.begin(), expect_data.end(), 1); + + nanoarrow::UniqueSchema expected_schema; + ArrowSchemaInit(expected_schema.get()); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(expected_schema.get(), 1)); + ArrowSchemaInit(expected_schema->children[0]); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(expected_schema->children[0], + NANOARROW_TYPE_DECIMAL128, + cudf::detail::max_precision<__int128_t>(), + -scale)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(expected_schema->children[0], "a")); + expected_schema->children[0]->flags = 0; + + nanoarrow::UniqueArray expected_array; + NANOARROW_THROW_NOT_OK( + ArrowArrayInitFromSchema(expected_array.get(), expected_schema.get(), nullptr)); + expected_array->length = input.num_rows(); + + get_nanoarrow_array<__int128_t>(expect_data).move(expected_array->children[0]); + NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(expected_array.get(), nullptr)); + + auto got_arrow_host = cudf::to_arrow_host(input); + EXPECT_EQ(ARROW_DEVICE_CPU, got_arrow_host->device_type); + EXPECT_EQ(-1, got_arrow_host->device_id); + EXPECT_EQ(nullptr, got_arrow_host->sync_event); + + ArrowArrayView expected, actual; + NANOARROW_THROW_NOT_OK(ArrowArrayViewInitFromSchema(&expected, expected_schema.get(), nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&expected, expected_array.get(), nullptr)); + + NANOARROW_THROW_NOT_OK(ArrowArrayViewInitFromSchema(&actual, expected_schema.get(), nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&actual, &got_arrow_host->array, nullptr)); + compare_arrays(&expected, &actual); + ArrowArrayViewReset(&actual); + + got_arrow_host = cudf::to_arrow_host(input.column(0)); + NANOARROW_THROW_NOT_OK( + ArrowArrayViewInitFromSchema(&actual, expected_schema->children[0], nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&actual, &got_arrow_host->array, nullptr)); + compare_arrays(expected.children[0], &actual); + ArrowArrayViewReset(&actual); + + ArrowArrayViewReset(&expected); + } +} + +TEST_F(ToArrowHostDeviceTest, FixedPoint32TableNullsSimple) +{ + using namespace numeric; + + for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) { + auto const data = std::vector<__int128_t>{1, 2, 3, 4, 5, 6, 0, 0}; + auto const validity = std::vector{1, 1, 1, 1, 1, 1, 0, 0}; + auto const col = + fp_wrapper({1, 2, 3, 4, 5, 6, 0, 0}, {1, 1, 1, 1, 1, 1, 0, 0}, scale_type{scale}); + auto const input = cudf::table_view({col}); + + nanoarrow::UniqueSchema expected_schema; + ArrowSchemaInit(expected_schema.get()); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(expected_schema.get(), 1)); + ArrowSchemaInit(expected_schema->children[0]); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(expected_schema->children[0], + NANOARROW_TYPE_DECIMAL128, + cudf::detail::max_precision(), + -scale)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(expected_schema->children[0], "a")); + expected_schema->children[0]->flags = 0; + + nanoarrow::UniqueArray expected_array; + NANOARROW_THROW_NOT_OK( + ArrowArrayInitFromSchema(expected_array.get(), expected_schema.get(), nullptr)); + expected_array->length = input.num_rows(); + + get_nanoarrow_array<__int128_t>(data, validity).move(expected_array->children[0]); + NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(expected_array.get(), nullptr)); + + auto got_arrow_host = cudf::to_arrow_host(input); + EXPECT_EQ(ARROW_DEVICE_CPU, got_arrow_host->device_type); + EXPECT_EQ(-1, got_arrow_host->device_id); + EXPECT_EQ(nullptr, got_arrow_host->sync_event); + + ArrowArrayView expected, actual; + NANOARROW_THROW_NOT_OK(ArrowArrayViewInitFromSchema(&expected, expected_schema.get(), nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&expected, expected_array.get(), nullptr)); + + NANOARROW_THROW_NOT_OK(ArrowArrayViewInitFromSchema(&actual, expected_schema.get(), nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&actual, &got_arrow_host->array, nullptr)); + compare_arrays(&expected, &actual); + ArrowArrayViewReset(&actual); + + got_arrow_host = cudf::to_arrow_host(input.column(0)); + NANOARROW_THROW_NOT_OK( + ArrowArrayViewInitFromSchema(&actual, expected_schema->children[0], nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&actual, &got_arrow_host->array, nullptr)); + compare_arrays(expected.children[0], &actual); + ArrowArrayViewReset(&actual); + + ArrowArrayViewReset(&expected); + } +} + +TEST_F(ToArrowHostDeviceTest, FixedPoint64TableNullsSimple) +{ + using namespace numeric; + + for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) { + auto const data = std::vector<__int128_t>{1, 2, 3, 4, 5, 6, 0, 0}; + auto const validity = std::vector{1, 1, 1, 1, 1, 1, 0, 0}; + auto const col = + fp_wrapper({1, 2, 3, 4, 5, 6, 0, 0}, {1, 1, 1, 1, 1, 1, 0, 0}, scale_type{scale}); + auto const input = cudf::table_view({col}); + + nanoarrow::UniqueSchema expected_schema; + ArrowSchemaInit(expected_schema.get()); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(expected_schema.get(), 1)); + ArrowSchemaInit(expected_schema->children[0]); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(expected_schema->children[0], + NANOARROW_TYPE_DECIMAL128, + cudf::detail::max_precision(), + -scale)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(expected_schema->children[0], "a")); + expected_schema->children[0]->flags = 0; + + nanoarrow::UniqueArray expected_array; + NANOARROW_THROW_NOT_OK( + ArrowArrayInitFromSchema(expected_array.get(), expected_schema.get(), nullptr)); + expected_array->length = input.num_rows(); + + get_nanoarrow_array<__int128_t>(data, validity).move(expected_array->children[0]); + NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(expected_array.get(), nullptr)); + + auto got_arrow_host = cudf::to_arrow_host(input); + EXPECT_EQ(ARROW_DEVICE_CPU, got_arrow_host->device_type); + EXPECT_EQ(-1, got_arrow_host->device_id); + EXPECT_EQ(nullptr, got_arrow_host->sync_event); + + ArrowArrayView expected, actual; + NANOARROW_THROW_NOT_OK(ArrowArrayViewInitFromSchema(&expected, expected_schema.get(), nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&expected, expected_array.get(), nullptr)); + + NANOARROW_THROW_NOT_OK(ArrowArrayViewInitFromSchema(&actual, expected_schema.get(), nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&actual, &got_arrow_host->array, nullptr)); + compare_arrays(&expected, &actual); + ArrowArrayViewReset(&actual); + + got_arrow_host = cudf::to_arrow_host(input.column(0)); + NANOARROW_THROW_NOT_OK( + ArrowArrayViewInitFromSchema(&actual, expected_schema->children[0], nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&actual, &got_arrow_host->array, nullptr)); + compare_arrays(expected.children[0], &actual); + ArrowArrayViewReset(&actual); + + ArrowArrayViewReset(&expected); + } +} + +TEST_F(ToArrowHostDeviceTest, FixedPoint128TableNullsSimple) +{ + using namespace numeric; + + for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) { + auto const data = std::vector<__int128_t>{1, 2, 3, 4, 5, 6, 0, 0}; + auto const validity = std::vector{1, 1, 1, 1, 1, 1, 0, 0}; + auto const col = + fp_wrapper<__int128_t>({1, 2, 3, 4, 5, 6, 0, 0}, {1, 1, 1, 1, 1, 1, 0, 0}, scale_type{scale}); + auto const input = cudf::table_view({col}); + + nanoarrow::UniqueSchema expected_schema; + ArrowSchemaInit(expected_schema.get()); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(expected_schema.get(), 1)); + ArrowSchemaInit(expected_schema->children[0]); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(expected_schema->children[0], + NANOARROW_TYPE_DECIMAL128, + cudf::detail::max_precision<__int128_t>(), + -scale)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(expected_schema->children[0], "a")); + expected_schema->children[0]->flags = 0; + + nanoarrow::UniqueArray expected_array; + NANOARROW_THROW_NOT_OK( + ArrowArrayInitFromSchema(expected_array.get(), expected_schema.get(), nullptr)); + expected_array->length = input.num_rows(); + + get_nanoarrow_array<__int128_t>(data, validity).move(expected_array->children[0]); + NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(expected_array.get(), nullptr)); + + auto got_arrow_host = cudf::to_arrow_host(input); + EXPECT_EQ(ARROW_DEVICE_CPU, got_arrow_host->device_type); + EXPECT_EQ(-1, got_arrow_host->device_id); + EXPECT_EQ(nullptr, got_arrow_host->sync_event); + + ArrowArrayView expected, actual; + NANOARROW_THROW_NOT_OK(ArrowArrayViewInitFromSchema(&expected, expected_schema.get(), nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&expected, expected_array.get(), nullptr)); + + NANOARROW_THROW_NOT_OK(ArrowArrayViewInitFromSchema(&actual, expected_schema.get(), nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&actual, &got_arrow_host->array, nullptr)); + compare_arrays(&expected, &actual); + ArrowArrayViewReset(&actual); + + got_arrow_host = cudf::to_arrow_host(input.column(0)); + NANOARROW_THROW_NOT_OK( + ArrowArrayViewInitFromSchema(&actual, expected_schema->children[0], nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&actual, &got_arrow_host->array, nullptr)); + compare_arrays(expected.children[0], &actual); + ArrowArrayViewReset(&actual); + + ArrowArrayViewReset(&expected); + } +} + +struct ToArrowHostDeviceTestSlice + : public ToArrowHostDeviceTest, + public ::testing::WithParamInterface> {}; + +TEST_P(ToArrowHostDeviceTestSlice, SliceTest) +{ + auto [table, expected_schema, expected_array] = get_nanoarrow_host_tables(10000); + auto cudf_table_view = table->view(); + auto const [start, end] = GetParam(); + + slice_host_nanoarrow(expected_array.get(), start, end); + auto sliced_cudf_table = cudf::slice(cudf_table_view, {start, end})[0]; + auto got_arrow_host = cudf::to_arrow_host(sliced_cudf_table); + EXPECT_EQ(ARROW_DEVICE_CPU, got_arrow_host->device_type); + EXPECT_EQ(-1, got_arrow_host->device_id); + EXPECT_EQ(nullptr, got_arrow_host->sync_event); + + ArrowArrayView expected, actual; + NANOARROW_THROW_NOT_OK(ArrowArrayViewInitFromSchema(&expected, expected_schema.get(), nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&expected, expected_array.get(), nullptr)); + + NANOARROW_THROW_NOT_OK(ArrowArrayViewInitFromSchema(&actual, expected_schema.get(), nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayViewSetArray(&actual, &got_arrow_host->array, nullptr)); + compare_arrays(&expected, &actual); + ArrowArrayViewReset(&actual); + + ArrowArrayViewReset(&expected); +} + +INSTANTIATE_TEST_CASE_P(ToArrowHostDeviceTest, + ToArrowHostDeviceTestSlice, + ::testing::Values(std::make_tuple(0, 10000), + std::make_tuple(100, 3000), + std::make_tuple(0, 0), + std::make_tuple(0, 3000))); diff --git a/cpp/tests/interop/to_arrow_test.cpp b/cpp/tests/interop/to_arrow_test.cpp index a1ece0ce0f1..328ba210a3f 100644 --- a/cpp/tests/interop/to_arrow_test.cpp +++ b/cpp/tests/interop/to_arrow_test.cpp @@ -14,6 +14,13 @@ * limitations under the License. */ +// These interop functions are deprecated. We keep the code in this +// test and will migrate the tests to export via the arrow C data +// interface with to_arrow_host which arrow can consume. For now, the +// test is commented out. + +#if 0 + #include #include @@ -196,6 +203,7 @@ TEST_F(ToArrowTest, DateTimeTable) std::vector> schema_vector({arrow::field("a", arr->type())}); auto schema = std::make_shared(schema_vector); + auto expected_arrow_table = arrow::Table::Make(schema, {arr}); auto got_arrow_table = cudf::to_arrow(input_view, {{"a"}}); @@ -685,3 +693,5 @@ TEST_F(ToArrowStructScalarTest, Basic) } CUDF_TEST_PROGRAM_MAIN() + +#endif diff --git a/cpp/tests/io/fst/common.hpp b/cpp/tests/io/fst/common.hpp index 382d21fabb8..0177300eda9 100644 --- a/cpp/tests/io/fst/common.hpp +++ b/cpp/tests/io/fst/common.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -69,6 +69,8 @@ std::array, TT_NUM_STATES> const pda_s /* TT_ESC */ {{TT_STR, TT_STR, TT_STR, TT_STR, TT_STR, TT_STR, TT_STR}}}}; // Translation table (i.e., for each transition, what are the symbols that we output) +static constexpr auto min_translated_out = 1; +static constexpr auto max_translated_out = 1; std::array, NUM_SYMBOL_GROUPS>, TT_NUM_STATES> const pda_out_tt{ {/* IN_STATE { [ } ] " \ OTHER */ /* TT_OOS */ {{{'{'}, {'['}, {'}'}, {']'}, {'x'}, {'x'}, {'x'}}}, diff --git a/cpp/tests/io/fst/fst_test.cu b/cpp/tests/io/fst/fst_test.cu index 4df0d3ae04d..8a8d3d39e0f 100644 --- a/cpp/tests/io/fst/fst_test.cu +++ b/cpp/tests/io/fst/fst_test.cu @@ -169,7 +169,9 @@ TEST_F(FstTest, GroundTruth) auto parser = cudf::io::fst::detail::make_fst( cudf::io::fst::detail::make_symbol_group_lut(pda_sgs), cudf::io::fst::detail::make_transition_table(pda_state_tt), - cudf::io::fst::detail::make_translation_table(pda_out_tt), + cudf::io::fst::detail::make_translation_table(pda_out_tt), stream); // Allocate device-side temporary storage & run algorithm diff --git a/cpp/tests/io/json_chunked_reader.cpp b/cpp/tests/io/json/json_chunked_reader.cu similarity index 64% rename from cpp/tests/io/json_chunked_reader.cpp rename to cpp/tests/io/json/json_chunked_reader.cu index 23d54f7263c..b9dee54752c 100644 --- a/cpp/tests/io/json_chunked_reader.cpp +++ b/cpp/tests/io/json/json_chunked_reader.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "io/json/read_json.hpp" +#include "json_utils.cuh" #include #include @@ -37,65 +37,6 @@ cudf::test::TempDirTestEnvironment* const temp_env = static_cast( ::testing::AddGlobalTestEnvironment(new cudf::test::TempDirTestEnvironment)); -// function to extract first delimiter in the string in each chunk, -// collate together and form byte_range for each chunk, -// parse separately. -std::vector skeleton_for_parellel_chunk_reader( - cudf::host_span> sources, - cudf::io::json_reader_options const& reader_opts, - int32_t chunk_size, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - using namespace cudf::io::json::detail; - using cudf::size_type; - size_t total_source_size = 0; - for (auto const& source : sources) { - total_source_size += source->size(); - } - size_t num_chunks = (total_source_size + chunk_size - 1) / chunk_size; - constexpr size_type no_min_value = -1; - - // Get the first delimiter in each chunk. - std::vector first_delimiter_index(num_chunks); - auto reader_opts_chunk = reader_opts; - for (size_t i = 0; i < num_chunks; i++) { - auto const chunk_start = i * chunk_size; - reader_opts_chunk.set_byte_range_offset(chunk_start); - reader_opts_chunk.set_byte_range_size(chunk_size); - first_delimiter_index[i] = - find_first_delimiter_in_chunk(sources, reader_opts_chunk, '\n', stream); - if (first_delimiter_index[i] != no_min_value) { first_delimiter_index[i] += chunk_start; } - } - - // Process and allocate record start, end for each worker. - using record_range = std::pair; - std::vector record_ranges; - record_ranges.reserve(num_chunks); - first_delimiter_index[0] = 0; - auto prev = first_delimiter_index[0]; - for (size_t i = 1; i < num_chunks; i++) { - if (first_delimiter_index[i] == no_min_value) continue; - record_ranges.emplace_back(prev, first_delimiter_index[i]); - prev = first_delimiter_index[i]; - } - record_ranges.emplace_back(prev, total_source_size); - - std::vector tables; - // Process each chunk in parallel. - for (auto const& [chunk_start, chunk_end] : record_ranges) { - if (chunk_start == -1 or chunk_end == -1 or - static_cast(chunk_start) >= total_source_size) - continue; - reader_opts_chunk.set_byte_range_offset(chunk_start); - reader_opts_chunk.set_byte_range_size(chunk_end - chunk_start); - tables.push_back(read_json(sources, reader_opts_chunk, stream, mr)); - } - // assume all records have same number of columns, and inferred same type. (or schema is passed) - // TODO a step before to merge all columns, types and infer final schema. - return tables; -} - TEST_F(JsonReaderTest, ByteRange_SingleSource) { std::string const json_string = R"( @@ -118,11 +59,11 @@ TEST_F(JsonReaderTest, ByteRange_SingleSource) // Test for different chunk sizes for (auto chunk_size : {7, 10, 15, 20, 40, 50, 100, 200, 500}) { - auto const tables = skeleton_for_parellel_chunk_reader(datasources, - json_lines_options, - chunk_size, - cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + auto const tables = split_byte_range_reading(datasources, + json_lines_options, + chunk_size, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); auto table_views = std::vector(tables.size()); std::transform(tables.begin(), tables.end(), table_views.begin(), [](auto& table) { @@ -213,11 +154,11 @@ TEST_F(JsonReaderTest, ByteRange_MultiSource) // Test for different chunk sizes for (auto chunk_size : {7, 10, 15, 20, 40, 50, 100, 200, 500, 1000, 2000}) { - auto const tables = skeleton_for_parellel_chunk_reader(datasources, - json_lines_options, - chunk_size, - cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + auto const tables = split_byte_range_reading(datasources, + json_lines_options, + chunk_size, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); auto table_views = std::vector(tables.size()); std::transform(tables.begin(), tables.end(), table_views.begin(), [](auto& table) { diff --git a/cpp/tests/io/json_quote_normalization_test.cpp b/cpp/tests/io/json/json_quote_normalization_test.cpp similarity index 100% rename from cpp/tests/io/json_quote_normalization_test.cpp rename to cpp/tests/io/json/json_quote_normalization_test.cpp diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json/json_test.cpp similarity index 100% rename from cpp/tests/io/json_test.cpp rename to cpp/tests/io/json/json_test.cpp diff --git a/cpp/tests/io/json_tree.cpp b/cpp/tests/io/json/json_tree.cpp similarity index 99% rename from cpp/tests/io/json_tree.cpp rename to cpp/tests/io/json/json_tree.cpp index 7a72b77e1fb..8bcd5790e99 100644 --- a/cpp/tests/io/json_tree.cpp +++ b/cpp/tests/io/json/json_tree.cpp @@ -235,10 +235,8 @@ tree_meta_t2 get_tree_representation_cpu( { constexpr bool include_quote_char = true; // Copy the JSON tokens to the host - thrust::host_vector tokens = - cudf::detail::make_host_vector_async(tokens_gpu, stream); - thrust::host_vector token_indices = - cudf::detail::make_host_vector_async(token_indices_gpu1, stream); + auto tokens = cudf::detail::make_host_vector_async(tokens_gpu, stream); + auto token_indices = cudf::detail::make_host_vector_async(token_indices_gpu1, stream); // Make sure tokens have been copied to the host stream.synchronize(); diff --git a/cpp/tests/io/json_type_cast_test.cu b/cpp/tests/io/json/json_type_cast_test.cu similarity index 100% rename from cpp/tests/io/json_type_cast_test.cu rename to cpp/tests/io/json/json_type_cast_test.cu diff --git a/cpp/tests/io/json/json_utils.cuh b/cpp/tests/io/json/json_utils.cuh new file mode 100644 index 00000000000..9383797d91b --- /dev/null +++ b/cpp/tests/io/json/json_utils.cuh @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "io/json/read_json.hpp" + +#include +#include +#include +#include + +#include + +#include + +// Helper function to test correctness of JSON byte range reading. +// We split the input source files into a set of byte range chunks each of size +// `chunk_size` and return an array of partial tables constructed from each chunk +template +std::vector split_byte_range_reading( + cudf::host_span> sources, + cudf::io::json_reader_options const& reader_opts, + IndexType chunk_size, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + auto total_source_size = [&sources]() { + return std::accumulate(sources.begin(), sources.end(), 0ul, [=](size_t sum, auto& source) { + auto const size = source->size(); + return sum + size; + }); + }(); + auto find_first_delimiter_in_chunk = + [total_source_size, &sources, &stream]( + cudf::io::json_reader_options const& reader_opts) -> IndexType { + rmm::device_uvector buffer(total_source_size, stream); + auto readbufspan = cudf::io::json::detail::ingest_raw_input(buffer, + sources, + reader_opts.get_compression(), + reader_opts.get_byte_range_offset(), + reader_opts.get_byte_range_size(), + stream); + // Note: we cannot reuse cudf::io::json::detail::find_first_delimiter since the + // return type of that function is size_type. However, when the chunk_size is + // larger than INT_MAX, the position of the delimiter can also be larger than + // INT_MAX. We do not encounter this overflow error in the detail function + // since the batched JSON reader splits the byte_range_size into chunk_sizes + // smaller than INT_MAX bytes + auto const first_delimiter_position_it = + thrust::find(rmm::exec_policy(stream), readbufspan.begin(), readbufspan.end(), '\n'); + return first_delimiter_position_it != readbufspan.end() + ? thrust::distance(readbufspan.begin(), first_delimiter_position_it) + : -1; + }; + size_t num_chunks = (total_source_size + chunk_size - 1) / chunk_size; + constexpr IndexType no_min_value = -1; + + // Get the first delimiter in each chunk. + std::vector first_delimiter_index(num_chunks); + auto reader_opts_chunk = reader_opts; + for (size_t i = 0; i < num_chunks; i++) { + auto const chunk_start = i * chunk_size; + // We are updating reader_opt_chunks to store offset and size information for the current chunk + reader_opts_chunk.set_byte_range_offset(chunk_start); + reader_opts_chunk.set_byte_range_size(chunk_size); + first_delimiter_index[i] = find_first_delimiter_in_chunk(reader_opts_chunk); + } + + // Process and allocate record start, end for each worker. + using record_range = std::pair; + std::vector record_ranges; + record_ranges.reserve(num_chunks); + size_t prev = 0; + for (size_t i = 1; i < num_chunks; i++) { + // In the case where chunk_size is smaller than row size, the chunk needs to be skipped + if (first_delimiter_index[i] == no_min_value) continue; + size_t next = static_cast(first_delimiter_index[i]) + (i * chunk_size); + record_ranges.emplace_back(prev, next); + prev = next; + } + record_ranges.emplace_back(prev, total_source_size); + + std::vector tables; + for (auto const& [chunk_start, chunk_end] : record_ranges) { + reader_opts_chunk.set_byte_range_offset(chunk_start); + reader_opts_chunk.set_byte_range_size(chunk_end - chunk_start); + tables.push_back(cudf::io::json::detail::read_json(sources, reader_opts_chunk, stream, mr)); + } + // assume all records have same number of columns, and inferred same type. (or schema is passed) + // TODO a step before to merge all columns, types and infer final schema. + return tables; +} diff --git a/cpp/tests/io/json_whitespace_normalization_test.cu b/cpp/tests/io/json/json_whitespace_normalization_test.cu similarity index 100% rename from cpp/tests/io/json_whitespace_normalization_test.cu rename to cpp/tests/io/json/json_whitespace_normalization_test.cu diff --git a/cpp/tests/io/json_writer.cpp b/cpp/tests/io/json/json_writer.cpp similarity index 100% rename from cpp/tests/io/json_writer.cpp rename to cpp/tests/io/json/json_writer.cpp diff --git a/cpp/tests/io/nested_json_test.cpp b/cpp/tests/io/json/nested_json_test.cpp similarity index 100% rename from cpp/tests/io/nested_json_test.cpp rename to cpp/tests/io/json/nested_json_test.cpp diff --git a/cpp/tests/io/parquet_chunked_reader_test.cu b/cpp/tests/io/parquet_chunked_reader_test.cu index cff85647725..66b36aeed63 100644 --- a/cpp/tests/io/parquet_chunked_reader_test.cu +++ b/cpp/tests/io/parquet_chunked_reader_test.cu @@ -149,6 +149,33 @@ auto chunked_read(std::string const& filepath, return chunked_read(vpath, output_limit, input_limit); } +auto const read_table_and_nrows_per_source(cudf::io::chunked_parquet_reader const& reader) +{ + auto out_tables = std::vector>{}; + int num_chunks = 0; + auto nrows_per_source = std::vector{}; + while (reader.has_next()) { + auto chunk = reader.read_chunk(); + out_tables.emplace_back(std::move(chunk.tbl)); + num_chunks++; + if (nrows_per_source.empty()) { + nrows_per_source = std::move(chunk.metadata.num_rows_per_source); + } else { + std::transform(chunk.metadata.num_rows_per_source.cbegin(), + chunk.metadata.num_rows_per_source.cend(), + nrows_per_source.begin(), + nrows_per_source.begin(), + std::plus()); + } + } + auto out_tviews = std::vector{}; + for (auto const& tbl : out_tables) { + out_tviews.emplace_back(tbl->view()); + } + + return std::tuple(cudf::concatenate(out_tviews), num_chunks, nrows_per_source); +} + } // namespace struct ParquetChunkedReaderTest : public cudf::test::BaseFixture {}; @@ -1477,3 +1504,370 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadOutOfBoundChunks) CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result); } } + +TEST_F(ParquetChunkedReaderTest, TestNumRowsPerSource) +{ + constexpr int num_rows = 10'723; // A prime number + constexpr int rows_in_row_group = 500; + + // Table with single col of random int64 values + auto const int64_data = random_values(num_rows); + auto int64_col = int64s_col(int64_data.begin(), int64_data.end()).release(); + + std::vector> input_columns; + input_columns.emplace_back(std::move(int64_col)); + + // Write to Parquet + auto const [expected, filepath] = write_file(input_columns, + "num_rows_per_source", + false, + false, + cudf::io::default_max_page_size_bytes, + rows_in_row_group); + + // Chunked-read single data source entirely + { + auto constexpr output_read_limit = 1'500; + auto constexpr pass_read_limit = 3'500; + + auto const options = + cudf::io::parquet_reader_options_builder(cudf::io::source_info{filepath}).build(); + auto const reader = cudf::io::chunked_parquet_reader( + output_read_limit, pass_read_limit, options, cudf::get_default_stream()); + + auto const [result, num_chunks, num_rows_per_source] = read_table_and_nrows_per_source(reader); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result->view()); + EXPECT_EQ(num_rows_per_source.size(), 1); + EXPECT_EQ(num_rows_per_source[0], num_rows); + } + + // Chunked-read rows_to_read rows skipping rows_to_skip from single data source + { + // TODO: rows_to_skip = 0 until https://github.com/rapidsai/cudf/issues/16186 is resolved + auto const rows_to_skip = 0; // 1'237 + auto const rows_to_read = 7'232; + auto constexpr output_read_limit = 1'500; + auto constexpr pass_read_limit = 3'500; + + auto const options = cudf::io::parquet_reader_options_builder(cudf::io::source_info{filepath}) + .skip_rows(rows_to_skip) + .num_rows(rows_to_read) + .build(); + auto const reader = cudf::io::chunked_parquet_reader( + output_read_limit, pass_read_limit, options, cudf::get_default_stream()); + + auto const [result, num_chunks, num_rows_per_source] = read_table_and_nrows_per_source(reader); + + auto int64_col_selected = int64s_col(int64_data.begin() + rows_to_skip, + int64_data.begin() + rows_to_skip + rows_to_read) + .release(); + + cudf::table_view const expected_selected({int64_col_selected->view()}); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_selected, result->view()); + EXPECT_EQ(num_rows_per_source.size(), 1); + EXPECT_EQ(num_rows_per_source[0], rows_to_read); + } + + // Chunked-read two data sources skipping the first entire file completely + { + // TODO: rows_to_skip = 0 until https://github.com/rapidsai/cudf/issues/16186 is resolved + auto constexpr rows_to_skip = 0; // 15'723; + auto constexpr output_read_limit = 1'024'000; + auto constexpr pass_read_limit = 1'024'000; + + auto constexpr nsources = 2; + std::vector const datasources(nsources, filepath); + + auto const options = + cudf::io::parquet_reader_options_builder(cudf::io::source_info{datasources}) + .skip_rows(rows_to_skip) + .build(); + + auto const reader = cudf::io::chunked_parquet_reader( + output_read_limit, pass_read_limit, options, cudf::get_default_stream()); + + auto const [result, num_chunks, num_rows_per_source] = read_table_and_nrows_per_source(reader); + + // TODO: Enable code inside /* */ when https://github.com/rapidsai/cudf/issues/16186 is resolved + auto int64_col_selected = + int64s_col(int64_data.begin() /* + rows_to_skip - num_rows */, int64_data.end()).release(); + + cudf::table_view const expected_selected({int64_col_selected->view()}); + + // TODO: Enable the following check when https://github.com/rapidsai/cudf/issues/16186 + // is resolved + // CUDF_TEST_EXPECT_TABLES_EQUAL(expected_selected, result->view()); + + EXPECT_EQ(num_rows_per_source.size(), 2); + EXPECT_EQ(num_rows_per_source[0], num_rows /* 0 */); + EXPECT_EQ(num_rows_per_source[1], num_rows /* nsources * num_rows - rows_to_skip */); + } + + // Chunked-read from single data source skipping rows_to_skip + { + // TODO: rows_to_skip = 0 until https://github.com/rapidsai/cudf/issues/16186 is resolved + auto const rows_to_skip = 0; // 1'237; + auto constexpr output_read_limit = 1'500; + auto constexpr pass_read_limit = 1'800; + + auto const options = cudf::io::parquet_reader_options_builder(cudf::io::source_info{filepath}) + .skip_rows(rows_to_skip) + .build(); + auto const reader = cudf::io::chunked_parquet_reader( + output_read_limit, pass_read_limit, options, cudf::get_default_stream()); + + auto const [result, num_chunks, num_rows_per_source] = read_table_and_nrows_per_source(reader); + + auto int64_col_selected = + int64s_col(int64_data.begin() + rows_to_skip, int64_data.end()).release(); + + cudf::table_view const expected_selected({int64_col_selected->view()}); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_selected, result->view()); + EXPECT_EQ(num_rows_per_source.size(), 1); + EXPECT_EQ(num_rows_per_source[0], num_rows - rows_to_skip); + } + + // Filtered chunked-read from single data source + { + int64_t const max_value = int64_data[int64_data.size() / 2]; + auto constexpr output_read_limit = 1'500; + auto constexpr pass_read_limit = 3'500; + auto literal_value = cudf::numeric_scalar{max_value}; + auto literal = cudf::ast::literal{literal_value}; + auto col_ref = cudf::ast::column_reference(0); + auto filter_expression = + cudf::ast::operation(cudf::ast::ast_operator::LESS_EQUAL, col_ref, literal); + + auto const options = cudf::io::parquet_reader_options_builder(cudf::io::source_info{filepath}) + .filter(filter_expression) + .build(); + auto const reader = cudf::io::chunked_parquet_reader( + output_read_limit, pass_read_limit, options, cudf::get_default_stream()); + + auto const [result, num_chunks, num_rows_per_source] = read_table_and_nrows_per_source(reader); + + std::vector int64_data_filtered; + int64_data_filtered.reserve(num_rows); + std::copy_if( + int64_data.begin(), int64_data.end(), std::back_inserter(int64_data_filtered), [=](auto val) { + return val <= max_value; + }); + + auto int64_col_filtered = + int64s_col(int64_data_filtered.begin(), int64_data_filtered.end()).release(); + + cudf::table_view expected_filtered({int64_col_filtered->view()}); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_filtered, result->view()); + EXPECT_TRUE(num_rows_per_source.empty()); + } +} + +TEST_F(ParquetChunkedReaderTest, TestNumRowsPerSourceMultipleSources) +{ + constexpr int num_rows = 10'723; // A prime number + constexpr int rows_in_row_group = 500; + + // Table with single col of random int64 values + auto const int64_data = random_values(num_rows); + auto int64_col = int64s_col(int64_data.begin(), int64_data.end()).release(); + + std::vector> input_columns; + input_columns.emplace_back(std::move(int64_col)); + + // Write to Parquet + auto const [expected, filepath] = write_file(input_columns, + "num_rows_per_source", + false, + false, + cudf::io::default_max_page_size_bytes, + rows_in_row_group); + + // Function to initialize a vector of expected counts per source + auto initialize_expected_counts = + [](int const nsources, int const num_rows, int const rows_to_skip, int const rows_to_read) { + // Initialize expected_counts + std::vector expected_counts(nsources, num_rows); + + // Adjust expected_counts for rows_to_skip + int64_t counter = 0; + for (auto& nrows : expected_counts) { + if (counter < rows_to_skip) { + counter += nrows; + nrows = (counter >= rows_to_skip) ? counter - rows_to_skip : 0; + } else { + break; + } + } + + // Reset the counter + counter = 0; + + // Adjust expected_counts for rows_to_read + for (auto& nrows : expected_counts) { + if (counter < rows_to_read) { + counter += nrows; + nrows = (counter >= rows_to_read) ? rows_to_read - counter + nrows : nrows; + } else if (counter > rows_to_read) { + nrows = 0; + } + } + + return expected_counts; + }; + + // Chunked-read six data sources entirely + { + auto const nsources = 6; + auto constexpr output_read_limit = 15'000; + auto constexpr pass_read_limit = 35'000; + std::vector const datasources(nsources, filepath); + + auto const options = + cudf::io::parquet_reader_options_builder(cudf::io::source_info{datasources}).build(); + auto const reader = cudf::io::chunked_parquet_reader( + output_read_limit, pass_read_limit, options, cudf::get_default_stream()); + + auto const [result, num_chunks, num_rows_per_source] = read_table_and_nrows_per_source(reader); + + // Initialize expected_counts + std::vector const expected_counts(nsources, num_rows); + + EXPECT_EQ(num_rows_per_source.size(), nsources); + EXPECT_TRUE( + std::equal(expected_counts.cbegin(), expected_counts.cend(), num_rows_per_source.cbegin())); + } + + // Chunked-read rows_to_read rows skipping rows_to_skip from eight data sources + { + // TODO: rows_to_skip = 0 until https://github.com/rapidsai/cudf/issues/16186 is resolved + auto const rows_to_skip = 0; // 25'571; + auto const rows_to_read = 41'232; + auto constexpr output_read_limit = 15'000; + auto constexpr pass_read_limit = 35'000; + auto const nsources = 8; + std::vector int64_selected_data{}; + int64_selected_data.reserve(nsources * num_rows); + + std::for_each( + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(nsources), + [&](auto const i) { + std::copy(int64_data.begin(), int64_data.end(), std::back_inserter(int64_selected_data)); + }); + + std::vector const datasources(nsources, filepath); + + auto const options = + cudf::io::parquet_reader_options_builder(cudf::io::source_info{datasources}) + .skip_rows(rows_to_skip) + .num_rows(rows_to_read) + .build(); + auto const reader = cudf::io::chunked_parquet_reader( + output_read_limit, pass_read_limit, options, cudf::get_default_stream()); + + auto const [result, num_chunks, num_rows_per_source] = read_table_and_nrows_per_source(reader); + + // Initialize expected_counts + auto const expected_counts = + initialize_expected_counts(nsources, num_rows, rows_to_skip, rows_to_read); + + // Initialize expected table + auto int64_col_selected = int64s_col(int64_selected_data.begin() + rows_to_skip, + int64_selected_data.begin() + +rows_to_skip + rows_to_read) + .release(); + + cudf::table_view const expected_selected({int64_col_selected->view()}); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_selected, result->view()); + EXPECT_EQ(num_rows_per_source.size(), nsources); + EXPECT_TRUE( + std::equal(expected_counts.cbegin(), expected_counts.cend(), num_rows_per_source.cbegin())); + } + + // Chunked-read four data sources skipping three files completely + { + auto const nsources = 4; + // TODO: rows_to_skip = 0 until https://github.com/rapidsai/cudf/issues/16186 is resolved + int constexpr rows_to_skip = 0; // num_rows * 3 + 1; + auto constexpr output_read_limit = 15'000; + auto constexpr pass_read_limit = 35'000; + std::vector int64_selected_data{}; + int64_selected_data.reserve(nsources * num_rows); + + std::for_each( + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(nsources), + [&](auto const i) { + std::copy(int64_data.begin(), int64_data.end(), std::back_inserter(int64_selected_data)); + }); + + std::vector const datasources(nsources, filepath); + auto const options = + cudf::io::parquet_reader_options_builder(cudf::io::source_info{datasources}) + .skip_rows(rows_to_skip) + .build(); + auto const reader = cudf::io::chunked_parquet_reader( + output_read_limit, pass_read_limit, options, cudf::get_default_stream()); + + auto const [result, num_chunks, num_rows_per_source] = read_table_and_nrows_per_source(reader); + + // Initialize expected_counts + auto const expected_counts = + initialize_expected_counts(nsources, num_rows, rows_to_skip, num_rows * nsources); + + // Initialize expected table + auto int64_col_selected = + int64s_col(int64_selected_data.begin() + rows_to_skip, int64_selected_data.end()).release(); + + cudf::table_view const expected_selected({int64_col_selected->view()}); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_selected, result->view()); + EXPECT_EQ(num_rows_per_source.size(), nsources); + EXPECT_TRUE( + std::equal(expected_counts.cbegin(), expected_counts.cend(), num_rows_per_source.cbegin())); + } +} + +TEST_F(ParquetChunkedReaderTest, TestNumRowsPerSourceEmptyTable) +{ + auto constexpr output_read_limit = 4'500; + auto constexpr pass_read_limit = 8'500; + auto const nsources = 10; + + // Table with single col of random int64 values + auto int64_empty_col = int64s_col{}.release(); + + std::vector> input_empty_columns; + input_empty_columns.emplace_back(std::move(int64_empty_col)); + + // Write to Parquet + auto const [expected_empty, filepath_empty] = write_file(input_empty_columns, + "num_rows_per_source_empty", + false, + false, + cudf::io::default_max_page_size_bytes, + 500); + + std::vector const datasources(nsources, filepath_empty); + + auto const options = + cudf::io::parquet_reader_options_builder(cudf::io::source_info{datasources}).build(); + auto const reader = cudf::io::chunked_parquet_reader( + output_read_limit, pass_read_limit, options, cudf::get_default_stream()); + + auto const [result, num_chunks, num_rows_per_source] = read_table_and_nrows_per_source(reader); + + // Initialize expected_counts + std::vector const expected_counts(nsources, 0); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_empty->view(), result->view()); + + EXPECT_EQ(num_chunks, 1); + EXPECT_EQ(num_rows_per_source.size(), nsources); + EXPECT_TRUE( + std::equal(expected_counts.cbegin(), expected_counts.cend(), num_rows_per_source.cbegin())); +} diff --git a/cpp/tests/io/parquet_reader_test.cpp b/cpp/tests/io/parquet_reader_test.cpp index 2edf9e0aee6..6c61535359f 100644 --- a/cpp/tests/io/parquet_reader_test.cpp +++ b/cpp/tests/io/parquet_reader_test.cpp @@ -2243,6 +2243,209 @@ TEST_F(ParquetReaderTest, StringsWithPageStats) } } +TEST_F(ParquetReaderTest, NumRowsPerSource) +{ + int constexpr num_rows = 10'723; // A prime number + int constexpr rows_in_row_group = 500; + + // Table with single col of random int64 values + auto const int64_data = random_values(num_rows); + column_wrapper const int64_col{ + int64_data.begin(), int64_data.end(), cudf::test::iterators::no_nulls()}; + cudf::table_view const expected({int64_col}); + + // Write to Parquet + auto const filepath = temp_env->get_temp_filepath("NumRowsPerSource.parquet"); + auto const out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected) + .row_group_size_rows(rows_in_row_group) + .build(); + cudf::io::write_parquet(out_opts); + + // Read single data source entirely + { + auto const in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).build(); + auto const result = cudf::io::read_parquet(in_opts); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); + EXPECT_EQ(result.metadata.num_rows_per_source.size(), 1); + EXPECT_EQ(result.metadata.num_rows_per_source[0], num_rows); + } + + // Read rows_to_read rows skipping rows_to_skip from single data source + { + auto constexpr rows_to_skip = 557; // a prime number != rows_in_row_group + auto constexpr rows_to_read = 7'232; + auto const in_opts = cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) + .skip_rows(rows_to_skip) + .num_rows(rows_to_read) + .build(); + auto const result = cudf::io::read_parquet(in_opts); + column_wrapper int64_col_selected{int64_data.begin() + rows_to_skip, + int64_data.begin() + rows_to_skip + rows_to_read, + cudf::test::iterators::no_nulls()}; + + cudf::table_view const expected_selected({int64_col_selected}); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_selected, result.tbl->view()); + EXPECT_EQ(result.metadata.num_rows_per_source.size(), 1); + EXPECT_EQ(result.metadata.num_rows_per_source[0], rows_to_read); + } + + // Filtered read from single data source + { + auto constexpr max_value = 100; + auto literal_value = cudf::numeric_scalar{max_value}; + auto literal = cudf::ast::literal{literal_value}; + auto col_ref = cudf::ast::column_reference(0); + auto filter_expression = + cudf::ast::operation(cudf::ast::ast_operator::LESS_EQUAL, col_ref, literal); + + auto const in_opts = cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) + .filter(filter_expression) + .build(); + + std::vector int64_data_filtered; + int64_data_filtered.reserve(num_rows); + std::copy_if( + int64_data.begin(), int64_data.end(), std::back_inserter(int64_data_filtered), [=](auto val) { + return val <= max_value; + }); + column_wrapper int64_col_filtered{ + int64_data_filtered.begin(), int64_data_filtered.end(), cudf::test::iterators::no_nulls()}; + + cudf::table_view expected_filtered({int64_col_filtered}); + + auto const result = cudf::io::read_parquet(in_opts); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_filtered, result.tbl->view()); + EXPECT_EQ(result.metadata.num_rows_per_source.size(), 0); + } + + // Read two data sources skipping the first entire file completely + { + auto constexpr rows_to_skip = 15'723; + auto constexpr nsources = 2; + std::vector const datasources(nsources, filepath); + + auto const in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{datasources}) + .skip_rows(rows_to_skip) + .build(); + + auto const result = cudf::io::read_parquet(in_opts); + + column_wrapper int64_col_selected{int64_data.begin() + rows_to_skip - num_rows, + int64_data.end(), + cudf::test::iterators::no_nulls()}; + + cudf::table_view const expected_selected({int64_col_selected}); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_selected, result.tbl->view()); + EXPECT_EQ(result.metadata.num_rows_per_source.size(), 2); + EXPECT_EQ(result.metadata.num_rows_per_source[0], 0); + EXPECT_EQ(result.metadata.num_rows_per_source[1], nsources * num_rows - rows_to_skip); + } + + // Read ten data sources entirely + { + auto constexpr nsources = 10; + std::vector const datasources(nsources, filepath); + + auto const in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{datasources}).build(); + auto const result = cudf::io::read_parquet(in_opts); + + // Initialize expected_counts + std::vector const expected_counts(nsources, num_rows); + + EXPECT_EQ(result.metadata.num_rows_per_source.size(), nsources); + EXPECT_TRUE(std::equal(expected_counts.cbegin(), + expected_counts.cend(), + result.metadata.num_rows_per_source.cbegin())); + } + + // Read rows_to_read rows skipping rows_to_skip (> two sources) from ten data sources + { + auto constexpr rows_to_skip = 25'999; + auto constexpr rows_to_read = 47'232; + + auto constexpr nsources = 10; + std::vector const datasources(nsources, filepath); + + auto const in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{datasources}) + .skip_rows(rows_to_skip) + .num_rows(rows_to_read) + .build(); + + auto const result = cudf::io::read_parquet(in_opts); + + // Initialize expected_counts + std::vector expected_counts(nsources, num_rows); + + // Adjust expected_counts for rows_to_skip + int64_t counter = 0; + for (auto& nrows : expected_counts) { + if (counter < rows_to_skip) { + counter += nrows; + nrows = (counter >= rows_to_skip) ? counter - rows_to_skip : 0; + } else { + break; + } + } + + // Reset the counter + counter = 0; + + // Adjust expected_counts for rows_to_read + for (auto& nrows : expected_counts) { + if (counter < rows_to_read) { + counter += nrows; + nrows = (counter >= rows_to_read) ? rows_to_read - counter + nrows : nrows; + } else if (counter > rows_to_read) { + nrows = 0; + } + } + + EXPECT_EQ(result.metadata.num_rows_per_source.size(), nsources); + EXPECT_TRUE(std::equal(expected_counts.cbegin(), + expected_counts.cend(), + result.metadata.num_rows_per_source.cbegin())); + } +} + +TEST_F(ParquetReaderTest, NumRowsPerSourceEmptyTable) +{ + auto const nsources = 10; + + column_wrapper const int64_empty_col{}; + cudf::table_view const expected_empty({int64_empty_col}); + + // Write to Parquet + auto const filepath_empty = temp_env->get_temp_filepath("NumRowsPerSourceEmpty.parquet"); + auto const out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath_empty}, expected_empty) + .build(); + cudf::io::write_parquet(out_opts); + + // Read from Parquet + std::vector const datasources(nsources, filepath_empty); + + auto const in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{datasources}).build(); + auto const result = cudf::io::read_parquet(in_opts); + + // Initialize expected_counts + std::vector const expected_counts(nsources, 0); + + EXPECT_EQ(result.metadata.num_rows_per_source.size(), nsources); + EXPECT_TRUE(std::equal(expected_counts.cbegin(), + expected_counts.cend(), + result.metadata.num_rows_per_source.cbegin())); +} + /////////////////// // metadata tests diff --git a/cpp/tests/large_strings/json_tests.cpp b/cpp/tests/large_strings/json_tests.cu similarity index 50% rename from cpp/tests/large_strings/json_tests.cpp rename to cpp/tests/large_strings/json_tests.cu index bf16d131ba7..49abf7b484d 100644 --- a/cpp/tests/large_strings/json_tests.cpp +++ b/cpp/tests/large_strings/json_tests.cu @@ -14,8 +14,13 @@ * limitations under the License. */ +#include "../io/json/json_utils.cuh" #include "large_strings_fixture.hpp" +#include + +#include +#include #include #include @@ -28,31 +33,57 @@ TEST_F(JsonLargeReaderTest, MultiBatch) { "a": { "y" : 6}, "b" : [4, 5 ], "c": 12 } { "a": { "y" : 6}, "b" : [6 ], "c": 13 } { "a": { "y" : 6}, "b" : [7 ], "c": 14 })"; - constexpr size_t expected_file_size = std::numeric_limits::max() / 2; + constexpr size_t batch_size_ub = std::numeric_limits::max(); + constexpr size_t expected_file_size = 1.5 * static_cast(batch_size_ub); std::size_t const log_repetitions = static_cast(std::ceil(std::log2(expected_file_size / json_string.size()))); json_string.reserve(json_string.size() * (1UL << log_repetitions)); - std::size_t numrows = 4; for (std::size_t i = 0; i < log_repetitions; i++) { json_string += json_string; - numrows <<= 1; } constexpr int num_sources = 2; - std::vector> hostbufs( - num_sources, cudf::host_span(json_string.data(), json_string.size())); + std::vector> hostbufs( + num_sources, + cudf::host_span(reinterpret_cast(json_string.data()), + json_string.size())); // Initialize parsing options (reading json lines) cudf::io::json_reader_options json_lines_options = cudf::io::json_reader_options::builder( cudf::io::source_info{ - cudf::host_span>(hostbufs.data(), hostbufs.size())}) + cudf::host_span>(hostbufs.data(), hostbufs.size())}) .lines(true) .compression(cudf::io::compression_type::NONE) .recovery_mode(cudf::io::json_recovery_mode_t::FAIL); // Read full test data via existing, nested JSON lines reader cudf::io::table_with_metadata current_reader_table = cudf::io::read_json(json_lines_options); - ASSERT_EQ(current_reader_table.tbl->num_rows(), numrows * num_sources); + + std::vector> datasources; + for (auto& hb : hostbufs) { + datasources.emplace_back(cudf::io::datasource::create(hb)); + } + // Test for different chunk sizes + std::vector chunk_sizes{ + batch_size_ub / 4, batch_size_ub / 2, batch_size_ub, static_cast(batch_size_ub * 2)}; + for (auto chunk_size : chunk_sizes) { + auto const tables = + split_byte_range_reading(datasources, + json_lines_options, + chunk_size, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); + + auto table_views = std::vector(tables.size()); + std::transform(tables.begin(), tables.end(), table_views.begin(), [](auto& table) { + return table.tbl->view(); + }); + auto result = cudf::concatenate(table_views); + + // Verify that the data read via chunked reader matches the data read via nested JSON reader + // cannot use EQUAL due to concatenate removing null mask + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(current_reader_table.tbl->view(), result->view()); + } } diff --git a/cpp/tests/streams/dictionary_test.cpp b/cpp/tests/streams/dictionary_test.cpp index 9e81c8574b8..03e4cf47470 100644 --- a/cpp/tests/streams/dictionary_test.cpp +++ b/cpp/tests/streams/dictionary_test.cpp @@ -26,6 +26,52 @@ class DictionaryTest : public cudf::test::BaseFixture {}; +TEST_F(DictionaryTest, FactoryColumnViews) +{ + cudf::test::strings_column_wrapper keys({"aaa", "ccc", "ddd", "www"}); + cudf::test::fixed_width_column_wrapper values{2, 0, 3, 1, 2, 2, 2, 3, 0}; + + auto dictionary = cudf::make_dictionary_column(keys, values, cudf::test::get_default_stream()); + cudf::dictionary_column_view view(dictionary->view()); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(view.keys(), keys); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(view.indices(), values); +} + +TEST_F(DictionaryTest, FactoryColumns) +{ + std::vector h_keys{"aaa", "ccc", "ddd", "www"}; + cudf::test::strings_column_wrapper keys(h_keys.begin(), h_keys.end()); + std::vector h_values{2, 0, 3, 1, 2, 2, 2, 3, 0}; + cudf::test::fixed_width_column_wrapper values(h_values.begin(), h_values.end()); + + auto dictionary = cudf::make_dictionary_column( + keys.release(), values.release(), cudf::test::get_default_stream()); + cudf::dictionary_column_view view(dictionary->view()); + + cudf::test::strings_column_wrapper keys_expected(h_keys.begin(), h_keys.end()); + cudf::test::fixed_width_column_wrapper values_expected(h_values.begin(), h_values.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(view.keys(), keys_expected); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(view.indices(), values_expected); +} + +TEST_F(DictionaryTest, FactoryColumnsNullMaskCount) +{ + std::vector h_keys{"aaa", "ccc", "ddd", "www"}; + cudf::test::strings_column_wrapper keys(h_keys.begin(), h_keys.end()); + std::vector h_values{2, 0, 3, 1, 2, 2, 2, 3, 0}; + cudf::test::fixed_width_column_wrapper values(h_values.begin(), h_values.end()); + + auto dictionary = cudf::make_dictionary_column( + keys.release(), values.release(), rmm::device_buffer{}, 0, cudf::test::get_default_stream()); + cudf::dictionary_column_view view(dictionary->view()); + + cudf::test::strings_column_wrapper keys_expected(h_keys.begin(), h_keys.end()); + cudf::test::fixed_width_column_wrapper values_expected(h_values.begin(), h_values.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(view.keys(), keys_expected); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(view.indices(), values_expected); +} + TEST_F(DictionaryTest, Encode) { cudf::test::fixed_width_column_wrapper col({1, 2, 3, 4, 5}); diff --git a/cpp/tests/streams/interop_test.cpp b/cpp/tests/streams/interop_test.cpp index 9e4ee5a4a93..9ba862585d0 100644 --- a/cpp/tests/streams/interop_test.cpp +++ b/cpp/tests/streams/interop_test.cpp @@ -14,6 +14,13 @@ * limitations under the License. */ +// These interop functions are deprecated. We keep the code in this +// test and will migrate the tests to export via the arrow C data +// interface with to_arrow_host which arrow can consume. For now, the +// test is commented out. + +#if 0 + #include #include #include @@ -67,3 +74,5 @@ TEST_F(ArrowTest, FromArrowScalar) auto arrow_scalar = arrow::MakeScalar(value); cudf::from_arrow(*arrow_scalar, cudf::test::get_default_stream()); } + +#endif diff --git a/cpp/tests/streams/lists_test.cpp b/cpp/tests/streams/lists_test.cpp index 711e20e4b17..7963dced292 100644 --- a/cpp/tests/streams/lists_test.cpp +++ b/cpp/tests/streams/lists_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -212,3 +213,57 @@ TEST_F(ListTest, HaveOverlap) cudf::nan_equality::ALL_EQUAL, cudf::test::get_default_stream()); } + +TEST_F(ListTest, Explode) +{ + cudf::test::fixed_width_column_wrapper list_col_a{100, 200, 300}; + cudf::test::lists_column_wrapper list_col_b{ + cudf::test::lists_column_wrapper{1, 2, 7}, + cudf::test::lists_column_wrapper{5, 6}, + cudf::test::lists_column_wrapper{0, 3}}; + cudf::test::strings_column_wrapper list_col_c{"string0", "string1", "string2"}; + cudf::table_view lists_table({list_col_a, list_col_b, list_col_c}); + cudf::explode(lists_table, 1, cudf::test::get_default_stream()); +} + +TEST_F(ListTest, ExplodePosition) +{ + cudf::test::fixed_width_column_wrapper list_col_a{100, 200, 300}; + cudf::test::lists_column_wrapper list_col_b{ + cudf::test::lists_column_wrapper{1, 2, 7}, + cudf::test::lists_column_wrapper{5, 6}, + cudf::test::lists_column_wrapper{0, 3}}; + cudf::test::strings_column_wrapper list_col_c{"string0", "string1", "string2"}; + cudf::table_view lists_table({list_col_a, list_col_b, list_col_c}); + cudf::explode_position(lists_table, 1, cudf::test::get_default_stream()); +} + +TEST_F(ListTest, ExplodeOuter) +{ + constexpr auto null = 0; + auto valids = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; }); + cudf::test::lists_column_wrapper list_col_a{ + cudf::test::lists_column_wrapper({1, null, 7}, valids), + cudf::test::lists_column_wrapper({5, null, 0, null}, valids), + cudf::test::lists_column_wrapper{}, + cudf::test::lists_column_wrapper({0, null, 8}, valids)}; + cudf::test::fixed_width_column_wrapper list_col_b{100, 200, 300, 400}; + cudf::table_view lists_table({list_col_a, list_col_b}); + cudf::explode_outer(lists_table, 0, cudf::test::get_default_stream()); +} + +TEST_F(ListTest, ExplodeOuterPosition) +{ + constexpr auto null = 0; + auto valids = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; }); + cudf::test::lists_column_wrapper list_col_a{ + cudf::test::lists_column_wrapper({1, null, 7}, valids), + cudf::test::lists_column_wrapper({5, null, 0, null}, valids), + cudf::test::lists_column_wrapper{}, + cudf::test::lists_column_wrapper({0, null, 8}, valids)}; + cudf::test::fixed_width_column_wrapper list_col_b{100, 200, 300, 400}; + cudf::table_view lists_table({list_col_a, list_col_b}); + cudf::explode_outer_position(lists_table, 0, cudf::test::get_default_stream()); +} diff --git a/cpp/tests/streams/reshape_test.cpp b/cpp/tests/streams/reshape_test.cpp new file mode 100644 index 00000000000..d7c5da91bca --- /dev/null +++ b/cpp/tests/streams/reshape_test.cpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include + +class ReshapeTest : public cudf::test::BaseFixture {}; + +TEST_F(ReshapeTest, InterleaveColumns) +{ + auto a = cudf::test::fixed_width_column_wrapper({0, 3, 6}); + auto b = cudf::test::fixed_width_column_wrapper({1, 4, 7}); + auto c = cudf::test::fixed_width_column_wrapper({2, 5, 8}); + cudf::table_view in(std::vector{a, b, c}); + cudf::interleave_columns(in, cudf::test::get_default_stream()); +} + +TEST_F(ReshapeTest, Tile) +{ + auto a = cudf::test::fixed_width_column_wrapper({-1, 0, 1}); + cudf::table_view in(std::vector{a}); + cudf::tile(in, 2, cudf::test::get_default_stream()); +} + +TEST_F(ReshapeTest, ByteCast) +{ + auto a = cudf::test::fixed_width_column_wrapper({0, 100, -100, 1000, 1000}); + cudf::byte_cast(a, cudf::flip_endianness::YES, cudf::test::get_default_stream()); + cudf::byte_cast(a, cudf::flip_endianness::NO, cudf::test::get_default_stream()); +} diff --git a/cpp/tests/strings/integers_tests.cpp b/cpp/tests/strings/integers_tests.cpp index 51e9b3bd0a0..7a038fa6d75 100644 --- a/cpp/tests/strings/integers_tests.cpp +++ b/cpp/tests/strings/integers_tests.cpp @@ -294,7 +294,7 @@ TYPED_TEST(StringsIntegerConvertTest, FromToInteger) std::iota(h_integers.begin(), h_integers.end(), -(TypeParam)(h_integers.size() / 2)); h_integers.push_back(std::numeric_limits::min()); h_integers.push_back(std::numeric_limits::max()); - auto d_integers = cudf::detail::make_device_uvector_sync( + auto const d_integers = cudf::detail::make_device_uvector_sync( h_integers, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto integers = cudf::make_numeric_column(cudf::data_type{cudf::type_to_id()}, (cudf::size_type)d_integers.size()); @@ -308,8 +308,6 @@ TYPED_TEST(StringsIntegerConvertTest, FromToInteger) // convert to strings auto results_strings = cudf::strings::from_integers(integers->view()); - // copy back to host - h_integers = cudf::detail::make_host_vector_sync(d_integers, cudf::get_default_stream()); std::vector h_strings; for (auto itr = h_integers.begin(); itr != h_integers.end(); ++itr) h_strings.push_back(std::to_string(*itr)); diff --git a/cpp/tests/utilities/random_seed.cpp b/cpp/tests/utilities/random_seed.cpp index 4d5035e5a22..ab5a31ce161 100644 --- a/cpp/tests/utilities/random_seed.cpp +++ b/cpp/tests/utilities/random_seed.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,7 +23,7 @@ namespace detail { /** * @copydoc cudf::test::detail::random_generator_incrementing_seed() */ -uint64_t random_generator_incrementing_seed() +CUDF_EXPORT uint64_t random_generator_incrementing_seed() { static uint64_t seed = 0; return ++seed; diff --git a/cpp/tests/utilities_tests/pinned_memory_tests.cpp b/cpp/tests/utilities_tests/pinned_memory_tests.cpp index df9103640f4..93259fd63ee 100644 --- a/cpp/tests/utilities_tests/pinned_memory_tests.cpp +++ b/cpp/tests/utilities_tests/pinned_memory_tests.cpp @@ -18,16 +18,33 @@ #include #include +#include #include +#include #include #include #include #include -class PinnedMemoryTest : public cudf::test::BaseFixture {}; +class PinnedMemoryTest : public cudf::test::BaseFixture { + size_t prev_copy_threshold; + size_t prev_alloc_threshold; -TEST(PinnedMemoryTest, MemoryResourceGetAndSet) + public: + PinnedMemoryTest() + : prev_copy_threshold{cudf::get_kernel_pinned_copy_threshold()}, + prev_alloc_threshold{cudf::get_allocate_host_as_pinned_threshold()} + { + } + ~PinnedMemoryTest() override + { + cudf::set_kernel_pinned_copy_threshold(prev_copy_threshold); + cudf::set_allocate_host_as_pinned_threshold(prev_alloc_threshold); + } +}; + +TEST_F(PinnedMemoryTest, MemoryResourceGetAndSet) { // Global environment for temporary files auto const temp_env = static_cast( @@ -63,3 +80,49 @@ TEST(PinnedMemoryTest, MemoryResourceGetAndSet) // reset memory resource back cudf::set_pinned_memory_resource(last_mr); } + +TEST_F(PinnedMemoryTest, KernelCopyThresholdGetAndSet) +{ + cudf::set_kernel_pinned_copy_threshold(12345); + EXPECT_EQ(cudf::get_kernel_pinned_copy_threshold(), 12345); +} + +TEST_F(PinnedMemoryTest, HostAsPinnedThresholdGetAndSet) +{ + cudf::set_allocate_host_as_pinned_threshold(12345); + EXPECT_EQ(cudf::get_allocate_host_as_pinned_threshold(), 12345); +} + +TEST_F(PinnedMemoryTest, MakePinnedVector) +{ + cudf::set_allocate_host_as_pinned_threshold(0); + + // should always use pinned memory + { + auto const vec = cudf::detail::make_pinned_vector_async(1, cudf::get_default_stream()); + EXPECT_TRUE(vec.get_allocator().is_device_accessible()); + } +} + +TEST_F(PinnedMemoryTest, MakeHostVector) +{ + cudf::set_allocate_host_as_pinned_threshold(7); + + // allocate smaller than the threshold + { + auto const vec = cudf::detail::make_host_vector(1, cudf::get_default_stream()); + EXPECT_TRUE(vec.get_allocator().is_device_accessible()); + } + + // allocate the same size as the threshold + { + auto const vec = cudf::detail::make_host_vector(7, cudf::get_default_stream()); + EXPECT_TRUE(vec.get_allocator().is_device_accessible()); + } + + // allocate larger than the threshold + { + auto const vec = cudf::detail::make_host_vector(2, cudf::get_default_stream()); + EXPECT_FALSE(vec.get_allocator().is_device_accessible()); + } +} diff --git a/dependencies.yaml b/dependencies.yaml index b5104024e18..3401362ad9a 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -112,6 +112,13 @@ files: includes: - test_python_common - test_python_cudf + py_rapids_build_cudf: + output: pyproject + pyproject_dir: python/pylibcudf + extras: + table: build-system + includes: + - rapids_build_skbuild py_build_pylibcudf: output: pyproject pyproject_dir: python/pylibcudf @@ -130,6 +137,7 @@ files: includes: - run_pylibcudf - pyarrow_run + # TODO: finishme # py_test_pylibcudf: # output: pyproject # pyproject_dir: python/pylibcudf @@ -314,8 +322,8 @@ dependencies: - output_types: conda packages: - fmt>=10.1.1,<11 - - librmm==24.8.*,>=0.0.0a0 - - libkvikio==24.8.*,>=0.0.0a0 + - librmm==24.10.*,>=0.0.0a0 + - libkvikio==24.10.*,>=0.0.0a0 - librdkafka>=1.9.0,<1.10.0a0 # Align nvcomp version with rapids-cmake - nvcomp==3.0.6 @@ -356,7 +364,7 @@ dependencies: common: - output_types: conda packages: - - &rmm_conda rmm==24.8.*,>=0.0.0a0 + - &rmm_unsuffixed rmm==24.10.*,>=0.0.0a0 - pip - pip: - git+https://github.com/python-streamz/streamz.git@master @@ -370,13 +378,17 @@ dependencies: specific: - output_types: [requirements, pyproject] matrices: - - matrix: {cuda: "12.*"} - packages: &build_python_packages_cu12 - - rmm-cu12==24.8.*,>=0.0.0a0 - - matrix: {cuda: "11.*"} - packages: &build_python_packages_cu11 - - rmm-cu11==24.8.*,>=0.0.0a0 - - {matrix: null, packages: [*rmm_conda] } + - matrix: + cuda: "12.*" + cuda_suffixed: "true" + packages: + - rmm-cu12==24.10.*,>=0.0.0a0 + - matrix: + cuda: "11.*" + cuda_suffixed: "true" + packages: + - rmm-cu11==24.10.*,>=0.0.0a0 + - {matrix: null, packages: [*rmm_unsuffixed]} libarrow_build: common: - output_types: conda @@ -532,7 +544,7 @@ dependencies: - output_types: [conda] packages: - breathe>=4.35.0 - - dask-cuda==24.8.*,>=0.0.0a0 + - dask-cuda==24.10.*,>=0.0.0a0 - *doxygen - make - myst-nb @@ -591,7 +603,7 @@ dependencies: - typing_extensions>=4.0.0 - output_types: conda packages: - - *rmm_conda + - *rmm_unsuffixed - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -610,13 +622,27 @@ dependencies: - {matrix: null, packages: *run_pylibcudf_packages_all_cu11} - output_types: [requirements, pyproject] matrices: - - matrix: {cuda: "12.*"} + - matrix: + cuda: "12.*" + cuda_suffixed: "true" packages: - - rmm-cu12==24.8.*,>=0.0.0a0 - - matrix: {cuda: "11.*"} + - rmm-cu12==24.10.*,>=0.0.0a0 + - matrix: + cuda: "12.*" + cuda_suffixed: "false" + packages: + - *rmm_unsuffixed + - matrix: + cuda: "11.*" + cuda_suffixed: "true" packages: - - rmm-cu11==24.8.*,>=0.0.0a0 - - {matrix: null, packages: [*rmm_conda]} + - rmm-cu11==24.10.*,>=0.0.0a0 + - matrix: + cuda: "11.*" + cuda_suffixed: "false" + packages: &run_pylibcudf_cu11_unsuffixed + - *rmm_unsuffixed + - {matrix: null, packages: *run_pylibcudf_cu11_unsuffixed} run_cudf: common: - output_types: [conda, requirements, pyproject] @@ -629,7 +655,7 @@ dependencies: - typing_extensions>=4.0.0 - output_types: conda packages: - - *rmm_conda + - *rmm_unsuffixed - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -650,23 +676,40 @@ dependencies: matrices: - matrix: {cuda: "12.*"} packages: - - pynvjitlink>=0.0.0a0 + - &pynvjitlink_unsuffixed pynvjitlink>=0.0.0a0 - matrix: {cuda: "11.*"} packages: - - cubinlinker - - ptxcompiler + - &cubinlinker_unsuffixed cubinlinker + - &ptxcompiler_unsuffixed ptxcompiler - output_types: [requirements, pyproject] matrices: - - matrix: {cuda: "12.*"} + - matrix: + cuda: "12.*" + cuda_suffixed: "true" packages: - - rmm-cu12==24.8.*,>=0.0.0a0 + - rmm-cu12==24.10.*,>=0.0.0a0 - pynvjitlink-cu12>=0.0.0a0 - - matrix: {cuda: "11.*"} + - matrix: + cuda: "12.*" + cuda_suffixed: "false" packages: - - rmm-cu11==24.8.*,>=0.0.0a0 + - *rmm_unsuffixed + - *pynvjitlink_unsuffixed + - matrix: + cuda: "11.*" + cuda_suffixed: "true" + packages: + - rmm-cu11==24.10.*,>=0.0.0a0 - cubinlinker-cu11 - ptxcompiler-cu11 - - {matrix: null, packages: [cubinlinker, ptxcompiler, *rmm_conda]} + - matrix: + cuda: "11.*" + cuda_suffixed: "false" + packages: &run_cudf_cu11_unsuffixed + - *cubinlinker_unsuffixed + - *ptxcompiler_unsuffixed + - *rmm_unsuffixed + - {matrix: null, packages: *run_cudf_cu11_unsuffixed} run_cudf_polars: common: - output_types: [conda, requirements, pyproject] @@ -676,7 +719,7 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - rapids-dask-dependency==24.8.*,>=0.0.0a0 + - rapids-dask-dependency==24.10.*,>=0.0.0a0 run_custreamz: common: - output_types: conda @@ -762,13 +805,13 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - dask-cuda==24.8.*,>=0.0.0a0 + - dask-cuda==24.10.*,>=0.0.0a0 - *numba depends_on_cudf: common: - output_types: conda packages: - - &cudf_conda cudf==24.8.*,>=0.0.0a0 + - &cudf_unsuffixed cudf==24.10.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -778,18 +821,22 @@ dependencies: specific: - output_types: [requirements, pyproject] matrices: - - matrix: {cuda: "12.*"} + - matrix: + cuda: "12.*" + cuda_suffixed: "true" packages: - - cudf-cu12==24.8.*,>=0.0.0a0 - - matrix: {cuda: "11.*"} + - cudf-cu12==24.10.*,>=0.0.0a0 + - matrix: + cuda: "11.*" + cuda_suffixed: "true" packages: - - cudf-cu11==24.8.*,>=0.0.0a0 - - {matrix: null, packages: [*cudf_conda]} + - cudf-cu11==24.10.*,>=0.0.0a0 + - {matrix: null, packages: [*cudf_unsuffixed]} depends_on_cudf_kafka: common: - output_types: conda packages: - - &cudf_kafka_conda cudf_kafka==24.8.*,>=0.0.0a0 + - &cudf_kafka_unsuffixed cudf_kafka==24.10.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -799,13 +846,17 @@ dependencies: specific: - output_types: [requirements, pyproject] matrices: - - matrix: {cuda: "12.*"} + - matrix: + cuda: "12.*" + cuda_suffixed: "true" packages: - - cudf_kafka-cu12==24.8.*,>=0.0.0a0 - - matrix: {cuda: "11.*"} + - cudf_kafka-cu12==24.10.*,>=0.0.0a0 + - matrix: + cuda: "11.*" + cuda_suffixed: "true" packages: - - cudf_kafka-cu11==24.8.*,>=0.0.0a0 - - {matrix: null, packages: [*cudf_kafka_conda]} + - cudf_kafka-cu11==24.10.*,>=0.0.0a0 + - {matrix: null, packages: [*cudf_kafka_unsuffixed]} depends_on_cupy: common: - output_types: conda diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index c3c14ac8cad..7421d9be298 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -556,10 +556,16 @@ def on_missing_reference(app, env, node, contnode): ("py:class", "Dtype"), # The following are erroneously warned due to # https://github.com/sphinx-doc/sphinx/issues/11225 + ("py:obj", "cudf.Index.values_host"), ("py:class", "pa.Array"), ("py:class", "ScalarLike"), ("py:class", "ParentType"), ("py:class", "ColumnLike"), + ("py:class", "ColumnLike"), + ("py:obj", "cudf.Index.transpose"), + ("py:obj", "cudf.Index.T"), + ("py:obj", "cudf.Index.to_flat_index"), + ("py:obj", "cudf.MultiIndex.to_flat_index"), # TODO: Remove this when we figure out why typing_extensions doesn't seem # to map types correctly for intersphinx ("py:class", "typing_extensions.Self"), diff --git a/docs/cudf/source/cudf_pandas/how-it-works.md b/docs/cudf/source/cudf_pandas/how-it-works.md index 75f57742ac9..8efd9d7e063 100644 --- a/docs/cudf/source/cudf_pandas/how-it-works.md +++ b/docs/cudf/source/cudf_pandas/how-it-works.md @@ -36,3 +36,19 @@ transfers. When using `cudf.pandas`, cuDF's [pandas compatibility mode](api.options) is automatically enabled, ensuring consistency with pandas-specific semantics like default sort ordering. + +`cudf.pandas` uses a managed memory pool by default. This allows `cudf.pandas` to process datasets larger than the memory of the GPU it is running on. Managed memory prefetching is also enabled by default to improve memory access performance. For more information on CUDA Unified Memory (managed memory), performance, and prefetching, see [this NVIDIA Developer blog post](https://developer.nvidia.com/blog/improving-gpu-memory-oversubscription-performance/). + +Pool allocators improve allocation performance. Without using one, memory +allocation may be a bottleneck depending on the workload. Managed memory +enables oversubscribing GPU memory. This allows cudf.pandas to process +data larger than GPU memory in many cases, without CPU (Pandas) fallback. + +Other memory allocators can be used by changing the environment +variable `CUDF_PANDAS_RMM_MODE` to one of the following. + +1. "managed_pool" (default): CUDA Unified Memory (managed memory) with RMM's asynchronous pool allocator. +2. "managed": CUDA Unified Memory, (managed memory) with no pool allocator. +3. "async": CUDA's built-in pool asynchronous pool allocator with normal CUDA device memory. +4. "pool": RMM's asynchronous pool allocator with normal CUDA device memory. +5. "cuda": normal CUDA device memory with no pool allocator. diff --git a/docs/cudf/source/developer_guide/documentation.md b/docs/cudf/source/developer_guide/documentation.md index c8da689479c..4f5a57fec02 100644 --- a/docs/cudf/source/developer_guide/documentation.md +++ b/docs/cudf/source/developer_guide/documentation.md @@ -164,7 +164,7 @@ The directive should be used inside docstrings like so: Docstring body .. pandas-compat:: - **$API_NAME** + :meth:`pandas.DataFrame.METHOD` Explanation of differences ``` diff --git a/docs/cudf/source/user_guide/api_docs/groupby.rst b/docs/cudf/source/user_guide/api_docs/groupby.rst index 80811efa33f..ca29087cbf9 100644 --- a/docs/cudf/source/user_guide/api_docs/groupby.rst +++ b/docs/cudf/source/user_guide/api_docs/groupby.rst @@ -68,7 +68,6 @@ Computations / descriptive stats GroupBy.std GroupBy.sum GroupBy.var - GroupBy.corr GroupBy.cov The following methods are available in both ``SeriesGroupBy`` and @@ -81,6 +80,7 @@ application to columns of a specific data type. :toctree: api/ DataFrameGroupBy.bfill + DataFrameGroupBy.corr DataFrameGroupBy.count DataFrameGroupBy.cumcount DataFrameGroupBy.cummax @@ -102,5 +102,6 @@ The following methods are available only for ``SeriesGroupBy`` objects. .. autosummary:: :toctree: api/ + SeriesGroupBy.corr SeriesGroupBy.nunique SeriesGroupBy.unique diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/index.rst index 1c7e364c40f..c8933981736 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/index.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/index.rst @@ -18,3 +18,4 @@ I/O Functions avro csv json + parquet diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/parquet.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/parquet.rst new file mode 100644 index 00000000000..9dfbadfa216 --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/parquet.rst @@ -0,0 +1,6 @@ +======= +Parquet +======= + +.. automodule:: cudf._lib.pylibcudf.io.parquet + :members: diff --git a/docs/cudf/source/user_guide/io/read-json.md b/docs/cudf/source/user_guide/io/read-json.md index 7049c75d1c1..d2bb021a5b5 100644 --- a/docs/cudf/source/user_guide/io/read-json.md +++ b/docs/cudf/source/user_guide/io/read-json.md @@ -218,11 +218,11 @@ reads a JSON object as a single line and then extracts the # first read the JSON object with line=True >>> df = cudf.read_json(j, lines=True) >>> df - metadata records + metadata results 0 {'vehicle': 'car'} [{'id': 0, 'distance': 1.2}, {'id': 1, 'distan... -# then explode the 'records' column ->>> df = df['records'].explode().struct.explode() +# then explode the 'results' column +>>> df = df['results'].explode().struct.explode() >>> df id distance 0 0 1.2 diff --git a/java/ci/README.md b/java/ci/README.md index 49481efab6b..ccb9efb50b6 100644 --- a/java/ci/README.md +++ b/java/ci/README.md @@ -34,7 +34,7 @@ nvidia-docker run -it cudf-build:11.8.0-devel-rocky8 bash You can download the cuDF repo in the docker container or you can mount it into the container. Here I choose to download again in the container. ```bash -git clone --recursive https://github.com/rapidsai/cudf.git -b branch-24.08 +git clone --recursive https://github.com/rapidsai/cudf.git -b branch-24.10 ``` ### Build cuDF jar with devtoolset @@ -47,4 +47,4 @@ scl enable gcc-toolset-11 "java/ci/build-in-docker.sh" ### The output -You can find the cuDF jar in java/target/ like cudf-24.08.0-SNAPSHOT-cuda11.jar. +You can find the cuDF jar in java/target/ like cudf-24.10.0-SNAPSHOT-cuda11.jar. diff --git a/java/pom.xml b/java/pom.xml index 70230e6bc71..9694e741f16 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -21,7 +21,7 @@ ai.rapids cudf - 24.08.0-SNAPSHOT + 24.10.0-SNAPSHOT cudfjni diff --git a/java/src/main/java/ai/rapids/cudf/Cudf.java b/java/src/main/java/ai/rapids/cudf/Cudf.java new file mode 100644 index 00000000000..d09e2f87ed4 --- /dev/null +++ b/java/src/main/java/ai/rapids/cudf/Cudf.java @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ai.rapids.cudf; + +public class Cudf { + + static { + NativeDepsLoader.loadNativeDeps(); + } + + /** + * cuDF copies that are smaller than the threshold will use a kernel to copy, instead + * of cudaMemcpyAsync. + */ + public static native void setKernelPinnedCopyThreshold(long kernelPinnedCopyThreshold); + + /** + * cudf allocations that are smaller than the threshold will use the pinned host + * memory resource. + */ + public static native void setPinnedAllocationThreshold(long pinnedAllocationThreshold); +} diff --git a/java/src/main/native/CMakeLists.txt b/java/src/main/native/CMakeLists.txt index 56f8f9d0472..22059c5bc7f 100644 --- a/java/src/main/native/CMakeLists.txt +++ b/java/src/main/native/CMakeLists.txt @@ -210,6 +210,7 @@ target_compile_definitions( cudfjni PUBLIC "$<$:${CUDF_CXX_DEFINITIONS}>" "$<$:${CUDF_CUDA_DEFINITIONS}>" ) +target_link_options(cudfjni PRIVATE "-Wl,--no-undefined") if(USE_GDS) add_library(cufilejni src/CuFileJni.cpp) diff --git a/java/src/main/native/include/jni_utils.hpp b/java/src/main/native/include/jni_utils.hpp index ea04c1cda83..a3b4bfcb63e 100644 --- a/java/src/main/native/include/jni_utils.hpp +++ b/java/src/main/native/include/jni_utils.hpp @@ -284,7 +284,7 @@ class native_jArray { return data()[index]; } - const N_TYPE* const data() const + N_TYPE const* data() const { init_data_ptr(); return data_ptr; @@ -296,17 +296,15 @@ class native_jArray { return data_ptr; } - const N_TYPE* const begin() const { return data(); } + N_TYPE const* begin() const { return data(); } N_TYPE* begin() { return data(); } - const N_TYPE* const end() const { return data() + size(); } + N_TYPE const* end() const { return data() + size(); } N_TYPE* end() { return data() + size(); } - const J_ARRAY_TYPE get_jArray() const { return orig; } - - J_ARRAY_TYPE get_jArray() { return orig; } + J_ARRAY_TYPE get_jArray() const { return orig; } /** * @brief Conversion to std::vector @@ -430,9 +428,7 @@ class native_jpointerArray { T* const* begin() const { return data(); } T* const* end() const { return data() + size(); } - const jlongArray get_jArray() const { return wrapped.get_jArray(); } - - jlongArray get_jArray() { return wrapped.get_jArray(); } + jlongArray get_jArray() const { return wrapped.get_jArray(); } void assert_no_nulls() const { @@ -624,7 +620,7 @@ class native_jstring { return true; } - const jstring get_jstring() const { return orig; } + jstring get_jstring() const { return orig; } ~native_jstring() { @@ -753,13 +749,13 @@ class native_jstringArray { return cache[index]; } - char const** const as_c_array() const + char const** as_c_array() const { init_c_cache(); return c_cache.data(); } - const std::vector as_cpp_vector() const + std::vector as_cpp_vector() const { init_cpp_cache(); return cpp_cache; diff --git a/java/src/main/native/src/CudfJni.cpp b/java/src/main/native/src/CudfJni.cpp index 698a8f6ff02..2860dc2e4b2 100644 --- a/java/src/main/native/src/CudfJni.cpp +++ b/java/src/main/native/src/CudfJni.cpp @@ -18,6 +18,7 @@ #include #include +#include #include @@ -201,4 +202,28 @@ JNIEXPORT jboolean JNICALL Java_ai_rapids_cudf_Cuda_isPtdsEnabled(JNIEnv* env, j return cudf::jni::is_ptds_enabled; } +JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cudf_setKernelPinnedCopyThreshold(JNIEnv* env, + jclass clazz, + jlong jthreshold) +{ + try { + cudf::jni::auto_set_device(env); + auto threshold = static_cast(jthreshold); + cudf::set_kernel_pinned_copy_threshold(threshold); + } + CATCH_STD(env, ) +} + +JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cudf_setPinnedAllocationThreshold(JNIEnv* env, + jclass clazz, + jlong jthreshold) +{ + try { + cudf::jni::auto_set_device(env); + auto threshold = static_cast(jthreshold); + cudf::set_allocate_host_as_pinned_threshold(threshold); + } + CATCH_STD(env, ) +} + } // extern "C" diff --git a/java/src/main/native/src/RmmJni.cpp b/java/src/main/native/src/RmmJni.cpp index 5842a980fc4..09c04a77590 100644 --- a/java/src/main/native/src/RmmJni.cpp +++ b/java/src/main/native/src/RmmJni.cpp @@ -154,13 +154,6 @@ class tracking_resource_adaptor final : public base_tracking_resource_adaptor { } }; -template -tracking_resource_adaptor* make_tracking_adaptor(Upstream* upstream, - std::size_t size_alignment) -{ - return new tracking_resource_adaptor{upstream, size_alignment}; -} - /** * @brief An RMM device memory resource adaptor that delegates to the wrapped resource * for most operations but will call Java to handle certain situations (e.g.: allocation failure). diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index c58cd732b39..a9ace1398e4 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -2789,7 +2790,7 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_leftDistinctJoinGatherMap auto has_nulls = cudf::has_nested_nulls(left) || cudf::has_nested_nulls(right) ? cudf::nullable_join::YES : cudf::nullable_join::NO; - if (cudf::detail::has_nested_columns(right)) { + if (cudf::has_nested_columns(right)) { cudf::distinct_hash_join hash(right, left, has_nulls, nulleq); return hash.left_join(); } else { @@ -3010,7 +3011,7 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_innerDistinctJoinGatherMa std::pair>, std::unique_ptr>> maps; - if (cudf::detail::has_nested_columns(right)) { + if (cudf::has_nested_columns(right)) { cudf::distinct_hash_join hash(right, left, has_nulls, nulleq); maps = hash.inner_join(); } else { diff --git a/java/src/main/native/src/aggregation128_utils.cu b/java/src/main/native/src/aggregation128_utils.cu index a32e7d27085..631df58b017 100644 --- a/java/src/main/native/src/aggregation128_utils.cu +++ b/java/src/main/native/src/aggregation128_utils.cu @@ -34,7 +34,7 @@ namespace { // Functor to reassemble a 128-bit value from four 64-bit chunks with overflow detection. -class chunk_assembler : public thrust::unary_function { +class chunk_assembler { public: chunk_assembler(bool* overflows, uint64_t const* chunks0, diff --git a/pyproject.toml b/pyproject.toml index 2f59864894b..e15cb7b3cdd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,69 @@ quiet-level = 3 line-length = 79 [tool.ruff.lint] -select = ["E", "F", "W", "D201", "D204", "D206", "D207", "D208", "D209", "D210", "D211", "D214", "D215", "D300", "D301", "D403", "D405", "D406", "D407", "D408", "D409", "D410", "D411", "D412", "D414", "D418", "TCH", "FA", "UP006", "UP007"] +typing-modules = ["cudf._typing"] +select = [ + # pycodestyle Error + "E", + # Pyflakes + "F", + # pycodestyle Warning + "W", + # no-blank-line-before-function + "D201", + # one-blank-line-after-class + "D204", + # indent-with-spaces + "D206", + # under-indentation + "D207", + # over-indentation + "D208", + # new-line-after-last-paragraph + "D209", + # surrounding-whitespace + "D210", + # blank-line-before-class + "D211", + # section-not-over-indented + "D214", + # section-underline-not-over-indented + "D215", + # triple-single-quotes + "D300", + # escape-sequence-in-docstring + "D301", + # first-line-capitalized + "D403", + # capitalize-section-name + "D405", + # new-line-after-section-name + "D406", + # dashed-underline-after-section + "D407", + # section-underline-after-name + "D408", + # section-underline-matches-section-length + "D409", + # no-blank-line-after-section + "D410", + # no-blank-line-before-section + "D411", + # blank-lines-between-header-and-content + "D412", + # empty-docstring-section + "D414", + # overload-with-docstring + "D418", + # flake8-type-checking + "TCH", + # flake8-future-annotations + "FA", + # non-pep585-annotation + "UP006", + # non-pep604-annotation + "UP007" +] ignore = [ # whitespace before : "E203", diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt new file mode 100644 index 00000000000..ecadbf5cbbc --- /dev/null +++ b/python/cudf/CMakeLists.txt @@ -0,0 +1,101 @@ +# ============================================================================= +# Copyright (c) 2022-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) + +include(../../rapids_config.cmake) +include(rapids-cuda) +rapids_cuda_init_architectures(cudf-python) + +project( + cudf-python + VERSION "${RAPIDS_VERSION}" + LANGUAGES CXX CUDA +) + +option(FIND_CUDF_CPP "Search for existing CUDF C++ installations before defaulting to local files" + OFF +) +option(USE_LIBARROW_FROM_PYARROW "Only use the libarrow contained in pyarrow" OFF) +mark_as_advanced(USE_LIBARROW_FROM_PYARROW) + +# Find Python early so that later commands can use it +find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) + +# If the user requested it we attempt to find CUDF. +if(FIND_CUDF_CPP) + include(rapids-cpm) + include(rapids-export) + include(rapids-find) + rapids_cpm_init() + + if(USE_LIBARROW_FROM_PYARROW) + # We need to find arrow before libcudf since libcudf requires it but doesn't bundle arrow + # libraries. These variables have no effect because we are always searching for arrow via + # pyarrow, but they must be set as they are required arguments to the function in + # get_arrow.cmake. + set(CUDF_USE_ARROW_STATIC OFF) + set(CUDF_ENABLE_ARROW_S3 OFF) + set(CUDF_ENABLE_ARROW_ORC OFF) + set(CUDF_ENABLE_ARROW_PYTHON OFF) + set(CUDF_ENABLE_ARROW_PARQUET OFF) + include(../../cpp/cmake/thirdparty/get_arrow.cmake) + endif() + + find_package(cudf "${RAPIDS_VERSION}" REQUIRED) + + # an installed version of libcudf doesn't provide the dlpack headers so we need to download dlpack + # for the interop.pyx + include(../../cpp/cmake/thirdparty/get_dlpack.cmake) +else() + set(cudf_FOUND OFF) +endif() + +include(rapids-cython-core) + +if(NOT cudf_FOUND) + set(BUILD_TESTS OFF) + set(BUILD_BENCHMARKS OFF) + set(CUDF_BUILD_TESTUTIL OFF) + set(CUDF_BUILD_STREAMS_TEST_UTIL OFF) + set(CUDA_STATIC_RUNTIME ON) + + add_subdirectory(../../cpp cudf-cpp EXCLUDE_FROM_ALL) + + # libcudf targets are excluded by default above via EXCLUDE_FROM_ALL to remove extraneous + # components like headers from libcudacxx, but we do need the libraries. However, we want to + # control where they are installed to. Since there are multiple subpackages of cudf._lib that + # require access to libcudf, we place the library and all its dependent artifacts in the cudf + # directory as a single source of truth and modify the other rpaths appropriately. + set(cython_lib_dir cudf) + include(cmake/Modules/WheelHelpers.cmake) + # TODO: This install is currently overzealous. We should only install the libraries that are + # downloaded by CPM during the build, not libraries that were found on the system. However, in + # practice right this would only be a problem is if libcudf was not found but some of the + # dependencies were, and we have no real use cases where that happens. + install_aliased_imported_targets( + TARGETS cudf arrow_shared nvcomp::nvcomp nvcomp::nvcomp_gdeflate nvcomp::nvcomp_bitcomp + DESTINATION ${cython_lib_dir} + ) +endif() + +rapids_cython_init() + +include(cmake/Modules/LinkPyarrowHeaders.cmake) +add_subdirectory(cudf/_lib) +add_subdirectory(udf_cpp) + +if(DEFINED cython_lib_dir) + rapids_cython_add_rpath_entries(TARGET cudf PATHS "${cython_lib_dir}") +endif() diff --git a/python/cudf/cudf/_lib/lists.pyx b/python/cudf/cudf/_lib/lists.pyx index ceae1b148aa..f6d9c8c404c 100644 --- a/python/cudf/cudf/_lib/lists.pyx +++ b/python/cudf/cudf/_lib/lists.pyx @@ -3,30 +3,9 @@ from cudf.core.buffer import acquire_spill_lock from libcpp cimport bool -from libcpp.memory cimport make_shared, shared_ptr, unique_ptr -from libcpp.utility cimport move from cudf._lib.column cimport Column -from cudf._lib.pylibcudf.libcudf.column.column cimport column -from cudf._lib.pylibcudf.libcudf.lists.count_elements cimport ( - count_elements as cpp_count_elements, -) -from cudf._lib.pylibcudf.libcudf.lists.lists_column_view cimport ( - lists_column_view, -) -from cudf._lib.pylibcudf.libcudf.lists.sorting cimport ( - sort_lists as cpp_sort_lists, -) -from cudf._lib.pylibcudf.libcudf.lists.stream_compaction cimport ( - distinct as cpp_distinct, -) -from cudf._lib.pylibcudf.libcudf.types cimport ( - nan_equality, - null_equality, - null_order, - order, - size_type, -) +from cudf._lib.pylibcudf.libcudf.types cimport null_order, size_type from cudf._lib.utils cimport columns_from_pylibcudf_table from cudf._lib import pylibcudf @@ -36,19 +15,10 @@ from cudf._lib.pylibcudf cimport Scalar @acquire_spill_lock() def count_elements(Column col): - - # shared_ptr required because lists_column_view has no default - # ctor - cdef shared_ptr[lists_column_view] list_view = ( - make_shared[lists_column_view](col.view()) + return Column.from_pylibcudf( + pylibcudf.lists.count_elements( + col.to_pylibcudf(mode="read")) ) - cdef unique_ptr[column] c_result - - with nogil: - c_result = move(cpp_count_elements(list_view.get()[0])) - - result = Column.from_unique_ptr(move(c_result)) - return result @acquire_spill_lock() @@ -63,53 +33,25 @@ def explode_outer(list source_columns, int explode_column_idx): @acquire_spill_lock() def distinct(Column col, bool nulls_equal, bool nans_all_equal): - """ - nulls_equal == True indicates that libcudf should treat any two nulls as - equal, and as unequal otherwise. - nans_all_equal == True indicates that libcudf should treat any two - elements from {-nan, +nan} as equal, and as unequal otherwise. - """ - cdef shared_ptr[lists_column_view] list_view = ( - make_shared[lists_column_view](col.view()) - ) - cdef null_equality c_nulls_equal = ( - null_equality.EQUAL if nulls_equal else null_equality.UNEQUAL - ) - cdef nan_equality c_nans_equal = ( - nan_equality.ALL_EQUAL if nans_all_equal else nan_equality.UNEQUAL - ) - - cdef unique_ptr[column] c_result - - with nogil: - c_result = move( - cpp_distinct(list_view.get()[0], - c_nulls_equal, - c_nans_equal) + return Column.from_pylibcudf( + pylibcudf.lists.distinct( + col.to_pylibcudf(mode="read"), + nulls_equal, + nans_all_equal, ) - return Column.from_unique_ptr(move(c_result)) + ) @acquire_spill_lock() def sort_lists(Column col, bool ascending, str na_position): - cdef shared_ptr[lists_column_view] list_view = ( - make_shared[lists_column_view](col.view()) - ) - cdef order c_sort_order = ( - order.ASCENDING if ascending else order.DESCENDING - ) - cdef null_order c_null_prec = ( - null_order.BEFORE if na_position == "first" else null_order.AFTER - ) - - cdef unique_ptr[column] c_result - - with nogil: - c_result = move( - cpp_sort_lists(list_view.get()[0], c_sort_order, c_null_prec) + return Column.from_pylibcudf( + pylibcudf.lists.sort_lists( + col.to_pylibcudf(mode="read"), + ascending, + null_order.BEFORE if na_position == "first" else null_order.AFTER, + False, ) - - return Column.from_unique_ptr(move(c_result)) + ) @acquire_spill_lock() diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx index e7959d21e01..a2eed94bb3c 100644 --- a/python/cudf/cudf/_lib/parquet.pyx +++ b/python/cudf/cudf/_lib/parquet.pyx @@ -18,16 +18,14 @@ from cython.operator cimport dereference from cudf.api.types import is_list_like -from cudf._lib.utils cimport data_from_unique_ptr +from cudf._lib.utils cimport _data_from_columns, data_from_pylibcudf_io -from cudf._lib import pylibcudf from cudf._lib.utils import _index_level_name, generate_pandas_metadata from libc.stdint cimport uint8_t from libcpp cimport bool from libcpp.map cimport map from libcpp.memory cimport make_unique, unique_ptr -from libcpp.pair cimport pair from libcpp.string cimport string from libcpp.unordered_map cimport unordered_map from libcpp.utility cimport move @@ -35,25 +33,20 @@ from libcpp.vector cimport vector cimport cudf._lib.pylibcudf.libcudf.io.data_sink as cudf_io_data_sink cimport cudf._lib.pylibcudf.libcudf.io.types as cudf_io_types -cimport cudf._lib.pylibcudf.libcudf.types as cudf_types from cudf._lib.column cimport Column from cudf._lib.io.utils cimport ( + add_df_col_struct_names, make_sinks_info, make_source_info, - update_struct_field_names, ) from cudf._lib.pylibcudf.expressions cimport Expression from cudf._lib.pylibcudf.io.datasource cimport NativeFileDatasource -from cudf._lib.pylibcudf.libcudf.expressions cimport expression +from cudf._lib.pylibcudf.io.parquet cimport ChunkedParquetReader from cudf._lib.pylibcudf.libcudf.io.parquet cimport ( - chunked_parquet_reader as cpp_chunked_parquet_reader, chunked_parquet_writer_options, merge_row_group_metadata as parquet_merge_metadata, parquet_chunked_writer as cpp_parquet_chunked_writer, - parquet_reader_options, - parquet_reader_options_builder, parquet_writer_options, - read_parquet as parquet_reader, write_parquet as parquet_writer, ) from cudf._lib.pylibcudf.libcudf.io.parquet_metadata cimport ( @@ -63,19 +56,17 @@ from cudf._lib.pylibcudf.libcudf.io.parquet_metadata cimport ( from cudf._lib.pylibcudf.libcudf.io.types cimport ( column_in_metadata, table_input_metadata, - table_metadata, ) from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view -from cudf._lib.pylibcudf.libcudf.types cimport data_type, size_type +from cudf._lib.pylibcudf.libcudf.types cimport size_type from cudf._lib.utils cimport table_view_from_table from pyarrow.lib import NativeFile -from cudf._lib.concat import concat_columns +import cudf._lib.pylibcudf as plc +from cudf._lib.pylibcudf cimport Table from cudf.utils.ioutils import _ROW_GROUP_SIZE_BYTES_DEFAULT -from cudf._lib.utils cimport data_from_pylibcudf_table - cdef class BufferArrayFromVector: cdef Py_ssize_t length @@ -133,71 +124,37 @@ def _parse_metadata(meta): return file_is_range_index, file_index_cols, file_column_dtype -cdef pair[parquet_reader_options, bool] _setup_parquet_reader_options( - cudf_io_types.source_info source, - vector[vector[size_type]] row_groups, - bool use_pandas_metadata, - Expression filters, - object columns): - - cdef parquet_reader_options args - cdef parquet_reader_options_builder builder - cdef data_type cpp_timestamp_type = cudf_types.data_type( - cudf_types.type_id.EMPTY - ) - builder = ( - parquet_reader_options.builder(source) - .row_groups(row_groups) - .use_pandas_metadata(use_pandas_metadata) - .use_arrow_schema(True) - .timestamp_type(cpp_timestamp_type) - ) - if filters is not None: - builder = builder.filter(dereference(filters.c_obj.get())) - - args = move(builder.build()) - cdef vector[string] cpp_columns - allow_range_index = True - if columns is not None: - cpp_columns.reserve(len(columns)) - allow_range_index = len(columns) > 0 - for col in columns: - cpp_columns.push_back(str(col).encode()) - args.set_columns(cpp_columns) - allow_range_index &= filters is None - - return pair[parquet_reader_options, bool](args, allow_range_index) - cdef object _process_metadata(object df, - table_metadata table_meta, list names, + dict child_names, + list per_file_user_data, object row_groups, object filepaths_or_buffers, list pa_buffers, bool allow_range_index, bool use_pandas_metadata): - update_struct_field_names(df, table_meta.schema_info) + + add_df_col_struct_names(df, child_names) index_col = None is_range_index = True column_index_type = None index_col_names = None meta = None - cdef vector[unordered_map[string, string]] per_file_user_data = \ - table_meta.per_file_user_data for single_file in per_file_user_data: + if b'pandas' not in single_file: + continue json_str = single_file[b'pandas'].decode('utf-8') - if json_str != "": - meta = json.loads(json_str) - file_is_range_index, index_col, column_index_type = _parse_metadata(meta) - is_range_index &= file_is_range_index - - if not file_is_range_index and index_col is not None \ - and index_col_names is None: - index_col_names = {} - for idx_col in index_col: - for c in meta['columns']: - if c['field_name'] == idx_col: - index_col_names[idx_col] = c['name'] + meta = json.loads(json_str) + file_is_range_index, index_col, column_index_type = _parse_metadata(meta) + is_range_index &= file_is_range_index + + if not file_is_range_index and index_col is not None \ + and index_col_names is None: + index_col_names = {} + for idx_col in index_col: + for c in meta['columns']: + if c['field_name'] == idx_col: + index_col_names[idx_col] = c['name'] if meta is not None: # Book keep each column metadata as the order @@ -297,6 +254,76 @@ cdef object _process_metadata(object df, return df +def read_parquet_chunked( + filepaths_or_buffers, + columns=None, + row_groups=None, + use_pandas_metadata=True, + size_t chunk_read_limit=0, + size_t pass_read_limit=1024000000 +): + # Convert NativeFile buffers to NativeFileDatasource, + # but save original buffers in case we need to use + # pyarrow for metadata processing + # (See: https://github.com/rapidsai/cudf/issues/9599) + + pa_buffers = [] + + new_bufs = [] + for i, datasource in enumerate(filepaths_or_buffers): + if isinstance(datasource, NativeFile): + new_bufs.append(NativeFileDatasource(datasource)) + else: + new_bufs.append(datasource) + + # Note: If this function ever takes accepts filters + # allow_range_index needs to be False when a filter is passed + # (see read_parquet) + allow_range_index = columns is not None and len(columns) != 0 + + reader = ChunkedParquetReader( + plc.io.SourceInfo(new_bufs), + columns, + row_groups, + use_pandas_metadata, + chunk_read_limit=chunk_read_limit, + pass_read_limit=pass_read_limit + ) + + tbl_w_meta = reader.read_chunk() + column_names = tbl_w_meta.column_names(include_children=False) + child_names = tbl_w_meta.child_names + per_file_user_data = tbl_w_meta.per_file_user_data + concatenated_columns = tbl_w_meta.tbl.columns() + + # save memory + del tbl_w_meta + + cdef Table tbl + while reader.has_next(): + tbl = reader.read_chunk().tbl + + for i in range(tbl.num_columns()): + concatenated_columns[i] = plc.concatenate.concatenate( + [concatenated_columns[i], tbl._columns[i]] + ) + # Drop residual columns to save memory + tbl._columns[i] = None + + df = cudf.DataFrame._from_data( + *_data_from_columns( + columns=[Column.from_pylibcudf(plc) for plc in concatenated_columns], + column_names=column_names, + index_names=None + ) + ) + df = _process_metadata(df, column_names, child_names, + per_file_user_data, row_groups, + filepaths_or_buffers, pa_buffers, + allow_range_index, use_pandas_metadata) + return df + + cpdef read_parquet(filepaths_or_buffers, columns=None, row_groups=None, use_pandas_metadata=True, Expression filters=None): @@ -322,33 +349,28 @@ cpdef read_parquet(filepaths_or_buffers, columns=None, row_groups=None, pa_buffers.append(datasource) filepaths_or_buffers[i] = NativeFileDatasource(datasource) - cdef cudf_io_types.source_info source = make_source_info( - filepaths_or_buffers) - - cdef vector[vector[size_type]] cpp_row_groups - if row_groups is not None: - cpp_row_groups = row_groups - - # Setup parquet reader arguments - cdef parquet_reader_options args - cdef pair[parquet_reader_options, bool] c_res = _setup_parquet_reader_options( - source, cpp_row_groups, use_pandas_metadata, filters, columns) - args, allow_range_index = c_res.first, c_res.second + allow_range_index = True + if columns is not None and len(columns) == 0 or filters: + allow_range_index = False # Read Parquet - cdef cudf_io_types.table_with_metadata c_result - with nogil: - c_result = move(parquet_reader(args)) + tbl_w_meta = plc.io.parquet.read_parquet( + plc.io.SourceInfo(filepaths_or_buffers), + columns, + row_groups, + filters, + convert_strings_to_categories = False, + use_pandas_metadata = use_pandas_metadata, + ) - names = [info.name.decode() for info in c_result.metadata.schema_info] + df = cudf.DataFrame._from_data( + *data_from_pylibcudf_io(tbl_w_meta) + ) - df = cudf.DataFrame._from_data(*data_from_unique_ptr( - move(c_result.tbl), - column_names=names - )) - df = _process_metadata(df, c_result.metadata, names, row_groups, - filepaths_or_buffers, pa_buffers, + df = _process_metadata(df, tbl_w_meta.column_names(include_children=False), + tbl_w_meta.child_names, tbl_w_meta.per_file_user_data, + row_groups, filepaths_or_buffers, pa_buffers, allow_range_index, use_pandas_metadata) return df @@ -804,120 +826,6 @@ cdef class ParquetWriter: self.initialized = True -cdef class ParquetReader: - cdef bool initialized - cdef unique_ptr[cpp_chunked_parquet_reader] reader - cdef size_t chunk_read_limit - cdef size_t pass_read_limit - cdef size_t row_group_size_bytes - cdef table_metadata result_meta - cdef vector[unordered_map[string, string]] per_file_user_data - cdef object pandas_meta - cdef list pa_buffers - cdef bool allow_range_index - cdef object row_groups - cdef object filepaths_or_buffers - cdef object names - cdef object column_index_type - cdef object index_col_names - cdef bool is_range_index - cdef object index_col - cdef bool cpp_use_pandas_metadata - - def __cinit__(self, filepaths_or_buffers, columns=None, row_groups=None, - use_pandas_metadata=True, - size_t chunk_read_limit=0, - size_t pass_read_limit=1024000000): - - # Convert NativeFile buffers to NativeFileDatasource, - # but save original buffers in case we need to use - # pyarrow for metadata processing - # (See: https://github.com/rapidsai/cudf/issues/9599) - - pa_buffers = [] - for i, datasource in enumerate(filepaths_or_buffers): - if isinstance(datasource, NativeFile): - pa_buffers.append(datasource) - filepaths_or_buffers[i] = NativeFileDatasource(datasource) - self.pa_buffers = pa_buffers - cdef cudf_io_types.source_info source = make_source_info( - filepaths_or_buffers) - - self.cpp_use_pandas_metadata = use_pandas_metadata - - cdef vector[vector[size_type]] cpp_row_groups - if row_groups is not None: - cpp_row_groups = row_groups - cdef parquet_reader_options args - cdef pair[parquet_reader_options, bool] c_res = _setup_parquet_reader_options( - source, cpp_row_groups, use_pandas_metadata, None, columns) - args, self.allow_range_index = c_res.first, c_res.second - - with nogil: - self.reader.reset( - new cpp_chunked_parquet_reader( - chunk_read_limit, - pass_read_limit, - args - ) - ) - self.initialized = False - self.row_groups = row_groups - self.filepaths_or_buffers = filepaths_or_buffers - - def _has_next(self): - cdef bool res - with nogil: - res = self.reader.get()[0].has_next() - return res - - def _read_chunk(self): - # Read Parquet - cdef cudf_io_types.table_with_metadata c_result - - with nogil: - c_result = move(self.reader.get()[0].read_chunk()) - - if not self.initialized: - self.names = [info.name.decode() for info in c_result.metadata.schema_info] - self.result_meta = c_result.metadata - - df = cudf.DataFrame._from_data(*data_from_unique_ptr( - move(c_result.tbl), - column_names=self.names, - )) - - self.initialized = True - return df - - def read(self): - dfs = self._read_chunk() - column_names = dfs._column_names - concatenated_columns = list(dfs._columns) - del dfs - while self._has_next(): - new_chunk = list(self._read_chunk()._columns) - for i in range(len(column_names)): - concatenated_columns[i] = concat_columns( - [concatenated_columns[i], new_chunk[i]] - ) - # Must drop any residual GPU columns to save memory - new_chunk[i] = None - - dfs = cudf.DataFrame._from_data( - *data_from_pylibcudf_table( - pylibcudf.Table( - [col.to_pylibcudf(mode="read") for col in concatenated_columns] - ), - column_names=column_names, - index_names=None - ) - ) - - return _process_metadata(dfs, self.result_meta, self.names, self.row_groups, - self.filepaths_or_buffers, self.pa_buffers, - self.allow_range_index, self.cpp_use_pandas_metadata) - cpdef merge_filemetadata(object filemetadata_list): """ Cython function to call into libcudf API, see `merge_row_group_metadata`. diff --git a/python/cudf/cudf/_lib/reduce.pyx b/python/cudf/cudf/_lib/reduce.pyx index 56bfa0ba332..64634b7a6f9 100644 --- a/python/cudf/cudf/_lib/reduce.pyx +++ b/python/cudf/cudf/_lib/reduce.pyx @@ -1,4 +1,5 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. +import warnings import cudf from cudf.core.buffer import acquire_spill_lock @@ -26,11 +27,15 @@ def reduce(reduction_op, Column incol, dtype=None, **kwargs): A numpy data type to use for the output, defaults to the same type as the input column """ - - col_dtype = ( - dtype if dtype is not None - else incol._reduction_result_dtype(reduction_op) - ) + if dtype is not None: + warnings.warn( + "dtype is deprecated and will be remove in a future release. " + "Cast the result (e.g. .astype) after the operation instead.", + FutureWarning + ) + col_dtype = dtype + else: + col_dtype = incol._reduction_result_dtype(reduction_op) # check empty case if len(incol) <= incol.null_count: diff --git a/python/cudf/cudf/_lib/types.pyx b/python/cudf/cudf/_lib/types.pyx index fc672caa574..253fdf7b0d9 100644 --- a/python/cudf/cudf/_lib/types.pyx +++ b/python/cudf/cudf/_lib/types.pyx @@ -21,8 +21,6 @@ from cudf._lib.types cimport ( import cudf from cudf._lib import pylibcudf -size_type_dtype = np.dtype("int32") - class TypeId(IntEnum): EMPTY = libcudf_types.type_id.EMPTY @@ -150,6 +148,8 @@ datetime_unit_map = { TypeId.TIMESTAMP_NANOSECONDS: "ns", } +size_type_dtype = LIBCUDF_TO_SUPPORTED_NUMPY_TYPES[pylibcudf.types.SIZE_TYPE_ID] + class Interpolation(IntEnum): LINEAR = ( diff --git a/python/cudf/cudf/api/types.py b/python/cudf/cudf/api/types.py index d97e9c815b6..294ae2fd985 100644 --- a/python/cudf/cudf/api/types.py +++ b/python/cudf/cudf/api/types.py @@ -90,7 +90,7 @@ def is_integer(obj): bool """ if isinstance(obj, cudf.Scalar): - return pd.api.types.is_integer_dtype(obj.dtype) + return obj.dtype.kind in "iu" return pd.api.types.is_integer(obj) diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py index 479f87bb78b..c91514202c5 100644 --- a/python/cudf/cudf/core/_base_index.py +++ b/python/cudf/cudf/core/_base_index.py @@ -19,14 +19,7 @@ ) from cudf._lib.types import size_type_dtype from cudf.api.extensions import no_default -from cudf.api.types import ( - is_integer, - is_integer_dtype, - is_list_like, - is_scalar, - is_signed_integer_dtype, - is_unsigned_integer_dtype, -) +from cudf.api.types import is_integer, is_list_like, is_scalar from cudf.core.abc import Serializable from cudf.core.column import ColumnBase, column from cudf.errors import MixedTypeError @@ -62,6 +55,12 @@ def copy(self, deep: bool = True) -> Self: def __len__(self): raise NotImplementedError + def __bool__(self): + raise ValueError( + f"The truth value of a {type(self).__name__} is ambiguous. Use " + "a.empty, a.bool(), a.item(), a.any() or a.all()." + ) + @property def size(self): # The size of an index is always its length irrespective of dimension. @@ -99,7 +98,7 @@ def astype(self, dtype, copy: bool = True): """ raise NotImplementedError - def argsort(self, *args, **kwargs): + def argsort(self, *args, **kwargs) -> cupy.ndarray: """Return the integer indices that would sort the index. Parameters vary by subclass. @@ -615,12 +614,8 @@ def union(self, other, sort=None): # Bools + other types will result in mixed type. # This is not yet consistent in pandas and specific to APIs. raise MixedTypeError("Cannot perform union with mixed types") - if ( - is_signed_integer_dtype(self.dtype) - and is_unsigned_integer_dtype(other.dtype) - ) or ( - is_unsigned_integer_dtype(self.dtype) - and is_signed_integer_dtype(other.dtype) + if (self.dtype.kind == "i" and other.dtype.kind == "u") or ( + self.dtype.kind == "u" and other.dtype.kind == "i" ): # signed + unsigned types will result in # mixed type for union in pandas. @@ -873,6 +868,24 @@ def to_numpy(self): """Convert to a numpy array.""" raise NotImplementedError + def to_flat_index(self) -> Self: + """ + Identity method. + + This is implemented for compatibility with subclass implementations + when chaining. + + Returns + ------- + pd.Index + Caller. + + See Also + -------- + MultiIndex.to_flat_index : Subclass implementation. + """ + return self + def any(self): """ Return whether any elements is True in Index. @@ -950,7 +963,7 @@ def to_pandas(self, *, nullable: bool = False, arrow_type: bool = False): """ raise NotImplementedError - def isin(self, values): + def isin(self, values, level=None): """Return a boolean array where the index values are in values. Compute boolean array of whether each index value is found in @@ -961,6 +974,9 @@ def isin(self, values): ---------- values : set, list-like, Index Sought values. + level : str or int, optional + Name or position of the index level to use (if the index is a + `MultiIndex`). Returns ------- @@ -984,7 +1000,7 @@ def isin(self, values): # ColumnBase.isin). raise NotImplementedError - def unique(self): + def unique(self, level: int | None = None): """ Return unique values in the index. @@ -1525,7 +1541,7 @@ def sort_values( ascending=True, na_position="last", key=None, - ): + ) -> Self | tuple[Self, cupy.ndarray]: """ Return a sorted copy of the index, and optionally return the indices that sorted the index itself. @@ -2097,7 +2113,7 @@ def _gather(self, gather_map, nullify=False, check_bounds=True): # TODO: For performance, the check and conversion of gather map should # be done by the caller. This check will be removed in future release. - if not is_integer_dtype(gather_map.dtype): + if gather_map.dtype.kind not in "iu": gather_map = gather_map.astype(size_type_dtype) if not _gather_map_is_valid( diff --git a/python/cudf/cudf/core/_internals/where.py b/python/cudf/cudf/core/_internals/where.py index 6003a0f6aea..18ab32d2c9e 100644 --- a/python/cudf/cudf/core/_internals/where.py +++ b/python/cudf/cudf/core/_internals/where.py @@ -47,7 +47,7 @@ def _check_and_cast_columns_with_other( other_is_scalar = is_scalar(other) if other_is_scalar: - if isinstance(other, float) and not np.isnan(other): + if isinstance(other, (float, np.floating)) and not np.isnan(other): try: is_safe = source_dtype.type(other) == other except OverflowError: diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index 9aaccca349d..9433a91b9c6 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -262,37 +262,10 @@ def add_categories(self, new_categories: Any) -> SeriesOrIndex | None: dtype: category Categories (2, int64): [1, 2] """ - old_categories = self._column.categories - new_categories = column.as_column( - new_categories, - dtype=old_categories.dtype if len(new_categories) == 0 else None, - ) - - if is_mixed_with_object_dtype(old_categories, new_categories): - raise TypeError( - f"cudf does not support adding categories with existing " - f"categories of dtype `{old_categories.dtype}` and new " - f"categories of dtype `{new_categories.dtype}`, please " - f"type-cast new_categories to the same type as " - f"existing categories." - ) - common_dtype = find_common_type( - [old_categories.dtype, new_categories.dtype] + return self._return_or_inplace( + self._column.add_categories(new_categories=new_categories) ) - new_categories = new_categories.astype(common_dtype) - old_categories = old_categories.astype(common_dtype) - - if old_categories.isin(new_categories).any(): - raise ValueError("new categories must not include old categories") - - new_categories = old_categories.append(new_categories) - out_col = self._column - if not out_col._categories_equal(new_categories): - out_col = out_col._set_categories(new_categories) - - return self._return_or_inplace(out_col) - def remove_categories( self, removals: Any, @@ -349,23 +322,9 @@ def remove_categories( dtype: category Categories (3, int64): [1, 2, 10] """ - - cats = self.categories.to_series() - removals = cudf.Series(removals, dtype=cats.dtype) - removals_mask = removals.isin(cats) - - # ensure all the removals are in the current categories - # list. If not, raise an error to match Pandas behavior - if not removals_mask.all(): - vals = removals[~removals_mask].to_numpy() - raise ValueError(f"removals must all be in old categories: {vals}") - - new_categories = cats[~cats.isin(removals)]._column - out_col = self._column - if not out_col._categories_equal(new_categories): - out_col = out_col._set_categories(new_categories) - - return self._return_or_inplace(out_col) + return self._return_or_inplace( + self._column.remove_categories(removals=removals) + ) def set_categories( self, @@ -1319,7 +1278,7 @@ def _set_categories( new_categories: Any, is_unique: bool = False, ordered: bool = False, - ) -> CategoricalColumn: + ) -> Self: """Returns a new CategoricalColumn with the categories set to the specified *new_categories*. @@ -1376,17 +1335,68 @@ def _set_categories( new_codes = df._data["new_codes"] # codes can't have masks, so take mask out before moving in - return column.build_categorical_column( - categories=new_cats, - codes=column.build_column( - new_codes.base_data, dtype=new_codes.dtype + return cast( + Self, + column.build_categorical_column( + categories=new_cats, + codes=column.build_column( + new_codes.base_data, dtype=new_codes.dtype + ), + mask=new_codes.base_mask, + size=new_codes.size, + offset=new_codes.offset, + ordered=ordered, ), - mask=new_codes.base_mask, - size=new_codes.size, - offset=new_codes.offset, - ordered=ordered, ) + def add_categories(self, new_categories: Any) -> Self: + old_categories = self.categories + new_categories = column.as_column( + new_categories, + dtype=old_categories.dtype if len(new_categories) == 0 else None, + ) + if is_mixed_with_object_dtype(old_categories, new_categories): + raise TypeError( + f"cudf does not support adding categories with existing " + f"categories of dtype `{old_categories.dtype}` and new " + f"categories of dtype `{new_categories.dtype}`, please " + f"type-cast new_categories to the same type as " + f"existing categories." + ) + common_dtype = find_common_type( + [old_categories.dtype, new_categories.dtype] + ) + + new_categories = new_categories.astype(common_dtype) + old_categories = old_categories.astype(common_dtype) + + if old_categories.isin(new_categories).any(): + raise ValueError("new categories must not include old categories") + + new_categories = old_categories.append(new_categories) + if not self._categories_equal(new_categories): + return self._set_categories(new_categories) + return self + + def remove_categories( + self, + removals: Any, + ) -> Self: + removals = column.as_column(removals).astype(self.categories.dtype) + removals_mask = removals.isin(self.categories) + + # ensure all the removals are in the current categories + # list. If not, raise an error to match Pandas behavior + if not removals_mask.all(): + raise ValueError("removals must all be in old categories") + + new_categories = self.categories.apply_boolean_mask( + self.categories.isin(removals).unary_operator("not") + ) + if not self._categories_equal(new_categories): + return self._set_categories(new_categories) + return self + def reorder_categories( self, new_categories: Any, @@ -1404,6 +1414,16 @@ def reorder_categories( ) return self._set_categories(new_categories, ordered=ordered) + def rename_categories(self, new_categories) -> CategoricalColumn: + raise NotImplementedError( + "rename_categories is currently not supported." + ) + + def remove_unused_categories(self) -> Self: + raise NotImplementedError( + "remove_unused_categories is currently not supported." + ) + def as_ordered(self, ordered: bool): if self.dtype.ordered == ordered: return self diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 9467bbeed15..32e6aade65b 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -71,7 +71,7 @@ get_time_unit, is_column_like, is_mixed_with_object_dtype, - min_scalar_type, + min_signed_type, min_unsigned_type, ) from cudf.utils.utils import _array_ufunc, mask_dtype @@ -261,7 +261,7 @@ def all(self, skipna: bool = True) -> bool: if self.null_count == self.size: return True - return libcudf.reduce.reduce("all", self, dtype=np.bool_) + return libcudf.reduce.reduce("all", self) def any(self, skipna: bool = True) -> bool: # Early exit for fast cases. @@ -271,7 +271,7 @@ def any(self, skipna: bool = True) -> bool: elif skipna and self.null_count == self.size: return False - return libcudf.reduce.reduce("any", self, dtype=np.bool_) + return libcudf.reduce.reduce("any", self) def dropna(self) -> Self: if self.has_nulls(): @@ -1305,7 +1305,10 @@ def _reduce( skipna=skipna, min_count=min_count ) if isinstance(preprocessed, ColumnBase): - return libcudf.reduce.reduce(op, preprocessed, **kwargs) + dtype = kwargs.pop("dtype", None) + return libcudf.reduce.reduce( + op, preprocessed, dtype=dtype, **kwargs + ) return preprocessed def _process_for_reduction( @@ -1336,6 +1339,8 @@ def _reduction_result_dtype(self, reduction_op: str) -> Dtype: Determine the correct dtype to pass to libcudf based on the input dtype, data dtype, and specific reduction op """ + if reduction_op in {"any", "all"}: + return np.dtype(np.bool_) return self.dtype def _with_type_metadata(self: ColumnBase, dtype: Dtype) -> ColumnBase: @@ -1351,7 +1356,7 @@ def _label_encoding( self, cats: ColumnBase, dtype: Dtype | None = None, - na_sentinel: ScalarLike | None = None, + na_sentinel: cudf.Scalar | None = None, ): """ Convert each value in `self` into an integer code, with `cats` @@ -1391,7 +1396,7 @@ def _return_sentinel_column(): return as_column(na_sentinel, dtype=dtype, length=len(self)) if dtype is None: - dtype = min_scalar_type(max(len(cats), na_sentinel), 8) + dtype = min_signed_type(max(len(cats), na_sentinel.value), 8) if is_mixed_with_object_dtype(self, cats): return _return_sentinel_column() @@ -1453,9 +1458,10 @@ def column_empty_like( return column_empty(row_count, dtype, masked) -def _has_any_nan(arbitrary): +def _has_any_nan(arbitrary: pd.Series | np.ndarray) -> bool: + """Check if an object dtype Series or array contains NaN.""" return any( - ((isinstance(x, float) or isinstance(x, np.floating)) and np.isnan(x)) + isinstance(x, (float, np.floating)) and np.isnan(x) for x in np.asarray(arbitrary) ) @@ -2213,25 +2219,26 @@ def as_column( and arbitrary.null_count > 0 ): arbitrary = arbitrary.cast(pa.float64()) - if cudf.get_option( - "default_integer_bitwidth" - ) and pa.types.is_integer(arbitrary.type): - dtype = _maybe_convert_to_default_type("int") - elif cudf.get_option( - "default_float_bitwidth" - ) and pa.types.is_floating(arbitrary.type): - dtype = _maybe_convert_to_default_type("float") + if ( + cudf.get_option("default_integer_bitwidth") + and pa.types.is_integer(arbitrary.type) + ) or ( + cudf.get_option("default_float_bitwidth") + and pa.types.is_floating(arbitrary.type) + ): + dtype = _maybe_convert_to_default_type( + cudf.dtype(arbitrary.type.to_pandas_dtype()) + ) except (pa.ArrowInvalid, pa.ArrowTypeError, TypeError): arbitrary = pd.Series(arbitrary) - if cudf.get_option( - "default_integer_bitwidth" - ) and arbitrary.dtype.kind in set("iu"): - dtype = _maybe_convert_to_default_type("int") - elif ( + if ( + cudf.get_option("default_integer_bitwidth") + and arbitrary.dtype.kind in set("iu") + ) or ( cudf.get_option("default_float_bitwidth") and arbitrary.dtype.kind == "f" ): - dtype = _maybe_convert_to_default_type("float") + dtype = _maybe_convert_to_default_type(arbitrary.dtype) return as_column(arbitrary, nan_as_null=nan_as_null, dtype=dtype) @@ -2307,9 +2314,8 @@ def concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase: # Notice, we can always cast pure null columns not_null_col_dtypes = [o.dtype for o in objs if o.null_count != len(o)] if len(not_null_col_dtypes) and all( - _is_non_decimal_numeric_dtype(dtyp) - and np.issubdtype(dtyp, np.datetime64) - for dtyp in not_null_col_dtypes + _is_non_decimal_numeric_dtype(dtype) and dtype.kind == "M" + for dtype in not_null_col_dtypes ): common_dtype = find_common_type(not_null_col_dtypes) # Cast all columns to the common dtype diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 004a059af95..73902789c11 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -485,13 +485,11 @@ def as_string_column(self) -> cudf.core.column.StringColumn: format = format.split(" ")[0] return self.strftime(format) - def mean( - self, skipna=None, min_count: int = 0, dtype=np.float64 - ) -> ScalarLike: + def mean(self, skipna=None, min_count: int = 0) -> ScalarLike: return pd.Timestamp( cast( "cudf.core.column.NumericalColumn", self.astype("int64") - ).mean(skipna=skipna, min_count=min_count, dtype=dtype), + ).mean(skipna=skipna, min_count=min_count), unit=self.time_unit, ).as_unit(self.time_unit) @@ -499,12 +497,11 @@ def std( self, skipna: bool | None = None, min_count: int = 0, - dtype: Dtype = np.float64, ddof: int = 1, ) -> pd.Timedelta: return pd.Timedelta( cast("cudf.core.column.NumericalColumn", self.astype("int64")).std( - skipna=skipna, min_count=min_count, dtype=dtype, ddof=ddof + skipna=skipna, min_count=min_count, ddof=ddof ) * _unit_to_nanoseconds_conversion[self.time_unit], ).as_unit(self.time_unit) @@ -642,7 +639,7 @@ def isin(self, values: Sequence) -> ColumnBase: return cudf.core.tools.datetimes._isin_datetimelike(self, values) def can_cast_safely(self, to_dtype: Dtype) -> bool: - if np.issubdtype(to_dtype, np.datetime64): + if to_dtype.kind == "M": # type: ignore[union-attr] to_res, _ = np.datetime_data(to_dtype) self_res, _ = np.datetime_data(self.dtype) diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index a63055ed527..6a7f338b065 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -15,7 +15,7 @@ from cudf._lib.strings.convert.convert_fixed_point import ( from_decimal as cpp_from_decimal, ) -from cudf.api.types import is_integer_dtype, is_scalar +from cudf.api.types import is_scalar from cudf.core.buffer import as_buffer from cudf.core.column import ColumnBase from cudf.core.dtypes import ( @@ -150,7 +150,7 @@ def _validate_fillna_value( def normalize_binop_value(self, other): if isinstance(other, ColumnBase): if isinstance(other, cudf.core.column.NumericalColumn): - if not is_integer_dtype(other.dtype): + if other.dtype.kind not in "iu": raise TypeError( "Decimal columns only support binary operations with " "integer numerical columns." diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index cc15e78314e..1b7cd95b3d0 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -564,10 +564,11 @@ def take(self, lists_indices: ColumnLike) -> ParentType: raise ValueError( "lists_indices and list column is of different " "size." ) - if not _is_non_decimal_numeric_dtype( - lists_indices_col.children[1].dtype - ) or not np.issubdtype( - lists_indices_col.children[1].dtype, np.integer + if ( + not _is_non_decimal_numeric_dtype( + lists_indices_col.children[1].dtype + ) + or lists_indices_col.children[1].dtype.kind not in "iu" ): raise TypeError( "lists_indices should be column of values of index types." @@ -646,9 +647,17 @@ def sort_values( dtype: list .. pandas-compat:: - **ListMethods.sort_values** + `pandas.Series.list.sort_values` + + This method does not exist in pandas but it can be run + as: - The ``inplace`` and ``kind`` arguments are currently not supported. + >>> import pandas as pd + >>> s = pd.Series([[3, 2, 1], [2, 4, 3]]) + >>> print(s.apply(sorted)) + 0 [1, 2, 3] + 1 [2, 3, 4] + dtype: object """ if inplace: raise NotImplementedError("`inplace` not currently implemented.") diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index cea68c88c90..f9404eb3b40 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -12,12 +12,7 @@ import cudf from cudf import _lib as libcudf from cudf._lib import pylibcudf -from cudf.api.types import ( - is_float_dtype, - is_integer, - is_integer_dtype, - is_scalar, -) +from cudf.api.types import is_integer, is_scalar from cudf.core.column import ( ColumnBase, as_column, @@ -29,10 +24,10 @@ from cudf.core.mixins import BinaryOperand from cudf.errors import MixedTypeError from cudf.utils.dtypes import ( + find_common_type, min_column_type, min_signed_type, np_dtypes_to_pandas_dtypes, - numeric_normalize_types, ) from .numerical_base import NumericalBaseColumn @@ -225,25 +220,17 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: tmp = self if reflect else other # Guard against division by zero for integers. if ( - (tmp.dtype.type in int_float_dtype_mapping) - and (tmp.dtype.type != np.bool_) - and ( - ( - ( - np.isscalar(tmp) - or ( - isinstance(tmp, cudf.Scalar) - # host to device copy - and tmp.is_valid() - ) - ) - and (0 == tmp) - ) - or ((isinstance(tmp, NumericalColumn)) and (0 in tmp)) - ) + tmp.dtype.type in int_float_dtype_mapping + and tmp.dtype.kind != "b" ): - out_dtype = cudf.dtype("float64") - + if isinstance(tmp, NumericalColumn) and 0 in tmp: + out_dtype = cudf.dtype("float64") + elif isinstance(tmp, cudf.Scalar): + if tmp.is_valid() and tmp == 0: + # tmp == 0 can return NA + out_dtype = cudf.dtype("float64") + elif is_scalar(tmp) and tmp == 0: + out_dtype = cudf.dtype("float64") if op in { "__lt__", "__gt__", @@ -257,7 +244,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: out_dtype = "bool" if op in {"__and__", "__or__", "__xor__"}: - if is_float_dtype(self.dtype) or is_float_dtype(other.dtype): + if self.dtype.kind == "f" or other.dtype.kind == "f": raise TypeError( f"Operation 'bitwise {op[2:-2]}' not supported between " f"{self.dtype.type.__name__} and " @@ -268,8 +255,8 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: if ( op == "__pow__" - and is_integer_dtype(self.dtype) - and (is_integer(other) or is_integer_dtype(other.dtype)) + and self.dtype.kind in "iu" + and (is_integer(other) or other.dtype.kind in "iu") ): op = "INT_POW" @@ -395,7 +382,7 @@ def all(self, skipna: bool = True) -> bool: if result_col.null_count == result_col.size: return True - return libcudf.reduce.reduce("all", result_col, dtype=np.bool_) + return libcudf.reduce.reduce("all", result_col) def any(self, skipna: bool = True) -> bool: # Early exit for fast cases. @@ -406,7 +393,7 @@ def any(self, skipna: bool = True) -> bool: elif skipna and result_col.null_count == result_col.size: return False - return libcudf.reduce.reduce("any", result_col, dtype=np.bool_) + return libcudf.reduce.reduce("any", result_col) @functools.cached_property def nan_count(self) -> int: @@ -517,11 +504,15 @@ def find_and_replace( ) elif len(replacement_col) == 1 and len(to_replace_col) == 0: return self.copy() - to_replace_col, replacement_col, replaced = numeric_normalize_types( - to_replace_col, replacement_col, self + common_type = find_common_type( + (to_replace_col.dtype, replacement_col.dtype, self.dtype) ) + replaced = self.astype(common_type) df = cudf.DataFrame._from_data( - {"old": to_replace_col, "new": replacement_col} + { + "old": to_replace_col.astype(common_type), + "new": replacement_col.astype(common_type), + } ) df = df.drop_duplicates(subset=["old"], keep="last", ignore_index=True) if df._data["old"].null_count == 1: @@ -684,15 +675,16 @@ def to_pandas( return super().to_pandas(nullable=nullable, arrow_type=arrow_type) def _reduction_result_dtype(self, reduction_op: str) -> Dtype: - col_dtype = self.dtype if reduction_op in {"sum", "product"}: - col_dtype = ( - col_dtype if col_dtype.kind == "f" else np.dtype("int64") - ) + if self.dtype.kind == "f": + return self.dtype + return np.dtype("int64") elif reduction_op == "sum_of_squares": - col_dtype = np.result_dtype(col_dtype, np.dtype("uint64")) + return np.result_dtype(self.dtype, np.dtype("uint64")) + elif reduction_op in {"var", "std", "mean"}: + return np.dtype("float64") - return col_dtype + return super()._reduction_result_dtype(reduction_op) def _normalize_find_and_replace_input( diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py index 95c78c5efcb..f41010062c8 100644 --- a/python/cudf/cudf/core/column/numerical_base.py +++ b/python/cudf/cudf/core/column/numerical_base.py @@ -144,32 +144,27 @@ def mean( self, skipna: bool | None = None, min_count: int = 0, - dtype=np.float64, ): - return self._reduce( - "mean", skipna=skipna, min_count=min_count, dtype=dtype - ) + return self._reduce("mean", skipna=skipna, min_count=min_count) def var( self, skipna: bool | None = None, min_count: int = 0, - dtype=np.float64, ddof=1, ): return self._reduce( - "var", skipna=skipna, min_count=min_count, dtype=dtype, ddof=ddof + "var", skipna=skipna, min_count=min_count, ddof=ddof ) def std( self, skipna: bool | None = None, min_count: int = 0, - dtype=np.float64, ddof=1, ): return self._reduce( - "std", skipna=skipna, min_count=min_count, dtype=dtype, ddof=ddof + "std", skipna=skipna, min_count=min_count, ddof=ddof ) def median(self, skipna: bool | None = None) -> NumericalBaseColumn: diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 96f9cdfd655..ec95c50f455 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -612,7 +612,7 @@ def extract( dtype: object .. pandas-compat:: - **StringMethods.extract** + :meth:`pandas.Series.str.extract` The `flags` parameter currently only supports re.DOTALL and re.MULTILINE. @@ -738,7 +738,7 @@ def contains( dtype: bool .. pandas-compat:: - **StringMethods.contains** + :meth:`pandas.Series.str.contains` The parameters `case` and `na` are not yet supported and will raise a NotImplementedError if anything other than the default @@ -974,7 +974,7 @@ def replace( dtype: object .. pandas-compat:: - **StringMethods.replace** + :meth:`pandas.Series.str.replace` The parameters `case` and `flags` are not yet supported and will raise a `NotImplementedError` if anything other than the default @@ -2803,7 +2803,7 @@ def partition(self, sep: str = " ", expand: bool = True) -> SeriesOrIndex: ) .. pandas-compat:: - **StringMethods.partition** + :meth:`pandas.Series.str.partition` The parameter `expand` is not yet supported and will raise a `NotImplementedError` if anything other than the default @@ -3527,7 +3527,7 @@ def count(self, pat: str, flags: int = 0) -> SeriesOrIndex: Index([0, 0, 2, 1], dtype='int64') .. pandas-compat:: - **StringMethods.count** + :meth:`pandas.Series.str.count` - `flags` parameter currently only supports re.DOTALL and re.MULTILINE. @@ -3607,7 +3607,7 @@ def findall(self, pat: str, flags: int = 0) -> SeriesOrIndex: dtype: list .. pandas-compat:: - **StringMethods.findall** + :meth:`pandas.Series.str.findall` The `flags` parameter currently only supports re.DOTALL and re.MULTILINE. @@ -3811,7 +3811,7 @@ def endswith(self, pat: str) -> SeriesOrIndex: dtype: bool .. pandas-compat:: - **StringMethods.endswith** + :meth:`pandas.Series.str.endswith` `na` parameter is not yet supported, as cudf uses native strings instead of Python objects. @@ -4264,7 +4264,7 @@ def match( dtype: bool .. pandas-compat:: - **StringMethods.match** + :meth:`pandas.Series.str.match` Parameters `case` and `na` are currently not supported. The `flags` parameter currently only supports re.DOTALL and diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py index 36d7d9f9614..59ea1cc002c 100644 --- a/python/cudf/cudf/core/column/timedelta.py +++ b/python/cudf/cudf/core/column/timedelta.py @@ -287,11 +287,11 @@ def as_timedelta_column(self, dtype: Dtype) -> TimeDeltaColumn: return self return libcudf.unary.cast(self, dtype=dtype) - def mean(self, skipna=None, dtype: Dtype = np.float64) -> pd.Timedelta: + def mean(self, skipna=None) -> pd.Timedelta: return pd.Timedelta( cast( "cudf.core.column.NumericalColumn", self.astype("int64") - ).mean(skipna=skipna, dtype=dtype), + ).mean(skipna=skipna), unit=self.time_unit, ).as_unit(self.time_unit) @@ -345,12 +345,11 @@ def std( self, skipna: bool | None = None, min_count: int = 0, - dtype: Dtype = np.float64, ddof: int = 1, ) -> pd.Timedelta: return pd.Timedelta( cast("cudf.core.column.NumericalColumn", self.astype("int64")).std( - skipna=skipna, min_count=min_count, ddof=ddof, dtype=dtype + skipna=skipna, min_count=min_count, ddof=ddof ), unit=self.time_unit, ).as_unit(self.time_unit) diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py index f30a557efb0..819d351b2c4 100644 --- a/python/cudf/cudf/core/column_accessor.py +++ b/python/cudf/cudf/core/column_accessor.py @@ -16,6 +16,8 @@ from cudf.core import column if TYPE_CHECKING: + from typing_extensions import Self + from cudf._typing import Dtype from cudf.core.column import ColumnBase @@ -86,58 +88,58 @@ class ColumnAccessor(abc.MutableMapping): (default=None). verify : bool, optional For non ColumnAccessor inputs, whether to verify - column length and type + column length and data.values() are all Columns """ - _data: "dict[Any, ColumnBase]" - multiindex: bool + _data: dict[Any, ColumnBase] _level_names: tuple[Any, ...] def __init__( self, - data: abc.MutableMapping | ColumnAccessor | None = None, + data: abc.MutableMapping[Any, ColumnBase] | Self, multiindex: bool = False, level_names=None, rangeindex: bool = False, label_dtype: Dtype | None = None, verify: bool = True, ): - self.rangeindex = rangeindex - self.label_dtype = label_dtype - if data is None: - data = {} - # TODO: we should validate the keys of `data` if isinstance(data, ColumnAccessor): - multiindex = multiindex or data.multiindex - level_names = level_names or data.level_names self._data = data._data - self.multiindex = multiindex - self._level_names = level_names - self.rangeindex = data.rangeindex - self.label_dtype = data.label_dtype - else: + self._level_names = data.level_names + self.multiindex: bool = data.multiindex + self.rangeindex: bool = data.rangeindex + self.label_dtype: Dtype | None = data.label_dtype + elif isinstance(data, abc.MutableMapping): # This code path is performance-critical for copies and should be # modified with care. - data = dict(data) if data and verify: - result = {} # Faster than next(iter(data.values())) column_length = len(data[next(iter(data))]) - for k, v in data.items(): - # Much faster to avoid the function call if possible; the - # extra isinstance is negligible if we do have to make a - # column from something else. - if not isinstance(v, column.ColumnBase): - v = column.as_column(v) - if len(v) != column_length: + # TODO: we should validate the keys of `data` + for col in data.values(): + if not isinstance(col, column.ColumnBase): + raise ValueError( + f"All data.values() must be Column, not {type(col).__name__}" + ) + if len(col) != column_length: raise ValueError("All columns must be of equal length") - result[k] = v - self._data = result - else: - self._data = data + if not isinstance(data, dict): + data = dict(data) + self._data = data + + if rangeindex and multiindex: + raise ValueError( + f"{rangeindex=} and {multiindex=} cannot both be True." + ) + self.rangeindex = rangeindex self.multiindex = multiindex + self.label_dtype = label_dtype self._level_names = level_names + else: + raise ValueError( + f"data must be a ColumnAccessor or MutableMapping, not {type(data).__name__}" + ) def __iter__(self): return iter(self._data) @@ -161,7 +163,9 @@ def __repr__(self) -> str: type_info = ( f"{self.__class__.__name__}(" f"multiindex={self.multiindex}, " - f"level_names={self.level_names})" + f"level_names={self.level_names}, " + f"rangeindex={self.rangeindex}, " + f"label_dtype={self.label_dtype})" ) column_info = "\n".join( [f"{name}: {col.dtype}" for name, col in self.items()] diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index b3d938829c9..6ea11fe9f64 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -83,8 +83,7 @@ cudf_dtype_from_pydata_dtype, find_common_type, is_column_like, - min_scalar_type, - numeric_normalize_types, + min_signed_type, ) from cudf.utils.performance_tracking import _performance_tracking from cudf.utils.utils import GetAttrGetItemMixin, _external_only_api @@ -103,20 +102,6 @@ "var": "nanvar", } -_numeric_reduction_ops = ( - "mean", - "min", - "max", - "sum", - "product", - "prod", - "std", - "var", - "kurtosis", - "kurt", - "skew", -) - def _shape_mismatch_error(x, y): raise ValueError( @@ -490,6 +475,7 @@ def __getitem__(self, arg): {key: ca._data[key] for key in column_names}, multiindex=ca.multiindex, level_names=ca.level_names, + verify=False, ), index=index, ) @@ -500,6 +486,7 @@ def __getitem__(self, arg): {key: ca._data[key] for key in column_names}, multiindex=ca.multiindex, level_names=ca.level_names, + verify=False, ), index=index, ) @@ -609,6 +596,9 @@ class DataFrame(IndexedFrame, Serializable, GetAttrGetItemMixin): dtype : dtype, default None Data type to force. Only a single dtype is allowed. If None, infer. + copy : bool or None, default None + Copy data from inputs. + Currently not implemented. nan_as_null : bool, Default True If ``None``/``True``, converts ``np.nan`` values to ``null`` values. @@ -695,8 +685,11 @@ def __init__( index=None, columns=None, dtype=None, + copy=None, nan_as_null=no_default, ): + if copy is not None: + raise NotImplementedError("copy is not currently implemented.") super().__init__() if nan_as_null is no_default: nan_as_null = not cudf.get_option("mode.pandas_compatible") @@ -780,6 +773,7 @@ def __init__( else None, rangeindex=rangeindex, label_dtype=label_dtype, + verify=False, ) elif isinstance(data, ColumnAccessor): raise TypeError( @@ -923,7 +917,8 @@ def _init_from_series_list(self, data, columns, index): final_index = ensure_index(index) series_lengths = list(map(len, data)) - data = numeric_normalize_types(*data) + common_dtype = find_common_type([obj.dtype for obj in data]) + data = [obj.astype(common_dtype) for obj in data] if series_lengths.count(series_lengths[0]) == len(series_lengths): # Calculating the final dataframe columns by # getting union of all `index` of the Series objects. @@ -939,7 +934,7 @@ def _init_from_series_list(self, data, columns, index): ) if not series.index.equals(final_columns): series = series.reindex(final_columns) - self._data[idx] = column.as_column(series._column) + self._data[idx] = series._column # Setting `final_columns` to self._index so # that the resulting `transpose` will be have @@ -1538,6 +1533,25 @@ def __array_function__(self, func, types, args, kwargs): pass return NotImplemented + def __arrow_c_stream__(self, requested_schema=None): + """ + Export the cudf DataFrame as an Arrow C stream PyCapsule. + + Parameters + ---------- + requested_schema : PyCapsule, default None + The schema to which the dataframe should be casted, passed as a + PyCapsule containing a C ArrowSchema representation of the + requested schema. Currently not implemented. + + Returns + ------- + PyCapsule + """ + if requested_schema is not None: + raise NotImplementedError("requested_schema is not supported") + return self.to_arrow().__arrow_c_stream__() + # The _get_numeric_data method is necessary for dask compatibility. @_performance_tracking def _get_numeric_data(self): @@ -2249,6 +2263,7 @@ def to_dict( self, orient: str = "dict", into: type[dict] = dict, + index: bool = True, ) -> dict | list[dict]: """ Convert the DataFrame to a dictionary. @@ -2282,6 +2297,13 @@ def to_dict( instance of the mapping type you want. If you want a collections.defaultdict, you must pass it initialized. + index : bool, default True + Whether to include the index item (and index_names item if `orient` + is 'tight') in the returned dictionary. Can only be ``False`` + when `orient` is 'split' or 'tight'. Note that when `orient` is + 'records', this parameter does not take effect (index item always + not included). + Returns ------- dict, list or collections.abc.Mapping @@ -2363,7 +2385,7 @@ def to_dict( raise TypeError(f"unsupported type: {into}") return cons(self.items()) # type: ignore[misc] - return self.to_pandas().to_dict(orient=orient, into=into) + return self.to_pandas().to_dict(orient=orient, into=into, index=index) @_performance_tracking def scatter_by_map( @@ -2750,7 +2772,7 @@ def reindex( Chrome 200 0.02 .. pandas-compat:: - **DataFrame.reindex** + :meth:`pandas.DataFrame.reindex` Note: One difference from Pandas is that ``NA`` is used for rows that do not match, rather than ``NaN``. One side effect of this is @@ -2822,6 +2844,10 @@ def reindex( index=index, inplace=False, fill_value=fill_value, + level=level, + method=method, + limit=limit, + tolerance=tolerance, ) @_performance_tracking @@ -2939,7 +2965,7 @@ def set_index( # label-like if is_scalar(col) or isinstance(col, tuple): if col in self._column_names: - data_to_add.append(self[col]) + data_to_add.append(self[col]._column) names.append(col) if drop: to_drop.append(col) @@ -2954,7 +2980,7 @@ def set_index( elif isinstance( col, (cudf.Series, cudf.Index, pd.Series, pd.Index) ): - data_to_add.append(col) + data_to_add.append(as_column(col)) names.append(col.name) else: try: @@ -3018,7 +3044,12 @@ def fillna( ) @_performance_tracking - def where(self, cond, other=None, inplace=False): + def where(self, cond, other=None, inplace=False, axis=None, level=None): + if axis is not None: + raise NotImplementedError("axis is not supported.") + elif level is not None: + raise NotImplementedError("level is not supported.") + from cudf.core._internals.where import ( _check_and_cast_columns_with_other, _make_categorical_like, @@ -3160,7 +3191,14 @@ class speed type ) ) def reset_index( - self, level=None, drop=False, inplace=False, col_level=0, col_fill="" + self, + level=None, + drop=False, + inplace=False, + col_level=0, + col_fill="", + allow_duplicates: bool = False, + names: abc.Hashable | abc.Sequence[abc.Hashable] | None = None, ): return self._mimic_inplace( DataFrame._from_data( @@ -3169,32 +3207,45 @@ def reset_index( drop=drop, col_level=col_level, col_fill=col_fill, + allow_duplicates=allow_duplicates, + names=names, ) ), inplace=inplace, ) @_performance_tracking - def insert(self, loc, name, value, nan_as_null=no_default): + def insert( + self, + loc, + column, + value, + allow_duplicates: bool = False, + nan_as_null=no_default, + ): """Add a column to DataFrame at the index specified by loc. Parameters ---------- loc : int location to insert by index, cannot be greater then num columns + 1 - name : number or string - name or label of column to be inserted + column : number or string + column or label of column to be inserted value : Series or array-like nan_as_null : bool, Default None If ``None``/``True``, converts ``np.nan`` values to ``null`` values. If ``False``, leaves ``np.nan`` values as is. """ + if allow_duplicates is not False: + raise NotImplementedError( + "allow_duplicates is currently not implemented." + ) if nan_as_null is no_default: nan_as_null = not cudf.get_option("mode.pandas_compatible") return self._insert( loc=loc, - name=name, + name=column, value=value, nan_as_null=nan_as_null, ignore_index=False, @@ -3350,7 +3401,7 @@ def diff(self, periods=1, axis=0): 5 2 5 20 .. pandas-compat:: - **DataFrame.diff** + :meth:`pandas.DataFrame.diff` Diff currently only supports numeric dtype columns. """ @@ -3555,7 +3606,7 @@ def rename( 30 3 6 .. pandas-compat:: - **DataFrame.rename** + :meth:`pandas.DataFrame.rename` * Not Supporting: level @@ -3628,7 +3679,9 @@ def rename( return result @_performance_tracking - def add_prefix(self, prefix): + def add_prefix(self, prefix, axis=None): + if axis is not None: + raise NotImplementedError("axis is currently not implemented.") # TODO: Change to deep=False when copy-on-write is default out = self.copy(deep=True) out.columns = [ @@ -3637,7 +3690,9 @@ def add_prefix(self, prefix): return out @_performance_tracking - def add_suffix(self, suffix): + def add_suffix(self, suffix, axis=None): + if axis is not None: + raise NotImplementedError("axis is currently not implemented.") # TODO: Change to deep=False when copy-on-write is default out = self.copy(deep=True) out.columns = [ @@ -3670,7 +3725,7 @@ def agg(self, aggs, axis=None): ``DataFrame`` is returned. .. pandas-compat:: - **DataFrame.agg** + :meth:`pandas.DataFrame.agg` * Not supporting: ``axis``, ``*args``, ``**kwargs`` @@ -3843,7 +3898,7 @@ def nlargest(self, n, columns, keep="first"): Brunei 434000 12128 BN .. pandas-compat:: - **DataFrame.nlargest** + :meth:`pandas.DataFrame.nlargest` - Only a single column is supported in *columns* """ @@ -3915,7 +3970,7 @@ def nsmallest(self, n, columns, keep="first"): Nauru 337000 182 NR .. pandas-compat:: - **DataFrame.nsmallest** + :meth:`pandas.DataFrame.nsmallest` - Only a single column is supported in *columns* """ @@ -3997,7 +4052,7 @@ def transpose(self): a new (ncol x nrow) dataframe. self is (nrow x ncol) .. pandas-compat:: - **DataFrame.transpose, DataFrame.T** + :meth:`pandas.DataFrame.transpose`, :attr:`pandas.DataFrame.T` Not supporting *copy* because default and only behavior is copy=True @@ -4053,7 +4108,15 @@ def transpose(self): T = property(transpose, doc=transpose.__doc__) @_performance_tracking - def melt(self, **kwargs): + def melt( + self, + id_vars=None, + value_vars=None, + var_name=None, + value_name="value", + col_level=None, + ignore_index: bool = True, + ): """Unpivots a DataFrame from wide format to long format, optionally leaving identifier variables set. @@ -4080,23 +4143,30 @@ def melt(self, **kwargs): """ from cudf.core.reshape import melt - return melt(self, **kwargs) + return melt( + self, + id_vars=id_vars, + value_vars=value_vars, + var_name=var_name, + value_name=value_name, + col_level=col_level, + ignore_index=ignore_index, + ) @_performance_tracking def merge( self, right, + how="inner", on=None, left_on=None, right_on=None, left_index=False, right_index=False, - how="inner", sort=False, - lsuffix=None, - rsuffix=None, - indicator=False, suffixes=("_x", "_y"), + indicator=False, + validate=None, ): """Merge GPU DataFrame objects by performing a database-style join operation by columns or indexes. @@ -4188,7 +4258,7 @@ def merge( from both sides. .. pandas-compat:: - **DataFrame.merge** + :meth:`pandas.DataFrame.merge` DataFrames merges in cuDF result in non-deterministic row ordering. @@ -4197,17 +4267,8 @@ def merge( raise NotImplementedError( "Only indicator=False is currently supported" ) - - if lsuffix or rsuffix: - raise ValueError( - "The lsuffix and rsuffix keywords have been replaced with the " - "``suffixes=`` keyword. " - "Please provide the following instead: \n\n" - " suffixes=('%s', '%s')" - % (lsuffix or "_x", rsuffix or "_y") - ) - else: - lsuffix, rsuffix = suffixes + if validate is not None: + raise NotImplementedError("validate is currently not supported.") lhs, rhs = self, right merge_cls = Merge @@ -4244,6 +4305,7 @@ def join( lsuffix="", rsuffix="", sort=False, + validate: str | None = None, ): """Join columns with other DataFrame on index or on a key column. @@ -4257,19 +4319,33 @@ def join( column names when avoiding conflicts. sort : bool Set to True to ensure sorted ordering. + validate : str, optional + If specified, checks if join is of specified type. + + * "one_to_one" or "1:1": check if join keys are unique in both left + and right datasets. + * "one_to_many" or "1:m": check if join keys are unique in left dataset. + * "many_to_one" or "m:1": check if join keys are unique in right dataset. + * "many_to_many" or "m:m": allowed, but does not result in checks. + + Currently not supported. Returns ------- joined : DataFrame .. pandas-compat:: - **DataFrame.join** + :meth:`pandas.DataFrame.join` - *other* must be a single DataFrame for now. - *on* is not supported yet due to lack of multi-index support. """ if on is not None: raise NotImplementedError("The on parameter is not yet supported") + elif validate is not None: + raise NotImplementedError( + "The validate parameter is not yet supported" + ) df = self.merge( other, @@ -4306,7 +4382,6 @@ def groupby( as_index=True, sort=no_default, group_keys=False, - squeeze=False, observed=True, dropna=True, ): @@ -4317,7 +4392,6 @@ def groupby( as_index, sort, group_keys, - squeeze, observed, dropna, ) @@ -4385,7 +4459,7 @@ def query(self, expr, local_dict=None): 1 2018-10-08 .. pandas-compat:: - **DataFrame.query** + :meth:`pandas.DataFrame.query` One difference from pandas is that ``query`` currently only supports numeric, datetime, timedelta, or bool dtypes. @@ -4420,7 +4494,16 @@ def query(self, expr, local_dict=None): @_performance_tracking def apply( - self, func, axis=1, raw=False, result_type=None, args=(), **kwargs + self, + func, + axis=1, + raw=False, + result_type=None, + args=(), + by_row: Literal[False, "compat"] = "compat", + engine: Literal["python", "numba"] = "python", + engine_kwargs: dict[str, bool] | None = None, + **kwargs, ): """ Apply a function along an axis of the DataFrame. @@ -4448,6 +4531,24 @@ def apply( Not yet supported args: tuple Positional arguments to pass to func in addition to the dataframe. + by_row : False or "compat", default "compat" + Only has an effect when ``func`` is a listlike or dictlike of funcs + and the func isn't a string. + If "compat", will if possible first translate the func into pandas + methods (e.g. ``Series().apply(np.sum)`` will be translated to + ``Series().sum()``). If that doesn't work, will try call to apply again with + ``by_row=True`` and if that fails, will call apply again with + ``by_row=False`` (backward compatible). + If False, the funcs will be passed the whole Series at once. + + Currently not supported. + engine : {'python', 'numba'}, default 'python' + Unused. Added for compatibility with pandas. + engine_kwargs : dict + Unused. Added for compatibility with pandas. + **kwargs + Additional keyword arguments to pass as keywords arguments to + `func`. Examples -------- @@ -4598,13 +4699,17 @@ def apply( """ if axis != 1: - raise ValueError( + raise NotImplementedError( "DataFrame.apply currently only supports row wise ops" ) if raw: - raise ValueError("The `raw` kwarg is not yet supported.") + raise NotImplementedError("The `raw` kwarg is not yet supported.") if result_type is not None: - raise ValueError("The `result_type` kwarg is not yet supported.") + raise NotImplementedError( + "The `result_type` kwarg is not yet supported." + ) + if by_row != "compat": + raise NotImplementedError("by_row is currently not supported.") return self._apply(func, _get_row_kernel, *args, **kwargs) @@ -4698,7 +4803,7 @@ def _func(x): # pragma: no cover result = {} for name, col in self._data.items(): apply_sr = Series._from_data({None: col}) - result[name] = apply_sr.apply(_func) + result[name] = apply_sr.apply(_func)._column return DataFrame._from_data(result, index=self.index) @@ -5447,10 +5552,11 @@ def from_arrow(cls, table): 2 3 6 .. pandas-compat:: - **DataFrame.from_arrow** + `pandas.DataFrame.from_arrow` - - Does not support automatically setting index column(s) similar - to how ``to_pandas`` works for PyArrow Tables. + This method does not exist in pandas but it is similar to + how :meth:`pyarrow.Table.to_pandas` works for PyArrow Tables i.e. + it does not support automatically setting index column(s). """ index_col = None col_index_names = None @@ -5504,7 +5610,7 @@ def from_arrow(cls, table): return out @_performance_tracking - def to_arrow(self, preserve_index=None): + def to_arrow(self, preserve_index=None) -> pa.Table: """ Convert to a PyArrow Table. @@ -5594,18 +5700,36 @@ def to_arrow(self, preserve_index=None): return out.replace_schema_metadata(metadata) @_performance_tracking - def to_records(self, index=True): + def to_records(self, index=True, column_dtypes=None, index_dtypes=None): """Convert to a numpy recarray Parameters ---------- index : bool Whether to include the index in the output. + column_dtypes : str, type, dict, default None + If a string or type, the data type to store all columns. If + a dictionary, a mapping of column names and indices (zero-indexed) + to specific data types. Currently not supported. + index_dtypes : str, type, dict, default None + If a string or type, the data type to store all index levels. If + a dictionary, a mapping of index level names and indices + (zero-indexed) to specific data types. + This mapping is applied only if `index=True`. + Currently not supported. Returns ------- numpy recarray """ + if column_dtypes is not None: + raise NotImplementedError( + "column_dtypes is currently not supported." + ) + elif index_dtypes is not None: + raise NotImplementedError( + "column_dtypes is currently not supported." + ) members = [("index", self.index.dtype)] if index else [] members += [(col, self[col].dtype) for col in self._data.names] dtype = np.dtype(members) @@ -5618,7 +5742,16 @@ def to_records(self, index=True): @classmethod @_performance_tracking - def from_records(cls, data, index=None, columns=None, nan_as_null=False): + def from_records( + cls, + data, + index=None, + exclude=None, + columns=None, + coerce_float: bool = False, + nrows: int | None = None, + nan_as_null=False, + ): """ Convert structured or record ndarray to DataFrame. @@ -5628,13 +5761,32 @@ def from_records(cls, data, index=None, columns=None, nan_as_null=False): index : str, array-like The name of the index column in *data*. If None, the default index is used. + exclude : sequence, default None + Columns or fields to exclude. + Currently not implemented. columns : list of str List of column names to include. + coerce_float : bool, default False + Attempt to convert values of non-string, non-numeric objects (like + decimal.Decimal) to floating point, useful for SQL result sets. + Currently not implemented. + nrows : int, default None + Number of rows to read if data is an iterator. + Currently not implemented. Returns ------- DataFrame """ + if exclude is not None: + raise NotImplementedError("exclude is currently not supported.") + if coerce_float is not False: + raise NotImplementedError( + "coerce_float is currently not supported." + ) + if nrows is not None: + raise NotImplementedError("nrows is currently not supported.") + if data.ndim != 1 and data.ndim != 2: raise ValueError( f"records dimension expected 1 or 2 but found {data.ndim}" @@ -5688,6 +5840,7 @@ def from_records(cls, data, index=None, columns=None, nan_as_null=False): ), level_names=level_names, label_dtype=getattr(columns, "dtype", None), + verify=False, ), index=new_index, ) @@ -5774,6 +5927,7 @@ def _from_arrays( ), level_names=level_names, label_dtype=getattr(columns, "dtype", None), + verify=False, ), index=index, ) @@ -5815,9 +5969,9 @@ def quantile( axis=0, numeric_only=True, interpolation=None, + method="single", columns=None, exact=True, - method="single", ): """ Return values at the given quantile. @@ -5843,14 +5997,14 @@ def quantile( * higher: `j`. * nearest: `i` or `j` whichever is nearest. * midpoint: (`i` + `j`) / 2. - columns : list of str - List of column names to include. - exact : boolean - Whether to use approximate or exact quantile algorithm. method : {'single', 'table'}, default `'single'` Whether to compute quantiles per-column ('single') or over all columns ('table'). When 'table', the only allowed interpolation methods are 'nearest', 'lower', and 'higher'. + columns : list of str + List of column names to include. + exact : boolean + Whether to use approximate or exact quantile algorithm. Returns ------- @@ -5884,7 +6038,7 @@ def quantile( 0.5 2.5 55.0 .. pandas-compat:: - **DataFrame.quantile** + :meth:`pandas.DataFrame.quantile` One notable difference from Pandas is when DataFrame is of non-numeric types and result is expected to be a Series in case of @@ -6174,7 +6328,7 @@ def count(self, axis=0, numeric_only=False): dtype: int64 .. pandas-compat:: - **DataFrame.count** + :meth:`pandas.DataFrame.count` Parameters currently not supported are `axis` and `numeric_only`. """ @@ -6184,10 +6338,9 @@ def count(self, axis=0, numeric_only=False): length = len(self) return Series._from_data( { - None: [ - length - self._data[col].null_count - for col in self._data.names - ] + None: as_column( + [length - col.null_count for col in self._columns] + ) }, cudf.Index(self._data.names), ) @@ -6412,7 +6565,7 @@ def mode(self, axis=0, numeric_only=False, dropna=True): 1 2.0 .. pandas-compat:: - **DataFrame.mode** + :meth:`pandas.DataFrame.transpose` ``axis`` parameter is currently not supported. """ @@ -7173,25 +7326,47 @@ def unnamed_group_generator(): return result @_performance_tracking - def cov(self, **kwargs): + def cov(self, min_periods=None, ddof: int = 1, numeric_only: bool = False): """Compute the covariance matrix of a DataFrame. Parameters ---------- - **kwargs - Keyword arguments to be passed to cupy.cov + min_periods : int, optional + Minimum number of observations required per pair of columns to + have a valid result. + Currently not supported. + + ddof : int, default 1 + Delta degrees of freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + + numeric_only : bool, default False + Include only `float`, `int` or `boolean` data. + Currently not supported. Returns ------- cov : DataFrame """ - cov = cupy.cov(self.values, rowvar=False) + if min_periods is not None: + raise NotImplementedError( + "min_periods is currently not supported." + ) + + if numeric_only is not False: + raise NotImplementedError( + "numeric_only is currently not supported." + ) + + cov = cupy.cov(self.values, ddof=ddof, rowvar=False) cols = self._data.to_pandas_index() df = DataFrame(cupy.asfortranarray(cov)).set_index(cols) df._set_columns_like(self._data) return df - def corr(self, method="pearson", min_periods=None): + def corr( + self, method="pearson", min_periods=None, numeric_only: bool = False + ): """Compute the correlation matrix of a DataFrame. Parameters @@ -7221,6 +7396,11 @@ def corr(self, method="pearson", min_periods=None): if min_periods is not None: raise NotImplementedError("Unsupported argument 'min_periods'") + if numeric_only is not False: + raise NotImplementedError( + "numeric_only is currently not supported." + ) + corr = cupy.corrcoef(values, rowvar=False) cols = self._data.to_pandas_index() df = DataFrame(cupy.asfortranarray(corr)).set_index(cols) @@ -7256,7 +7436,9 @@ def to_struct(self, name=None): offset=0, ) return cudf.Series._from_data( - cudf.core.column_accessor.ColumnAccessor({name: col}), + cudf.core.column_accessor.ColumnAccessor( + {name: col}, verify=False + ), index=self.index, name=name, ) @@ -7359,9 +7541,9 @@ def pivot_table( @_performance_tracking @copy_docstring(reshape.unstack) - def unstack(self, level=-1, fill_value=None): + def unstack(self, level=-1, fill_value=None, sort: bool = True): return cudf.core.reshape.unstack( - self, level=level, fill_value=fill_value + self, level=level, fill_value=fill_value, sort=sort ) @_performance_tracking @@ -7407,7 +7589,12 @@ def explode(self, column, ignore_index=False): return super()._explode(column, ignore_index) def pct_change( - self, periods=1, fill_method=no_default, limit=no_default, freq=None + self, + periods=1, + fill_method=no_default, + limit=no_default, + freq=None, + **kwargs, ): """ Calculates the percent change between sequential elements @@ -7432,6 +7619,9 @@ def pct_change( freq : str, optional Increment to use from time series API. Not yet implemented. + **kwargs + Additional keyword arguments are passed into + `DataFrame.shift`. Returns ------- @@ -7477,7 +7667,7 @@ def pct_change( data = self.fillna(method=fill_method, limit=limit) return data.diff(periods=periods) / data.shift( - periods=periods, freq=freq + periods=periods, freq=freq, **kwargs ) def __dataframe__( @@ -7594,7 +7784,7 @@ def interleave_columns(self): The interleaved columns as a single column .. pandas-compat:: - **DataFrame.interleave_columns** + `pandas.DataFrame.interleave_columns` This method does not exist in pandas but it can be run as ``pd.Series(np.vstack(df.to_numpy()).reshape((-1,)))``. @@ -7696,7 +7886,7 @@ def eval(self, expr: str, inplace: bool = False, **kwargs): 4 5 2 7 3 .. pandas-compat:: - **DataFrame.eval** + :meth:`pandas.DataFrame.eval` * Additional kwargs are not supported. * Bitwise and logical operators are not dtype-dependent. @@ -8305,7 +8495,7 @@ def _find_common_dtypes_and_categories(non_null_columns, dtypes): )._column.unique() # Set the column dtype to the codes' dtype. The categories # will be re-assigned at the end - dtypes[idx] = min_scalar_type(len(categories[idx])) + dtypes[idx] = min_signed_type(len(categories[idx])) # Otherwise raise an error if columns have different dtypes elif not all(is_dtype_equal(c.dtype, dtypes[idx]) for c in cols): raise ValueError("All columns must be the same type") diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py index de715191c08..27afec18b4e 100644 --- a/python/cudf/cudf/core/dtypes.py +++ b/python/cudf/cudf/core/dtypes.py @@ -17,10 +17,15 @@ from pandas.core.arrays.arrow.extension_types import ArrowIntervalType import cudf -from cudf.core._compat import PANDAS_LT_300 +from cudf.core._compat import PANDAS_GE_210, PANDAS_LT_300 from cudf.core.abc import Serializable from cudf.utils.docutils import doc_apply +if PANDAS_GE_210: + PANDAS_NUMPY_DTYPE = pd.core.dtypes.dtypes.NumpyEADtype +else: + PANDAS_NUMPY_DTYPE = pd.core.dtypes.dtypes.PandasDtype + if TYPE_CHECKING: from cudf._typing import Dtype from cudf.core.buffer import Buffer @@ -72,7 +77,7 @@ def dtype(arbitrary): return np.dtype("object") else: return dtype(pd_dtype.numpy_dtype) - elif isinstance(pd_dtype, pd.core.dtypes.dtypes.NumpyEADtype): + elif isinstance(pd_dtype, PANDAS_NUMPY_DTYPE): return dtype(pd_dtype.numpy_dtype) elif isinstance(pd_dtype, pd.CategoricalDtype): return cudf.CategoricalDtype.from_pandas(pd_dtype) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 802751e47ad..32c313e42d3 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -389,7 +389,7 @@ def values_host(self) -> np.ndarray: return self.to_numpy() @_performance_tracking - def __array__(self, dtype=None): + def __array__(self, dtype=None, copy=None): raise TypeError( "Implicit conversion to a host NumPy array via __array__ is not " "allowed, To explicitly construct a GPU matrix, consider using " @@ -591,7 +591,7 @@ def where(self, cond, other=None, inplace: bool = False) -> Self | None: dtype: int64 .. pandas-compat:: - **DataFrame.where, Series.where** + :meth:`pandas.DataFrame.where`, :meth:`pandas.Series.where` Note that ``where`` treats missing values as falsy, in parallel with pandas treatment of nullable data: @@ -1187,6 +1187,7 @@ def searchsorted( self, values, side: Literal["left", "right"] = "left", + sorter=None, ascending: bool = True, na_position: Literal["first", "last"] = "last", ) -> ScalarLike | cupy.ndarray: @@ -1199,6 +1200,10 @@ def searchsorted( side : str {'left', 'right'} optional, default 'left' If 'left', the index of the first suitable location found is given If 'right', return the last such index + sorter : 1-D array-like, optional + Optional array of integer indices that sort `self` into ascending + order. They are typically the result of ``np.argsort``. + Currently not supported. ascending : bool optional, default True Sorted Frame is in ascending order (otherwise descending) na_position : str {'last', 'first'} optional, default 'last' @@ -1245,10 +1250,12 @@ def searchsorted( >>> df.searchsorted(values_df, ascending=False) array([4, 4, 4, 0], dtype=int32) """ - # Call libcudf search_sorted primitive + # Note: pandas.DataFrame does not support searchsorted if na_position not in {"first", "last"}: raise ValueError(f"invalid na_position: {na_position}") + elif sorter is not None: + raise NotImplementedError("sorter is currently not supported.") scalar_flag = None if is_scalar(values): @@ -1305,7 +1312,7 @@ def argsort( order=None, ascending=True, na_position="last", - ): + ) -> cupy.ndarray: """Return the integer indices that would sort the Series values. Parameters @@ -1587,6 +1594,12 @@ def __pos__(self): def __abs__(self): return self._unaryop("abs") + def __bool__(self): + raise ValueError( + f"The truth value of a {type(self).__name__} is ambiguous. Use " + "a.empty, a.bool(), a.item(), a.any() or a.all()." + ) + # Reductions @classmethod @_performance_tracking @@ -1641,7 +1654,7 @@ def min( 1 .. pandas-compat:: - **DataFrame.min, Series.min** + :meth:`pandas.DataFrame.min`, :meth:`pandas.Series.min` Parameters currently not supported are `level`, `numeric_only`. """ @@ -1689,7 +1702,7 @@ def max( dtype: int64 .. pandas-compat:: - **DataFrame.max, Series.max** + :meth:`pandas.DataFrame.max`, :meth:`pandas.Series.max` Parameters currently not supported are `level`, `numeric_only`. """ @@ -1742,7 +1755,7 @@ def all(self, axis=0, skipna=True, **kwargs): dtype: bool .. pandas-compat:: - **DataFrame.all, Series.all** + :meth:`pandas.DataFrame.all`, :meth:`pandas.Series.all` Parameters currently not supported are `axis`, `bool_only`, `level`. @@ -1795,7 +1808,7 @@ def any(self, axis=0, skipna=True, **kwargs): dtype: bool .. pandas-compat:: - **DataFrame.any, Series.any** + :meth:`pandas.DataFrame.any`, :meth:`pandas.Series.any` Parameters currently not supported are `axis`, `bool_only`, `level`. diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index d2c75715be2..3cfbd1d736a 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -8,7 +8,7 @@ import warnings from collections import abc from functools import cached_property -from typing import TYPE_CHECKING, Any, Iterable +from typing import TYPE_CHECKING, Any, Iterable, Literal import cupy as cp import numpy as np @@ -306,6 +306,18 @@ def __iter__(self): grouped_values[offsets[i] : offsets[i + 1]], ) + def __len__(self) -> int: + return self.ngroups + + @property + def ngroups(self) -> int: + _, offsets, _, _ = self._grouped() + return len(offsets) - 1 + + @property + def ndim(self) -> int: + return self.obj.ndim + @property def dtypes(self): """ @@ -457,10 +469,20 @@ def size(self): ) @_performance_tracking - def cumcount(self): + def cumcount(self, ascending: bool = True): """ Return the cumulative count of keys in each group. + + Parameters + ---------- + ascending : bool, default True + If False, number in reverse, from length of group - 1 to 0. + Currently not supported """ + if ascending is not True: + raise NotImplementedError( + "ascending is currently not implemented." + ) return ( cudf.Series( cudf.core.column.column_empty( @@ -527,7 +549,7 @@ def _groupby(self): ) @_performance_tracking - def agg(self, func): + def agg(self, func, *args, engine=None, engine_kwargs=None, **kwargs): """ Apply aggregation(s) to the groups. @@ -615,6 +637,22 @@ def agg(self, func): 1 1.5 1.75 2.0 2.0 2 3.0 3.00 1.0 1.0 """ + if engine is not None: + raise NotImplementedError( + "engine is non-functional and added for compatibility with pandas" + ) + if engine_kwargs is not None: + raise NotImplementedError( + "engine_kwargs is non-functional added for compatibility with pandas" + ) + if args: + raise NotImplementedError( + "Passing args to func is currently not supported." + ) + if kwargs: + raise NotImplementedError( + "Passing kwargs to func is currently not supported." + ) column_names, columns, normalized_aggs = self._normalize_aggs(func) orig_dtypes = tuple(c.dtype for c in columns) @@ -744,7 +782,8 @@ def _reduce( Computed {op} of values within each group. .. pandas-compat:: - **{cls}.{op}** + :meth:`pandas.core.groupby.DataFrameGroupBy.{op}`, + :meth:`pandas.core.groupby.SeriesGroupBy.{op}` The numeric_only, min_count """ @@ -934,12 +973,13 @@ def tail(self, n: int = 5, *, preserve_order: bool = True): ) @_performance_tracking - def nth(self, n): + def nth(self, n, dropna: Literal["any", "all", None] = None): """ Return the nth row from each group. """ - - self.obj["__groupbynth_order__"] = range(0, len(self.obj)) + if dropna is not None: + raise NotImplementedError("dropna is not currently supported.") + self.obj["__groupbynth_order__"] = range(0, len(self.obj)) # type: ignore[index] # We perform another groupby here to have the grouping columns # be a part of dataframe columns. result = self.obj.groupby(self.grouping.keys).agg(lambda x: x.nth(n)) @@ -1359,7 +1399,9 @@ def _post_process_chunk_results( if isinstance(chunk_results, ColumnBase) or cudf.api.types.is_scalar( chunk_results[0] ): - data = {None: chunk_results} + data = ColumnAccessor( + {None: as_column(chunk_results)}, verify=False + ) ty = cudf.Series if self._as_index else cudf.DataFrame result = ty._from_data(data, index=group_names) result.index.names = self.grouping.names @@ -1420,13 +1462,13 @@ def _post_process_chunk_results( @_performance_tracking def apply( - self, function, *args, engine="auto", include_groups: bool = True + self, func, *args, engine="auto", include_groups: bool = True, **kwargs ): """Apply a python transformation function over the grouped chunk. Parameters ---------- - function : callable + func : callable The python transformation function that will be applied on the grouped chunk. args : tuple @@ -1449,6 +1491,9 @@ def apply( When True, will attempt to apply ``func`` to the groupings in the case that they are columns of the DataFrame. In the future, this will default to ``False``. + kwargs : dict + Optional keyword arguments to pass to the function. + Currently not supported Examples -------- @@ -1482,7 +1527,8 @@ def mult(df): 6 2 6 12 .. pandas-compat:: - **GroupBy.apply** + :meth:`pandas.core.groupby.DataFrameGroupBy.apply`, + :meth:`pandas.core.groupby.SeriesGroupBy.apply` cuDF's ``groupby.apply`` is limited compared to pandas. In some situations, Pandas returns the grouped keys as part of @@ -1524,13 +1570,17 @@ def mult(df): dtype: int64 """ + if kwargs: + raise NotImplementedError( + "Passing kwargs to func is currently not supported." + ) if self.obj.empty: - if function in {"count", "size", "idxmin", "idxmax"}: + if func in {"count", "size", "idxmin", "idxmax"}: res = cudf.Series([], dtype="int64") else: res = self.obj.copy(deep=True) res.index = self.grouping.keys - if function in {"sum", "product"}: + if func in {"sum", "product"}: # For `sum` & `product`, boolean types # will need to result in `int64` type. for name, col in res._data.items(): @@ -1538,20 +1588,20 @@ def mult(df): res._data[name] = col.astype("int") return res - if not callable(function): - raise TypeError(f"type {type(function)} is not callable") + if not callable(func): + raise TypeError(f"type {type(func)} is not callable") group_names, offsets, group_keys, grouped_values = self._grouped( include_groups=include_groups ) if engine == "auto": - if _can_be_jitted(grouped_values, function, args): + if _can_be_jitted(grouped_values, func, args): engine = "jit" else: engine = "cudf" if engine == "jit": result = self._jit_groupby_apply( - function, + func, group_names, offsets, group_keys, @@ -1560,7 +1610,7 @@ def mult(df): ) elif engine == "cudf": result = self._iterative_groupby_apply( - function, + func, group_names, offsets, group_keys, @@ -1740,12 +1790,14 @@ def _broadcast(self, values: cudf.Series) -> cudf.Series: return values @_performance_tracking - def transform(self, function): + def transform( + self, func, *args, engine=None, engine_kwargs=None, **kwargs + ): """Apply an aggregation, then broadcast the result to the group size. Parameters ---------- - function: str or callable + func: str or callable Aggregation to apply to each group. Note that the set of operations currently supported by `transform` is identical to that supported by the `agg` method. @@ -1774,18 +1826,35 @@ def transform(self, function): -------- agg """ - if not (isinstance(function, str) or callable(function)): + if engine is not None: + raise NotImplementedError( + "engine is non-functional and added for compatibility with pandas" + ) + if engine_kwargs is not None: + raise NotImplementedError( + "engine_kwargs is non-functional added for compatibility with pandas" + ) + if args: + raise NotImplementedError( + "Passing args to func is currently not supported." + ) + if kwargs: + raise NotImplementedError( + "Passing kwargs to func is currently not supported." + ) + + if not (isinstance(func, str) or callable(func)): raise TypeError( "Aggregation must be a named aggregation or a callable" ) try: - result = self.agg(function) + result = self.agg(func) except TypeError as e: raise NotImplementedError( "Currently, `transform()` supports only aggregations." ) from e # If the aggregation is a scan, don't broadcast - if libgroupby._is_all_scan_aggregate([[function]]): + if libgroupby._is_all_scan_aggregate([[func]]): if len(result) != len(self.obj): raise AssertionError( "Unexpected result length for scan transform" @@ -1820,7 +1889,7 @@ def func(x): return self.agg(func) @_performance_tracking - def describe(self, include=None, exclude=None): + def describe(self, percentiles=None, include=None, exclude=None): """ Generate descriptive statistics that summarizes the central tendency, dispersion and shape of a dataset's distribution, excluding NaN values. @@ -1829,6 +1898,10 @@ def describe(self, include=None, exclude=None): Parameters ---------- + percentiles : list-like of numbers, optional + The percentiles to include in the output. + Currently not supported. + include: 'all', list-like of dtypes or None (default), optional list of data types to include in the result. Ignored for Series. @@ -1865,8 +1938,12 @@ def describe(self, include=None, exclude=None): 90 1 24.0 24.0 24.0 24.0 24.0 24.0 """ - if exclude is not None and include is not None: - raise NotImplementedError + if percentiles is not None: + raise NotImplementedError("percentiles is currently not supported") + if exclude is not None: + raise NotImplementedError("exclude is currently not supported") + if include is not None: + raise NotImplementedError("include is currently not supported") res = self.agg( [ @@ -1892,69 +1969,7 @@ def describe(self, include=None, exclude=None): return res @_performance_tracking - def corr(self, method="pearson", min_periods=1): - """ - Compute pairwise correlation of columns, excluding NA/null values. - - Parameters - ---------- - method: {"pearson", "kendall", "spearman"} or callable, - default "pearson". Currently only the pearson correlation - coefficient is supported. - - min_periods: int, optional - Minimum number of observations required per pair of columns - to have a valid result. - - Returns - ------- - DataFrame - Correlation matrix. - - Examples - -------- - >>> import cudf - >>> gdf = cudf.DataFrame({ - ... "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"], - ... "val1": [5, 4, 6, 4, 8, 7, 4, 5, 2], - ... "val2": [4, 5, 6, 1, 2, 9, 8, 5, 1], - ... "val3": [4, 5, 6, 1, 2, 9, 8, 5, 1]}) - >>> gdf - id val1 val2 val3 - 0 a 5 4 4 - 1 a 4 5 5 - 2 a 6 6 6 - 3 b 4 1 1 - 4 b 8 2 2 - 5 b 7 9 9 - 6 c 4 8 8 - 7 c 5 5 5 - 8 c 2 1 1 - >>> gdf.groupby("id").corr(method="pearson") - val1 val2 val3 - id - a val1 1.000000 0.500000 0.500000 - val2 0.500000 1.000000 1.000000 - val3 0.500000 1.000000 1.000000 - b val1 1.000000 0.385727 0.385727 - val2 0.385727 1.000000 1.000000 - val3 0.385727 1.000000 1.000000 - c val1 1.000000 0.714575 0.714575 - val2 0.714575 1.000000 1.000000 - val3 0.714575 1.000000 1.000000 - """ - - if method.lower() not in ("pearson",): - raise NotImplementedError( - "Only pearson correlation is currently supported" - ) - - return self._cov_or_corr( - lambda x: x.corr(method, min_periods), "Correlation" - ) - - @_performance_tracking - def cov(self, min_periods=0, ddof=1): + def cov(self, min_periods=0, ddof=1, numeric_only: bool = False): """ Compute the pairwise covariance among the columns of a DataFrame, excluding NA/null values. @@ -2038,6 +2053,10 @@ def cov(self, min_periods=0, ddof=1): val2 3.833333 12.333333 12.333333 val3 3.833333 12.333333 12.333333 """ + if numeric_only is not False: + raise NotImplementedError( + "numeric_only is currently not supported." + ) return self._cov_or_corr( lambda x: x.cov(min_periods, ddof), "Covariance" @@ -2133,7 +2152,13 @@ def _cov_or_corr(self, func, method_name): return res @_performance_tracking - def var(self, ddof=1): + def var( + self, + ddof=1, + engine=None, + engine_kwargs=None, + numeric_only: bool = False, + ): """Compute the column-wise variance of the values in each group. Parameters @@ -2142,6 +2167,18 @@ def var(self, ddof=1): The delta degrees of freedom. N - ddof is the divisor used to normalize the variance. """ + if engine is not None: + raise NotImplementedError( + "engine is non-functional and added for compatibility with pandas" + ) + if engine_kwargs is not None: + raise NotImplementedError( + "engine_kwargs is non-functional added for compatibility with pandas" + ) + if numeric_only is not False: + raise NotImplementedError( + "numeric_only is currently not supported." + ) def func(x): return getattr(x, "var")(ddof=ddof) @@ -2149,7 +2186,13 @@ def func(x): return self.agg(func) @_performance_tracking - def std(self, ddof=1): + def std( + self, + ddof=1, + engine=None, + engine_kwargs=None, + numeric_only: bool = False, + ): """Compute the column-wise std of the values in each group. Parameters @@ -2158,6 +2201,18 @@ def std(self, ddof=1): The delta degrees of freedom. N - ddof is the divisor used to normalize the standard deviation. """ + if engine is not None: + raise NotImplementedError( + "engine is non-functional and added for compatibility with pandas" + ) + if engine_kwargs is not None: + raise NotImplementedError( + "engine_kwargs is non-functional added for compatibility with pandas" + ) + if numeric_only is not False: + raise NotImplementedError( + "numeric_only is currently not supported." + ) def func(x): return getattr(x, "std")(ddof=ddof) @@ -2165,7 +2220,9 @@ def func(x): return self.agg(func) @_performance_tracking - def quantile(self, q=0.5, interpolation="linear"): + def quantile( + self, q=0.5, interpolation="linear", numeric_only: bool = False + ): """Compute the column-wise quantiles of the values in each group. Parameters @@ -2175,7 +2232,14 @@ def quantile(self, q=0.5, interpolation="linear"): interpolation : {"linear", "lower", "higher", "midpoint", "nearest"} The interpolation method to use when the desired quantile lies between two data points. Defaults to "linear". + numeric_only : bool, default False + Include only `float`, `int` or `boolean` data. + Currently not supported """ + if numeric_only is not False: + raise NotImplementedError( + "numeric_only is not currently supported." + ) def func(x): return getattr(x, "quantile")(q=q, interpolation=interpolation) @@ -2329,7 +2393,14 @@ def fillna( ) @_performance_tracking - def shift(self, periods=1, freq=None, axis=0, fill_value=None): + def shift( + self, + periods=1, + freq=None, + axis=0, + fill_value=None, + suffix: str | None = None, + ): """ Shift each group by ``periods`` positions. @@ -2351,6 +2422,10 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None): the list. The length of the list should match the number of columns shifted. Each value should match the data type of the column to fill. + suffix : str, optional + A string to add to each shifted column if there are multiple periods. + Ignored otherwise. + Currently not supported. Returns ------- @@ -2358,7 +2433,8 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None): Object shifted within each group. .. pandas-compat:: - **GroupBy.shift** + :meth:`pandas.core.groupby.DataFrameGroupBy.shift`, + :meth:`pandas.core.groupby.SeriesGroupBy.shift` Parameter ``freq`` is unsupported. """ @@ -2369,6 +2445,9 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None): if not axis == 0: raise NotImplementedError("Only axis=0 is supported.") + if suffix is not None: + raise NotImplementedError("shift is not currently supported.") + values = self.grouping.values if is_list_like(fill_value): if len(fill_value) != len(values._data): @@ -2468,6 +2547,142 @@ def pct_change( shifted = fill_grp.shift(periods=periods, freq=freq) return (filled / shifted) - 1 + def _mimic_pandas_order( + self, result: DataFrameOrSeries + ) -> DataFrameOrSeries: + """Given a groupby result from libcudf, reconstruct the row orders + matching that of pandas. This also adds appropriate indices. + """ + # TODO: copy metadata after this method is a common pattern, should + # merge in this method. + + # This function is used to reorder the results of scan-based + # groupbys which have the same output size as input size. + # However, if the grouping key has NAs and dropna=True, the + # result coming back from libcudf has null_count few rows than + # the input, so we must produce an ordering from the full + # input range. + _, _, (ordering,) = self._groupby.groups( + [as_column(range(0, len(self.obj)))] + ) + if self._dropna and any( + c.has_nulls(include_nan=True) > 0 + for c in self.grouping._key_columns + ): + # Scan aggregations with null/nan keys put nulls in the + # corresponding output rows in pandas, to do that here + # expand the result by reindexing. + ri = cudf.RangeIndex(0, len(self.obj)) + result.index = cudf.Index(ordering) + # This reorders and expands + result = result.reindex(ri) + else: + # Just reorder according to the groupings + result = result.take(ordering.argsort()) + # Now produce the actual index we first thought of + result.index = self.obj.index + return result + + def ohlc(self): + """ + Compute open, high, low and close values of a group, excluding missing values. + + Currently not implemented. + """ + raise NotImplementedError("ohlc is currently not implemented") + + @property + def plot(self): + """ + Make plots of a grouped Series or DataFrame. + + Currently not implemented. + """ + raise NotImplementedError("plot is currently not implemented") + + def resample(self, rule, *args, include_groups: bool = True, **kwargs): + """ + Provide resampling when using a TimeGrouper. + + Currently not implemented. + """ + raise NotImplementedError("resample is currently not implemented") + + def take(self, indices): + """ + Return the elements in the given *positional* indices in each group. + + Currently not implemented. + """ + raise NotImplementedError("take is currently not implemented") + + def filter(self, func, dropna: bool = True, *args, **kwargs): + """ + Filter elements from groups that don't satisfy a criterion. + + Currently not implemented. + """ + raise NotImplementedError("filter is currently not implemented") + + def expanding(self, *args, **kwargs): + """ + Return an expanding grouper, providing expanding + functionality per group. + + Currently not implemented. + """ + raise NotImplementedError("expanding is currently not implemented") + + def ewm(self, *args, **kwargs): + """ + Return an ewm grouper, providing ewm functionality per group. + + Currently not implemented. + """ + raise NotImplementedError("expanding is currently not implemented") + + def any(self, skipna: bool = True): + """ + Return True if any value in the group is truthful, else False. + + Currently not implemented. + """ + raise NotImplementedError("any is currently not implemented") + + def all(self, skipna: bool = True): + """ + Return True if all values in the group are truthful, else False. + + Currently not implemented. + """ + raise NotImplementedError("all is currently not implemented") + + +class DataFrameGroupBy(GroupBy, GetAttrGetItemMixin): + obj: "cudf.core.dataframe.DataFrame" + + _PROTECTED_KEYS = frozenset(("obj",)) + + def _reduce_numeric_only(self, op: str): + columns = list( + name + for name in self.obj._data.names + if ( + is_numeric_dtype(self.obj._data[name].dtype) + and name not in self.grouping.names + ) + ) + return self[columns].agg(op) + + def __getitem__(self, key): + return self.obj[key].groupby( + by=self.grouping.keys, + dropna=self._dropna, + sort=self._sort, + group_keys=self._group_keys, + as_index=self._as_index, + ) + def value_counts( self, subset=None, @@ -2632,68 +2847,112 @@ def value_counts( return result - def _mimic_pandas_order( - self, result: DataFrameOrSeries - ) -> DataFrameOrSeries: - """Given a groupby result from libcudf, reconstruct the row orders - matching that of pandas. This also adds appropriate indices. + @_performance_tracking + def corr( + self, method="pearson", min_periods=1, numeric_only: bool = False + ): """ - # TODO: copy metadata after this method is a common pattern, should - # merge in this method. + Compute pairwise correlation of columns, excluding NA/null values. - # This function is used to reorder the results of scan-based - # groupbys which have the same output size as input size. - # However, if the grouping key has NAs and dropna=True, the - # result coming back from libcudf has null_count few rows than - # the input, so we must produce an ordering from the full - # input range. - _, _, (ordering,) = self._groupby.groups( - [as_column(range(0, len(self.obj)))] - ) - if self._dropna and any( - c.has_nulls(include_nan=True) > 0 - for c in self.grouping._key_columns - ): - # Scan aggregations with null/nan keys put nulls in the - # corresponding output rows in pandas, to do that here - # expand the result by reindexing. - ri = cudf.RangeIndex(0, len(self.obj)) - result.index = cudf.Index(ordering) - # This reorders and expands - result = result.reindex(ri) - else: - # Just reorder according to the groupings - result = result.take(ordering.argsort()) - # Now produce the actual index we first thought of - result.index = self.obj.index - return result + Parameters + ---------- + method: {"pearson", "kendall", "spearman"} or callable, + default "pearson". Currently only the pearson correlation + coefficient is supported. + min_periods: int, optional + Minimum number of observations required per pair of columns + to have a valid result. -class DataFrameGroupBy(GroupBy, GetAttrGetItemMixin): - obj: "cudf.core.dataframe.DataFrame" + Returns + ------- + DataFrame + Correlation matrix. - _PROTECTED_KEYS = frozenset(("obj",)) + Examples + -------- + >>> import cudf + >>> gdf = cudf.DataFrame({ + ... "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"], + ... "val1": [5, 4, 6, 4, 8, 7, 4, 5, 2], + ... "val2": [4, 5, 6, 1, 2, 9, 8, 5, 1], + ... "val3": [4, 5, 6, 1, 2, 9, 8, 5, 1]}) + >>> gdf + id val1 val2 val3 + 0 a 5 4 4 + 1 a 4 5 5 + 2 a 6 6 6 + 3 b 4 1 1 + 4 b 8 2 2 + 5 b 7 9 9 + 6 c 4 8 8 + 7 c 5 5 5 + 8 c 2 1 1 + >>> gdf.groupby("id").corr(method="pearson") + val1 val2 val3 + id + a val1 1.000000 0.500000 0.500000 + val2 0.500000 1.000000 1.000000 + val3 0.500000 1.000000 1.000000 + b val1 1.000000 0.385727 0.385727 + val2 0.385727 1.000000 1.000000 + val3 0.385727 1.000000 1.000000 + c val1 1.000000 0.714575 0.714575 + val2 0.714575 1.000000 1.000000 + val3 0.714575 1.000000 1.000000 + """ - def _reduce_numeric_only(self, op: str): - columns = list( - name - for name in self.obj._data.names - if ( - is_numeric_dtype(self.obj._data[name].dtype) - and name not in self.grouping.names + if method != "pearson": + raise NotImplementedError( + "Only pearson correlation is currently supported" + ) + if numeric_only is not False: + raise NotImplementedError( + "numeric_only is currently not supported." ) - ) - return self[columns].agg(op) - def __getitem__(self, key): - return self.obj[key].groupby( - by=self.grouping.keys, - dropna=self._dropna, - sort=self._sort, - group_keys=self._group_keys, - as_index=self._as_index, + return self._cov_or_corr( + lambda x: x.corr(method, min_periods), "Correlation" ) + def hist( + self, + column=None, + by=None, + grid: bool = True, + xlabelsize: int | None = None, + xrot: float | None = None, + ylabelsize: int | None = None, + yrot: float | None = None, + ax=None, + sharex: bool = False, + sharey: bool = False, + figsize: tuple[float, float] | None = None, + layout: tuple[int, int] | None = None, + bins: int | abc.Sequence[int] = 10, + backend: str | None = None, + legend: bool = False, + **kwargs, + ): + raise NotImplementedError("hist is not currently implemented") + + def boxplot( + self, + subplots: bool = True, + column=None, + fontsize: int | None = None, + rot: int = 0, + grid: bool = True, + ax=None, + figsize: tuple[float, float] | None = None, + layout=None, + sharex: bool = False, + sharey: bool = True, + backend=None, + **kwargs, + ): + raise NotImplementedError("boxplot is not currently implemented") + DataFrameGroupBy.__doc__ = groupby_doc_template.format(ret="") @@ -2701,8 +2960,10 @@ def __getitem__(self, key): class SeriesGroupBy(GroupBy): obj: "cudf.core.series.Series" - def agg(self, func): - result = super().agg(func) + def agg(self, func, *args, engine=None, engine_kwargs=None, **kwargs): + result = super().agg( + func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs + ) # downcast the result to a Series: if len(result._data): @@ -2717,14 +2978,95 @@ def agg(self, func): aggregate = agg - def apply(self, func, *args): - result = super().apply(func, *args) + def apply(self, func, *args, **kwargs): + result = super().apply(func, *args, **kwargs) # apply Series name to result result.name = self.obj.name return result + @property + def dtype(self) -> pd.Series: + raise NotImplementedError("dtype is currently not implemented.") + + def hist( + self, + by=None, + ax=None, + grid: bool = True, + xlabelsize: int | None = None, + xrot: float | None = None, + ylabelsize: int | None = None, + yrot: float | None = None, + figsize: tuple[float, float] | None = None, + bins: int | abc.Sequence[int] = 10, + backend: str | None = None, + legend: bool = False, + **kwargs, + ): + raise NotImplementedError("hist is currently not implemented.") + + @property + def is_monotonic_increasing(self) -> cudf.Series: + """ + Return whether each group's values are monotonically increasing. + + Currently not implemented + """ + raise NotImplementedError( + "is_monotonic_increasing is currently not implemented." + ) + + @property + def is_monotonic_decreasing(self) -> cudf.Series: + """ + Return whether each group's values are monotonically decreasing. + + Currently not implemented + """ + raise NotImplementedError( + "is_monotonic_decreasing is currently not implemented." + ) + + def nlargest( + self, n: int = 5, keep: Literal["first", "last", "all"] = "first" + ) -> cudf.Series: + """ + Return the largest n elements. + + Currently not implemented + """ + raise NotImplementedError("nlargest is currently not implemented.") + + def nsmallest( + self, n: int = 5, keep: Literal["first", "last", "all"] = "first" + ) -> cudf.Series: + """ + Return the smallest n elements. + + Currently not implemented + """ + raise NotImplementedError("nsmallest is currently not implemented.") + + def value_counts( + self, + normalize: bool = False, + sort: bool = True, + ascending: bool = False, + bins=None, + dropna: bool = True, + ) -> cudf.Series | cudf.DataFrame: + raise NotImplementedError("value_counts is currently not implemented.") + + def corr( + self, + other: cudf.Series, + method: str = "pearson", + min_periods: int | None = None, + ) -> cudf.Series: + raise NotImplementedError("corr is currently not implemented.") + SeriesGroupBy.__doc__ = groupby_doc_template.format(ret="") diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 4164f981fca..8c3b091abec 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -52,11 +52,9 @@ from cudf.core.single_column_frame import SingleColumnFrame from cudf.utils.docutils import copy_docstring from cudf.utils.dtypes import ( - _NUMPY_SCTYPES, _maybe_convert_to_default_type, find_common_type, is_mixed_with_object_dtype, - numeric_normalize_types, ) from cudf.utils.performance_tracking import _performance_tracking from cudf.utils.utils import _warn_no_dask_cudf, search_range @@ -80,6 +78,11 @@ class IndexMeta(type): """Custom metaclass for Index that overrides instance/subclass tests.""" def __call__(cls, data, *args, **kwargs): + if kwargs.get("tupleize_cols", True) is not True: + raise NotImplementedError( + "tupleize_cols is currently not supported." + ) + if cls is Index: return as_index( arbitrary=data, @@ -351,18 +354,16 @@ def hasnans(self) -> bool: @_performance_tracking def _data(self): return cudf.core.column_accessor.ColumnAccessor( - {self.name: self._values} + {self.name: self._values}, verify=False ) @_performance_tracking def __contains__(self, item): hash(item) - if isinstance(item, bool) or not isinstance( - item, - tuple( - _NUMPY_SCTYPES["int"] + _NUMPY_SCTYPES["float"] + [int, float] - ), - ): + if not isinstance(item, (np.floating, np.integer, int, float)): + return False + elif isinstance(item, (np.timedelta64, np.datetime64, bool)): + # Cases that would pass the above check return False try: int_item = int(item) @@ -539,8 +540,12 @@ def memory_usage(self, deep: bool = False) -> int: ) return 0 - def unique(self) -> Self: + def unique(self, level: int | None = None) -> Self: # RangeIndex always has unique values + if level is not None and level > 0: + raise IndexError( + f"Too many levels: Index has only 1 level, not {level + 1}" + ) return self.copy() @_performance_tracking @@ -963,7 +968,11 @@ def _indices_of(self, value) -> cudf.core.column.NumericalColumn: i = [] return as_column(i, dtype=size_type_dtype) - def isin(self, values): + def isin(self, values, level=None): + if level is not None and level > 0: + raise IndexError( + f"Too many levels: Index has only 1 level, not {level + 1}" + ) if is_scalar(values): raise TypeError( "only list-like objects are allowed to be passed " @@ -1001,21 +1010,23 @@ def __dask_tokenize__(self): class Index(SingleColumnFrame, BaseIndex, metaclass=IndexMeta): """ - An array of orderable values that represent the indices of another Column + Immutable sequence used for indexing and alignment. - Attributes - ---------- - _values: A Column object - name: A string + The basic object storing axis labels for all pandas objects. Parameters ---------- - data : Column - The Column of data for this index - name : str optional - The name of the Index. If not provided, the Index adopts the value - Column's name. Otherwise if this name is different from the value - Column's, the data Column will be cloned to adopt this name. + data : array-like (1-dimensional) + dtype : str, numpy.dtype, or ExtensionDtype, optional + Data type for the output Index. If not specified, this will be + inferred from `data`. + copy : bool, default False + Copy input data. + name : object + Name to be stored in the index. + tupleize_cols : bool (default: True) + When True, attempt to create a MultiIndex if possible. + Currently not supported. """ @_performance_tracking @@ -1460,18 +1471,19 @@ def notna(self): notnull = notna def _is_numeric(self): - return isinstance( - self._values, cudf.core.column.NumericalColumn - ) and self.dtype != cudf.dtype("bool") + return ( + isinstance(self._values, cudf.core.column.NumericalColumn) + and self.dtype.kind != "b" + ) def _is_boolean(self): - return self.dtype == cudf.dtype("bool") + return self.dtype.kind == "b" def _is_integer(self): - return cudf.api.types.is_integer_dtype(self.dtype) + return self.dtype.kind in "iu" def _is_floating(self): - return cudf.api.types.is_float_dtype(self.dtype) + return self.dtype.kind == "f" def _is_object(self): return isinstance(self._values, cudf.core.column.StringColumn) @@ -1495,7 +1507,7 @@ def argsort( order=None, ascending=True, na_position="last", - ): + ) -> cupy.ndarray: """Return the integer indices that would sort the index. Parameters @@ -1601,19 +1613,31 @@ def append(self, other): f"either one of them to same dtypes." ) - if isinstance(self._values, cudf.core.column.NumericalColumn): - if self.dtype != other.dtype: - this, other = numeric_normalize_types(self, other) + if ( + isinstance(self._column, cudf.core.column.NumericalColumn) + and self.dtype != other.dtype + ): + common_type = find_common_type((self.dtype, other.dtype)) + this = this.astype(common_type) + other = other.astype(common_type) to_concat = [this, other] return self._concat(to_concat) - def unique(self): + def unique(self, level: int | None = None) -> Self: + if level is not None and level > 0: + raise IndexError( + f"Too many levels: Index has only 1 level, not {level + 1}" + ) return cudf.core.index._index_from_data( {self.name: self._values.unique()}, name=self.name ) - def isin(self, values): + def isin(self, values, level=None): + if level is not None and level > 0: + raise IndexError( + f"Too many levels: Index has only 1 level, not {level + 1}" + ) if is_scalar(values): raise TypeError( "only list-like objects are allowed to be passed " @@ -1734,8 +1758,18 @@ def __init__( if tz is not None: raise NotImplementedError("tz is not yet supported") if normalize is not False: + warnings.warn( + "The 'normalize' keyword is " + "deprecated and will be removed in a future version. ", + FutureWarning, + ) raise NotImplementedError("normalize == True is not yet supported") if closed is not None: + warnings.warn( + "The 'closed' keyword is " + "deprecated and will be removed in a future version. ", + FutureWarning, + ) raise NotImplementedError("closed is not yet supported") if ambiguous != "raise": raise NotImplementedError("ambiguous is not yet supported") @@ -2479,6 +2513,14 @@ def __init__( if freq is not None: raise NotImplementedError("freq is not yet supported") + if closed is not None: + warnings.warn( + "The 'closed' keyword is " + "deprecated and will be removed in a future version. ", + FutureWarning, + ) + raise NotImplementedError("closed is not yet supported") + if unit is not None: warnings.warn( "The 'unit' keyword is " @@ -2679,6 +2721,10 @@ def __init__( data = data.as_ordered(ordered=False) super().__init__(data, name=name) + @property + def ordered(self) -> bool: + return self._column.ordered + @property # type: ignore @_performance_tracking def codes(self): @@ -2701,6 +2747,118 @@ def _is_boolean(self): def _is_categorical(self): return True + def add_categories(self, new_categories) -> Self: + """ + Add new categories. + + `new_categories` will be included at the last/highest place in the + categories and will be unused directly after this call. + """ + return type(self)._from_data( + {self.name: self._column.add_categories(new_categories)} + ) + + def as_ordered(self) -> Self: + """ + Set the Categorical to be ordered. + """ + return type(self)._from_data( + {self.name: self._column.as_ordered(ordered=True)} + ) + + def as_unordered(self) -> Self: + """ + Set the Categorical to be unordered. + """ + return type(self)._from_data( + {self.name: self._column.as_ordered(ordered=False)} + ) + + def remove_categories(self, removals) -> Self: + """ + Remove the specified categories. + + `removals` must be included in the old categories. + + Parameters + ---------- + removals : category or list of categories + The categories which should be removed. + """ + return type(self)._from_data( + {self.name: self._column.remove_categories(removals)} + ) + + def remove_unused_categories(self) -> Self: + """ + Remove categories which are not used. + + This method is currently not supported. + """ + return type(self)._from_data( + {self.name: self._column.remove_unused_categories()} + ) + + def rename_categories(self, new_categories) -> Self: + """ + Rename categories. + + This method is currently not supported. + """ + return type(self)._from_data( + {self.name: self._column.rename_categories(new_categories)} + ) + + def reorder_categories(self, new_categories, ordered=None) -> Self: + """ + Reorder categories as specified in new_categories. + + ``new_categories`` need to include all old categories and no new category + items. + + Parameters + ---------- + new_categories : Index-like + The categories in new order. + ordered : bool, optional + Whether or not the categorical is treated as a ordered categorical. + If not given, do not change the ordered information. + """ + return type(self)._from_data( + { + self.name: self._column.reorder_categories( + new_categories, ordered=ordered + ) + } + ) + + def set_categories( + self, new_categories, ordered=None, rename: bool = False + ) -> Self: + """ + Set the categories to the specified new_categories. + + Parameters + ---------- + new_categories : list-like + The categories in new order. + ordered : bool, default None + Whether or not the categorical is treated as + a ordered categorical. If not given, do + not change the ordered information. + rename : bool, default False + Whether or not the `new_categories` should be + considered as a rename of the old categories + or as reordered categories. + """ + return type(self)._from_data( + { + self.name: self._column.set_categories( + new_categories, ordered=ordered, rename=rename + ) + } + ) + @_performance_tracking def interval_range( @@ -2862,6 +3020,7 @@ def __init__( dtype=None, copy: bool = False, name=None, + verify_integrity: bool = True, ): name = _getdefault_name(data, name=name) diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 30b68574960..0678ebfdd81 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -84,6 +84,9 @@ {argument} inplace : bool, default False Modify the DataFrame in place (do not create a new object). + allow_duplicates : bool, default False + Allow duplicate column labels to be created. + Currently not supported. Returns ------- @@ -497,7 +500,7 @@ def empty(self): True .. pandas-compat:: - **DataFrame.empty, Series.empty** + :attr:`pandas.DataFrame.empty`, :attr:`pandas.Series.empty` If DataFrame/Series contains only `null` values, it is still not considered empty. See the example above. @@ -831,7 +834,7 @@ def replace( 4 4 9 e .. pandas-compat:: - **DataFrame.replace, Series.replace** + :meth:`pandas.DataFrame.replace`, :meth:`pandas.Series.replace` Parameters that are currently not supported are: `limit`, `regex`, `method` @@ -902,7 +905,7 @@ def replace( return self._mimic_inplace(result, inplace=inplace) @_performance_tracking - def clip(self, lower=None, upper=None, inplace=False, axis=1): + def clip(self, lower=None, upper=None, axis=1, inplace=False): """ Trim values at input threshold(s). @@ -1372,7 +1375,7 @@ def sum( dtype: int64 .. pandas-compat:: - **DataFrame.sum, Series.sum** + :meth:`pandas.DataFrame.sum`, :meth:`pandas.Series.sum` Parameters currently not supported are `level`, `numeric_only`. """ @@ -1433,7 +1436,7 @@ def product( dtype: int64 .. pandas-compat:: - **DataFrame.product, Series.product** + :meth:`pandas.DataFrame.product`, :meth:`pandas.Series.product` Parameters currently not supported are level`, `numeric_only`. """ @@ -1492,9 +1495,7 @@ def mean(self, axis=0, skipna=True, numeric_only=False, **kwargs): **kwargs, ) - def median( - self, axis=None, skipna=True, level=None, numeric_only=None, **kwargs - ): + def median(self, axis=None, skipna=True, numeric_only=None, **kwargs): """ Return the median of the values for the requested axis. @@ -1530,7 +1531,7 @@ def median( 17.0 .. pandas-compat:: - **DataFrame.median, Series.median** + :meth:`pandas.DataFrame.median`, :meth:`pandas.Series.median` Parameters currently not supported are `level` and `numeric_only`. """ @@ -1586,7 +1587,7 @@ def std( dtype: float64 .. pandas-compat:: - **DataFrame.std, Series.std** + :meth:`pandas.DataFrame.std`, :meth:`pandas.Series.std` Parameters currently not supported are `level` and `numeric_only` @@ -1645,7 +1646,7 @@ def var( dtype: float64 .. pandas-compat:: - **DataFrame.var, Series.var** + :meth:`pandas.DataFrame.var`, :meth:`pandas.Series.var` Parameters currently not supported are `level` and `numeric_only` @@ -1701,7 +1702,7 @@ def kurtosis(self, axis=0, skipna=True, numeric_only=False, **kwargs): dtype: float64 .. pandas-compat:: - **DataFrame.kurtosis** + :meth:`pandas.DataFrame.kurtosis` Parameters currently not supported are `level` and `numeric_only` """ @@ -1763,7 +1764,7 @@ def skew(self, axis=0, skipna=True, numeric_only=False, **kwargs): dtype: float64 .. pandas-compat:: - **DataFrame.skew, Series.skew, Frame.skew** + :meth:`pandas.DataFrame.skew`, :meth:`pandas.Series.skew` The `axis` parameter is not currently supported. """ @@ -1779,7 +1780,14 @@ def skew(self, axis=0, skipna=True, numeric_only=False, **kwargs): ) @_performance_tracking - def mask(self, cond, other=None, inplace: bool = False) -> Self | None: + def mask( + self, + cond, + other=None, + inplace: bool = False, + axis=None, + level=None, + ) -> Self | None: """ Replace values where the condition is True. @@ -1831,6 +1839,10 @@ def mask(self, cond, other=None, inplace: bool = False) -> Self | None: 4 0 dtype: int64 """ + if axis is not None: + raise NotImplementedError("axis is not supported.") + elif level is not None: + raise NotImplementedError("level is not supported.") if not hasattr(cond, "__invert__"): # We Invert `cond` below and call `where`, so @@ -1843,7 +1855,16 @@ def mask(self, cond, other=None, inplace: bool = False) -> Self | None: @_performance_tracking @copy_docstring(Rolling) def rolling( - self, window, min_periods=None, center=False, axis=0, win_type=None + self, + window, + min_periods=None, + center: bool = False, + win_type: str | None = None, + on=None, + axis=0, + closed: str | None = None, + step: int | None = None, + method: str = "single", ): return Rolling( self, @@ -1851,7 +1872,11 @@ def rolling( min_periods=min_periods, center=center, axis=axis, + on=on, win_type=win_type, + closed=closed, + step=step, + method=method, ) @copy_docstring(ExponentialMovingWindow) @@ -1866,6 +1891,7 @@ def ewm( ignore_na: bool = False, axis: int = 0, times: str | np.ndarray | None = None, + method: Literal["single", "table"] = "single", ): return ExponentialMovingWindow( self, @@ -1878,6 +1904,7 @@ def ewm( ignore_na=ignore_na, axis=axis, times=times, + method=method, ) @_performance_tracking @@ -2042,13 +2069,26 @@ def interpolate( ) @_performance_tracking - def shift(self, periods=1, freq=None, axis=0, fill_value=None): + def shift( + self, + periods=1, + freq=None, + axis=0, + fill_value=None, + suffix: str | None = None, + ): """Shift values by `periods` positions.""" axis = self._get_axis_from_axis_arg(axis) if axis != 0: - raise ValueError("Only axis=0 is supported.") + raise NotImplementedError("Only axis=0 is supported.") if freq is not None: - raise ValueError("The freq argument is not yet supported.") + raise NotImplementedError( + "The freq argument is not yet supported." + ) + if suffix is not None: + raise NotImplementedError( + "The suffix argument is not yet supported." + ) data_columns = ( col.shift(periods, fill_value) for col in self._columns @@ -2229,7 +2269,7 @@ def truncate(self, before=None, after=None, axis=0, copy=True): 2021-01-01 23:45:27 1 2 .. pandas-compat:: - **DataFrame.truncate, Series.truncate** + :meth:`pandas.DataFrame.truncate`, :meth:`pandas.Series.truncate` The ``copy`` parameter is only present for API compatibility, but ``copy=False`` is not supported. This method always generates a @@ -2665,7 +2705,7 @@ def sort_index( 2 3 1 .. pandas-compat:: - **DataFrame.sort_index, Series.sort_index** + :meth:`pandas.DataFrame.sort_index`, :meth:`pandas.Series.sort_index` * Not supporting: kind, sort_remaining=False """ @@ -3225,7 +3265,9 @@ def _split(self, splits, keep_index=True): ] @_performance_tracking - def bfill(self, value=None, axis=None, inplace=None, limit=None): + def bfill( + self, value=None, axis=None, inplace=None, limit=None, limit_area=None + ): """ Synonym for :meth:`Series.fillna` with ``method='bfill'``. @@ -3233,6 +3275,9 @@ def bfill(self, value=None, axis=None, inplace=None, limit=None): ------- Object with missing values filled or None if ``inplace=True``. """ + if limit_area is not None: + raise NotImplementedError("limit_area is currently not supported.") + with warnings.catch_warnings(): warnings.simplefilter("ignore", FutureWarning) return self.fillna( @@ -3264,7 +3309,14 @@ def backfill(self, value=None, axis=None, inplace=None, limit=None): return self.bfill(value=value, axis=axis, inplace=inplace, limit=limit) @_performance_tracking - def ffill(self, value=None, axis=None, inplace=None, limit=None): + def ffill( + self, + value=None, + axis=None, + inplace=None, + limit=None, + limit_area: Literal["inside", "outside", None] = None, + ): """ Synonym for :meth:`Series.fillna` with ``method='ffill'``. @@ -3272,6 +3324,9 @@ def ffill(self, value=None, axis=None, inplace=None, limit=None): ------- Object with missing values filled or None if ``inplace=True``. """ + if limit_area is not None: + raise NotImplementedError("limit_area is currently not supported.") + with warnings.catch_warnings(): warnings.simplefilter("ignore", FutureWarning) return self.fillna( @@ -3302,7 +3357,7 @@ def pad(self, value=None, axis=None, inplace=None, limit=None): ) return self.ffill(value=value, axis=axis, inplace=inplace, limit=limit) - def add_prefix(self, prefix): + def add_prefix(self, prefix, axis=None): """ Prefix labels with string `prefix`. @@ -3363,7 +3418,7 @@ def add_prefix(self, prefix): Use `Series.add_prefix` or `DataFrame.add_prefix`" ) - def add_suffix(self, suffix): + def add_suffix(self, suffix, axis=None): """ Suffix labels with string `suffix`. @@ -3464,6 +3519,7 @@ def sort_values( kind="quicksort", na_position="last", ignore_index=False, + key=None, ): """Sort by the values along either axis. @@ -3479,6 +3535,14 @@ def sort_values( 'first' puts nulls at the beginning, 'last' puts nulls at the end ignore_index : bool, default False If True, index will not be sorted. + key : callable, optional + Apply the key function to the values + before sorting. This is similar to the ``key`` argument in the + builtin ``sorted`` function, with the notable difference that + this ``key`` function should be *vectorized*. It should expect a + ``Series`` and return a Series with the same shape as the input. + It will be applied to each column in `by` independently. + Currently not supported. Returns ------- @@ -3497,7 +3561,7 @@ def sort_values( 1 1 2 .. pandas-compat:: - **DataFrame.sort_values, Series.sort_values** + :meth:`pandas.DataFrame.sort_values`, :meth:`pandas.Series.sort_values` * Support axis='index' only. * Not supporting: inplace, kind @@ -3518,6 +3582,8 @@ def sort_values( ) if axis != 0: raise NotImplementedError("`axis` not currently implemented.") + if key is not None: + raise NotImplementedError("key is not currently supported.") if len(self) == 0: return self @@ -3642,6 +3708,10 @@ def _reindex( index=None, inplace=False, fill_value=NA, + level=None, + method=None, + limit=None, + tolerance=None, ): """ Helper for `.reindex` @@ -3666,6 +3736,15 @@ def _reindex( ------- Series or DataFrame """ + if method is not None: + raise NotImplementedError("method is not currently supported.") + if level is not None: + raise NotImplementedError("level is not currently supported.") + if limit is not None: + raise NotImplementedError("limit is not currently supported.") + if tolerance is not None: + raise NotImplementedError("tolerance is not currently supported.") + if dtypes is None: dtypes = {} @@ -3877,16 +3956,15 @@ def resample( self, rule, axis=0, - closed=None, - label=None, - convention="start", + closed: Literal["right", "left"] | None = None, + label: Literal["right", "left"] | None = None, + convention: Literal["start", "end", "s", "e"] = "start", kind=None, - loffset=None, - base=None, on=None, level=None, origin="start_day", offset=None, + group_keys: bool = False, ): """ Convert the frequency of ("resample") the given time series data. @@ -4008,7 +4086,7 @@ def resample( .. pandas-compat:: - **DataFrame.resample, Series.resample** + :meth:`pandas.DataFrame.resample`, :meth:`pandas.Series.resample` Note that the dtype of the index (or the 'on' column if using 'on=') in the result will be of a frequency closest to the @@ -4024,26 +4102,27 @@ def resample( "deprecated and will be removed in a future version. ", FutureWarning, ) - if (axis, convention, kind, loffset, base, origin, offset) != ( - 0, - "start", - None, - None, - None, - "start_day", - None, - ): - raise NotImplementedError( - "The following arguments are not " - "currently supported by resample:\n\n" - "- axis\n" - "- convention\n" - "- kind\n" - "- loffset\n" - "- base\n" - "- origin\n" - "- offset" + raise NotImplementedError("kind is currently not supported.") + if axis != 0: + warnings.warn( + "The 'axis' keyword in is " + "deprecated and will be removed in a future version. ", + FutureWarning, ) + raise NotImplementedError("axis is currently not supported.") + if convention != "start": + warnings.warn( + "The 'convention' keyword in is " + "deprecated and will be removed in a future version. ", + FutureWarning, + ) + raise NotImplementedError("convention is currently not supported.") + if origin != "start_day": + raise NotImplementedError("origin is currently not supported.") + if offset is not None: + raise NotImplementedError("offset is currently not supported.") + if group_keys is not False: + raise NotImplementedError("group_keys is currently not supported.") by = cudf.Grouper( key=on, freq=rule, closed=closed, label=label, level=level ) @@ -4054,7 +4133,13 @@ def resample( ) def dropna( - self, axis=0, how="any", thresh=None, subset=None, inplace=False + self, + axis=0, + how="any", + thresh=None, + subset=None, + inplace=False, + ignore_index: bool = False, ): """ Drop rows (or columns) containing nulls from a Column. @@ -4078,6 +4163,8 @@ def dropna( columns, subset is a list of rows to consider. inplace : bool, default False If True, do operation inplace and return None. + ignore_index : bool, default ``False`` + If ``True``, the resulting axis will be labeled 0, 1, …, n - 1. Returns ------- @@ -4154,6 +4241,8 @@ def dropna( """ if axis == 0: result = self._drop_na_rows(how=how, subset=subset, thresh=thresh) + if ignore_index: + result.index = RangeIndex(len(result)) else: result = self._drop_na_columns( how=how, subset=subset, thresh=thresh @@ -4292,8 +4381,22 @@ def take(self, indices, axis=0): return self._gather(GatherMap(indices, len(self), nullify=False)) - def _reset_index(self, level, drop, col_level=0, col_fill=""): + def _reset_index( + self, + level, + drop, + col_level=0, + col_fill="", + allow_duplicates: bool = False, + names: abc.Hashable | abc.Sequence[abc.Hashable] | None = None, + ): """Shared path for DataFrame.reset_index and Series.reset_index.""" + if allow_duplicates is not False: + raise NotImplementedError( + "allow_duplicates is not currently supported." + ) + elif names is not None: + raise NotImplementedError("names is not currently supported.") if level is not None: if ( isinstance(level, int) @@ -4564,7 +4667,7 @@ def sample( 1 2 4 .. pandas-compat:: - **DataFrame.sample, Series.sample** + :meth:`pandas.DataFrame.sample`, :meth:`pandas.Series.sample` When sampling from ``axis=0/'index'``, ``random_state`` can be either a numpy random state (``numpy.random.RandomState``) @@ -5249,7 +5352,6 @@ def groupby( as_index=True, sort=no_default, group_keys=False, - squeeze=False, observed=True, dropna=True, ): @@ -5259,11 +5361,6 @@ def groupby( if axis not in (0, "index"): raise NotImplementedError("axis parameter is not yet implemented") - if squeeze is not False: - raise NotImplementedError( - "squeeze parameter is not yet implemented" - ) - if not observed: raise NotImplementedError( "observed parameter is not yet implemented" @@ -6224,6 +6321,7 @@ def rank( multiindex=self._data.multiindex, level_names=self._data.level_names, label_dtype=self._data.label_dtype, + verify=False, ), ) else: diff --git a/python/cudf/cudf/core/indexing_utils.py b/python/cudf/cudf/core/indexing_utils.py index 9c81b0eb607..a0089242909 100644 --- a/python/cudf/cudf/core/indexing_utils.py +++ b/python/cudf/cudf/core/indexing_utils.py @@ -8,11 +8,7 @@ from typing_extensions import TypeAlias import cudf -from cudf.api.types import ( - _is_scalar_or_zero_d_array, - is_integer, - is_integer_dtype, -) +from cudf.api.types import _is_scalar_or_zero_d_array, is_integer from cudf.core.copy_types import BooleanMask, GatherMap @@ -233,7 +229,7 @@ def parse_row_iloc_indexer(key: Any, n: int) -> IndexingSpec: return MaskIndexer(BooleanMask(key, n)) elif len(key) == 0: return EmptyIndexer() - elif is_integer_dtype(key.dtype): + elif key.dtype.kind in "iu": return MapIndexer(GatherMap(key, n, nullify=False)) else: raise TypeError( diff --git a/python/cudf/cudf/core/join/_join_helpers.py b/python/cudf/cudf/core/join/_join_helpers.py index dd0a4f666a1..32c84763401 100644 --- a/python/cudf/cudf/core/join/_join_helpers.py +++ b/python/cudf/cudf/core/join/_join_helpers.py @@ -9,7 +9,7 @@ import numpy as np import cudf -from cudf.api.types import is_decimal_dtype, is_dtype_equal +from cudf.api.types import is_decimal_dtype, is_dtype_equal, is_numeric_dtype from cudf.core.column import CategoricalColumn from cudf.core.dtypes import CategoricalDtype @@ -88,38 +88,25 @@ def _match_join_keys( ) if ( - np.issubdtype(ltype, np.number) - and np.issubdtype(rtype, np.number) - and not ( - np.issubdtype(ltype, np.timedelta64) - or np.issubdtype(rtype, np.timedelta64) - ) + is_numeric_dtype(ltype) + and is_numeric_dtype(rtype) + and not (ltype.kind == "m" or rtype.kind == "m") ): common_type = ( max(ltype, rtype) if ltype.kind == rtype.kind else np.result_type(ltype, rtype) ) - elif ( - np.issubdtype(ltype, np.datetime64) - and np.issubdtype(rtype, np.datetime64) - ) or ( - np.issubdtype(ltype, np.timedelta64) - and np.issubdtype(rtype, np.timedelta64) + elif (ltype.kind == "M" and rtype.kind == "M") or ( + ltype.kind == "m" and rtype.kind == "m" ): common_type = max(ltype, rtype) - elif ( - np.issubdtype(ltype, np.datetime64) - or np.issubdtype(ltype, np.timedelta64) - ) and not rcol.fillna(0).can_cast_safely(ltype): + elif ltype.kind in "mM" and not rcol.fillna(0).can_cast_safely(ltype): raise TypeError( f"Cannot join between {ltype} and {rtype}, please type-cast both " "columns to the same type." ) - elif ( - np.issubdtype(rtype, np.datetime64) - or np.issubdtype(rtype, np.timedelta64) - ) and not lcol.fillna(0).can_cast_safely(rtype): + elif rtype.kind in "mM" and not lcol.fillna(0).can_cast_safely(rtype): raise TypeError( f"Cannot join between {rtype} and {ltype}, please type-cast both " "columns to the same type." diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index ff4b06c6334..2788455aebf 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -150,7 +150,7 @@ def __init__( dtype=None, copy=False, name=None, - **kwargs, + verify_integrity=True, ): if sortorder is not None: raise NotImplementedError("sortorder is not yet supported") @@ -524,8 +524,10 @@ def codes(self): col.values for col in self._codes ) - def get_slice_bound(self, label, side, kind=None): - raise NotImplementedError() + def get_slice_bound(self, label, side): + raise NotImplementedError( + "get_slice_bound is not currently implemented." + ) @property # type: ignore @_performance_tracking @@ -1108,7 +1110,7 @@ def _concat(cls, objs): @classmethod @_performance_tracking - def from_tuples(cls, tuples, names=None): + def from_tuples(cls, tuples, sortorder: int | None = None, names=None): """ Convert list of tuples to MultiIndex. @@ -1116,6 +1118,9 @@ def from_tuples(cls, tuples, names=None): ---------- tuples : list / sequence of tuple-likes Each tuple is the index of one row/column. + sortorder : int or None + Level of sortedness (must be lexicographically sorted by that + level). names : list / sequence of str, optional Names for the levels in the index. @@ -1142,13 +1147,24 @@ def from_tuples(cls, tuples, names=None): names=['number', 'color']) """ # Use Pandas for handling Python host objects - pdi = pd.MultiIndex.from_tuples(tuples, names=names) + pdi = pd.MultiIndex.from_tuples( + tuples, sortorder=sortorder, names=names + ) return cls.from_pandas(pdi) @_performance_tracking def to_numpy(self): return self.values_host + def to_flat_index(self): + """ + Convert a MultiIndex to an Index of Tuples containing the level values. + + This is not currently implemented + """ + # TODO: Could implement as Index of ListDtype? + raise NotImplementedError("to_flat_index is not currently supported.") + @property # type: ignore @_performance_tracking def values_host(self): @@ -1215,7 +1231,12 @@ def values(self): @classmethod @_performance_tracking - def from_frame(cls, df: pd.DataFrame | cudf.DataFrame, names=None): + def from_frame( + cls, + df: pd.DataFrame | cudf.DataFrame, + sortorder: int | None = None, + names=None, + ): """ Make a MultiIndex from a DataFrame. @@ -1223,6 +1244,9 @@ def from_frame(cls, df: pd.DataFrame | cudf.DataFrame, names=None): ---------- df : DataFrame DataFrame to be converted to MultiIndex. + sortorder : int, optional + Level of sortedness (must be lexicographically sorted by that + level). names : list-like, optional If no names are provided, use the column names, or tuple of column names if the columns is a MultiIndex. If a sequence, overwrite @@ -1273,11 +1297,13 @@ def from_frame(cls, df: pd.DataFrame | cudf.DataFrame, names=None): else: source_data = df names = names if names is not None else source_data._column_names - return cls.from_arrays(source_data._columns, names=names) + return cls.from_arrays( + source_data._columns, sortorder=sortorder, names=names + ) @classmethod @_performance_tracking - def from_product(cls, arrays, names=None): + def from_product(cls, iterables, sortorder: int | None = None, names=None): """ Make a MultiIndex from the cartesian product of multiple iterables. @@ -1285,6 +1311,9 @@ def from_product(cls, arrays, names=None): ---------- iterables : list / sequence of iterables Each iterable has unique labels for each level of the index. + sortorder : int or None + Level of sortedness (must be lexicographically sorted by that + level). names : list / sequence of str, optional Names for the levels in the index. If not explicitly provided, names will be inferred from the @@ -1314,7 +1343,9 @@ def from_product(cls, arrays, names=None): names=['number', 'color']) """ # Use Pandas for handling Python host objects - pdi = pd.MultiIndex.from_product(arrays, names=names) + pdi = pd.MultiIndex.from_product( + iterables, sortorder=sortorder, names=names + ) return cls.from_pandas(pdi) @classmethod @@ -1712,8 +1743,11 @@ def fillna(self, value): return super().fillna(value=value) @_performance_tracking - def unique(self): - return self.drop_duplicates(keep="first") + def unique(self, level: int | None = None) -> Self | cudf.Index: + if level is None: + return self.drop_duplicates(keep="first") + else: + return self.get_level_values(level).unique() @_performance_tracking def nunique(self, dropna: bool = True) -> int: diff --git a/python/cudf/cudf/core/resample.py b/python/cudf/cudf/core/resample.py index cdd4ec6f8e5..715bbf89b15 100644 --- a/python/cudf/cudf/core/resample.py +++ b/python/cudf/cudf/core/resample.py @@ -13,9 +13,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations import pickle import warnings +from typing import TYPE_CHECKING import numpy as np import pandas as pd @@ -23,7 +25,6 @@ import cudf import cudf._lib.labeling import cudf.core.index -from cudf._typing import DataFrameOrSeries from cudf.core.groupby.groupby import ( DataFrameGroupBy, GroupBy, @@ -31,6 +32,9 @@ _Grouping, ) +if TYPE_CHECKING: + from cudf._typing import DataFrameOrSeries + class _Resampler(GroupBy): grouping: "_ResampleGrouping" @@ -39,8 +43,10 @@ def __init__(self, obj, by, axis=None, kind=None): by = _ResampleGrouping(obj, by) super().__init__(obj, by=by) - def agg(self, func): - result = super().agg(func) + def agg(self, func, *args, engine=None, engine_kwargs=None, **kwargs): + result = super().agg( + func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs + ) if len(self.grouping.bin_labels) != len(result): index = cudf.core.index.Index( self.grouping.bin_labels, name=self.grouping.names[0] diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py index 1120642947b..e7248977b1d 100644 --- a/python/cudf/cudf/core/reshape.py +++ b/python/cudf/cudf/core/reshape.py @@ -502,6 +502,7 @@ def melt( var_name=None, value_name="value", col_level=None, + ignore_index: bool = True, ): """Unpivots a DataFrame from wide format to long format, optionally leaving identifier variables set. @@ -566,6 +567,8 @@ def melt( """ if col_level is not None: raise NotImplementedError("col_level != None is not supported yet.") + if ignore_index is not True: + raise NotImplementedError("ignore_index is currently not supported.") # Arg cleaning @@ -932,14 +935,10 @@ def _pivot(df, index, columns): index_labels, index_idx = index._encode() column_labels = columns_labels.to_pandas().to_flat_index() - # the result of pivot always has a multicolumn - result = cudf.core.column_accessor.ColumnAccessor( - multiindex=True, level_names=(None,) + columns._data.names - ) - def as_tuple(x): return x if isinstance(x, tuple) else (x,) + result = {} for v in df: names = [as_tuple(v) + as_tuple(name) for name in column_labels] nrows = len(index_labels) @@ -964,8 +963,12 @@ def as_tuple(x): } ) + # the result of pivot always has a multicolumn + ca = cudf.core.column_accessor.ColumnAccessor( + result, multiindex=True, level_names=(None,) + columns._data.names + ) return cudf.DataFrame._from_data( - result, index=cudf.Index(index_labels, name=index.name) + ca, index=cudf.Index(index_labels, name=index.name) ) @@ -1060,7 +1063,7 @@ def pivot(data, columns=None, index=no_default, values=no_default): return result -def unstack(df, level, fill_value=None): +def unstack(df, level, fill_value=None, sort: bool = True): """ Pivot one or more levels of the (necessarily hierarchical) index labels. @@ -1080,6 +1083,9 @@ def unstack(df, level, fill_value=None): levels of the index to pivot fill_value Non-functional argument provided for compatibility with Pandas. + sort : bool, default True + Sort the level(s) in the resulting MultiIndex columns. + Returns ------- @@ -1156,6 +1162,8 @@ def unstack(df, level, fill_value=None): if fill_value is not None: raise NotImplementedError("fill_value is not supported.") + elif sort is False: + raise NotImplementedError(f"{sort=} is not supported.") if pd.api.types.is_list_like(level): if not level: return df diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index e12cc3d52fb..10ac1fdfc1e 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -24,7 +24,6 @@ _is_scalar_or_zero_d_array, is_dict_like, is_integer, - is_integer_dtype, is_scalar, ) from cudf.core import indexing_utils @@ -213,7 +212,7 @@ def __setitem__(self, key, value): and self._frame.dtype.categories.dtype.kind == "f" ) ) - and isinstance(value, (np.float32, np.float64)) + and isinstance(value, np.floating) and np.isnan(value) ): raise MixedTypeError( @@ -356,12 +355,10 @@ def _loc_to_iloc(self, arg): ) if not _is_non_decimal_numeric_dtype(index_dtype) and not ( isinstance(index_dtype, cudf.CategoricalDtype) - and is_integer_dtype(index_dtype.categories.dtype) + and index_dtype.categories.dtype.kind in "iu" ): # TODO: switch to cudf.utils.dtypes.is_integer(arg) - if isinstance(arg, cudf.Scalar) and is_integer_dtype( - arg.dtype - ): + if isinstance(arg, cudf.Scalar) and arg.dtype.kind in "iu": # Do not remove until pandas 3.0 support is added. assert ( PANDAS_LT_300 @@ -921,7 +918,18 @@ def to_dict(self, into: type[dict] = dict) -> dict: return self.to_pandas().to_dict(into=into) @_performance_tracking - def reindex(self, *args, **kwargs): + def reindex( + self, + index=None, + *, + axis=None, + method: str | None = None, + copy: bool = True, + level=None, + fill_value: ScalarLike | None = None, + limit: int | None = None, + tolerance=None, + ): """ Conform Series to new index. @@ -930,6 +938,8 @@ def reindex(self, *args, **kwargs): index : Index, Series-convertible, default None New labels / index to conform to, should be specified using keywords. + axis: int, default None + Unused. method: Not Supported copy : boolean, default True level: Not Supported @@ -960,7 +970,7 @@ def reindex(self, *args, **kwargs): dtype: int64 .. pandas-compat:: - **Series.reindex** + :meth:`pandas.Series.reindex` Note: One difference from Pandas is that ``NA`` is used for rows that do not match, rather than ``NaN``. One side effect of this is @@ -968,27 +978,23 @@ def reindex(self, *args, **kwargs): where it is cast to float in Pandas. """ - if len(args) > 1: - raise TypeError( - "Only one positional argument ('index') is allowed" - ) - if args: - (index,) = args - if "index" in kwargs: - raise TypeError( - "'index' passed as both positional and keyword argument" - ) - else: - index = kwargs.get("index", self.index) + if index is None: + index = self.index + if fill_value is None: + fill_value = cudf.NA name = self.name or 0 series = self._reindex( - deep=kwargs.get("copy", True), + deep=copy, dtypes={name: self.dtype}, index=index, column_names=[name], inplace=False, - fill_value=kwargs.get("fill_value", cudf.NA), + fill_value=fill_value, + level=level, + method=method, + limit=limit, + tolerance=tolerance, ) series.name = self.name return series @@ -1057,14 +1063,21 @@ def reindex(self, *args, **kwargs): ) ) def reset_index( - self, level=None, drop=False, name=no_default, inplace=False + self, + level=None, + drop=False, + name=no_default, + inplace=False, + allow_duplicates=False, ): if not drop and inplace: raise TypeError( "Cannot reset_index inplace on a Series " "to create a DataFrame" ) - data, index = self._reset_index(level=level, drop=drop) + data, index = self._reset_index( + level=level, drop=drop, allow_duplicates=allow_duplicates + ) if not drop: if name is no_default: name = 0 if self.name is None else self.name @@ -1243,7 +1256,7 @@ def map(self, arg, na_action=None) -> "Series": dtype: int64 .. pandas-compat:: - **Series.map** + :meth:`pandas.Series.map` Please note map currently only supports fixed-width numeric type functions. @@ -1635,7 +1648,9 @@ def has_nulls(self): return self._column.has_nulls() @_performance_tracking - def dropna(self, axis=0, inplace=False, how=None): + def dropna( + self, axis=0, inplace=False, how=None, ignore_index: bool = False + ): """ Return a Series with null values removed. @@ -1647,6 +1662,8 @@ def dropna(self, axis=0, inplace=False, how=None): If True, do operation inplace and return None. how : str, optional Not in use. Kept for compatibility. + ignore_index : bool, default ``False`` + If ``True``, the resulting axis will be labeled 0, 1, …, n - 1. Returns ------- @@ -1712,6 +1729,9 @@ def dropna(self, axis=0, inplace=False, how=None): result = super().dropna(axis=axis) + if ignore_index: + result.index = RangeIndex(len(result)) + return self._mimic_inplace(result, inplace=inplace) @_performance_tracking @@ -2049,10 +2069,31 @@ def astype( return super().astype(dtype, copy, errors) @_performance_tracking - def sort_index(self, axis=0, *args, **kwargs): + def sort_index( + self, + axis=0, + level=None, + ascending=True, + inplace=False, + kind=None, + na_position="last", + sort_remaining=True, + ignore_index=False, + key=None, + ): if axis not in (0, "index"): raise ValueError("Only axis=0 is valid for Series.") - return super().sort_index(axis=axis, *args, **kwargs) + return super().sort_index( + axis=axis, + level=level, + ascending=ascending, + inplace=inplace, + kind=kind, + na_position=na_position, + sort_remaining=sort_remaining, + ignore_index=ignore_index, + key=key, + ) @_performance_tracking def sort_values( @@ -2063,6 +2104,7 @@ def sort_values( kind="quicksort", na_position="last", ignore_index=False, + key=None, ): """Sort by the values along either axis. @@ -2076,6 +2118,14 @@ def sort_values( 'first' puts nulls at the beginning, 'last' puts nulls at the end ignore_index : bool, default False If True, index will not be sorted. + key : callable, optional + Apply the key function to the values + before sorting. This is similar to the ``key`` argument in the + builtin ``sorted`` function, with the notable difference that + this ``key`` function should be *vectorized*. It should expect a + ``Series`` and return a Series with the same shape as the input. + It will be applied to each column in `by` independently. + Currently not supported. Returns ------- @@ -2094,7 +2144,7 @@ def sort_values( dtype: int64 .. pandas-compat:: - **Series.sort_values** + :meth:`pandas.Series.sort_values` * Support axis='index' only. * The inplace and kind argument is currently unsupported @@ -2107,6 +2157,7 @@ def sort_values( kind=kind, na_position=na_position, ignore_index=ignore_index, + key=key, ) @_performance_tracking @@ -2256,30 +2307,44 @@ def argsort( order=None, ascending=True, na_position="last", - ): - obj = self.__class__._from_data( - { - None: super().argsort( - axis=axis, - kind=kind, - order=order, - ascending=ascending, - na_position=na_position, - ) - } + ) -> Self: + col = as_column( + super().argsort( + axis=axis, + kind=kind, + order=order, + ascending=ascending, + na_position=na_position, + ) + ) + return self._from_data_like_self( + self._data._from_columns_like_self([col]) ) - obj.name = self.name - return obj @_performance_tracking - def replace(self, to_replace=None, value=no_default, *args, **kwargs): + def replace( + self, + to_replace=None, + value=no_default, + inplace=False, + limit=None, + regex=False, + method=no_default, + ): if is_dict_like(to_replace) and value not in {None, no_default}: raise ValueError( "Series.replace cannot use dict-like to_replace and non-None " "value" ) - return super().replace(to_replace, value, *args, **kwargs) + return super().replace( + to_replace, + value, + inplace=inplace, + limit=limit, + regex=regex, + method=method, + ) @_performance_tracking def update(self, other): @@ -2388,7 +2453,14 @@ def update(self, other): # UDF related @_performance_tracking - def apply(self, func, convert_dtype=True, args=(), **kwargs): + def apply( + self, + func, + convert_dtype=True, + args=(), + by_row: Literal[False, "compat"] = "compat", + **kwargs, + ): """ Apply a scalar function to the values of a Series. Similar to ``pandas.Series.apply``. @@ -2415,6 +2487,18 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs): See examples for details. args : tuple Positional arguments passed to func after the series value. + by_row : False or "compat", default "compat" + If ``"compat"`` and func is a callable, func will be passed each element of + the Series, like ``Series.map``. If func is a list or dict of + callables, will first try to translate each func into pandas methods. If + that doesn't work, will try call to apply again with ``by_row="compat"`` + and if that fails, will call apply again with ``by_row=False`` + (backward compatible). + If False, the func will be passed the whole Series at once. + + ``by_row`` has no effect when ``func`` is a string. + + Currently not implemented. **kwargs Not supported @@ -2524,6 +2608,8 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs): """ if convert_dtype is not True: raise ValueError("Series.apply only supports convert_dtype=True") + elif by_row != "compat": + raise NotImplementedError("by_row is currently not supported.") result = self._apply(func, _get_scalar_kernel, *args, **kwargs) result.name = self.name @@ -2550,7 +2636,7 @@ def count(self): 5 .. pandas-compat:: - **Series.count** + :meth:`pandas.Series.count` Parameters currently not supported is `level`. """ @@ -2624,7 +2710,7 @@ def mode(self, dropna=True): val_counts = val_counts[val_counts == val_counts.iloc[0]] return Series._from_data( - {self.name: val_counts.index.sort_values()}, name=self.name + {self.name: val_counts.index.sort_values()._column}, name=self.name ) @_performance_tracking @@ -2637,7 +2723,7 @@ def round(self, decimals=0, how="half_even"): return super().round(decimals, how) @_performance_tracking - def cov(self, other, min_periods=None): + def cov(self, other, min_periods=None, ddof: int | None = None): """ Compute covariance with Series, excluding missing values. @@ -2661,7 +2747,7 @@ def cov(self, other, min_periods=None): -0.015750000000000004 .. pandas-compat:: - **Series.cov** + :meth:`pandas.Series.cov` `min_periods` parameter is not yet supported. """ @@ -2670,6 +2756,8 @@ def cov(self, other, min_periods=None): raise NotImplementedError( "min_periods parameter is not implemented yet" ) + if ddof is not None: + raise NotImplementedError("ddof parameter is not implemented yet") if self.empty or other.empty: return cudf.utils.dtypes._get_nan_for_dtype(self.dtype) @@ -2687,14 +2775,6 @@ def cov(self, other, min_periods=None): f"{other.dtype}" ) - @_performance_tracking - def transpose(self): - """Return the transpose, which is by definition self.""" - - return self - - T = property(transpose, doc=transpose.__doc__) - @_performance_tracking def duplicated(self, keep="first"): """ @@ -3368,7 +3448,6 @@ def groupby( as_index=True, sort=no_default, group_keys=False, - squeeze=False, observed=True, dropna=True, ): @@ -3379,13 +3458,20 @@ def groupby( as_index, sort, group_keys, - squeeze, observed, dropna, ) @_performance_tracking - def rename(self, index=None, copy=True): + def rename( + self, + index=None, + axis=None, + copy: bool = True, + inplace: bool = False, + level=None, + errors: Literal["ignore", "raise"] = "ignore", + ): """ Alter Series name @@ -3395,8 +3481,21 @@ def rename(self, index=None, copy=True): ---------- index : Scalar, optional Scalar to alter the Series.name attribute + axis : {0 or 'index'} + Unused. Parameter needed for compatibility with DataFrame. copy : boolean, default True Also copy underlying data + inplace : bool, default False + Whether to return a new Series. If True the value of copy is ignored. + Currently not supported. + level : int or level name, default None + In case of MultiIndex, only rename labels in the specified level. + Currently not supported. + errors : {'ignore', 'raise'}, default 'ignore' + If 'raise', raise `KeyError` when a `dict-like mapper` or + `index` contains labels that are not present in the index being transformed. + If 'ignore', existing keys will be renamed and extra keys will be ignored. + Currently not supported. Returns ------- @@ -3422,16 +3521,23 @@ def rename(self, index=None, copy=True): 'numeric_series' .. pandas-compat:: - **Series.rename** + :meth:`pandas.Series.rename` - Supports scalar values only for changing name attribute - - The ``inplace`` and ``level`` is not supported """ + if inplace is not False: + raise NotImplementedError("inplace is currently not supported.") + if level is not None: + raise NotImplementedError("level is currently not supported.") + if errors != "ignore": + raise NotImplementedError("errors is currently not supported.") out_data = self._data.copy(deep=copy) return Series._from_data(out_data, self.index, name=index) @_performance_tracking - def add_prefix(self, prefix): + def add_prefix(self, prefix, axis=None): + if axis is not None: + raise NotImplementedError("axis is currently not implemented.") return Series._from_data( # TODO: Change to deep=False when copy-on-write is default data=self._data.copy(deep=True), @@ -3439,7 +3545,9 @@ def add_prefix(self, prefix): ) @_performance_tracking - def add_suffix(self, suffix): + def add_suffix(self, suffix, axis=None): + if axis is not None: + raise NotImplementedError("axis is currently not implemented.") return Series._from_data( # TODO: Change to deep=False when copy-on-write is default data=self._data.copy(deep=True), @@ -3529,7 +3637,12 @@ def explode(self, ignore_index=False): @_performance_tracking def pct_change( - self, periods=1, fill_method=no_default, limit=no_default, freq=None + self, + periods=1, + fill_method=no_default, + limit=no_default, + freq=None, + **kwargs, ): """ Calculates the percent change between sequential elements @@ -3554,6 +3667,9 @@ def pct_change( freq : str, optional Increment to use from time series API. Not yet implemented. + **kwargs + Additional keyword arguments are passed into + `Series.shift`. Returns ------- @@ -3598,11 +3714,15 @@ def pct_change( warnings.simplefilter("ignore") data = self.fillna(method=fill_method, limit=limit) diff = data.diff(periods=periods) - change = diff / data.shift(periods=periods, freq=freq) + change = diff / data.shift(periods=periods, freq=freq, **kwargs) return change @_performance_tracking - def where(self, cond, other=None, inplace=False): + def where(self, cond, other=None, inplace=False, axis=None, level=None): + if axis is not None: + raise NotImplementedError("axis is not supported.") + elif level is not None: + raise NotImplementedError("level is not supported.") result_col = super().where(cond, other, inplace) return self._mimic_inplace( self._from_data_like_self( @@ -4702,7 +4822,7 @@ def strftime(self, date_format: str, *args, **kwargs) -> Series: dtype: object .. pandas-compat:: - **series.DatetimeProperties.strftime** + :meth:`pandas.DatetimeIndex.strftime` The following date format identifiers are not yet supported: ``%c``, ``%x``,``%X`` diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py index 04c7db7a53c..a5ff1223791 100644 --- a/python/cudf/cudf/core/single_column_frame.py +++ b/python/cudf/cudf/core/single_column_frame.py @@ -12,7 +12,6 @@ from cudf.api.types import ( _is_scalar_or_zero_d_array, is_integer, - is_integer_dtype, is_numeric_dtype, ) from cudf.core.column import ColumnBase, as_column @@ -91,12 +90,6 @@ def shape(self) -> tuple[int]: """Get a tuple representing the dimensionality of the Index.""" return (len(self),) - def __bool__(self): - raise TypeError( - f"The truth value of a {type(self)} is ambiguous. Use " - "a.empty, a.bool(), a.item(), a.any() or a.all()." - ) - @property # type: ignore @_performance_tracking def _num_columns(self) -> int: @@ -358,7 +351,7 @@ def _get_elements_from_column(self, arg) -> ScalarLike | ColumnBase: arg = as_column(arg) if len(arg) == 0: arg = cudf.core.column.column_empty(0, dtype="int32") - if is_integer_dtype(arg.dtype): + if arg.dtype.kind in "iu": return self._column.take(arg) if arg.dtype.kind == "b": if (bn := len(arg)) != (n := len(self)): @@ -396,3 +389,10 @@ def where(self, cond, other=None, inplace=False): result = cudf._lib.copying.copy_if_else(input_col, other, cond) return _make_categorical_like(result, self_column) + + @_performance_tracking + def transpose(self): + """Return the transpose, which is by definition self.""" + return self + + T = property(transpose, doc=transpose.__doc__) diff --git a/python/cudf/cudf/core/tools/numeric.py b/python/cudf/cudf/core/tools/numeric.py index 466d46f7dca..07158e4ee61 100644 --- a/python/cudf/cudf/core/tools/numeric.py +++ b/python/cudf/cudf/core/tools/numeric.py @@ -80,7 +80,7 @@ def to_numeric(arg, errors="raise", downcast=None): dtype: float64 .. pandas-compat:: - **cudf.to_numeric** + :func:`pandas.to_numeric` An important difference from pandas is that this function does not accept mixed numeric/non-numeric type sequences. diff --git a/python/cudf/cudf/core/window/ewm.py b/python/cudf/cudf/core/window/ewm.py index 21693e106bd..ef0f6958aeb 100644 --- a/python/cudf/cudf/core/window/ewm.py +++ b/python/cudf/cudf/core/window/ewm.py @@ -1,7 +1,9 @@ # Copyright (c) 2022-2024, NVIDIA CORPORATION. - from __future__ import annotations +import warnings +from typing import Literal + import numpy as np from cudf._lib.reduce import scan @@ -56,7 +58,7 @@ class ExponentialMovingWindow(_RollingBase): the equivalent pandas method. .. pandas-compat:: - **cudf.core.window.ExponentialMovingWindow** + :meth:`pandas.DataFrame.ewm` The parameters ``min_periods``, ``ignore_na``, ``axis``, and ``times`` are not yet supported. Behavior is defined only for data that begins @@ -103,34 +105,79 @@ def __init__( ignore_na: bool = False, axis: int = 0, times: str | np.ndarray | None = None, + method: Literal["single", "table"] = "single", ): - if (min_periods, ignore_na, axis, times) != (0, False, 0, None): + if min_periods != 0: raise NotImplementedError( - "The parameters `min_periods`, `ignore_na`, " - "`axis`, and `times` are not yet supported." + "min_periods is currently not supported." ) - + if ignore_na is not False: + raise NotImplementedError("ignore_na is currently not supported.") + if axis != 0: + warnings.warn( + "axis is deprecated with will be removed in a future version. " + "Transpose the DataFrame first instead." + ) + raise NotImplementedError("axis is currently not supported.") + if times is not None: + raise NotImplementedError("times is currently not supported.") + if method != "single": + raise NotImplementedError("method is currently not supported.") self.obj = obj self.adjust = adjust self.com = get_center_of_mass(com, span, halflife, alpha) - def mean(self): + def online(self, engine: str = "numba", engine_kwargs=None): + """ + Return an ``OnlineExponentialMovingWindow`` object to calculate + exponentially moving window aggregations in an online method. + + Currently not supported. + """ + raise NotImplementedError("online is currently not supported.") + + def mean( + self, numeric_only: bool = False, engine=None, engine_kwargs=None + ): """ Calculate the ewm (exponential weighted moment) mean. """ + if numeric_only is not False: + raise NotImplementedError( + "numeric_only is currently not supported." + ) + if engine is not None: + raise NotImplementedError( + "engine is non-functional and added for compatibility with pandas." + ) + if engine_kwargs is not None: + raise NotImplementedError( + "engine_kwargs is non-functional and added for compatibility with pandas." + ) return self._apply_agg("ewma") - def var(self, bias): - raise NotImplementedError("ewmvar not yet supported.") + def sum(self, numeric_only: bool = False, engine=None, engine_kwargs=None): + raise NotImplementedError("sum not yet supported.") - def std(self, bias): - raise NotImplementedError("ewmstd not yet supported.") + def var(self, bias: bool = False, numeric_only: bool = False): + raise NotImplementedError("var not yet supported.") - def corr(self, other): - raise NotImplementedError("ewmcorr not yet supported.") + def std(self, bias: bool = False, numeric_only: bool = False): + raise NotImplementedError("std not yet supported.") - def cov(self, other): - raise NotImplementedError("ewmcov not yet supported.") + def corr( + self, other, pairwise: bool | None = None, numeric_only: bool = False + ): + raise NotImplementedError("corr not yet supported.") + + def cov( + self, + other, + pairwise: bool | None = None, + bias: bool = False, + numeric_only: bool = False, + ): + raise NotImplementedError("cov not yet supported.") def _apply_agg_series(self, sr, agg_name): if not is_numeric_dtype(sr.dtype): diff --git a/python/cudf/cudf/core/window/rolling.py b/python/cudf/cudf/core/window/rolling.py index 29391c68471..043a41145e5 100644 --- a/python/cudf/cudf/core/window/rolling.py +++ b/python/cudf/cudf/core/window/rolling.py @@ -1,4 +1,7 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION +from __future__ import annotations + +import warnings import numba import pandas as pd @@ -196,17 +199,26 @@ def __init__( obj, window, min_periods=None, - center=False, + center: bool = False, + win_type: str | None = None, + on=None, axis=0, - win_type=None, + closed: str | None = None, + step: int | None = None, + method: str = "single", ): self.obj = obj self.window = window self.min_periods = min_periods self.center = center self._normalize() - self.agg_params = {} + # for var & std only? + self.agg_params: dict[str, int] = {} if axis != 0: + warnings.warn( + "axis is deprecated with will be removed in a future version. " + "Transpose the DataFrame first instead." + ) raise NotImplementedError("axis != 0 is not supported yet.") self.axis = axis @@ -217,6 +229,15 @@ def __init__( ) self.win_type = win_type + if on is not None: + raise NotImplementedError("on is currently not supported") + if closed not in (None, "right"): + raise NotImplementedError("closed is currently not supported") + if step is not None: + raise NotImplementedError("step is currently not supported") + if method != "single": + raise NotImplementedError("method is currently not supported") + def __getitem__(self, arg): if isinstance(arg, tuple): arg = list(arg) diff --git a/python/cudf/cudf/io/csv.py b/python/cudf/cudf/io/csv.py index e909d96309e..0f2820a01e9 100644 --- a/python/cudf/cudf/io/csv.py +++ b/python/cudf/cudf/io/csv.py @@ -50,7 +50,7 @@ def read_csv( comment=None, delim_whitespace=False, byte_range=None, - use_python_file_object=True, + use_python_file_object=None, storage_options=None, bytes_per_thread=None, ): diff --git a/python/cudf/cudf/io/orc.py b/python/cudf/cudf/io/orc.py index 7082a85237a..289292b5182 100644 --- a/python/cudf/cudf/io/orc.py +++ b/python/cudf/cudf/io/orc.py @@ -10,6 +10,7 @@ from cudf._lib import orc as liborc from cudf.api.types import is_list_like from cudf.utils import ioutils +from cudf.utils.utils import maybe_filter_deprecation def _make_empty_df(filepath_or_buffer, columns): @@ -280,7 +281,7 @@ def read_orc( num_rows=None, use_index=True, timestamp_type=None, - use_python_file_object=True, + use_python_file_object=None, storage_options=None, bytes_per_thread=None, ): @@ -320,6 +321,9 @@ def read_orc( ) filepaths_or_buffers = [] + have_nativefile = any( + isinstance(source, pa.NativeFile) for source in filepath_or_buffer + ) for source in filepath_or_buffer: if ioutils.is_directory( path_or_data=source, storage_options=storage_options @@ -360,17 +364,24 @@ def read_orc( stripes = selected_stripes if engine == "cudf": - return DataFrame._from_data( - *liborc.read_orc( - filepaths_or_buffers, - columns, - stripes, - skiprows, - num_rows, - use_index, - timestamp_type, + # Don't want to warn if use_python_file_object causes us to get + # a NativeFile (there is a separate deprecation warning for that) + with maybe_filter_deprecation( + not have_nativefile, + message="Support for reading pyarrow's NativeFile is deprecated", + category=FutureWarning, + ): + return DataFrame._from_data( + *liborc.read_orc( + filepaths_or_buffers, + columns, + stripes, + skiprows, + num_rows, + use_index, + timestamp_type, + ) ) - ) else: from pyarrow import orc diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py index 02b26ea1c01..7dab2f20100 100644 --- a/python/cudf/cudf/io/parquet.py +++ b/python/cudf/cudf/io/parquet.py @@ -15,6 +15,7 @@ import numpy as np import pandas as pd +import pyarrow as pa from pyarrow import dataset as ds import cudf @@ -23,6 +24,7 @@ from cudf.core.column import as_column, build_categorical_column, column_empty from cudf.utils import ioutils from cudf.utils.performance_tracking import _performance_tracking +from cudf.utils.utils import maybe_filter_deprecation BYTE_SIZES = { "kb": 1000, @@ -350,7 +352,7 @@ def read_parquet_metadata(filepath_or_buffer): path_or_data=source, compression=None, fs=fs, - use_python_file_object=True, + use_python_file_object=None, open_file_options=None, storage_options=None, bytes_per_thread=None, @@ -532,7 +534,7 @@ def read_parquet( filters=None, row_groups=None, use_pandas_metadata=True, - use_python_file_object=True, + use_python_file_object=None, categorical_partitions=True, open_file_options=None, bytes_per_thread=None, @@ -615,6 +617,9 @@ def read_parquet( row_groups=row_groups, fs=fs, ) + have_nativefile = any( + isinstance(source, pa.NativeFile) for source in filepath_or_buffer + ) for source in filepath_or_buffer: tmp_source, compression = ioutils.get_reader_filepath_or_buffer( path_or_data=source, @@ -662,19 +667,26 @@ def read_parquet( ) # Convert parquet data to a cudf.DataFrame - df = _parquet_to_frame( - filepaths_or_buffers, - engine, - *args, - columns=columns, - row_groups=row_groups, - use_pandas_metadata=use_pandas_metadata, - partition_keys=partition_keys, - partition_categories=partition_categories, - dataset_kwargs=dataset_kwargs, - **kwargs, - ) + # Don't want to warn if use_python_file_object causes us to get + # a NativeFile (there is a separate deprecation warning for that) + with maybe_filter_deprecation( + not have_nativefile, + message="Support for reading pyarrow's NativeFile is deprecated", + category=FutureWarning, + ): + df = _parquet_to_frame( + filepaths_or_buffers, + engine, + *args, + columns=columns, + row_groups=row_groups, + use_pandas_metadata=use_pandas_metadata, + partition_keys=partition_keys, + partition_categories=partition_categories, + dataset_kwargs=dataset_kwargs, + **kwargs, + ) # Apply filters row-wise (if any are defined), and return df = _apply_post_filters(df, filters) if projected_columns: @@ -917,12 +929,12 @@ def _read_parquet( f"following positional arguments: {list(args)}" ) if cudf.get_option("io.parquet.low_memory"): - return libparquet.ParquetReader( + return libparquet.read_parquet_chunked( filepaths_or_buffers, columns=columns, row_groups=row_groups, use_pandas_metadata=use_pandas_metadata, - ).read() + ) else: return libparquet.read_parquet( filepaths_or_buffers, diff --git a/python/cudf/cudf/pandas/__init__.py b/python/cudf/cudf/pandas/__init__.py index ff445a63f74..bf88c950385 100644 --- a/python/cudf/cudf/pandas/__init__.py +++ b/python/cudf/cudf/pandas/__init__.py @@ -25,41 +25,39 @@ def install(): global LOADED LOADED = loader is not None - if (rmm_mode := os.getenv("CUDF_PANDAS_RMM_MODE", None)) is not None: - # Check if a non-default memory resource is set - current_mr = rmm.mr.get_current_device_resource() - if not isinstance(current_mr, rmm.mr.CudaMemoryResource): - warnings.warn( - f"cudf.pandas detected an already configured memory resource, ignoring 'CUDF_PANDAS_RMM_MODE'={str(rmm_mode)}", - UserWarning, - ) - free_memory, _ = rmm.mr.available_device_memory() - free_memory = int(round(float(free_memory) * 0.80 / 256) * 256) + rmm_mode = os.getenv("CUDF_PANDAS_RMM_MODE", "managed_pool") + # Check if a non-default memory resource is set + current_mr = rmm.mr.get_current_device_resource() + if not isinstance(current_mr, rmm.mr.CudaMemoryResource): + warnings.warn( + f"cudf.pandas detected an already configured memory resource, ignoring 'CUDF_PANDAS_RMM_MODE'={str(rmm_mode)}", + UserWarning, + ) + return rmm_mode - if rmm_mode == "cuda": - mr = rmm.mr.CudaMemoryResource() - rmm.mr.set_current_device_resource(mr) - elif rmm_mode == "pool": - rmm.mr.set_current_device_resource( - rmm.mr.PoolMemoryResource( - rmm.mr.get_current_device_resource(), - initial_pool_size=free_memory, - ) - ) - elif rmm_mode == "async": - mr = rmm.mr.CudaAsyncMemoryResource(initial_pool_size=free_memory) - rmm.mr.set_current_device_resource(mr) - elif rmm_mode == "managed": - mr = rmm.mr.ManagedMemoryResource() - rmm.mr.set_current_device_resource(mr) - elif rmm_mode == "managed_pool": - mr = rmm.mr.PoolMemoryResource( + free_memory, _ = rmm.mr.available_device_memory() + free_memory = int(round(float(free_memory) * 0.80 / 256) * 256) + new_mr = current_mr + if rmm_mode == "pool": + new_mr = rmm.mr.PoolMemoryResource( + current_mr, + initial_pool_size=free_memory, + ) + elif rmm_mode == "async": + new_mr = rmm.mr.CudaAsyncMemoryResource(initial_pool_size=free_memory) + elif rmm_mode == "managed": + new_mr = rmm.mr.PrefetchResourceAdaptor(rmm.mr.ManagedMemoryResource()) + elif rmm_mode == "managed_pool": + new_mr = rmm.mr.PrefetchResourceAdaptor( + rmm.mr.PoolMemoryResource( rmm.mr.ManagedMemoryResource(), initial_pool_size=free_memory, ) - rmm.mr.set_current_device_resource(mr) - else: - raise ValueError(f"Unsupported rmm mode: {rmm_mode}") + ) + elif rmm_mode != "cuda": + raise ValueError(f"Unsupported {rmm_mode=}") + rmm.mr.set_current_device_resource(new_mr) + return rmm_mode def pytest_load_initial_conftests(early_config, parser, args): diff --git a/python/cudf/cudf/pandas/__main__.py b/python/cudf/cudf/pandas/__main__.py index fb8569fa1d0..591744ce793 100644 --- a/python/cudf/cudf/pandas/__main__.py +++ b/python/cudf/cudf/pandas/__main__.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. # All rights reserved. # SPDX-License-Identifier: Apache-2.0 @@ -72,7 +72,17 @@ def main(): args = parser.parse_args() - install() + rmm_mode = install() + if "managed" in rmm_mode: + for key in { + "column_view::get_data", + "mutable_column_view::get_data", + "gather", + "hash_join", + }: + from cudf._lib import pylibcudf + + pylibcudf.experimental.enable_prefetching(key) with profile(args.profile, args.line_profile, args.args[0]) as fn: args.args[0] = fn if args.module: diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py index 59a243dd7c4..478108f36f1 100644 --- a/python/cudf/cudf/pandas/_wrappers/pandas.py +++ b/python/cudf/cudf/pandas/_wrappers/pandas.py @@ -26,6 +26,7 @@ ) import cudf +import cudf.core._compat from ..annotation import nvtx from ..fast_slow_proxy import ( @@ -556,13 +557,14 @@ def Index__setattr__(self, name, value): }, ) -ArrowStringArrayNumpySemantics = make_final_proxy_type( - "ArrowStringArrayNumpySemantics", - _Unusable, - pd.core.arrays.string_arrow.ArrowStringArrayNumpySemantics, - fast_to_slow=_Unusable(), - slow_to_fast=_Unusable(), -) +if cudf.core._compat.PANDAS_GE_210: + ArrowStringArrayNumpySemantics = make_final_proxy_type( + "ArrowStringArrayNumpySemantics", + _Unusable, + pd.core.arrays.string_arrow.ArrowStringArrayNumpySemantics, + fast_to_slow=_Unusable(), + slow_to_fast=_Unusable(), + ) ArrowStringArray = make_final_proxy_type( "ArrowStringArray", diff --git a/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh b/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh index a66f63c09b3..9c65b74d081 100755 --- a/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh +++ b/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh @@ -137,7 +137,7 @@ and not test_eof_states \ and not test_array_tz" # TODO: Remove "not db" once a postgres & mysql container is set up on the CI -PANDAS_CI="1" timeout 30m python -m pytest -p cudf.pandas \ +PANDAS_CI="1" timeout 60m python -m pytest -p cudf.pandas \ -v -m "not single_cpu and not db" \ -k "$TEST_THAT_NEED_MOTO_SERVER and $TEST_THAT_CRASH_PYTEST_WORKERS and not test_groupby_raises_category_on_category and not test_constructor_no_pandas_array and not test_is_monotonic_na and not test_index_contains and not test_index_contains and not test_frame_op_subclass_nonclass_constructor and not test_round_trip_current" \ --import-mode=importlib \ diff --git a/python/cudf/cudf/testing/testing.py b/python/cudf/cudf/testing/testing.py index e56c8d867cb..c2072d90e98 100644 --- a/python/cudf/cudf/testing/testing.py +++ b/python/cudf/cudf/testing/testing.py @@ -158,12 +158,12 @@ def assert_column_equal( return True if check_datetimelike_compat: - if np.issubdtype(left.dtype, np.datetime64): + if left.dtype.kind == "M": right = right.astype(left.dtype) - elif np.issubdtype(right.dtype, np.datetime64): + elif right.dtype.kind == "M": left = left.astype(right.dtype) - if np.issubdtype(left.dtype, np.datetime64): + if left.dtype.kind == "M": if not left.equals(right): raise AssertionError( f"[datetimelike_compat=True] {left.values} " @@ -779,9 +779,7 @@ def assert_eq(left, right, **kwargs): tm.assert_index_equal(left, right, **kwargs) elif isinstance(left, np.ndarray) and isinstance(right, np.ndarray): - if np.issubdtype(left.dtype, np.floating) and np.issubdtype( - right.dtype, np.floating - ): + if left.dtype.kind == "f" and right.dtype.kind == "f": assert np.allclose(left, right, equal_nan=True) else: assert np.array_equal(left, right) diff --git a/python/cudf/cudf/tests/test_categorical.py b/python/cudf/cudf/tests/test_categorical.py index 9b6029582ce..ae58af8ebce 100644 --- a/python/cudf/cudf/tests/test_categorical.py +++ b/python/cudf/cudf/tests/test_categorical.py @@ -891,3 +891,59 @@ def test_categorical_maxima(op): result = getattr(ser.cat.as_ordered(), op)() result_pd = getattr(ser_pd.cat.as_ordered(), op)() assert_eq(result, result_pd) + + +@pytest.mark.parametrize("ordered", [True, False]) +def test_index_ordered(ordered): + pd_ci = pd.CategoricalIndex([1, 2, 3], ordered=ordered) + cudf_ci = cudf.from_pandas(pd_ci) + assert pd_ci.ordered == cudf_ci.ordered + + +@pytest.mark.parametrize("method", ["as_ordered", "as_unordered"]) +@pytest.mark.parametrize("ordered", [True, False]) +def test_index_as_ordered(method, ordered): + pd_ci = pd.CategoricalIndex([1, 2, 3], ordered=ordered) + cudf_ci = cudf.from_pandas(pd_ci) + + expected = getattr(pd_ci, method)() + result = getattr(cudf_ci, method)() + assert_eq(result, expected) + + +def test_index_add_categories(): + pd_ci = pd.CategoricalIndex([1, 2, 3]) + cudf_ci = cudf.from_pandas(pd_ci) + + expected = pd_ci.add_categories([4]) + result = cudf_ci.add_categories([4]) + assert_eq(result, expected) + + +def test_index_remove_categories(): + pd_ci = pd.CategoricalIndex([1, 2, 3], categories=[1, 2, 3, 4]) + cudf_ci = cudf.from_pandas(pd_ci) + + expected = pd_ci.remove_categories([4]) + result = cudf_ci.remove_categories([4]) + assert_eq(result, expected) + + +@pytest.mark.parametrize("ordered", [True, False]) +def test_index_reorder_categories(ordered): + pd_ci = pd.CategoricalIndex([1, 2, 3], categories=[1, 3, 2, 4]) + cudf_ci = cudf.from_pandas(pd_ci) + + expected = pd_ci.reorder_categories([1, 2, 3, 4], ordered=ordered) + result = cudf_ci.reorder_categories([1, 2, 3, 4], ordered=ordered) + assert_eq(result, expected) + + +@pytest.mark.parametrize("ordered", [True, False]) +def test_index_set_categories(ordered): + pd_ci = pd.CategoricalIndex([1, 2, 3]) + cudf_ci = cudf.from_pandas(pd_ci) + + expected = pd_ci.set_categories([1, 2, 3, 4], ordered=ordered) + result = cudf_ci.set_categories([1, 2, 3, 4], ordered=ordered) + assert_eq(result, expected) diff --git a/python/cudf/cudf/tests/test_column_accessor.py b/python/cudf/cudf/tests/test_column_accessor.py index f3343c37d1d..e84e1433c10 100644 --- a/python/cudf/cudf/tests/test_column_accessor.py +++ b/python/cudf/cudf/tests/test_column_accessor.py @@ -5,28 +5,35 @@ import pytest import cudf +from cudf.core.column import as_column from cudf.core.column_accessor import ColumnAccessor from cudf.testing import assert_eq simple_test_data = [ {}, - {"a": []}, - {"a": [1]}, - {"a": ["a"]}, - {"a": [1, 2, 3], "b": ["a", "b", "c"]}, + {"a": as_column([])}, + {"a": as_column([1])}, + {"a": as_column(["a"])}, + {"a": as_column([1, 2, 3]), "b": as_column(["a", "b", "c"])}, ] mi_test_data = [ - {("a", "b"): [1, 2, 4], ("a", "c"): [2, 3, 4]}, - {("a", "b"): [1, 2, 3], ("a", ""): [2, 3, 4]}, - {("a", "b"): [1, 2, 4], ("c", "d"): [2, 3, 4]}, - {("a", "b"): [1, 2, 3], ("a", "c"): [2, 3, 4], ("b", ""): [4, 5, 6]}, + {("a", "b"): as_column([1, 2, 4]), ("a", "c"): as_column([2, 3, 4])}, + {("a", "b"): as_column([1, 2, 3]), ("a", ""): as_column([2, 3, 4])}, + {("a", "b"): as_column([1, 2, 4]), ("c", "d"): as_column([2, 3, 4])}, + { + ("a", "b"): as_column([1, 2, 3]), + ("a", "c"): as_column([2, 3, 4]), + ("b", ""): as_column([4, 5, 6]), + }, ] def check_ca_equal(lhs, rhs): assert lhs.level_names == rhs.level_names assert lhs.multiindex == rhs.multiindex + assert lhs.rangeindex == rhs.rangeindex + assert lhs.label_dtype == rhs.label_dtype for l_key, r_key in zip(lhs, rhs): assert l_key == r_key assert_eq(lhs[l_key], rhs[r_key]) @@ -58,19 +65,26 @@ def test_to_pandas_simple(simple_data): # to ignore this `inferred_type` comparison, we pass exact=False. assert_eq( ca.to_pandas_index(), - pd.DataFrame(simple_data).columns, + pd.DataFrame( + {key: value.values_host for key, value in simple_data.items()} + ).columns, exact=False, ) def test_to_pandas_multiindex(mi_data): ca = ColumnAccessor(mi_data, multiindex=True) - assert_eq(ca.to_pandas_index(), pd.DataFrame(mi_data).columns) + assert_eq( + ca.to_pandas_index(), + pd.DataFrame( + {key: value.values_host for key, value in mi_data.items()} + ).columns, + ) def test_to_pandas_multiindex_names(): ca = ColumnAccessor( - {("a", "b"): [1, 2, 3], ("c", "d"): [3, 4, 5]}, + {("a", "b"): as_column([1, 2, 3]), ("c", "d"): as_column([3, 4, 5])}, multiindex=True, level_names=("foo", "bar"), ) @@ -108,16 +122,20 @@ def test_column_size_mismatch(): differing sizes throws an error. """ with pytest.raises(ValueError): - ColumnAccessor({"a": [1], "b": [1, 2]}) + ColumnAccessor({"a": as_column([1]), "b": as_column([1, 2])}) def test_select_by_label_simple(): """ Test getting a column by label """ - ca = ColumnAccessor({"a": [1, 2, 3], "b": [2, 3, 4]}) - check_ca_equal(ca.select_by_label("a"), ColumnAccessor({"a": [1, 2, 3]})) - check_ca_equal(ca.select_by_label("b"), ColumnAccessor({"b": [2, 3, 4]})) + ca = ColumnAccessor({"a": as_column([1, 2, 3]), "b": as_column([2, 3, 4])}) + check_ca_equal( + ca.select_by_label("a"), ColumnAccessor({"a": as_column([1, 2, 3])}) + ) + check_ca_equal( + ca.select_by_label("b"), ColumnAccessor({"b": as_column([2, 3, 4])}) + ) def test_select_by_label_multiindex(): @@ -126,40 +144,62 @@ def test_select_by_label_multiindex(): """ ca = ColumnAccessor( { - ("a", "b", "c"): [1, 2, 3], - ("a", "b", "e"): [2, 3, 4], - ("b", "x", ""): [4, 5, 6], - ("a", "d", "e"): [3, 4, 5], + ("a", "b", "c"): as_column([1, 2, 3]), + ("a", "b", "e"): as_column([2, 3, 4]), + ("b", "x", ""): as_column([4, 5, 6]), + ("a", "d", "e"): as_column([3, 4, 5]), }, multiindex=True, ) expect = ColumnAccessor( - {("b", "c"): [1, 2, 3], ("b", "e"): [2, 3, 4], ("d", "e"): [3, 4, 5]}, + { + ("b", "c"): as_column([1, 2, 3]), + ("b", "e"): as_column([2, 3, 4]), + ("d", "e"): as_column([3, 4, 5]), + }, multiindex=True, ) got = ca.select_by_label("a") check_ca_equal(expect, got) - expect = ColumnAccessor({"c": [1, 2, 3], "e": [2, 3, 4]}, multiindex=False) + expect = ColumnAccessor( + {"c": as_column([1, 2, 3]), "e": as_column([2, 3, 4])}, + multiindex=False, + ) got = ca.select_by_label(("a", "b")) check_ca_equal(expect, got) expect = ColumnAccessor( - {("b", "c"): [1, 2, 3], ("b", "e"): [2, 3, 4], ("d", "e"): [3, 4, 5]}, + { + ("b", "c"): as_column([1, 2, 3]), + ("b", "e"): as_column([2, 3, 4]), + ("d", "e"): as_column([3, 4, 5]), + }, multiindex=True, ) got = ca.select_by_label("a") check_ca_equal(expect, got) - expect = ColumnAccessor({"c": [1, 2, 3], "e": [2, 3, 4]}, multiindex=False) + expect = ColumnAccessor( + {"c": as_column([1, 2, 3]), "e": as_column([2, 3, 4])}, + multiindex=False, + ) got = ca.select_by_label(("a", "b")) check_ca_equal(expect, got) def test_select_by_label_simple_slice(): - ca = ColumnAccessor({"a": [1, 2, 3], "b": [2, 3, 4], "c": [3, 4, 5]}) - expect = ColumnAccessor({"b": [2, 3, 4], "c": [3, 4, 5]}) + ca = ColumnAccessor( + { + "a": as_column([1, 2, 3]), + "b": as_column([2, 3, 4]), + "c": as_column([3, 4, 5]), + } + ) + expect = ColumnAccessor( + {"b": as_column([2, 3, 4]), "c": as_column([3, 4, 5])} + ) got = ca.select_by_label(slice("b", "c")) check_ca_equal(expect, got) @@ -167,10 +207,10 @@ def test_select_by_label_simple_slice(): def test_select_by_label_multiindex_slice(): ca = ColumnAccessor( { - ("a", "b", "c"): [1, 2, 3], - ("a", "b", "e"): [2, 3, 4], - ("a", "d", "e"): [3, 4, 5], - ("b", "x", ""): [4, 5, 6], + ("a", "b", "c"): as_column([1, 2, 3]), + ("a", "b", "e"): as_column([2, 3, 4]), + ("a", "d", "e"): as_column([3, 4, 5]), + ("b", "x", ""): as_column([4, 5, 6]), }, multiindex=True, ) # pandas needs columns to be sorted to do slicing with multiindex @@ -180,9 +220,9 @@ def test_select_by_label_multiindex_slice(): expect = ColumnAccessor( { - ("a", "b", "e"): [2, 3, 4], - ("a", "d", "e"): [3, 4, 5], - ("b", "x", ""): [4, 5, 6], + ("a", "b", "e"): as_column([2, 3, 4]), + ("a", "d", "e"): as_column([3, 4, 5]), + ("b", "x", ""): as_column([4, 5, 6]), }, multiindex=True, ) @@ -191,8 +231,16 @@ def test_select_by_label_multiindex_slice(): def test_by_label_list(): - ca = ColumnAccessor({"a": [1, 2, 3], "b": [2, 3, 4], "c": [3, 4, 5]}) - expect = ColumnAccessor({"b": [2, 3, 4], "c": [3, 4, 5]}) + ca = ColumnAccessor( + { + "a": as_column([1, 2, 3]), + "b": as_column([2, 3, 4]), + "c": as_column([3, 4, 5]), + } + ) + expect = ColumnAccessor( + {"b": as_column([2, 3, 4]), "c": as_column([3, 4, 5])} + ) got = ca.select_by_label(["b", "c"]) check_ca_equal(expect, got) @@ -201,9 +249,13 @@ def test_select_by_index_simple(): """ Test getting a column by label """ - ca = ColumnAccessor({"a": [1, 2, 3], "b": [2, 3, 4]}) - check_ca_equal(ca.select_by_index(0), ColumnAccessor({"a": [1, 2, 3]})) - check_ca_equal(ca.select_by_index(1), ColumnAccessor({"b": [2, 3, 4]})) + ca = ColumnAccessor({"a": as_column([1, 2, 3]), "b": as_column([2, 3, 4])}) + check_ca_equal( + ca.select_by_index(0), ColumnAccessor({"a": as_column([1, 2, 3])}) + ) + check_ca_equal( + ca.select_by_index(1), ColumnAccessor({"b": as_column([2, 3, 4])}) + ) check_ca_equal(ca.select_by_index([0, 1]), ca) check_ca_equal(ca.select_by_index(slice(0, None)), ca) @@ -214,19 +266,19 @@ def test_select_by_index_multiindex(): """ ca = ColumnAccessor( { - ("a", "b", "c"): [1, 2, 3], - ("a", "b", "e"): [2, 3, 4], - ("b", "x", ""): [4, 5, 6], - ("a", "d", "e"): [3, 4, 5], + ("a", "b", "c"): as_column([1, 2, 3]), + ("a", "b", "e"): as_column([2, 3, 4]), + ("b", "x", ""): as_column([4, 5, 6]), + ("a", "d", "e"): as_column([3, 4, 5]), }, multiindex=True, ) expect = ColumnAccessor( { - ("a", "b", "c"): [1, 2, 3], - ("a", "b", "e"): [2, 3, 4], - ("b", "x", ""): [4, 5, 6], + ("a", "b", "c"): as_column([1, 2, 3]), + ("a", "b", "e"): as_column([2, 3, 4]), + ("b", "x", ""): as_column([4, 5, 6]), }, multiindex=True, ) @@ -235,9 +287,9 @@ def test_select_by_index_multiindex(): expect = ColumnAccessor( { - ("a", "b", "c"): [1, 2, 3], - ("a", "b", "e"): [2, 3, 4], - ("a", "d", "e"): [3, 4, 5], + ("a", "b", "c"): as_column([1, 2, 3]), + ("a", "b", "e"): as_column([2, 3, 4]), + ("a", "d", "e"): as_column([3, 4, 5]), }, multiindex=True, ) @@ -248,10 +300,10 @@ def test_select_by_index_multiindex(): def test_select_by_index_empty(): ca = ColumnAccessor( { - ("a", "b", "c"): [1, 2, 3], - ("a", "b", "e"): [2, 3, 4], - ("b", "x", ""): [4, 5, 6], - ("a", "d", "e"): [3, 4, 5], + ("a", "b", "c"): as_column([1, 2, 3]), + ("a", "b", "e"): as_column([2, 3, 4]), + ("b", "x", ""): as_column([4, 5, 6]), + ("a", "d", "e"): as_column([3, 4, 5]), }, multiindex=True, ) @@ -267,12 +319,20 @@ def test_select_by_index_empty(): def test_replace_level_values_RangeIndex(): ca = ColumnAccessor( - {("a"): [1, 2, 3], ("b"): [2, 3, 4], ("c"): [3, 4, 5]}, + { + ("a"): as_column([1, 2, 3]), + ("b"): as_column([2, 3, 4]), + ("c"): as_column([3, 4, 5]), + }, multiindex=False, ) expect = ColumnAccessor( - {("f"): [1, 2, 3], ("b"): [2, 3, 4], ("c"): [3, 4, 5]}, + { + ("f"): as_column([1, 2, 3]), + ("b"): as_column([2, 3, 4]), + ("c"): as_column([3, 4, 5]), + }, multiindex=False, ) @@ -282,12 +342,20 @@ def test_replace_level_values_RangeIndex(): def test_replace_level_values_MultiColumn(): ca = ColumnAccessor( - {("a", 1): [1, 2, 3], ("a", 2): [2, 3, 4], ("b", 1): [3, 4, 5]}, + { + ("a", 1): as_column([1, 2, 3]), + ("a", 2): as_column([2, 3, 4]), + ("b", 1): as_column([3, 4, 5]), + }, multiindex=True, ) expect = ColumnAccessor( - {("f", 1): [1, 2, 3], ("f", 2): [2, 3, 4], ("b", 1): [3, 4, 5]}, + { + ("f", 1): as_column([1, 2, 3]), + ("f", 2): as_column([2, 3, 4]), + ("b", 1): as_column([3, 4, 5]), + }, multiindex=True, ) @@ -303,7 +371,17 @@ def test_clear_nrows_empty_before(): def test_clear_nrows_empty_after(): - ca = ColumnAccessor({"new": [1]}) + ca = ColumnAccessor({"new": as_column([1])}) assert ca.nrows == 1 del ca["new"] assert ca.nrows == 0 + + +def test_not_rangeindex_and_multiindex(): + with pytest.raises(ValueError): + ColumnAccessor({}, multiindex=True, rangeindex=True) + + +def test_data_values_not_column_raises(): + with pytest.raises(ValueError): + ColumnAccessor({"a": [1]}) diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py index a22a627523f..6a21cb1b9d7 100644 --- a/python/cudf/cudf/tests/test_csv.py +++ b/python/cudf/cudf/tests/test_csv.py @@ -1085,8 +1085,9 @@ def test_csv_reader_arrow_nativefile(path_or_buf): # Arrow FileSystem interface expect = cudf.read_csv(path_or_buf("filepath")) fs, path = pa_fs.FileSystem.from_uri(path_or_buf("filepath")) - with fs.open_input_file(path) as fil: - got = cudf.read_csv(fil) + with pytest.warns(FutureWarning): + with fs.open_input_file(path) as fil: + got = cudf.read_csv(fil) assert_eq(expect, got) @@ -1617,7 +1618,7 @@ def test_csv_reader_partial_dtype(dtype): StringIO('"A","B","C"\n0,1,2'), dtype=dtype, usecols=["A", "C"] ) - assert names_df == header_df + assert_eq(names_df, header_df) assert all(names_df.dtypes == ["int16", "int64"]) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 2009fc49ce5..e2ce5c03b70 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -10833,7 +10833,7 @@ def test_dataframe_contains(name, contains, other_names): expectation = contains is cudf.NA and name is cudf.NA assert (contains in pdf) == expectation assert (contains in gdf) == expectation - elif pd.api.types.is_float_dtype(gdf.columns.dtype): + elif gdf.columns.dtype.kind == "f": # In some cases, the columns are converted to an Index[float] based on # the other column names. That casts name values from None to np.nan. expectation = contains is np.nan and (name is None or name is np.nan) @@ -11100,3 +11100,12 @@ def test_from_records_with_index_no_shallow_copy(): data = np.array([(1.0, 2), (3.0, 4)], dtype=[("x", " PythonFile conversion diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py index 826a0e52f57..74f04c0584f 100644 --- a/python/cudf/cudf/tests/test_groupby.py +++ b/python/cudf/cudf/tests/test_groupby.py @@ -3885,3 +3885,28 @@ def test_group_by_raises_category_error(op): with pytest.raises(TypeError): df.groupby(df.a).agg(op) + + +def test_ngroups(): + pdf = pd.DataFrame({"a": [1, 1, 3], "b": range(3)}) + gdf = cudf.DataFrame.from_pandas(pdf) + + pgb = pdf.groupby("a") + ggb = gdf.groupby("a") + assert pgb.ngroups == ggb.ngroups + assert len(pgb) == len(ggb) + + +def test_ndim(): + pdf = pd.DataFrame({"a": [1, 1, 3], "b": range(3)}) + gdf = cudf.DataFrame.from_pandas(pdf) + + pgb = pdf.groupby("a") + ggb = gdf.groupby("a") + assert pgb.ndim == ggb.ndim + + pser = pd.Series(range(3)) + gser = cudf.Series.from_pandas(pser) + pgb = pser.groupby([0, 0, 1]) + ggb = gser.groupby(cudf.Series([0, 0, 1])) + assert pgb.ndim == ggb.ndim diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index 9eba6122d26..722a64cb553 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -3294,3 +3294,12 @@ def test_index_assignment_no_shallow_copy(index): df = cudf.DataFrame(range(1)) df.index = index assert df.index is index + + +def test_bool_rangeindex_raises(): + assert_exceptions_equal( + lfunc=bool, + rfunc=bool, + lfunc_args_and_kwargs=[[pd.RangeIndex(0)]], + rfunc_args_and_kwargs=[[cudf.RangeIndex(0)]], + ) diff --git a/python/cudf/cudf/tests/test_multiindex.py b/python/cudf/cudf/tests/test_multiindex.py index 1941eec91eb..b7314a36e73 100644 --- a/python/cudf/cudf/tests/test_multiindex.py +++ b/python/cudf/cudf/tests/test_multiindex.py @@ -2161,3 +2161,21 @@ def test_nunique(array, dropna): result = gidx.nunique(dropna=dropna) expected = pidx.nunique(dropna=dropna) assert result == expected + + +def test_bool_raises(): + assert_exceptions_equal( + lfunc=bool, + rfunc=bool, + lfunc_args_and_kwargs=[[cudf.MultiIndex.from_arrays([range(1)])]], + rfunc_args_and_kwargs=[[pd.MultiIndex.from_arrays([range(1)])]], + ) + + +def test_unique_level(): + pd_mi = pd.MultiIndex.from_arrays([[1, 1, 2], [3, 3, 2]]) + cudf_mi = cudf.MultiIndex.from_pandas(pd_mi) + + result = pd_mi.unique(level=1) + expected = cudf_mi.unique(level=1) + assert_eq(result, expected) diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py index ecb7fd44422..3806b901b10 100644 --- a/python/cudf/cudf/tests/test_parquet.py +++ b/python/cudf/cudf/tests/test_parquet.py @@ -22,7 +22,7 @@ from pyarrow import fs as pa_fs, parquet as pq import cudf -from cudf._lib.parquet import ParquetReader +from cudf._lib.parquet import read_parquet_chunked from cudf.io.parquet import ( ParquetDatasetWriter, ParquetWriter, @@ -711,7 +711,8 @@ def test_parquet_reader_arrow_nativefile(parquet_path_or_buf): expect = cudf.read_parquet(parquet_path_or_buf("filepath")) fs, path = pa_fs.FileSystem.from_uri(parquet_path_or_buf("filepath")) with fs.open_input_file(path) as fil: - got = cudf.read_parquet(fil) + with pytest.warns(FutureWarning): + got = cudf.read_parquet(fil) assert_eq(expect, got) @@ -726,16 +727,18 @@ def test_parquet_reader_use_python_file_object( fs, _, paths = get_fs_token_paths(parquet_path_or_buf("filepath")) # Pass open fsspec file - with fs.open(paths[0], mode="rb") as fil: - got1 = cudf.read_parquet( - fil, use_python_file_object=use_python_file_object - ) + with pytest.warns(FutureWarning): + with fs.open(paths[0], mode="rb") as fil: + got1 = cudf.read_parquet( + fil, use_python_file_object=use_python_file_object + ) assert_eq(expect, got1) # Pass path only - got2 = cudf.read_parquet( - paths[0], use_python_file_object=use_python_file_object - ) + with pytest.warns(FutureWarning): + got2 = cudf.read_parquet( + paths[0], use_python_file_object=use_python_file_object + ) assert_eq(expect, got2) @@ -3752,7 +3755,7 @@ def test_parquet_chunked_reader( ) buffer = BytesIO() df.to_parquet(buffer) - reader = ParquetReader( + actual = read_parquet_chunked( [buffer], chunk_read_limit=chunk_read_limit, pass_read_limit=pass_read_limit, @@ -3762,7 +3765,6 @@ def test_parquet_chunked_reader( expected = cudf.read_parquet( buffer, use_pandas_metadata=use_pandas_metadata, row_groups=row_groups ) - actual = reader.read() assert_eq(expected, actual) diff --git a/python/cudf/cudf/tests/test_reductions.py b/python/cudf/cudf/tests/test_reductions.py index 1247fa362ce..8be6463c699 100644 --- a/python/cudf/cudf/tests/test_reductions.py +++ b/python/cudf/cudf/tests/test_reductions.py @@ -248,16 +248,11 @@ def test_sum_masked(nelem): def test_sum_boolean(): s = Series(np.arange(100000)) - got = (s > 1).sum(dtype=np.int32) + got = (s > 1).sum() expect = 99998 assert expect == got - got = (s > 1).sum(dtype=np.bool_) - expect = True - - assert expect == got - def test_date_minmax(): np_data = np.random.normal(size=10**3) @@ -371,3 +366,11 @@ def test_reduction_column_multiindex(): result = df.mean() expected = df.to_pandas().mean() assert_eq(result, expected) + + +@pytest.mark.parametrize("op", ["sum", "product"]) +def test_dtype_deprecated(op): + ser = cudf.Series(range(5)) + with pytest.warns(FutureWarning): + result = getattr(ser, op)(dtype=np.dtype(np.int8)) + assert isinstance(result, np.int8) diff --git a/python/cudf/cudf/tests/test_s3.py b/python/cudf/cudf/tests/test_s3.py index a44bf791767..3ae318d3bf5 100644 --- a/python/cudf/cudf/tests/test_s3.py +++ b/python/cudf/cudf/tests/test_s3.py @@ -138,22 +138,24 @@ def test_read_csv(s3_base, s3so, pdf, bytes_per_thread): buffer = pdf.to_csv(index=False) # Use fsspec file object - with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}): - got = cudf.read_csv( - f"s3://{bucket}/{fname}", - storage_options=s3so, - bytes_per_thread=bytes_per_thread, - use_python_file_object=False, - ) + with pytest.warns(FutureWarning): + with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}): + got = cudf.read_csv( + f"s3://{bucket}/{fname}", + storage_options=s3so, + bytes_per_thread=bytes_per_thread, + use_python_file_object=False, + ) assert_eq(pdf, got) # Use Arrow PythonFile object - with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}): - got = cudf.read_csv( - f"s3://{bucket}/{fname}", - storage_options=s3so, - use_python_file_object=True, - ) + with pytest.warns(FutureWarning): + with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}): + got = cudf.read_csv( + f"s3://{bucket}/{fname}", + storage_options=s3so, + use_python_file_object=True, + ) assert_eq(pdf, got) @@ -166,8 +168,9 @@ def test_read_csv_arrow_nativefile(s3_base, s3so, pdf): fs = pa_fs.S3FileSystem( endpoint_override=s3so["client_kwargs"]["endpoint_url"], ) - with fs.open_input_file(f"{bucket}/{fname}") as fil: - got = cudf.read_csv(fil) + with pytest.warns(FutureWarning): + with fs.open_input_file(f"{bucket}/{fname}") as fil: + got = cudf.read_csv(fil) assert_eq(pdf, got) @@ -184,17 +187,18 @@ def test_read_csv_byte_range( # Use fsspec file object with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}): - got = cudf.read_csv( - f"s3://{bucket}/{fname}", - storage_options=s3so, - byte_range=(74, 73), - bytes_per_thread=bytes_per_thread - if not use_python_file_object - else None, - header=None, - names=["Integer", "Float", "Integer2", "String", "Boolean"], - use_python_file_object=use_python_file_object, - ) + with pytest.warns(FutureWarning): + got = cudf.read_csv( + f"s3://{bucket}/{fname}", + storage_options=s3so, + byte_range=(74, 73), + bytes_per_thread=bytes_per_thread + if not use_python_file_object + else None, + header=None, + names=["Integer", "Float", "Integer2", "String", "Boolean"], + use_python_file_object=use_python_file_object, + ) assert_eq(pdf.iloc[-2:].reset_index(drop=True), got) @@ -241,18 +245,19 @@ def test_read_parquet( # Check direct path handling buffer.seek(0) with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}): - got1 = cudf.read_parquet( - f"s3://{bucket}/{fname}", - open_file_options=( - {"precache_options": {"method": precache}} - if use_python_file_object - else None - ), - storage_options=s3so, - bytes_per_thread=bytes_per_thread, - columns=columns, - use_python_file_object=use_python_file_object, - ) + with pytest.warns(FutureWarning): + got1 = cudf.read_parquet( + f"s3://{bucket}/{fname}", + open_file_options=( + {"precache_options": {"method": precache}} + if use_python_file_object + else None + ), + storage_options=s3so, + bytes_per_thread=bytes_per_thread, + columns=columns, + use_python_file_object=use_python_file_object, + ) expect = pdf[columns] if columns else pdf assert_eq(expect, got1) @@ -263,12 +268,13 @@ def test_read_parquet( f"s3://{bucket}/{fname}", storage_options=s3so )[0] with fs.open(f"s3://{bucket}/{fname}", mode="rb") as f: - got2 = cudf.read_parquet( - f, - bytes_per_thread=bytes_per_thread, - columns=columns, - use_python_file_object=use_python_file_object, - ) + with pytest.warns(FutureWarning): + got2 = cudf.read_parquet( + f, + bytes_per_thread=bytes_per_thread, + columns=columns, + use_python_file_object=use_python_file_object, + ) assert_eq(expect, got2) @@ -353,11 +359,12 @@ def test_read_parquet_arrow_nativefile(s3_base, s3so, pdf, columns): pdf.to_parquet(path=buffer) buffer.seek(0) with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}): - fs = pa_fs.S3FileSystem( - endpoint_override=s3so["client_kwargs"]["endpoint_url"], - ) - with fs.open_input_file(f"{bucket}/{fname}") as fil: - got = cudf.read_parquet(fil, columns=columns) + with pytest.warns(FutureWarning): + fs = pa_fs.S3FileSystem( + endpoint_override=s3so["client_kwargs"]["endpoint_url"], + ) + with fs.open_input_file(f"{bucket}/{fname}") as fil: + got = cudf.read_parquet(fil, columns=columns) expect = pdf[columns] if columns else pdf assert_eq(expect, got) @@ -372,12 +379,13 @@ def test_read_parquet_filters(s3_base, s3so, pdf_ext, precache): buffer.seek(0) filters = [("String", "==", "Omega")] with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}): - got = cudf.read_parquet( - f"s3://{bucket}/{fname}", - storage_options=s3so, - filters=filters, - open_file_options={"precache_options": {"method": precache}}, - ) + with pytest.warns(FutureWarning): + got = cudf.read_parquet( + f"s3://{bucket}/{fname}", + storage_options=s3so, + filters=filters, + open_file_options={"precache_options": {"method": precache}}, + ) # All row-groups should be filtered out assert_eq(pdf_ext.iloc[:0], got.reset_index(drop=True)) @@ -449,12 +457,13 @@ def test_read_orc(s3_base, s3so, datadir, use_python_file_object, columns): buffer = f.read() with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}): - got = cudf.read_orc( - f"s3://{bucket}/{fname}", - columns=columns, - storage_options=s3so, - use_python_file_object=use_python_file_object, - ) + with pytest.warns(FutureWarning): + got = cudf.read_orc( + f"s3://{bucket}/{fname}", + columns=columns, + storage_options=s3so, + use_python_file_object=use_python_file_object, + ) if columns: expect = expect[columns] @@ -475,8 +484,9 @@ def test_read_orc_arrow_nativefile(s3_base, s3so, datadir, columns): fs = pa_fs.S3FileSystem( endpoint_override=s3so["client_kwargs"]["endpoint_url"], ) - with fs.open_input_file(f"{bucket}/{fname}") as fil: - got = cudf.read_orc(fil, columns=columns) + with pytest.warns(FutureWarning): + with fs.open_input_file(f"{bucket}/{fname}") as fil: + got = cudf.read_orc(fil, columns=columns) if columns: expect = expect[columns] diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py index af912bee342..b0788bcc0fc 100644 --- a/python/cudf/cudf/utils/dtypes.py +++ b/python/cudf/cudf/utils/dtypes.py @@ -1,7 +1,9 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. +from __future__ import annotations import datetime from decimal import Decimal +from typing import TYPE_CHECKING import cupy as cp import numpy as np @@ -11,6 +13,9 @@ import cudf +if TYPE_CHECKING: + from cudf._typing import DtypeObj + """Map numpy dtype to pyarrow types. Note that np.bool_ bitwidth (8) is different from pa.bool_ (1). Special handling is required when converting a Boolean column into arrow. @@ -89,10 +94,6 @@ BOOL_TYPES = {"bool"} ALL_TYPES = NUMERIC_TYPES | DATETIME_TYPES | TIMEDELTA_TYPES | OTHER_TYPES -# The NumPy scalar types are a bit of a mess as they align with the C types -# so for now we use the `sctypes` dict (although it was made private in 2.0) -_NUMPY_SCTYPES = np.sctypes if hasattr(np, "sctypes") else np._core.sctypes - def np_to_pa_dtype(dtype): """Util to convert numpy dtype to PyArrow dtype.""" @@ -114,12 +115,6 @@ def np_to_pa_dtype(dtype): return _np_pa_dtypes[cudf.dtype(dtype).type] -def numeric_normalize_types(*args): - """Cast all args to a common type using numpy promotion logic""" - dtype = np.result_type(*[a.dtype for a in args]) - return [a.astype(dtype) for a in args] - - def _find_common_type_decimal(dtypes): # Find the largest scale and the largest difference between # precision and scale of the columns to be concatenated @@ -330,32 +325,28 @@ def can_convert_to_column(obj): return is_column_like(obj) or cudf.api.types.is_list_like(obj) -def min_scalar_type(a, min_size=8): - return min_signed_type(a, min_size=min_size) - - -def min_signed_type(x, min_size=8): +def min_signed_type(x: int, min_size: int = 8) -> np.dtype: """ Return the smallest *signed* integer dtype that can represent the integer ``x`` """ - for int_dtype in _NUMPY_SCTYPES["int"]: + for int_dtype in (np.int8, np.int16, np.int32, np.int64): if (cudf.dtype(int_dtype).itemsize * 8) >= min_size: if np.iinfo(int_dtype).min <= x <= np.iinfo(int_dtype).max: - return int_dtype + return np.dtype(int_dtype) # resort to using `int64` and let numpy raise appropriate exception: return np.int64(x).dtype -def min_unsigned_type(x, min_size=8): +def min_unsigned_type(x: int, min_size: int = 8) -> np.dtype: """ Return the smallest *unsigned* integer dtype that can represent the integer ``x`` """ - for int_dtype in _NUMPY_SCTYPES["uint"]: + for int_dtype in (np.uint8, np.uint16, np.uint32, np.uint64): if (cudf.dtype(int_dtype).itemsize * 8) >= min_size: if 0 <= x <= np.iinfo(int_dtype).max: - return int_dtype + return np.dtype(int_dtype) # resort to using `uint64` and let numpy raise appropriate exception: return np.uint64(x).dtype @@ -373,10 +364,10 @@ def min_column_type(x, expected_type): if x.null_count == len(x): return x.dtype - if np.issubdtype(x.dtype, np.floating): + if x.dtype.kind == "f": return get_min_float_dtype(x) - elif np.issubdtype(expected_type, np.integer): + elif cudf.dtype(expected_type).kind in "iu": max_bound_dtype = np.min_scalar_type(x.max()) min_bound_dtype = np.min_scalar_type(x.min()) result_type = np.promote_types(max_bound_dtype, min_bound_dtype) @@ -582,25 +573,18 @@ def _dtype_pandas_compatible(dtype): return dtype -def _maybe_convert_to_default_type(dtype): +def _maybe_convert_to_default_type(dtype: DtypeObj) -> DtypeObj: """Convert `dtype` to default if specified by user. If not specified, return as is. """ - if cudf.get_option("default_integer_bitwidth"): - if cudf.api.types.is_signed_integer_dtype(dtype): - return cudf.dtype( - f'i{cudf.get_option("default_integer_bitwidth")//8}' - ) - elif cudf.api.types.is_unsigned_integer_dtype(dtype): - return cudf.dtype( - f'u{cudf.get_option("default_integer_bitwidth")//8}' - ) - if cudf.get_option( - "default_float_bitwidth" - ) and cudf.api.types.is_float_dtype(dtype): - return cudf.dtype(f'f{cudf.get_option("default_float_bitwidth")//8}') - + if ib := cudf.get_option("default_integer_bitwidth"): + if dtype.kind == "i": + return cudf.dtype(f"i{ib//8}") + elif dtype.kind == "u": + return cudf.dtype(f"u{ib//8}") + if (fb := cudf.get_option("default_float_bitwidth")) and dtype.kind == "f": + return cudf.dtype(f"f{fb//8}") return dtype diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py index 76c7f2bfdb8..80555750b3a 100644 --- a/python/cudf/cudf/utils/ioutils.py +++ b/python/cudf/cudf/utils/ioutils.py @@ -6,6 +6,7 @@ import warnings from io import BufferedWriter, BytesIO, IOBase, TextIOWrapper from threading import Thread +from typing import Callable import fsspec import fsspec.implementations.local @@ -15,6 +16,7 @@ from pyarrow import PythonFile as ArrowPythonFile from pyarrow.lib import NativeFile +from cudf.api.extensions import no_default from cudf.core._compat import PANDAS_LT_300 from cudf.utils.docutils import docfmt_partial @@ -24,7 +26,6 @@ except ImportError: fsspec_parquet = None - _BYTES_PER_THREAD_DEFAULT = 256 * 1024 * 1024 _ROW_GROUP_SIZE_BYTES_DEFAULT = 128 * 1024 * 1024 @@ -86,7 +87,7 @@ 1 20 rapids 2 30 ai """.format(remote_data_sources=_docstring_remote_sources) -doc_read_avro = docfmt_partial(docstring=_docstring_read_avro) +doc_read_avro: Callable = docfmt_partial(docstring=_docstring_read_avro) _docstring_read_parquet_metadata = """ Read a Parquet file's metadata and schema @@ -174,15 +175,23 @@ columns are also loaded. use_python_file_object : boolean, default True If True, Arrow-backed PythonFile objects will be used in place of fsspec - AbstractBufferedFile objects at IO time. Setting this argument to `False` - will require the entire file to be copied to host memory, and is highly - discouraged. + AbstractBufferedFile objects at IO time. + + .. deprecated:: 24.08 + `use_python_file_object` is deprecated and will be removed in a future + version of cudf, as PyArrow NativeFiles will no longer be accepted as + input/output in cudf readers/writers in the future. open_file_options : dict, optional Dictionary of key-value pairs to pass to the function used to open remote files. By default, this will be `fsspec.parquet.open_parquet_file`. To deactivate optimized precaching, set the "method" to `None` under the "precache_options" key. Note that the `open_file_func` key can also be used to specify a custom file-open function. + + .. deprecated:: 24.08 + `open_file_options` is deprecated as it was intended for + pyarrow file inputs, which will no longer be accepted as + input/output cudf readers/writers in the future. bytes_per_thread : int, default None Determines the number of bytes to be allocated per thread to read the files in parallel. When there is a file of large size, we get slightly @@ -468,8 +477,12 @@ If True, use row index if available for faster seeking. use_python_file_object : boolean, default True If True, Arrow-backed PythonFile objects will be used in place of fsspec - AbstractBufferedFile objects at IO time. This option is likely to improve - performance when making small reads from larger ORC files. + AbstractBufferedFile objects at IO time. + + .. deprecated:: 24.08 + `use_python_file_object` is deprecated and will be removed in a future + version of cudf, as PyArrow NativeFiles will no longer be accepted as + input/output in cudf readers/writers in the future. storage_options : dict, optional, default None Extra options that make sense for a particular storage connection, e.g. host, port, username, password, etc. For HTTP(S) URLs the key-value @@ -934,7 +947,7 @@ -------- cudf.DataFrame.to_hdf : Write a HDF file from a DataFrame. """ -doc_read_hdf = docfmt_partial(docstring=_docstring_read_hdf) +doc_read_hdf: Callable = docfmt_partial(docstring=_docstring_read_hdf) _docstring_to_hdf = """ Write the contained data to an HDF5 file using HDFStore. @@ -1006,7 +1019,7 @@ cudf.DataFrame.to_parquet : Write a DataFrame to the binary parquet format. cudf.DataFrame.to_feather : Write out feather-format for DataFrames. """ -doc_to_hdf = docfmt_partial(docstring=_docstring_to_hdf) +doc_to_hdf: Callable = docfmt_partial(docstring=_docstring_to_hdf) _docstring_read_feather = """ Load an feather object from the file path, returning a DataFrame. @@ -1188,8 +1201,12 @@ the end of the range. use_python_file_object : boolean, default True If True, Arrow-backed PythonFile objects will be used in place of fsspec - AbstractBufferedFile objects at IO time. This option is likely to improve - performance when making small reads from larger CSV files. + AbstractBufferedFile objects at IO time. + + .. deprecated:: 24.08 + `use_python_file_object` is deprecated and will be removed in a future + version of cudf, as PyArrow NativeFiles will no longer be accepted as + input/output in cudf readers/writers in the future. storage_options : dict, optional, default None Extra options that make sense for a particular storage connection, e.g. host, port, username, password, etc. For HTTP(S) URLs the key-value @@ -1409,7 +1426,7 @@ result : Series """ -doc_read_text = docfmt_partial(docstring=_docstring_text_datasource) +doc_read_text: Callable = docfmt_partial(docstring=_docstring_text_datasource) _docstring_get_reader_filepath_or_buffer = """ @@ -1430,9 +1447,19 @@ use_python_file_object : boolean, default False If True, Arrow-backed PythonFile objects will be used in place of fsspec AbstractBufferedFile objects. + + .. deprecated:: 24.08 + `use_python_file_object` is deprecated and will be removed in a future + version of cudf, as PyArrow NativeFiles will no longer be accepted as + input/output in cudf readers/writers. open_file_options : dict, optional Optional dictionary of keyword arguments to pass to `_open_remote_files` (used for remote storage only). + + .. deprecated:: 24.08 + `open_file_options` is deprecated as it was intended for + pyarrow file inputs, which will no longer be accepted as + input/output cudf readers/writers in the future. allow_raw_text_input : boolean, default False If True, this indicates the input `path_or_data` could be a raw text input and will not check for its existence in the filesystem. If False, @@ -1708,7 +1735,8 @@ def get_reader_filepath_or_buffer( mode="rb", fs=None, iotypes=(BytesIO, NativeFile), - use_python_file_object=False, + # no_default aliases to False + use_python_file_object=no_default, open_file_options=None, allow_raw_text_input=False, storage_options=None, @@ -1720,6 +1748,30 @@ def get_reader_filepath_or_buffer( path_or_data = stringify_pathlike(path_or_data) + if use_python_file_object is no_default: + use_python_file_object = False + elif use_python_file_object is not None: + warnings.warn( + "The 'use_python_file_object' keyword is deprecated and " + "will be removed in a future version.", + FutureWarning, + ) + else: + # Preserve the readers (e.g. read_csv) default of True + # if no use_python_file_object option is specified by the user + # for now (note: this is different from the default for this + # function of False) + # TODO: when non-pyarrow file reading perf is good enough + # we can default this to False + use_python_file_object = True + + if open_file_options is not None: + warnings.warn( + "The 'open_file_options' keyword is deprecated and " + "will be removed in a future version.", + FutureWarning, + ) + if isinstance(path_or_data, str): # Get a filesystem object if one isn't already available paths = [path_or_data] diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py index 7347ec7866a..c9b343e0f9f 100644 --- a/python/cudf/cudf/utils/utils.py +++ b/python/cudf/cudf/utils/utils.py @@ -6,6 +6,7 @@ import os import traceback import warnings +from contextlib import contextmanager import numpy as np import pandas as pd @@ -403,3 +404,28 @@ def _all_bools_with_nulls(lhs, rhs, bool_fill_value): if result_mask is not None: result_col = result_col.set_mask(result_mask.as_mask()) return result_col + + +@contextmanager +def maybe_filter_deprecation( + condition: bool, message: str, category: type[Warning] +): + """Conditionally filter a warning category. + + Parameters + ---------- + condition + If true, filter the warning + message + Message to match, passed to :func:`warnings.filterwarnings` + category + Category of warning, passed to :func:`warnings.filterwarnings` + """ + with warnings.catch_warnings(): + if condition: + warnings.filterwarnings( + "ignore", + message, + category=category, + ) + yield diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index dcb33b1fc1a..b2ddb06d8c9 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -31,7 +31,7 @@ dependencies = [ "ptxcompiler", "pyarrow>=16.1.0,<16.2.0a0", "rich", - "rmm==24.8.*,>=0.0.0a0", + "rmm==24.10.*,>=0.0.0a0", "typing_extensions>=4.0.0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ @@ -120,13 +120,14 @@ skip = [ [tool.rapids-build-backend] build-backend = "scikit_build_core.build" dependencies-file = "../../dependencies.yaml" +matrix-entry = "cuda_suffixed=true" requires = [ "cmake>=3.26.4,!=3.30.0", "cython>=3.0.3", "ninja", "numpy==1.23.*", "pyarrow==16.1.0.*", - "rmm==24.8.*,>=0.0.0a0", + "rmm==24.10.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [tool.scikit-build] diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml index badfdf06d15..a9b60133f42 100644 --- a/python/cudf_kafka/pyproject.toml +++ b/python/cudf_kafka/pyproject.toml @@ -18,7 +18,7 @@ authors = [ license = { text = "Apache 2.0" } requires-python = ">=3.9" dependencies = [ - "cudf==24.8.*,>=0.0.0a0", + "cudf==24.10.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project.optional-dependencies] @@ -100,6 +100,7 @@ regex = "(?P.*)" [tool.rapids-build-backend] build-backend = "scikit_build_core.build" dependencies-file = "../../dependencies.yaml" +matrix-entry = "cuda_suffixed=true" requires = [ "cmake>=3.26.4,!=3.30.0", "cython>=3.0.3", diff --git a/python/cudf_polars/cudf_polars/callback.py b/python/cudf_polars/cudf_polars/callback.py index 764cdd3b3ca..f31193aa938 100644 --- a/python/cudf_polars/cudf_polars/callback.py +++ b/python/cudf_polars/cudf_polars/callback.py @@ -5,11 +5,15 @@ from __future__ import annotations +import os +import warnings from functools import partial from typing import TYPE_CHECKING import nvtx +from polars.exceptions import PerformanceWarning + from cudf_polars.dsl.translate import translate_ir if TYPE_CHECKING: @@ -61,6 +65,12 @@ def execute_with_cudf( try: with nvtx.annotate(message="ConvertIR", domain="cudf_polars"): nt.set_udf(partial(_callback, translate_ir(nt))) - except exception: + except exception as e: + if bool(int(os.environ.get("POLARS_VERBOSE", 0))): + warnings.warn( + f"Query execution with GPU not supported, reason: {type(e)}: {e}", + PerformanceWarning, + stacklevel=2, + ) if raise_on_fail: raise diff --git a/python/cudf_polars/cudf_polars/containers/column.py b/python/cudf_polars/cudf_polars/containers/column.py index 42aba0fcdc0..02018548b2c 100644 --- a/python/cudf_polars/cudf_polars/containers/column.py +++ b/python/cudf_polars/cudf_polars/containers/column.py @@ -185,8 +185,7 @@ def nan_count(self) -> int: plc.reduce.reduce( plc.unary.is_nan(self.obj), plc.aggregation.sum(), - # TODO: pylibcudf needs to have a SizeType DataType singleton - plc.DataType(plc.TypeId.INT32), + plc.types.SIZE_TYPE, ) ).as_py() return 0 diff --git a/python/cudf_polars/cudf_polars/containers/dataframe.py b/python/cudf_polars/cudf_polars/containers/dataframe.py index cbeadf1426a..dba76855329 100644 --- a/python/cudf_polars/cudf_polars/containers/dataframe.py +++ b/python/cudf_polars/cudf_polars/containers/dataframe.py @@ -23,8 +23,6 @@ from typing_extensions import Self - import cudf - from cudf_polars.containers import Column @@ -83,16 +81,6 @@ def num_rows(self) -> int: """Number of rows.""" return 0 if len(self.columns) == 0 else self.table.num_rows() - @classmethod - def from_cudf(cls, df: cudf.DataFrame) -> Self: - """Create from a cudf dataframe.""" - return cls( - [ - NamedColumn(c.to_pylibcudf(mode="read"), name) - for name, c in df._data.items() - ] - ) - @classmethod def from_polars(cls, df: pl.DataFrame) -> Self: """ diff --git a/python/cudf_polars/cudf_polars/dsl/expr.py b/python/cudf_polars/cudf_polars/dsl/expr.py index f37cb3f475c..9e0fca3f52f 100644 --- a/python/cudf_polars/cudf_polars/dsl/expr.py +++ b/python/cudf_polars/cudf_polars/dsl/expr.py @@ -370,6 +370,10 @@ def do_evaluate( # datatype of pyarrow scalar is correct by construction. return Column(plc.Column.from_scalar(plc.interop.from_arrow(self.value), 1)) + def collect_agg(self, *, depth: int) -> AggInfo: + """Collect information about aggregations in groupbys.""" + return AggInfo([]) + class LiteralColumn(Expr): __slots__ = ("value",) @@ -382,6 +386,13 @@ def __init__(self, dtype: plc.DataType, value: pl.Series) -> None: data = value.to_arrow() self.value = data.cast(dtypes.downcast_arrow_lists(data.type)) + def get_hash(self) -> int: + """Compute a hash of the column.""" + # This is stricter than necessary, but we only need this hash + # for identity in groupby replacements so it's OK. And this + # way we avoid doing potentially expensive compute. + return hash((type(self), self.dtype, id(self.value))) + def do_evaluate( self, df: DataFrame, @@ -393,6 +404,10 @@ def do_evaluate( # datatype of pyarrow array is correct by construction. return Column(plc.interop.from_arrow(self.value)) + def collect_agg(self, *, depth: int) -> AggInfo: + """Collect information about aggregations in groupbys.""" + return AggInfo([]) + class Col(Expr): __slots__ = ("name",) @@ -867,7 +882,14 @@ def __init__( self.name = name self.options = options self.children = children - if self.name not in ("mask_nans", "round", "setsorted", "unique"): + if self.name not in ( + "mask_nans", + "round", + "setsorted", + "unique", + "dropnull", + "fill_null", + ): raise NotImplementedError(f"Unary function {name=}") def do_evaluate( @@ -953,6 +975,27 @@ def do_evaluate( order=order, null_order=null_order, ) + elif self.name == "dropnull": + (column,) = ( + child.evaluate(df, context=context, mapping=mapping) + for child in self.children + ) + return Column( + plc.stream_compaction.drop_nulls( + plc.Table([column.obj]), [0], 1 + ).columns()[0] + ) + elif self.name == "fill_null": + column = self.children[0].evaluate(df, context=context, mapping=mapping) + if isinstance(self.children[1], Literal): + arg = plc.interop.from_arrow(self.children[1].value) + else: + evaluated = self.children[1].evaluate( + df, context=context, mapping=mapping + ) + arg = evaluated.obj_scalar if evaluated.is_scalar else evaluated.obj + return Column(plc.replace.replace_nulls(column.obj, arg)) + raise NotImplementedError( f"Unimplemented unary function {self.name=}" ) # pragma: no cover; init trips first @@ -1145,6 +1188,14 @@ class Cast(Expr): def __init__(self, dtype: plc.DataType, value: Expr) -> None: super().__init__(dtype) self.children = (value,) + if not ( + plc.traits.is_fixed_width(self.dtype) + and plc.traits.is_fixed_width(value.dtype) + and plc.unary.is_supported_cast(value.dtype, self.dtype) + ): + raise NotImplementedError( + f"Can't cast {self.dtype.id().name} to {value.dtype.id().name}" + ) def do_evaluate( self, @@ -1377,13 +1428,14 @@ def __init__( super().__init__(dtype) self.op = op self.children = (left, right) - if ( - op in (plc.binaryop.BinaryOperator.ADD, plc.binaryop.BinaryOperator.SUB) - and plc.traits.is_chrono(left.dtype) - and plc.traits.is_chrono(right.dtype) - and not dtypes.have_compatible_resolution(left.dtype.id(), right.dtype.id()) + if not plc.binaryop.is_supported_operation( + self.dtype, left.dtype, right.dtype, op ): - raise NotImplementedError("Casting rules for timelike types") + raise NotImplementedError( + f"Operation {op.name} not supported " + f"for types {left.dtype.id().name} and {right.dtype.id().name} " + f"with output type {self.dtype.id().name}" + ) _MAPPING: ClassVar[dict[pl_expr.Operator, plc.binaryop.BinaryOperator]] = { pl_expr.Operator.Eq: plc.binaryop.BinaryOperator.EQUAL, diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py index cce0c4a3d94..7f62dff4389 100644 --- a/python/cudf_polars/cudf_polars/dsl/ir.py +++ b/python/cudf_polars/cudf_polars/dsl/ir.py @@ -25,7 +25,6 @@ import polars as pl -import cudf import cudf._lib.pylibcudf as plc import cudf_polars.dsl.expr as expr @@ -205,12 +204,14 @@ class Scan(IR): def __post_init__(self) -> None: """Validate preconditions.""" - if self.file_options.n_rows is not None: - raise NotImplementedError("row limit in scan") - if self.typ not in ("csv", "parquet"): + if self.typ not in ("csv", "parquet", "ndjson"): # pragma: no cover + # This line is unhittable ATM since IPC/Anonymous scan raise + # on the polars side raise NotImplementedError(f"Unhandled scan type: {self.typ}") + if self.typ == "ndjson" and self.file_options.n_rows is not None: + raise NotImplementedError("row limit in scan") if self.cloud_options is not None and any( - self.cloud_options[k] is not None for k in ("aws", "azure", "gcp") + self.cloud_options.get(k) is not None for k in ("aws", "azure", "gcp") ): raise NotImplementedError( "Read from cloud storage" @@ -235,17 +236,21 @@ def __post_init__(self) -> None: # Need to do some file introspection to get the number # of columns so that column projection works right. raise NotImplementedError("Reading CSV without header") + elif self.typ == "ndjson": + # TODO: consider handling the low memory option here + # (maybe use chunked JSON reader) + if self.reader_options["ignore_errors"]: + raise NotImplementedError( + "ignore_errors is not supported in the JSON reader" + ) def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" options = self.file_options with_columns = options.with_columns row_index = options.row_index + nrows = self.file_options.n_rows if self.file_options.n_rows is not None else -1 if self.typ == "csv": - dtype_map = { - name: cudf._lib.types.PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[typ.id()] - for name, typ in self.schema.items() - } parse_options = self.reader_options["parse_options"] sep = chr(parse_options["separator"]) quote = chr(parse_options["quote_char"]) @@ -280,35 +285,71 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: pieces = [] for p in self.paths: skiprows = self.reader_options["skip_rows"] - # TODO: read_csv expands globs which we should not do, - # because polars will already have handled them. path = Path(p) with path.open() as f: while f.readline() == "\n": skiprows += 1 - pieces.append( - cudf.read_csv( - path, - sep=sep, - quotechar=quote, - lineterminator=eol, - names=column_names, - header=header, - usecols=usecols, - na_filter=True, - na_values=null_values, - keep_default_na=False, - skiprows=skiprows, - comment=comment, - decimal=decimal, - dtype=dtype_map, - ) + tbl_w_meta = plc.io.csv.read_csv( + plc.io.SourceInfo([path]), + delimiter=sep, + quotechar=quote, + lineterminator=eol, + col_names=column_names, + header=header, + usecols=usecols, + na_filter=True, + na_values=null_values, + keep_default_na=False, + skiprows=skiprows, + comment=comment, + decimal=decimal, + dtypes=self.schema, + nrows=nrows, + ) + pieces.append(tbl_w_meta) + tables, colnames = zip( + *( + (piece.tbl, piece.column_names(include_children=False)) + for piece in pieces ) - df = DataFrame.from_cudf(cudf.concat(pieces)) + ) + df = DataFrame.from_table( + plc.concatenate.concatenate(list(tables)), + colnames[0], + ) elif self.typ == "parquet": - cdf = cudf.read_parquet(self.paths, columns=with_columns) - assert isinstance(cdf, cudf.DataFrame) - df = DataFrame.from_cudf(cdf) + tbl_w_meta = plc.io.parquet.read_parquet( + plc.io.SourceInfo(self.paths), + columns=with_columns, + num_rows=nrows, + ) + df = DataFrame.from_table( + tbl_w_meta.tbl, + # TODO: consider nested column names? + tbl_w_meta.column_names(include_children=False), + ) + elif self.typ == "ndjson": + json_schema: list[tuple[str, str, list]] = [ + (name, typ, []) for name, typ in self.schema.items() + ] + plc_tbl_w_meta = plc.io.json.read_json( + plc.io.SourceInfo(self.paths), + lines=True, + dtypes=json_schema, + prune_columns=True, + ) + # TODO: I don't think cudf-polars supports nested types in general right now + # (but when it does, we should pass child column names from nested columns in) + df = DataFrame.from_table( + plc_tbl_w_meta.tbl, plc_tbl_w_meta.column_names(include_children=False) + ) + col_order = list(self.schema.keys()) + # TODO: remove condition when dropping support for polars 1.0 + # https://github.com/pola-rs/polars/pull/17363 + if row_index is not None and row_index[0] in self.schema: + col_order.remove(row_index[0]) + if col_order is not None: + df = df.select(col_order) else: raise NotImplementedError( f"Unhandled scan type: {self.typ}" @@ -335,13 +376,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: null_order=plc.types.NullOrder.AFTER, ) df = DataFrame([index, *df.columns]) - # TODO: should be true, but not the case until we get - # cudf-classic out of the loop for IO since it converts date32 - # to datetime. - # assert all( - # c.obj.type() == dtype - # for c, dtype in zip(df.columns, self.schema.values()) - # ) + assert all(c.obj.type() == self.schema[c.name] for c in df.columns) if self.predicate is None: return df else: @@ -514,7 +549,7 @@ def check_agg(agg: expr.Expr) -> int: return max(GroupBy.check_agg(child) for child in agg.children) elif isinstance(agg, expr.Agg): return 1 + max(GroupBy.check_agg(child) for child in agg.children) - elif isinstance(agg, (expr.Len, expr.Col, expr.Literal)): + elif isinstance(agg, (expr.Len, expr.Col, expr.Literal, expr.LiteralColumn)): return 0 else: raise NotImplementedError(f"No handler for {agg=}") @@ -574,7 +609,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: results = [ req.evaluate(result_subs, mapping=mapping) for req in self.agg_requests ] - return DataFrame([*result_keys, *results]).slice(self.options.slice) + return DataFrame(broadcast(*result_keys, *results)).slice(self.options.slice) @dataclasses.dataclass @@ -653,6 +688,59 @@ def _joiners( else: assert_never(how) + def _reorder_maps( + self, + left_rows: int, + lg: plc.Column, + left_policy: plc.copying.OutOfBoundsPolicy, + right_rows: int, + rg: plc.Column, + right_policy: plc.copying.OutOfBoundsPolicy, + ) -> list[plc.Column]: + """ + Reorder gather maps to satisfy polars join order restrictions. + + Parameters + ---------- + left_rows + Number of rows in left table + lg + Left gather map + left_policy + Nullify policy for left map + right_rows + Number of rows in right table + rg + Right gather map + right_policy + Nullify policy for right map + + Returns + ------- + list of reordered left and right gather maps. + + Notes + ----- + For a left join, the polars result preserves the order of the + left keys, and is stable wrt the right keys. For all other + joins, there is no order obligation. + """ + dt = plc.interop.to_arrow(plc.types.SIZE_TYPE) + init = plc.interop.from_arrow(pa.scalar(0, type=dt)) + step = plc.interop.from_arrow(pa.scalar(1, type=dt)) + left_order = plc.copying.gather( + plc.Table([plc.filling.sequence(left_rows, init, step)]), lg, left_policy + ) + right_order = plc.copying.gather( + plc.Table([plc.filling.sequence(right_rows, init, step)]), rg, right_policy + ) + return plc.sorting.stable_sort_by_key( + plc.Table([lg, rg]), + plc.Table([*left_order.columns(), *right_order.columns()]), + [plc.types.Order.ASCENDING, plc.types.Order.ASCENDING], + [plc.types.NullOrder.AFTER, plc.types.NullOrder.AFTER], + ).columns() + def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" left = self.left.evaluate(cache=cache) @@ -693,6 +781,11 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: result = DataFrame.from_table(table, left.column_names) else: lg, rg = join_fn(left_on.table, right_on.table, null_equality) + if how == "left": + # Order of left table is preserved + lg, rg = self._reorder_maps( + left.num_rows, lg, left_policy, right.num_rows, rg, right_policy + ) if coalesce and how == "inner": right = right.discard_columns(right_on.column_names_set) left = DataFrame.from_table( @@ -1041,9 +1134,48 @@ class HConcat(IR): dfs: list[IR] """List of inputs.""" + @staticmethod + def _extend_with_nulls(table: plc.Table, *, nrows: int) -> plc.Table: + """ + Extend a table with nulls. + + Parameters + ---------- + table + Table to extend + nrows + Number of additional rows + + Returns + ------- + New pylibcudf table. + """ + return plc.concatenate.concatenate( + [ + table, + plc.Table( + [ + plc.Column.all_null_like(column, nrows) + for column in table.columns() + ] + ), + ] + ) + def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" dfs = [df.evaluate(cache=cache) for df in self.dfs] + max_rows = max(df.num_rows for df in dfs) + # Horizontal concatenation extends shorter tables with nulls + dfs = [ + df + if df.num_rows == max_rows + else DataFrame.from_table( + self._extend_with_nulls(df.table, nrows=max_rows - df.num_rows), + df.column_names, + ) + for df in dfs + ] return DataFrame( list(itertools.chain.from_iterable(df.columns for df in dfs)), ) diff --git a/python/cudf_polars/cudf_polars/testing/asserts.py b/python/cudf_polars/cudf_polars/testing/asserts.py index a9a4ae5f0a6..d37c96a15de 100644 --- a/python/cudf_polars/cudf_polars/testing/asserts.py +++ b/python/cudf_polars/cudf_polars/testing/asserts.py @@ -14,8 +14,6 @@ from cudf_polars.dsl.translate import translate_ir if TYPE_CHECKING: - from collections.abc import Mapping - import polars as pl from cudf_polars.typing import OptimizationArgs @@ -26,7 +24,9 @@ def assert_gpu_result_equal( lazydf: pl.LazyFrame, *, - collect_kwargs: Mapping[OptimizationArgs, bool] | None = None, + collect_kwargs: dict[OptimizationArgs, bool] | None = None, + polars_collect_kwargs: dict[OptimizationArgs, bool] | None = None, + cudf_collect_kwargs: dict[OptimizationArgs, bool] | None = None, check_row_order: bool = True, check_column_order: bool = True, check_dtypes: bool = True, @@ -43,8 +43,17 @@ def assert_gpu_result_equal( lazydf frame to collect. collect_kwargs - Keyword arguments to pass to collect. Useful for controlling - optimization settings. + Common keyword arguments to pass to collect for both polars CPU and + cudf-polars. + Useful for controlling optimization settings. + polars_collect_kwargs + Keyword arguments to pass to collect for execution on polars CPU. + Overrides kwargs in collect_kwargs. + Useful for controlling optimization settings. + cudf_collect_kwargs + Keyword arguments to pass to collect for execution on cudf-polars. + Overrides kwargs in collect_kwargs. + Useful for controlling optimization settings. check_row_order Expect rows to be in same order check_column_order @@ -68,10 +77,19 @@ def assert_gpu_result_equal( NotImplementedError If GPU collection failed in some way. """ - collect_kwargs = {} if collect_kwargs is None else collect_kwargs - expect = lazydf.collect(**collect_kwargs) + if collect_kwargs is None: + collect_kwargs = {} + final_polars_collect_kwargs = collect_kwargs.copy() + final_cudf_collect_kwargs = collect_kwargs.copy() + if polars_collect_kwargs is not None: + final_polars_collect_kwargs.update(polars_collect_kwargs) + if cudf_collect_kwargs is not None: # pragma: no cover + # exclude from coverage since not used ATM + # but this is probably still useful + final_cudf_collect_kwargs.update(cudf_collect_kwargs) + expect = lazydf.collect(**final_polars_collect_kwargs) got = lazydf.collect( - **collect_kwargs, + **final_cudf_collect_kwargs, post_opt_callback=partial(execute_with_cudf, raise_on_fail=True), ) assert_frame_equal( diff --git a/python/cudf_polars/cudf_polars/utils/dtypes.py b/python/cudf_polars/cudf_polars/utils/dtypes.py index 918cd024fa2..cd68d021286 100644 --- a/python/cudf_polars/cudf_polars/utils/dtypes.py +++ b/python/cudf_polars/cudf_polars/utils/dtypes.py @@ -14,43 +14,7 @@ import cudf._lib.pylibcudf as plc -__all__ = ["from_polars", "downcast_arrow_lists", "have_compatible_resolution"] - - -def have_compatible_resolution(lid: plc.TypeId, rid: plc.TypeId): - """ - Do two datetime typeids have matching resolution for a binop. - - Parameters - ---------- - lid - Left type id - rid - Right type id - - Returns - ------- - True if resolutions are compatible, False otherwise. - - Notes - ----- - Polars has different casting rules for combining - datetimes/durations than libcudf, and while we don't encode the - casting rules fully, just reject things we can't handle. - - Precondition for correctness: both lid and rid are timelike. - """ - if lid == rid: - return True - # Timestamps are smaller than durations in the libcudf enum. - lid, rid = sorted([lid, rid]) - if lid == plc.TypeId.TIMESTAMP_MILLISECONDS: - return rid == plc.TypeId.DURATION_MILLISECONDS - elif lid == plc.TypeId.TIMESTAMP_MICROSECONDS: - return rid == plc.TypeId.DURATION_MICROSECONDS - elif lid == plc.TypeId.TIMESTAMP_NANOSECONDS: - return rid == plc.TypeId.DURATION_NANOSECONDS - return False +__all__ = ["from_polars", "downcast_arrow_lists"] def downcast_arrow_lists(typ: pa.DataType) -> pa.DataType: @@ -153,7 +117,8 @@ def from_polars(dtype: pl.DataType) -> plc.DataType: # TODO: Hopefully return plc.DataType(plc.TypeId.EMPTY) elif isinstance(dtype, pl.List): - # TODO: This doesn't consider the value type. + # Recurse to catch unsupported inner types + _ = from_polars(dtype.inner) return plc.DataType(plc.TypeId.LIST) else: raise NotImplementedError(f"{dtype=} conversion not supported") diff --git a/python/cudf_polars/cudf_polars/utils/versions.py b/python/cudf_polars/cudf_polars/utils/versions.py index a9ac14c25aa..9807cffb384 100644 --- a/python/cudf_polars/cudf_polars/utils/versions.py +++ b/python/cudf_polars/cudf_polars/utils/versions.py @@ -15,6 +15,7 @@ POLARS_VERSION_GE_10 = POLARS_VERSION >= parse("1.0") POLARS_VERSION_GE_11 = POLARS_VERSION >= parse("1.1") POLARS_VERSION_GE_12 = POLARS_VERSION >= parse("1.2") +POLARS_VERSION_GE_121 = POLARS_VERSION >= parse("1.2.1") POLARS_VERSION_GT_10 = POLARS_VERSION > parse("1.0") POLARS_VERSION_GT_11 = POLARS_VERSION > parse("1.1") POLARS_VERSION_GT_12 = POLARS_VERSION > parse("1.2") diff --git a/python/cudf_polars/docs/overview.md b/python/cudf_polars/docs/overview.md index 874bb849747..6cd36136bf8 100644 --- a/python/cudf_polars/docs/overview.md +++ b/python/cudf_polars/docs/overview.md @@ -8,7 +8,7 @@ You will need: preferred configuration. Or else, use [rustup](https://www.rust-lang.org/tools/install) 2. A [cudf development - environment](https://github.com/rapidsai/cudf/blob/branch-24.08/CONTRIBUTING.md#setting-up-your-build-environment). + environment](https://github.com/rapidsai/cudf/blob/branch-24.10/CONTRIBUTING.md#setting-up-your-build-environment). The combined devcontainer works, or whatever your favourite approach is. > ![NOTE] These instructions will get simpler as we merge code in. diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml index 0b559f7a8e9..f8a1973bdbf 100644 --- a/python/cudf_polars/pyproject.toml +++ b/python/cudf_polars/pyproject.toml @@ -19,7 +19,7 @@ authors = [ license = { text = "Apache 2.0" } requires-python = ">=3.9" dependencies = [ - "cudf==24.8.*,>=0.0.0a0", + "cudf==24.10.*,>=0.0.0a0", "polars>=1.0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ @@ -182,3 +182,4 @@ docstring-code-format = true [tool.rapids-build-backend] build-backend = "setuptools.build_meta" dependencies-file = "../../dependencies.yaml" +matrix-entry = "cuda_suffixed=true" diff --git a/python/cudf_polars/tests/expressions/test_casting.py b/python/cudf_polars/tests/expressions/test_casting.py new file mode 100644 index 00000000000..3e003054338 --- /dev/null +++ b/python/cudf_polars/tests/expressions/test_casting.py @@ -0,0 +1,52 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-License-Identifier: Apache-2.0 +from __future__ import annotations + +import pytest + +import polars as pl + +from cudf_polars.testing.asserts import ( + assert_gpu_result_equal, + assert_ir_translation_raises, +) + +_supported_dtypes = [(pl.Int8(), pl.Int64())] + +_unsupported_dtypes = [ + (pl.String(), pl.Int64()), +] + + +@pytest.fixture +def dtypes(request): + return request.param + + +@pytest.fixture +def tests(dtypes): + fromtype, totype = dtypes + if fromtype == pl.String(): + data = ["a", "b", "c"] + else: + data = [1, 2, 3] + return pl.DataFrame( + { + "a": pl.Series(data, dtype=fromtype), + } + ).lazy(), totype + + +@pytest.mark.parametrize("dtypes", _supported_dtypes, indirect=True) +def test_cast_supported(tests): + df, totype = tests + q = df.select(pl.col("a").cast(totype)) + assert_gpu_result_equal(q) + + +@pytest.mark.parametrize("dtypes", _unsupported_dtypes, indirect=True) +def test_cast_unsupported(tests): + df, totype = tests + assert_ir_translation_raises( + df.select(pl.col("a").cast(totype)), NotImplementedError + ) diff --git a/python/cudf_polars/tests/expressions/test_literal.py b/python/cudf_polars/tests/expressions/test_literal.py index 55e688428bd..5bd3131d1d7 100644 --- a/python/cudf_polars/tests/expressions/test_literal.py +++ b/python/cudf_polars/tests/expressions/test_literal.py @@ -6,6 +6,8 @@ import polars as pl +import cudf._lib.pylibcudf as plc + from cudf_polars.testing.asserts import ( assert_gpu_result_equal, assert_ir_translation_raises, @@ -64,11 +66,17 @@ def test_timelike_literal(timestamp, timedelta): adjusted=timestamp + timedelta, two_delta=timedelta + timedelta, ) - schema = q.collect_schema() - time_type = schema["time"] - delta_type = schema["delta"] - if dtypes.have_compatible_resolution( - dtypes.from_polars(time_type).id(), dtypes.from_polars(delta_type).id() + schema = {k: dtypes.from_polars(v) for k, v in q.collect_schema().items()} + if plc.binaryop.is_supported_operation( + schema["adjusted"], + schema["time"], + schema["delta"], + plc.binaryop.BinaryOperator.ADD, + ) and plc.binaryop.is_supported_operation( + schema["two_delta"], + schema["delta"], + schema["delta"], + plc.binaryop.BinaryOperator.ADD, ): assert_gpu_result_equal(q) else: diff --git a/python/cudf_polars/tests/expressions/test_numeric_binops.py b/python/cudf_polars/tests/expressions/test_numeric_binops.py index b6bcd0026fa..8f68bbc460c 100644 --- a/python/cudf_polars/tests/expressions/test_numeric_binops.py +++ b/python/cudf_polars/tests/expressions/test_numeric_binops.py @@ -6,7 +6,10 @@ import polars as pl -from cudf_polars.testing.asserts import assert_gpu_result_equal +from cudf_polars.testing.asserts import ( + assert_gpu_result_equal, + assert_ir_translation_raises, +) dtypes = [ pl.Int8, @@ -111,3 +114,12 @@ def test_binop_with_scalar(left_scalar, right_scalar): q = df.select(lop / rop) assert_gpu_result_equal(q) + + +def test_numeric_to_string_cast_fails(): + df = pl.DataFrame( + {"a": [1, 1, 2, 3, 3, 4, 1], "b": [None, 2, 3, 4, 5, 6, 7]} + ).lazy() + q = df.select(pl.col("a").cast(pl.String)) + + assert_ir_translation_raises(q, NotImplementedError) diff --git a/python/cudf_polars/tests/expressions/test_stringfunction.py b/python/cudf_polars/tests/expressions/test_stringfunction.py index 8cf65dd51ac..df08e15baa4 100644 --- a/python/cudf_polars/tests/expressions/test_stringfunction.py +++ b/python/cudf_polars/tests/expressions/test_stringfunction.py @@ -34,7 +34,9 @@ def ldf(with_nulls): if with_nulls: a[4] = None a[-3] = None - return pl.LazyFrame({"a": a, "b": range(len(a))}) + return pl.LazyFrame( + {"a": a, "b": range(len(a)), "c": [str(i) for i in range(len(a))]} + ) slice_cases = [ @@ -84,7 +86,7 @@ def test_contains_re_non_strict_raises(ldf): def test_contains_re_non_literal_raises(ldf): - q = ldf.select(pl.col("a").str.contains(pl.col("b"), literal=False)) + q = ldf.select(pl.col("a").str.contains(pl.col("c"), literal=False)) assert_ir_translation_raises(q, NotImplementedError) diff --git a/python/cudf_polars/tests/test_config.py b/python/cudf_polars/tests/test_config.py new file mode 100644 index 00000000000..5b4bba55552 --- /dev/null +++ b/python/cudf_polars/tests/test_config.py @@ -0,0 +1,34 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import pytest + +import polars as pl + +from cudf_polars.dsl.ir import IR +from cudf_polars.testing.asserts import ( + assert_gpu_result_equal, + assert_ir_translation_raises, +) + + +def test_polars_verbose_warns(monkeypatch): + def raise_unimplemented(self): + raise NotImplementedError("We don't support this") + + monkeypatch.setattr(IR, "__post_init__", raise_unimplemented) + q = pl.LazyFrame({}) + # Ensure that things raise + assert_ir_translation_raises(q, NotImplementedError) + with ( + pl.Config(verbose=True), + pytest.raises(pl.exceptions.ComputeError), + pytest.warns( + pl.exceptions.PerformanceWarning, + match="Query execution with GPU not supported", + ), + ): + # And ensure that collecting issues the correct warning. + assert_gpu_result_equal(q) diff --git a/python/cudf_polars/tests/test_drop_nulls.py b/python/cudf_polars/tests/test_drop_nulls.py new file mode 100644 index 00000000000..5dfe9f66a97 --- /dev/null +++ b/python/cudf_polars/tests/test_drop_nulls.py @@ -0,0 +1,65 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-License-Identifier: Apache-2.0 +from __future__ import annotations + +import pytest + +import polars as pl + +from cudf_polars.testing.asserts import ( + assert_gpu_result_equal, + assert_ir_translation_raises, +) + + +@pytest.fixture( + params=[ + [1, 2, 1, 3, 5, None, None], + [1.5, 2.5, None, 1.5, 3, float("nan"), 3], + [], + [None, None], + [1, 2, 3, 4, 5], + ] +) +def null_data(request): + is_empty = pl.Series(request.param).dtype == pl.Null + return pl.DataFrame( + { + "a": pl.Series(request.param, dtype=pl.Float64 if is_empty else None), + "b": pl.Series(request.param, dtype=pl.Float64 if is_empty else None), + } + ).lazy() + + +def test_drop_null(null_data): + q = null_data.select(pl.col("a").drop_nulls()) + assert_gpu_result_equal(q) + + +@pytest.mark.parametrize( + "value", + [0, pl.col("a").mean(), pl.col("b")], + ids=["scalar", "aggregation", "column_expression"], +) +def test_fill_null(null_data, value): + q = null_data.select(pl.col("a").fill_null(value)) + assert_gpu_result_equal(q) + + +@pytest.mark.parametrize( + "strategy", ["forward", "backward", "min", "max", "mean", "zero", "one"] +) +def test_fill_null_with_strategy(null_data, strategy): + q = null_data.select(pl.col("a").fill_null(strategy=strategy)) + + # Not yet exposed to python from rust + assert_ir_translation_raises(q, NotImplementedError) + + +@pytest.mark.parametrize("strategy", ["forward", "backward"]) +@pytest.mark.parametrize("limit", [0, 1, 2]) +def test_fill_null_with_limit(null_data, strategy, limit): + q = null_data.select(pl.col("a").fill_null(strategy=strategy, limit=limit)) + + # Not yet exposed to python from rust + assert_ir_translation_raises(q, NotImplementedError) diff --git a/python/cudf_polars/tests/test_groupby.py b/python/cudf_polars/tests/test_groupby.py index b07d8e38217..a75825ef3d3 100644 --- a/python/cudf_polars/tests/test_groupby.py +++ b/python/cudf_polars/tests/test_groupby.py @@ -155,3 +155,31 @@ def test_groupby_nan_minmax_raises(op): q = df.group_by("key").agg(op(pl.col("value"))) assert_ir_translation_raises(q, NotImplementedError) + + +@pytest.mark.parametrize( + "key", + [ + pytest.param( + 1, + marks=pytest.mark.xfail( + versions.POLARS_VERSION_GE_121, reason="polars 1.2.1 disallows this" + ), + ), + pl.col("key1"), + ], +) +@pytest.mark.parametrize( + "expr", + [ + pl.lit(1).alias("value"), + pl.lit([[4, 5, 6]]).alias("value"), + pl.col("float") * (1 - pl.col("int")), + [pl.lit(2).alias("value"), pl.col("float") * 2], + ], +) +def test_groupby_literal_in_agg(df, key, expr): + # check_row_order=False doesn't work for list aggregations + # so just sort by the group key + q = df.group_by(key).agg(expr).sort(key, maintain_order=True) + assert_gpu_result_equal(q) diff --git a/python/cudf_polars/tests/test_hconcat.py b/python/cudf_polars/tests/test_hconcat.py index 46cbb21b25a..4737aa18028 100644 --- a/python/cudf_polars/tests/test_hconcat.py +++ b/python/cudf_polars/tests/test_hconcat.py @@ -17,3 +17,12 @@ def test_hconcat(): ldf2 = ldf.select((pl.col("a") + pl.col("b")).alias("c")) query = pl.concat([ldf, ldf2], how="horizontal") assert_gpu_result_equal(query) + + +def test_hconcat_different_heights(): + left = pl.LazyFrame({"a": [1, 2, 3, 4]}) + + right = pl.LazyFrame({"b": [[1], [2]], "c": ["a", "bcde"]}) + + q = pl.concat([left, right], how="horizontal") + assert_gpu_result_equal(q) diff --git a/python/cudf_polars/tests/test_join.py b/python/cudf_polars/tests/test_join.py index 89f6fd3455b..1e880cdc6de 100644 --- a/python/cudf_polars/tests/test_join.py +++ b/python/cudf_polars/tests/test_join.py @@ -12,65 +12,68 @@ ) +@pytest.fixture(params=[False, True], ids=["nulls_not_equal", "nulls_equal"]) +def join_nulls(request): + return request.param + + +@pytest.fixture(params=["inner", "left", "semi", "anti", "full"]) +def how(request): + return request.param + + +@pytest.fixture +def left(): + return pl.LazyFrame( + { + "a": [1, 2, 3, 1, None], + "b": [1, 2, 3, 4, 5], + "c": [2, 3, 4, 5, 6], + } + ) + + +@pytest.fixture +def right(): + return pl.LazyFrame( + { + "a": [1, 4, 3, 7, None, None], + "c": [2, 3, 4, 5, 6, 7], + } + ) + + @pytest.mark.parametrize( - "how", + "join_expr", [ - "inner", - "left", - "semi", - "anti", - "full", + pl.col("a"), + pl.col("a") * 2, + [pl.col("a"), pl.col("c") + 1], + ["c", "a"], ], ) -@pytest.mark.parametrize("coalesce", [False, True]) -@pytest.mark.parametrize( - "join_nulls", [False, True], ids=["nulls_not_equal", "nulls_equal"] -) +def test_non_coalesce_join(left, right, how, join_nulls, join_expr): + query = left.join( + right, on=join_expr, how=how, join_nulls=join_nulls, coalesce=False + ) + assert_gpu_result_equal(query, check_row_order=how == "left") + + @pytest.mark.parametrize( "join_expr", [ pl.col("a"), - pl.col("a") * 2, - [pl.col("a"), pl.col("c") + 1], ["c", "a"], ], ) -def test_join(how, coalesce, join_nulls, join_expr): - left = pl.DataFrame( - { - "a": [1, 2, 3, 1, None], - "b": [1, 2, 3, 4, 5], - "c": [2, 3, 4, 5, 6], - } - ).lazy() - right = pl.DataFrame( - { - "a": [1, 4, 3, 7, None, None], - "c": [2, 3, 4, 5, 6, 7], - } - ).lazy() - +def test_coalesce_join(left, right, how, join_nulls, join_expr): query = left.join( - right, on=join_expr, how=how, join_nulls=join_nulls, coalesce=coalesce + right, on=join_expr, how=how, join_nulls=join_nulls, coalesce=True ) assert_gpu_result_equal(query, check_row_order=False) -def test_cross_join(): - left = pl.DataFrame( - { - "a": [1, 2, 3, 1, None], - "b": [1, 2, 3, 4, 5], - "c": [2, 3, 4, 5, 6], - } - ).lazy() - right = pl.DataFrame( - { - "a": [1, 4, 3, 7, None, None], - "c": [2, 3, 4, 5, 6, 7], - } - ).lazy() - +def test_cross_join(left, right): q = left.join(right, how="cross") assert_gpu_result_equal(q) @@ -79,9 +82,7 @@ def test_cross_join(): @pytest.mark.parametrize( "left_on,right_on", [(pl.col("a"), pl.lit(2)), (pl.lit(2), pl.col("a"))] ) -def test_join_literal_key_unsupported(left_on, right_on): - left = pl.LazyFrame({"a": [1, 2, 3], "b": [3, 4, 5]}) - right = pl.LazyFrame({"a": [1, 2, 3], "b": [5, 6, 7]}) +def test_join_literal_key_unsupported(left, right, left_on, right_on): q = left.join(right, left_on=left_on, right_on=right_on, how="inner") assert_ir_translation_raises(q, NotImplementedError) diff --git a/python/cudf_polars/tests/test_scan.py b/python/cudf_polars/tests/test_scan.py index d0c41090433..64acbb076ed 100644 --- a/python/cudf_polars/tests/test_scan.py +++ b/python/cudf_polars/tests/test_scan.py @@ -2,6 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations +import os + import pytest import polars as pl @@ -22,48 +24,23 @@ def row_index(request): @pytest.fixture( - params=[ - None, - pytest.param( - 2, marks=pytest.mark.xfail(reason="No handling of row limit in scan") - ), - pytest.param( - 3, marks=pytest.mark.xfail(reason="No handling of row limit in scan") - ), - ], + params=[None, 2, 3], ids=["all-rows", "n_rows-with-skip", "n_rows-no-skip"], ) def n_rows(request): return request.param -@pytest.fixture(params=["csv", "parquet"]) -def df(request, tmp_path, row_index, n_rows): - df = pl.DataFrame( +@pytest.fixture(scope="module") +def df(): + # TODO: more dtypes + return pl.DataFrame( { - "a": [1, 2, 3, None], - "b": ["ẅ", "x", "y", "z"], - "c": [None, None, 4, 5], + "a": [1, 2, 3, None, 4, 5], + "b": ["ẅ", "x", "y", "z", "123", "abcd"], + "c": [None, None, 4, 5, -1, 0], } ) - name, offset = row_index - if request.param == "csv": - df.write_csv(tmp_path / "file.csv") - return pl.scan_csv( - tmp_path / "file.csv", - row_index_name=name, - row_index_offset=offset, - n_rows=n_rows, - ) - else: - df.write_parquet(tmp_path / "file.pq") - # parquet doesn't have skip_rows argument - return pl.scan_parquet( - tmp_path / "file.pq", - row_index_name=name, - row_index_offset=offset, - n_rows=n_rows, - ) @pytest.fixture(params=[None, ["a"], ["b", "a"]], ids=["all", "subset", "reordered"]) @@ -81,20 +58,72 @@ def mask(request): return request.param -def test_scan(df, columns, mask): - q = df +def make_source(df, path, format): + """ + Writes the passed polars df to a file of + the desired format + """ + if format == "csv": + df.write_csv(path) + elif format == "ndjson": + df.write_ndjson(path) + else: + df.write_parquet(path) + + +@pytest.mark.parametrize( + "format, scan_fn", + [ + ("csv", pl.scan_csv), + ("ndjson", pl.scan_ndjson), + ("parquet", pl.scan_parquet), + ], +) +def test_scan(tmp_path, df, format, scan_fn, row_index, n_rows, columns, mask, request): + name, offset = row_index + make_source(df, tmp_path / "file", format) + request.applymarker( + pytest.mark.xfail( + condition=(n_rows is not None and scan_fn is pl.scan_ndjson), + reason="libcudf does not support n_rows", + ) + ) + q = scan_fn( + tmp_path / "file", + row_index_name=name, + row_index_offset=offset, + n_rows=n_rows, + ) if mask is not None: q = q.filter(mask) if columns is not None: - q = df.select(*columns) - assert_gpu_result_equal(q) + q = q.select(*columns) + polars_collect_kwargs = {} + if versions.POLARS_VERSION_LT_12: + # https://github.com/pola-rs/polars/issues/17553 + polars_collect_kwargs = {"projection_pushdown": False} + assert_gpu_result_equal( + q, + polars_collect_kwargs=polars_collect_kwargs, + # This doesn't work in polars < 1.2 since the row-index + # is in the wrong order in previous polars releases + check_column_order=versions.POLARS_VERSION_LT_12, + ) def test_scan_unsupported_raises(tmp_path): df = pl.DataFrame({"a": [1, 2, 3]}) - df.write_ndjson(tmp_path / "df.json") - q = pl.scan_ndjson(tmp_path / "df.json") + df.write_ipc(tmp_path / "df.ipc") + q = pl.scan_ipc(tmp_path / "df.ipc") + assert_ir_translation_raises(q, NotImplementedError) + + +def test_scan_ndjson_nrows_notimplemented(tmp_path, df): + df = pl.DataFrame({"a": [1, 2, 3]}) + + df.write_ndjson(tmp_path / "df.jsonl") + q = pl.scan_ndjson(tmp_path / "df.jsonl", n_rows=1) assert_ir_translation_raises(q, NotImplementedError) @@ -129,6 +158,42 @@ def test_scan_csv_column_renames_projection_schema(tmp_path): assert_gpu_result_equal(q) +@pytest.mark.parametrize( + "filename,glob", + [ + (["test1.csv", "test2.csv"], True), + ("test*.csv", True), + # Make sure we don't expand glob when + # trying to read a file like test*.csv + # when glob=False + ("test*.csv", False), + ], +) +def test_scan_csv_multi(tmp_path, filename, glob): + with (tmp_path / "test1.csv").open("w") as f: + f.write("""foo,bar,baz\n1,2\n3,4,5""") + with (tmp_path / "test2.csv").open("w") as f: + f.write("""foo,bar,baz\n1,2\n3,4,5""") + with (tmp_path / "test*.csv").open("w") as f: + f.write("""foo,bar,baz\n1,2\n3,4,5""") + os.chdir(tmp_path) + q = pl.scan_csv(filename, glob=glob) + + assert_gpu_result_equal(q) + + +def test_scan_csv_multi_differing_colnames(tmp_path): + with (tmp_path / "test1.csv").open("w") as f: + f.write("""foo,bar,baz\n1,2\n3,4,5""") + with (tmp_path / "test2.csv").open("w") as f: + f.write("""abc,def,ghi\n1,2\n3,4,5""") + q = pl.scan_csv( + [tmp_path / "test1.csv", tmp_path / "test2.csv"], + ) + with pytest.raises(pl.exceptions.ComputeError): + q.explain() + + def test_scan_csv_skip_after_header_not_implemented(tmp_path): with (tmp_path / "test.csv").open("w") as f: f.write("""foo,bar,baz\n1,2,3\n3,4,5""") @@ -195,3 +260,23 @@ def test_scan_csv_skip_initial_empty_rows(tmp_path): q = pl.scan_csv(tmp_path / "test.csv", separator="|", skip_rows=1) assert_gpu_result_equal(q) + + +@pytest.mark.parametrize( + "schema", + [ + # List of colnames (basicaly like names param in CSV) + {"b": pl.String, "a": pl.Float32}, + {"a": pl.UInt64}, + ], +) +def test_scan_ndjson_schema(df, tmp_path, schema): + make_source(df, tmp_path / "file", "ndjson") + q = pl.scan_ndjson(tmp_path / "file", schema=schema) + assert_gpu_result_equal(q) + + +def test_scan_ndjson_unsupported(df, tmp_path): + make_source(df, tmp_path / "file", "ndjson") + q = pl.scan_ndjson(tmp_path / "file", ignore_errors=True) + assert_ir_translation_raises(q, NotImplementedError) diff --git a/python/cudf_polars/tests/utils/test_dtypes.py b/python/cudf_polars/tests/utils/test_dtypes.py index 535fdd846a0..bbdb4faa256 100644 --- a/python/cudf_polars/tests/utils/test_dtypes.py +++ b/python/cudf_polars/tests/utils/test_dtypes.py @@ -16,6 +16,7 @@ pl.Time(), pl.Struct({"a": pl.Int8, "b": pl.Float32}), pl.Datetime("ms", time_zone="US/Pacific"), + pl.List(pl.Datetime("ms", time_zone="US/Pacific")), pl.Array(pl.Int8, 2), pl.Binary(), pl.Categorical(), diff --git a/python/custreamz/pyproject.toml b/python/custreamz/pyproject.toml index 7b99e041b54..d6b88167262 100644 --- a/python/custreamz/pyproject.toml +++ b/python/custreamz/pyproject.toml @@ -20,8 +20,8 @@ license = { text = "Apache 2.0" } requires-python = ">=3.9" dependencies = [ "confluent-kafka>=1.9.0,<1.10.0a0", - "cudf==24.8.*,>=0.0.0a0", - "cudf_kafka==24.8.*,>=0.0.0a0", + "cudf==24.10.*,>=0.0.0a0", + "cudf_kafka==24.10.*,>=0.0.0a0", "streamz", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ @@ -49,6 +49,7 @@ Homepage = "https://github.com/rapidsai/cudf" [tool.rapids-build-backend] build-backend = "setuptools.build_meta" dependencies-file = "../../dependencies.yaml" +matrix-entry = "cuda_suffixed=true" [tool.setuptools] license-files = ["LICENSE"] @@ -57,7 +58,7 @@ zip-safe = false [tool.setuptools.dynamic] version = {file = "custreamz/VERSION"} -[tools.setuptools.packages.find] +[tool.setuptools.packages.find] include = [ "custreamz", "custreamz.*", diff --git a/python/dask_cudf/dask_cudf/io/parquet.py b/python/dask_cudf/dask_cudf/io/parquet.py index 810a804e428..f0cab953458 100644 --- a/python/dask_cudf/dask_cudf/io/parquet.py +++ b/python/dask_cudf/dask_cudf/io/parquet.py @@ -33,6 +33,7 @@ _is_local_filesystem, _open_remote_files, ) +from cudf.utils.utils import maybe_filter_deprecation class CudfEngine(ArrowDatasetEngine): @@ -110,39 +111,50 @@ def _read_paths( ), ) - # Use cudf to read in data - try: - df = cudf.read_parquet( - paths_or_fobs, - engine="cudf", - columns=columns, - row_groups=row_groups if row_groups else None, - dataset_kwargs=dataset_kwargs, - categorical_partitions=False, - **kwargs, - ) - except RuntimeError as err: - # TODO: Remove try/except after null-schema issue is resolved - # (See: https://github.com/rapidsai/cudf/issues/12702) - if len(paths_or_fobs) > 1: - df = cudf.concat( - [ - cudf.read_parquet( - pof, - engine="cudf", - columns=columns, - row_groups=row_groups[i] - if row_groups - else None, - dataset_kwargs=dataset_kwargs, - categorical_partitions=False, - **kwargs, - ) - for i, pof in enumerate(paths_or_fobs) - ] + # Filter out deprecation warning unless the user + # specifies open_file_options and/or use_python_file_object. + # Otherwise, the FutureWarning is out of their control. + with maybe_filter_deprecation( + ( + not open_file_options + and "use_python_file_object" not in kwargs + ), + message="Support for reading pyarrow's NativeFile is deprecated", + category=FutureWarning, + ): + # Use cudf to read in data + try: + df = cudf.read_parquet( + paths_or_fobs, + engine="cudf", + columns=columns, + row_groups=row_groups if row_groups else None, + dataset_kwargs=dataset_kwargs, + categorical_partitions=False, + **kwargs, ) - else: - raise err + except RuntimeError as err: + # TODO: Remove try/except after null-schema issue is resolved + # (See: https://github.com/rapidsai/cudf/issues/12702) + if len(paths_or_fobs) > 1: + df = cudf.concat( + [ + cudf.read_parquet( + pof, + engine="cudf", + columns=columns, + row_groups=row_groups[i] + if row_groups + else None, + dataset_kwargs=dataset_kwargs, + categorical_partitions=False, + **kwargs, + ) + for i, pof in enumerate(paths_or_fobs) + ] + ) + else: + raise err # Apply filters (if any are defined) df = _apply_post_filters(df, filters) diff --git a/python/dask_cudf/dask_cudf/io/tests/test_s3.py b/python/dask_cudf/dask_cudf/io/tests/test_s3.py index a67404da4fe..ac3245b3748 100644 --- a/python/dask_cudf/dask_cudf/io/tests/test_s3.py +++ b/python/dask_cudf/dask_cudf/io/tests/test_s3.py @@ -9,6 +9,8 @@ import pyarrow.fs as pa_fs import pytest +from dask.dataframe import assert_eq + import dask_cudf moto = pytest.importorskip("moto", minversion="3.1.6") @@ -102,6 +104,11 @@ def s3_context(s3_base, bucket, files=None): pass +@pytest.fixture +def pdf(scope="module"): + return pd.DataFrame({"a": [1, 2, 3, 4], "b": [2.1, 2.2, 2.3, 2.4]}) + + def test_read_csv(s3_base, s3so): with s3_context( s3_base=s3_base, bucket="daskcsv", files={"a.csv": b"a,b\n1,2\n3,4\n"} @@ -112,6 +119,22 @@ def test_read_csv(s3_base, s3so): assert df.a.sum().compute() == 4 +def test_read_csv_warns(s3_base, s3so): + with s3_context( + s3_base=s3_base, + bucket="daskcsv_warns", + files={"a.csv": b"a,b\n1,2\n3,4\n"}, + ): + with pytest.warns(FutureWarning): + df = dask_cudf.read_csv( + "s3://daskcsv_warns/*.csv", + blocksize="50 B", + storage_options=s3so, + use_python_file_object=True, + ) + assert df.a.sum().compute() == 4 + + @pytest.mark.parametrize( "open_file_options", [ @@ -120,8 +143,7 @@ def test_read_csv(s3_base, s3so): {"open_file_func": None}, ], ) -def test_read_parquet(s3_base, s3so, open_file_options): - pdf = pd.DataFrame({"a": [1, 2, 3, 4], "b": [2.1, 2.2, 2.3, 2.4]}) +def test_read_parquet_open_file_options(s3_base, s3so, open_file_options, pdf): buffer = BytesIO() pdf.to_parquet(path=buffer) buffer.seek(0) @@ -138,5 +160,67 @@ def test_read_parquet(s3_base, s3so, open_file_options): storage_options=s3so, open_file_options=open_file_options, ) - assert df.a.sum().compute() == 10 - assert df.b.sum().compute() == 9 + with pytest.warns(FutureWarning): + assert df.a.sum().compute() == 10 + with pytest.warns(FutureWarning): + assert df.b.sum().compute() == 9 + + +def test_read_parquet(s3_base, s3so, pdf): + fname = "test_parquet_reader_dask.parquet" + bucket = "parquet" + buffer = BytesIO() + pdf.to_parquet(path=buffer) + buffer.seek(0) + with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}): + got = dask_cudf.read_parquet( + f"s3://{bucket}/{fname}", + storage_options=s3so, + ) + assert_eq(pdf, got) + + +def test_read_parquet_use_python_file_object(s3_base, s3so, pdf): + fname = "test_parquet_use_python_file_object.parquet" + bucket = "parquet" + buffer = BytesIO() + pdf.to_parquet(path=buffer) + buffer.seek(0) + with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}): + with pytest.warns(FutureWarning): + got = dask_cudf.read_parquet( + f"s3://{bucket}/{fname}", + storage_options=s3so, + read={"use_python_file_object": True}, + ).head() + assert_eq(pdf, got) + + +def test_read_orc(s3_base, s3so, pdf): + fname = "test_orc_reader_dask.orc" + bucket = "orc" + buffer = BytesIO() + pdf.to_orc(path=buffer) + buffer.seek(0) + with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}): + got = dask_cudf.read_orc( + f"s3://{bucket}/{fname}", + storage_options=s3so, + ) + assert_eq(pdf, got) + + +def test_read_orc_use_python_file_object(s3_base, s3so, pdf): + fname = "test_orc_use_python_file_object.orc" + bucket = "orc" + buffer = BytesIO() + pdf.to_orc(path=buffer) + buffer.seek(0) + with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}): + with pytest.warns(FutureWarning): + got = dask_cudf.read_orc( + f"s3://{bucket}/{fname}", + storage_options=s3so, + use_python_file_object=True, + ).head() + assert_eq(pdf, got) diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml index 9b2e3a5a7b1..872ecd35c28 100644 --- a/python/dask_cudf/pyproject.toml +++ b/python/dask_cudf/pyproject.toml @@ -19,12 +19,12 @@ authors = [ license = { text = "Apache 2.0" } requires-python = ">=3.9" dependencies = [ - "cudf==24.8.*,>=0.0.0a0", + "cudf==24.10.*,>=0.0.0a0", "cupy-cuda11x>=12.0.0", "fsspec>=0.6.0", "numpy>=1.23,<2.0a0", "pandas>=2.0,<2.2.3dev0", - "rapids-dask-dependency==24.8.*,>=0.0.0a0", + "rapids-dask-dependency==24.10.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", @@ -45,7 +45,7 @@ cudf = "dask_cudf.backends:CudfDXBackendEntrypoint" [project.optional-dependencies] test = [ - "dask-cuda==24.8.*,>=0.0.0a0", + "dask-cuda==24.10.*,>=0.0.0a0", "numba>=0.57", "pytest-cov", "pytest-xdist", @@ -58,6 +58,7 @@ Homepage = "https://github.com/rapidsai/cudf" [tool.rapids-build-backend] build-backend = "setuptools.build_meta" dependencies-file = "../../dependencies.yaml" +matrix-entry = "cuda_suffixed=true" [tool.setuptools] license-files = ["LICENSE"] diff --git a/python/pylibcudf/CMakeLists.txt b/python/pylibcudf/CMakeLists.txt index 021f8f593b3..fa52bc0780a 100644 --- a/python/pylibcudf/CMakeLists.txt +++ b/python/pylibcudf/CMakeLists.txt @@ -16,10 +16,10 @@ cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) include(../../rapids_config.cmake) include(rapids-cuda) -rapids_cuda_init_architectures(cudf-python) +rapids_cuda_init_architectures(pylibcudf) project( - cudf-python + pylibcudf VERSION "${RAPIDS_VERSION}" LANGUAGES CXX CUDA ) @@ -96,5 +96,5 @@ include(cmake/Modules/LinkPyarrowHeaders.cmake) add_subdirectory(pylibcudf) if(DEFINED cython_lib_dir) - rapids_cython_add_rpath_entries(TARGET pylibcudf PATHS "${cython_lib_dir}") + rapids_cython_add_rpath_entries(TARGET cudf PATHS "${cython_lib_dir}") endif() diff --git a/python/pylibcudf/pylibcudf/CMakeLists.txt b/python/pylibcudf/pylibcudf/CMakeLists.txt index 0800fa18e94..df4591baa71 100644 --- a/python/pylibcudf/pylibcudf/CMakeLists.txt +++ b/python/pylibcudf/pylibcudf/CMakeLists.txt @@ -20,6 +20,7 @@ set(cython_sources concatenate.pyx copying.pyx datetime.pyx + experimental.pyx expressions.pyx filling.pyx gpumemoryview.pyx diff --git a/python/pylibcudf/pylibcudf/__init__.pxd b/python/pylibcudf/pylibcudf/__init__.pxd index 26e89b818d3..71f523fc3cd 100644 --- a/python/pylibcudf/pylibcudf/__init__.pxd +++ b/python/pylibcudf/pylibcudf/__init__.pxd @@ -8,6 +8,7 @@ from . cimport ( concatenate, copying, datetime, + experimental, expressions, filling, groupby, @@ -48,6 +49,8 @@ __all__ = [ "concatenate", "copying", "datetime", + "experimental", + "expressions", "filling", "gpumemoryview", "groupby", diff --git a/python/pylibcudf/pylibcudf/__init__.py b/python/pylibcudf/pylibcudf/__init__.py index e89a5ed9f96..9705eba84b1 100644 --- a/python/pylibcudf/pylibcudf/__init__.py +++ b/python/pylibcudf/pylibcudf/__init__.py @@ -7,6 +7,7 @@ concatenate, copying, datetime, + experimental, expressions, filling, groupby, @@ -48,6 +49,8 @@ "concatenate", "copying", "datetime", + "experimental", + "expressions", "filling", "gpumemoryview", "groupby", diff --git a/python/pylibcudf/pylibcudf/binaryop.pxd b/python/pylibcudf/pylibcudf/binaryop.pxd index 39bf183e7b0..06625e9e2db 100644 --- a/python/pylibcudf/pylibcudf/binaryop.pxd +++ b/python/pylibcudf/pylibcudf/binaryop.pxd @@ -1,5 +1,6 @@ # Copyright (c) 2024, NVIDIA CORPORATION. +from libcpp cimport bool from pylibcudf.libcudf.binaryop cimport binary_operator from .column cimport Column @@ -22,3 +23,10 @@ cpdef Column binary_operation( binary_operator op, DataType output_type ) + +cpdef bool is_supported_operation( + DataType out, + DataType lhs, + DataType rhs, + binary_operator op +) diff --git a/python/pylibcudf/pylibcudf/binaryop.pyx b/python/pylibcudf/pylibcudf/binaryop.pyx index 8870bd2f2f0..5a67f4d6cdb 100644 --- a/python/pylibcudf/pylibcudf/binaryop.pyx +++ b/python/pylibcudf/pylibcudf/binaryop.pyx @@ -2,6 +2,7 @@ from cython.operator import dereference +from libcpp cimport bool from libcpp.memory cimport unique_ptr from libcpp.utility cimport move from pylibcudf.libcudf cimport binaryop as cpp_binaryop @@ -83,3 +84,37 @@ cpdef Column binary_operation( raise ValueError(f"Invalid arguments {lhs} and {rhs}") return Column.from_libcudf(move(result)) + + +cpdef bool is_supported_operation( + DataType out, + DataType lhs, + DataType rhs, + binary_operator op +): + """Check if an operation is supported for the given data types. + + For details, see :cpp:func::is_supported_operation`. + + Parameters + ---------- + out : DataType + The output data type. + lhs : DataType + The left hand side data type. + rhs : DataType + The right hand side data type. + op : BinaryOperator + The operation to check. + Returns + ------- + bool + True if the operation is supported, False otherwise + """ + + return cpp_binaryop.is_supported_operation( + out.c_obj, + lhs.c_obj, + rhs.c_obj, + op + ) diff --git a/python/pylibcudf/pylibcudf/column.pyx b/python/pylibcudf/pylibcudf/column.pyx index c987fa3af57..7177b321bbc 100644 --- a/python/pylibcudf/pylibcudf/column.pyx +++ b/python/pylibcudf/pylibcudf/column.pyx @@ -252,6 +252,28 @@ cdef class Column: c_result = move(make_column_from_scalar(dereference(c_scalar), size)) return Column.from_libcudf(move(c_result)) + @staticmethod + def all_null_like(Column like, size_type size): + """Create an all null column from a template. + + Parameters + ---------- + like : Column + Column whose type we should mimic + size : int + Number of rows in the resulting column. + + Returns + ------- + Column + An all-null column of `size` rows and type matching `like`. + """ + cdef Scalar slr = Scalar.empty_like(like) + cdef unique_ptr[column] c_result + with nogil: + c_result = move(make_column_from_scalar(dereference(slr.get()), size)) + return Column.from_libcudf(move(c_result)) + @staticmethod def from_cuda_array_interface_obj(object obj): """Create a Column from an object with a CUDA array interface. diff --git a/python/pylibcudf/pylibcudf/experimental.pxd b/python/pylibcudf/pylibcudf/experimental.pxd new file mode 100644 index 00000000000..107c91c8365 --- /dev/null +++ b/python/pylibcudf/pylibcudf/experimental.pxd @@ -0,0 +1,10 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from libcpp cimport bool + + +cpdef enable_prefetching(str key) + +cpdef disable_prefetching(str key) + +cpdef prefetch_debugging(bool enable) diff --git a/python/pylibcudf/pylibcudf/experimental.pyx b/python/pylibcudf/pylibcudf/experimental.pyx new file mode 100644 index 00000000000..b25a53e13b2 --- /dev/null +++ b/python/pylibcudf/pylibcudf/experimental.pyx @@ -0,0 +1,42 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from libcpp cimport bool +from libcpp.string cimport string +from pylibcudf.libcudf cimport experimental as cpp_experimental + + +cpdef enable_prefetching(str key): + """Turn on prefetch instructions for the given key. + + Parameters + ---------- + key : str + The key to enable prefetching for. + """ + cdef string c_key = key.encode("utf-8") + cpp_experimental.enable_prefetching(c_key) + + +cpdef disable_prefetching(str key): + """Turn off prefetch instructions for the given key. + + Parameters + ---------- + key : str + The key to disable prefetching for. + """ + cdef string c_key = key.encode("utf-8") + cpp_experimental.disable_prefetching(c_key) + + +cpdef prefetch_debugging(bool enable): + """Enable or disable prefetch debugging. + + When enabled, any prefetch instructions will be logged to the console. + + Parameters + ---------- + enable : bool + Whether to enable or disable prefetch debugging. + """ + cpp_experimental.prefetch_debugging(enable) diff --git a/python/pylibcudf/pylibcudf/expressions.pyx b/python/pylibcudf/pylibcudf/expressions.pyx index 18ee9d3ad83..a44c9e25987 100644 --- a/python/pylibcudf/pylibcudf/expressions.pyx +++ b/python/pylibcudf/pylibcudf/expressions.pyx @@ -37,6 +37,17 @@ from .types cimport DataType # Aliases for simplicity ctypedef unique_ptr[libcudf_exp.expression] expression_ptr +# Define this class just to have a docstring for it +cdef class Expression: + """ + The base class for all expression types. + This class cannot be instantiated directly, please + instantiate one of its child classes instead. + + For details, see :cpp:class:`cudf::ast::expression`. + """ + pass + cdef class Literal(Expression): """ A literal value used in an abstract syntax tree. diff --git a/python/pylibcudf/pylibcudf/io/CMakeLists.txt b/python/pylibcudf/pylibcudf/io/CMakeLists.txt index 8dd08d11dc8..55bea4fc262 100644 --- a/python/pylibcudf/pylibcudf/io/CMakeLists.txt +++ b/python/pylibcudf/pylibcudf/io/CMakeLists.txt @@ -12,7 +12,7 @@ # the License. # ============================================================================= -set(cython_sources avro.pyx csv.pyx datasource.pyx json.pyx types.pyx) +set(cython_sources avro.pyx csv.pyx datasource.pyx json.pyx parquet.pyx types.pyx) set(linked_libraries cudf::cudf) rapids_cython_create_modules( @@ -22,6 +22,6 @@ rapids_cython_create_modules( ) set(targets_using_arrow_headers pylibcudf_io_avro pylibcudf_io_csv pylibcudf_io_datasource - pylibcudf_io_json pylibcudf_io_types + pylibcudf_io_json pylibcudf_io_parquet pylibcudf_io_types ) link_to_pyarrow_headers("${targets_using_arrow_headers}") diff --git a/python/pylibcudf/pylibcudf/io/__init__.pxd b/python/pylibcudf/pylibcudf/io/__init__.pxd index 5b3272d60e0..62820048584 100644 --- a/python/pylibcudf/pylibcudf/io/__init__.pxd +++ b/python/pylibcudf/pylibcudf/io/__init__.pxd @@ -1,5 +1,5 @@ # Copyright (c) 2024, NVIDIA CORPORATION. # CSV is removed since it is def not cpdef (to force kw-only arguments) -from . cimport avro, datasource, json, types +from . cimport avro, datasource, json, parquet, types from .types cimport SourceInfo, TableWithMetadata diff --git a/python/pylibcudf/pylibcudf/io/__init__.py b/python/pylibcudf/pylibcudf/io/__init__.py index e17deaa4663..27640f7d955 100644 --- a/python/pylibcudf/pylibcudf/io/__init__.py +++ b/python/pylibcudf/pylibcudf/io/__init__.py @@ -1,4 +1,4 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from . import avro, csv, datasource, json, types +from . import avro, csv, datasource, json, parquet, types from .types import SinkInfo, SourceInfo, TableWithMetadata diff --git a/python/pylibcudf/pylibcudf/io/datasource.pyx b/python/pylibcudf/pylibcudf/io/datasource.pyx index d2de339bd6b..6cc509b74cb 100644 --- a/python/pylibcudf/pylibcudf/io/datasource.pyx +++ b/python/pylibcudf/pylibcudf/io/datasource.pyx @@ -6,6 +6,8 @@ from pyarrow.lib cimport NativeFile from pylibcudf.libcudf.io.arrow_io_source cimport arrow_io_source from pylibcudf.libcudf.io.datasource cimport datasource +import warnings + cdef class Datasource: cdef datasource* get_datasource(self) except * nogil: @@ -15,10 +17,16 @@ cdef class Datasource: cdef class NativeFileDatasource(Datasource): - def __cinit__(self, NativeFile native_file,): + def __cinit__(self, NativeFile native_file): cdef shared_ptr[CRandomAccessFile] ra_src + warnings.warn( + "Support for reading pyarrow's NativeFile is deprecated " + "and will be removed in a future release of cudf.", + FutureWarning, + ) + ra_src = native_file.get_random_access_file() self.c_datasource.reset(new arrow_io_source(ra_src)) diff --git a/python/pylibcudf/pylibcudf/io/parquet.pxd b/python/pylibcudf/pylibcudf/io/parquet.pxd new file mode 100644 index 00000000000..4e9dbdf78df --- /dev/null +++ b/python/pylibcudf/pylibcudf/io/parquet.pxd @@ -0,0 +1,34 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from libc.stdint cimport int64_t +from libcpp cimport bool +from libcpp.memory cimport unique_ptr +from pylibcudf.expressions cimport Expression +from pylibcudf.io.types cimport SourceInfo, TableWithMetadata +from pylibcudf.libcudf.io.parquet cimport ( + chunked_parquet_reader as cpp_chunked_parquet_reader, +) +from pylibcudf.libcudf.types cimport size_type +from pylibcudf.types cimport DataType + + +cdef class ChunkedParquetReader: + cdef unique_ptr[cpp_chunked_parquet_reader] reader + + cpdef bool has_next(self) + cpdef TableWithMetadata read_chunk(self) + + +cpdef read_parquet( + SourceInfo source_info, + list columns = *, + list row_groups = *, + Expression filters = *, + bool convert_strings_to_categories = *, + bool use_pandas_metadata = *, + int64_t skip_rows = *, + size_type num_rows = *, + # disabled see comment in parquet.pyx for more + # ReaderColumnSchema reader_column_schema = *, + # DataType timestamp_type = * +) diff --git a/python/pylibcudf/pylibcudf/io/parquet.pyx b/python/pylibcudf/pylibcudf/io/parquet.pyx new file mode 100644 index 00000000000..d48430fc958 --- /dev/null +++ b/python/pylibcudf/pylibcudf/io/parquet.pyx @@ -0,0 +1,203 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +from cython.operator cimport dereference +from libc.stdint cimport int64_t +from libcpp cimport bool +from libcpp.string cimport string +from libcpp.utility cimport move +from libcpp.vector cimport vector +from pylibcudf.expressions cimport Expression +from pylibcudf.io.types cimport SourceInfo, TableWithMetadata +from pylibcudf.libcudf.expressions cimport expression +from pylibcudf.libcudf.io.parquet cimport ( + chunked_parquet_reader as cpp_chunked_parquet_reader, + parquet_reader_options, + read_parquet as cpp_read_parquet, +) +from pylibcudf.libcudf.io.types cimport table_with_metadata +from pylibcudf.libcudf.types cimport size_type + + +cdef parquet_reader_options _setup_parquet_reader_options( + SourceInfo source_info, + list columns = None, + list row_groups = None, + Expression filters = None, + bool convert_strings_to_categories = False, + bool use_pandas_metadata = True, + int64_t skip_rows = 0, + size_type num_rows = -1, + # ReaderColumnSchema reader_column_schema = None, + # DataType timestamp_type = DataType(type_id.EMPTY) +): + cdef vector[string] col_vec + cdef parquet_reader_options opts = ( + parquet_reader_options.builder(source_info.c_obj) + .convert_strings_to_categories(convert_strings_to_categories) + .use_pandas_metadata(use_pandas_metadata) + .use_arrow_schema(True) + .build() + ) + if row_groups is not None: + opts.set_row_groups(row_groups) + if num_rows != -1: + opts.set_num_rows(num_rows) + if skip_rows != 0: + opts.set_skip_rows(skip_rows) + if columns is not None: + col_vec.reserve(len(columns)) + for col in columns: + col_vec.push_back(str(col).encode()) + opts.set_columns(col_vec) + if filters is not None: + opts.set_filter(dereference(filters.c_obj.get())) + return opts + + +cdef class ChunkedParquetReader: + """ + Reads chunks of a Parquet file into a :py:class:`~.types.TableWithMetadata`. + + Parameters + ---------- + source_info : SourceInfo + The SourceInfo object to read the Parquet file from. + columns : list, default None + The names of the columns to be read + row_groups : list[list[size_type]], default None + List of row groups to be read. + use_pandas_metadata : bool, default True + If True, return metadata about the index column in + the per-file user metadata of the ``TableWithMetadata`` + convert_strings_to_categories : bool, default False + Whether to convert string columns to the category type + skip_rows : int64_t, default 0 + The number of rows to skip from the start of the file. + num_rows : size_type, default -1 + The number of rows to read. By default, read the entire file. + chunk_read_limit : size_t, default 0 + Limit on total number of bytes to be returned per read, + or 0 if there is no limit. + pass_read_limit : size_t, default 1024000000 + Limit on the amount of memory used for reading and decompressing data + or 0 if there is no limit. + """ + def __init__( + self, + SourceInfo source_info, + list columns=None, + list row_groups=None, + bool use_pandas_metadata=True, + bool convert_strings_to_categories=False, + int64_t skip_rows = 0, + size_type num_rows = -1, + size_t chunk_read_limit=0, + size_t pass_read_limit=1024000000 + ): + + cdef parquet_reader_options opts = _setup_parquet_reader_options( + source_info, + columns, + row_groups, + filters=None, + convert_strings_to_categories=convert_strings_to_categories, + use_pandas_metadata=use_pandas_metadata, + skip_rows=skip_rows, + num_rows=num_rows, + ) + + with nogil: + self.reader.reset( + new cpp_chunked_parquet_reader( + chunk_read_limit, + pass_read_limit, + opts + ) + ) + + cpdef bool has_next(self): + """ + Returns True if there is another chunk in the Parquet file + to be read. + + Returns + ------- + True if we have not finished reading the file. + """ + with nogil: + return self.reader.get()[0].has_next() + + cpdef TableWithMetadata read_chunk(self): + """ + Read the next chunk into a :py:class:`~.types.TableWithMetadata` + + Returns + ------- + TableWithMetadata + The Table and its corresponding metadata (column names) that were read in. + """ + # Read Parquet + cdef table_with_metadata c_result + + with nogil: + c_result = move(self.reader.get()[0].read_chunk()) + + return TableWithMetadata.from_libcudf(c_result) + +cpdef read_parquet( + SourceInfo source_info, + list columns = None, + list row_groups = None, + Expression filters = None, + bool convert_strings_to_categories = False, + bool use_pandas_metadata = True, + int64_t skip_rows = 0, + size_type num_rows = -1, + # Disabled, these aren't used by cudf-python + # we should only add them back in if there's user demand + # ReaderColumnSchema reader_column_schema = None, + # DataType timestamp_type = DataType(type_id.EMPTY) +): + """Reads an Parquet file into a :py:class:`~.types.TableWithMetadata`. + + Parameters + ---------- + source_info : SourceInfo + The SourceInfo object to read the Parquet file from. + columns : list, default None + The string names of the columns to be read. + row_groups : list[list[size_type]], default None + List of row groups to be read. + filters : Expression, default None + An AST :py:class:`pylibcudf.expressions.Expression` + to use for predicate pushdown. + convert_strings_to_categories : bool, default False + Whether to convert string columns to the category type + use_pandas_metadata : bool, default True + If True, return metadata about the index column in + the per-file user metadata of the ``TableWithMetadata`` + skip_rows : int64_t, default 0 + The number of rows to skip from the start of the file. + num_rows : size_type, default -1 + The number of rows to read. By default, read the entire file. + + Returns + ------- + TableWithMetadata + The Table and its corresponding metadata (column names) that were read in. + """ + cdef table_with_metadata c_result + cdef parquet_reader_options opts = _setup_parquet_reader_options( + source_info, + columns, + row_groups, + filters, + convert_strings_to_categories, + use_pandas_metadata, + skip_rows, + num_rows, + ) + + with nogil: + c_result = move(cpp_read_parquet(opts)) + + return TableWithMetadata.from_libcudf(c_result) diff --git a/python/pylibcudf/pylibcudf/io/types.pyx b/python/pylibcudf/pylibcudf/io/types.pyx index 1edb753efc6..1600a805b37 100644 --- a/python/pylibcudf/pylibcudf/io/types.pyx +++ b/python/pylibcudf/pylibcudf/io/types.pyx @@ -121,6 +121,14 @@ cdef class TableWithMetadata: out.metadata = tbl_with_meta.metadata return out + @property + def per_file_user_data(self): + """ + Returns a list containing a dict + containing file-format specific metadata, + for each file being read in. + """ + return self.metadata.per_file_user_data cdef class SourceInfo: """A class containing details on a source to read from. diff --git a/python/pylibcudf/pylibcudf/join.pyx b/python/pylibcudf/pylibcudf/join.pyx index 925efface7b..25664286f19 100644 --- a/python/pylibcudf/pylibcudf/join.pyx +++ b/python/pylibcudf/pylibcudf/join.pyx @@ -7,12 +7,7 @@ from libcpp.utility cimport move from pylibcudf.libcudf cimport join as cpp_join from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.table.table cimport table -from pylibcudf.libcudf.types cimport ( - data_type, - null_equality, - size_type, - type_id, -) +from pylibcudf.libcudf.types cimport null_equality from rmm._lib.device_buffer cimport device_buffer @@ -22,15 +17,11 @@ from .table cimport Table cdef Column _column_from_gather_map(cpp_join.gather_map_type gather_map): # helper to convert a gather map to a Column - cdef device_buffer c_empty - cdef size_type size = dereference(gather_map.get()).size() return Column.from_libcudf( move( make_unique[column]( - data_type(type_id.INT32), - size, - dereference(gather_map.get()).release(), - move(c_empty), + move(dereference(gather_map.get())), + device_buffer(), 0 ) ) diff --git a/python/pylibcudf/pylibcudf/libcudf/binaryop.pxd b/python/pylibcudf/pylibcudf/libcudf/binaryop.pxd index a9ca4f5b708..b9480c66c52 100644 --- a/python/pylibcudf/pylibcudf/libcudf/binaryop.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/binaryop.pxd @@ -1,10 +1,12 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. from libc.stdint cimport int32_t +from libcpp cimport bool from libcpp.memory cimport unique_ptr from libcpp.string cimport string from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view +from pylibcudf.libcudf.exception_handler cimport cudf_exception_handler from pylibcudf.libcudf.scalar.scalar cimport scalar from pylibcudf.libcudf.types cimport data_type @@ -18,9 +20,20 @@ cdef extern from "cudf/binaryop.hpp" namespace "cudf" nogil: TRUE_DIV FLOOR_DIV MOD + PMOD PYMOD POW INT_POW + LOG_BASE + ATAN2 + SHIFT_LEFT + SHIFT_RIGHT + SHIFT_RIGHT_UNSIGNED + BITWISE_AND + BITWISE_OR + BITWISE_XOR + LOGICAL_AND + LOGICAL_OR EQUAL NOT_EQUAL LESS @@ -28,38 +41,46 @@ cdef extern from "cudf/binaryop.hpp" namespace "cudf" nogil: LESS_EQUAL GREATER_EQUAL NULL_EQUALS + NULL_MAX + NULL_MIN NULL_NOT_EQUALS - BITWISE_AND - BITWISE_OR - BITWISE_XOR - LOGICAL_AND - LOGICAL_OR GENERIC_BINARY + NULL_LOGICAL_AND + NULL_LOGICAL_OR + INVALID_BINARY cdef unique_ptr[column] binary_operation ( const scalar& lhs, const column_view& rhs, binary_operator op, data_type output_type - ) except + + ) except +cudf_exception_handler cdef unique_ptr[column] binary_operation ( const column_view& lhs, const scalar& rhs, binary_operator op, data_type output_type - ) except + + ) except +cudf_exception_handler cdef unique_ptr[column] binary_operation ( const column_view& lhs, const column_view& rhs, binary_operator op, data_type output_type - ) except + + ) except +cudf_exception_handler cdef unique_ptr[column] binary_operation ( const column_view& lhs, const column_view& rhs, const string& op, data_type output_type - ) except + + ) except +cudf_exception_handler + +cdef extern from "cudf/binaryop.hpp" namespace "cudf::binops" nogil: + cdef bool is_supported_operation( + data_type output_type, + data_type lhs_type, + data_type rhs_type, + binary_operator op + ) except +cudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/copying.pxd b/python/pylibcudf/pylibcudf/libcudf/copying.pxd index a4b6525d4d1..97439145160 100644 --- a/python/pylibcudf/pylibcudf/libcudf/copying.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/copying.pxd @@ -10,6 +10,7 @@ from pylibcudf.libcudf.column.column_view cimport ( column_view, mutable_column_view, ) +from pylibcudf.libcudf.exception_handler cimport cudf_exception_handler from pylibcudf.libcudf.scalar.scalar cimport scalar from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.table.table_view cimport table_view @@ -17,8 +18,6 @@ from pylibcudf.libcudf.types cimport size_type from rmm._lib.device_buffer cimport device_buffer -from cudf._lib.exception_handler cimport cudf_exception_handler - ctypedef const scalar constscalar cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: diff --git a/python/pylibcudf/pylibcudf/libcudf/exception_handler.pxd b/python/pylibcudf/pylibcudf/libcudf/exception_handler.pxd new file mode 100644 index 00000000000..4337d8db285 --- /dev/null +++ b/python/pylibcudf/pylibcudf/libcudf/exception_handler.pxd @@ -0,0 +1,69 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. + + +# See +# https://github.com/cython/cython/blob/master/Cython/Utility/CppSupport.cpp +# for the original Cython exception handler. +cdef extern from *: + """ + #include + #include + #include + #include + + namespace { + + /** + * @brief Exception handler to map C++ exceptions to Python ones in Cython + * + * This exception handler extends the base exception handler provided by + * Cython. In addition to the exceptions that Cython itself supports, this + * file adds support for additional exceptions thrown by libcudf that need + * to be mapped to specific Python exceptions. + * + * Since this function interoperates with Python's exception state, it + * does not throw any C++ exceptions. + */ + void cudf_exception_handler() + { + // Catch a handful of different errors here and turn them into the + // equivalent Python errors. + try { + if (PyErr_Occurred()) + ; // let latest Python exn pass through and ignore the current one + throw; + } catch (const std::bad_alloc& exn) { + PyErr_SetString(PyExc_MemoryError, exn.what()); + } catch (const std::bad_cast& exn) { + PyErr_SetString(PyExc_TypeError, exn.what()); + } catch (const std::domain_error& exn) { + PyErr_SetString(PyExc_ValueError, exn.what()); + } catch (const cudf::data_type_error& exn) { + // Catch subclass (data_type_error) before parent (invalid_argument) + PyErr_SetString(PyExc_TypeError, exn.what()); + } catch (const std::invalid_argument& exn) { + PyErr_SetString(PyExc_ValueError, exn.what()); + } catch (const std::ios_base::failure& exn) { + // Unfortunately, in standard C++ we have no way of distinguishing EOF + // from other errors here; be careful with the exception mask + PyErr_SetString(PyExc_IOError, exn.what()); + } catch (const std::out_of_range& exn) { + // Change out_of_range to IndexError + PyErr_SetString(PyExc_IndexError, exn.what()); + } catch (const std::overflow_error& exn) { + PyErr_SetString(PyExc_OverflowError, exn.what()); + } catch (const std::range_error& exn) { + PyErr_SetString(PyExc_ArithmeticError, exn.what()); + } catch (const std::underflow_error& exn) { + PyErr_SetString(PyExc_ArithmeticError, exn.what()); + // The below is the default catch-all case. + } catch (const std::exception& exn) { + PyErr_SetString(PyExc_RuntimeError, exn.what()); + } catch (...) { + PyErr_SetString(PyExc_RuntimeError, "Unknown exception"); + } + } + + } // anonymous namespace + """ + cdef void cudf_exception_handler() diff --git a/python/pylibcudf/pylibcudf/libcudf/experimental.pxd b/python/pylibcudf/pylibcudf/libcudf/experimental.pxd new file mode 100644 index 00000000000..f280a382a04 --- /dev/null +++ b/python/pylibcudf/pylibcudf/libcudf/experimental.pxd @@ -0,0 +1,16 @@ +# Copyright (c) 2022-2024, NVIDIA CORPORATION. + +from libcpp cimport bool +from libcpp.string cimport string + + +cdef extern from "cudf/utilities/prefetch.hpp" \ + namespace "cudf::experimental::prefetch" nogil: + # Not technically the right signature, but it's good enough to let Cython + # generate valid C++ code. It just means we'll be copying a host string + # extra, but that's OK. If we care we could generate string_view bindings, + # but there's no real rush so if we go that route we might as well + # contribute them upstream to Cython itself. + void enable_prefetching(string key) + void disable_prefetching(string key) + void prefetch_debugging(bool enable) diff --git a/python/pylibcudf/pylibcudf/libcudf/io/parquet.pxd b/python/pylibcudf/pylibcudf/libcudf/io/parquet.pxd index a8e1364b54a..222d87defa0 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/parquet.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/io/parquet.pxd @@ -1,8 +1,6 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. -cimport pylibcudf.libcudf.io.types as cudf_io_types -cimport pylibcudf.libcudf.table.table_view as cudf_table_view -from libc.stdint cimport uint8_t +from libc.stdint cimport int64_t, uint8_t from libcpp cimport bool from libcpp.functional cimport reference_wrapper from libcpp.map cimport map @@ -11,13 +9,24 @@ from libcpp.optional cimport optional from libcpp.string cimport string from libcpp.vector cimport vector from pylibcudf.libcudf.expressions cimport expression +from pylibcudf.libcudf.io.types cimport ( + compression_type, + dictionary_policy, + partition_info, + sink_info, + source_info, + statistics_freq, + table_input_metadata, + table_with_metadata, +) +from pylibcudf.libcudf.table.table_view cimport table_view from pylibcudf.libcudf.types cimport data_type, size_type cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: cdef cppclass parquet_reader_options: parquet_reader_options() except + - cudf_io_types.source_info get_source_info() except + + source_info get_source_info() except + vector[vector[size_type]] get_row_groups() except + const optional[reference_wrapper[expression]]& get_filter() except + data_type get_timestamp_type() except + @@ -26,21 +35,24 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: # setter + void set_filter(expression &filter) except + void set_columns(vector[string] col_names) except + + void set_num_rows(size_type val) except + void set_row_groups(vector[vector[size_type]] row_grp) except + + void set_skip_rows(int64_t val) except + void enable_use_arrow_schema(bool val) except + void enable_use_pandas_metadata(bool val) except + void set_timestamp_type(data_type type) except + @staticmethod parquet_reader_options_builder builder( - cudf_io_types.source_info src + source_info src ) except + cdef cppclass parquet_reader_options_builder: parquet_reader_options_builder() except + parquet_reader_options_builder( - cudf_io_types.source_info src + source_info src ) except + parquet_reader_options_builder& columns( vector[string] col_names @@ -48,6 +60,9 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: parquet_reader_options_builder& row_groups( vector[vector[size_type]] row_grp ) except + + parquet_reader_options_builder& convert_strings_to_categories( + bool val + ) except + parquet_reader_options_builder& use_pandas_metadata( bool val ) except + @@ -62,15 +77,15 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: ) except + parquet_reader_options build() except + - cdef cudf_io_types.table_with_metadata read_parquet( + cdef table_with_metadata read_parquet( parquet_reader_options args) except + cdef cppclass parquet_writer_options_base: parquet_writer_options_base() except + - cudf_io_types.sink_info get_sink_info() except + - cudf_io_types.compression_type get_compression() except + - cudf_io_types.statistics_freq get_stats_level() except + - const optional[cudf_io_types.table_input_metadata]& get_metadata( + sink_info get_sink_info() except + + compression_type get_compression() except + + statistics_freq get_stats_level() except + + const optional[table_input_metadata]& get_metadata( ) except + size_t get_row_group_size_bytes() except + size_type get_row_group_size_rows() except + @@ -80,16 +95,16 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: bool is_enabled_write_arrow_schema() except + void set_metadata( - cudf_io_types.table_input_metadata m + table_input_metadata m ) except + void set_key_value_metadata( vector[map[string, string]] kvm ) except + void set_stats_level( - cudf_io_types.statistics_freq sf + statistics_freq sf ) except + void set_compression( - cudf_io_types.compression_type compression + compression_type compression ) except + void set_int96_timestamps( bool enabled @@ -104,14 +119,14 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: void set_max_dictionary_size(size_t val) except + void enable_write_v2_headers(bool val) except + void enable_write_arrow_schema(bool val) except + - void set_dictionary_policy(cudf_io_types.dictionary_policy policy) except + + void set_dictionary_policy(dictionary_policy policy) except + cdef cppclass parquet_writer_options(parquet_writer_options_base): parquet_writer_options() except + - cudf_table_view.table_view get_table() except + + table_view get_table() except + string get_column_chunks_file_paths() except + void set_partitions( - vector[cudf_io_types.partition_info] partitions + vector[partition_info] partitions ) except + void set_column_chunks_file_paths( vector[string] column_chunks_file_paths @@ -119,24 +134,24 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: @staticmethod parquet_writer_options_builder builder( - cudf_io_types.sink_info sink_, - cudf_table_view.table_view table_ + sink_info sink_, + table_view table_ ) except + cdef cppclass parquet_writer_options_builder_base[BuilderT, OptionsT]: parquet_writer_options_builder_base() except + BuilderT& metadata( - cudf_io_types.table_input_metadata m + table_input_metadata m ) except + BuilderT& key_value_metadata( vector[map[string, string]] kvm ) except + BuilderT& stats_level( - cudf_io_types.statistics_freq sf + statistics_freq sf ) except + BuilderT& compression( - cudf_io_types.compression_type compression + compression_type compression ) except + BuilderT& int96_timestamps( bool enabled @@ -166,7 +181,7 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: bool val ) except + BuilderT& dictionary_policy( - cudf_io_types.dictionary_policy val + dictionary_policy val ) except + OptionsT build() except + @@ -175,11 +190,11 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: parquet_writer_options]): parquet_writer_options_builder() except + parquet_writer_options_builder( - cudf_io_types.sink_info sink_, - cudf_table_view.table_view table_ + sink_info sink_, + table_view table_ ) except + parquet_writer_options_builder& partitions( - vector[cudf_io_types.partition_info] partitions + vector[partition_info] partitions ) except + parquet_writer_options_builder& column_chunks_file_paths( vector[string] column_chunks_file_paths @@ -194,7 +209,7 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: @staticmethod chunked_parquet_writer_options_builder builder( - cudf_io_types.sink_info sink_, + sink_info sink_, ) except + cdef cppclass chunked_parquet_writer_options_builder( @@ -203,18 +218,18 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: ): chunked_parquet_writer_options_builder() except + chunked_parquet_writer_options_builder( - cudf_io_types.sink_info sink_, + sink_info sink_, ) except + cdef cppclass parquet_chunked_writer: parquet_chunked_writer() except + parquet_chunked_writer(chunked_parquet_writer_options args) except + parquet_chunked_writer& write( - cudf_table_view.table_view table_, + table_view table_, ) except + parquet_chunked_writer& write( - const cudf_table_view.table_view& table_, - const vector[cudf_io_types.partition_info]& partitions, + const table_view& table_, + const vector[partition_info]& partitions, ) except + unique_ptr[vector[uint8_t]] close( vector[string] column_chunks_file_paths, @@ -230,7 +245,7 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: size_t pass_read_limit, const parquet_reader_options& options) except + bool has_next() except + - cudf_io_types.table_with_metadata read_chunk() except + + table_with_metadata read_chunk() except + cdef unique_ptr[vector[uint8_t]] merge_row_group_metadata( const vector[unique_ptr[vector[uint8_t]]]& metadata_list diff --git a/python/pylibcudf/pylibcudf/libcudf/io/types.pxd b/python/pylibcudf/pylibcudf/libcudf/io/types.pxd index 4e83c56dcf0..a3d99807876 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/types.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/io/types.pxd @@ -80,6 +80,7 @@ cdef extern from "cudf/io/types.hpp" \ map[string, string] user_data vector[unordered_map[string, string]] per_file_user_data vector[column_name_info] schema_info + vector[size_t] num_rows_per_source cdef cppclass table_with_metadata: unique_ptr[table] tbl diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/contains.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/contains.pxd index 05d4e7628e6..48c4ec70c8a 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/contains.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/contains.pxd @@ -4,11 +4,10 @@ from libc.stdint cimport int32_t from libcpp.memory cimport unique_ptr from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view +from pylibcudf.libcudf.exception_handler cimport cudf_exception_handler from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view from pylibcudf.libcudf.scalar.scalar cimport scalar -from cudf._lib.exception_handler cimport cudf_exception_handler - cdef extern from "cudf/lists/contains.hpp" namespace "cudf::lists" nogil: diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/count_elements.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/count_elements.pxd index fff082d90ef..e283551ed0c 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/count_elements.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/count_elements.pxd @@ -6,4 +6,4 @@ from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view cdef extern from "cudf/lists/count_elements.hpp" namespace "cudf::lists" nogil: - cdef unique_ptr[column] count_elements(const lists_column_view) except + + cdef unique_ptr[column] count_elements(const lists_column_view&) except + diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/filling.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/filling.pxd new file mode 100644 index 00000000000..54f5a8409b6 --- /dev/null +++ b/python/pylibcudf/pylibcudf/libcudf/lists/filling.pxd @@ -0,0 +1,18 @@ +# Copyright (c) 2021-2024, NVIDIA CORPORATION. + +from libcpp.memory cimport unique_ptr +from pylibcudf.libcudf.column.column cimport column +from pylibcudf.libcudf.column.column_view cimport column_view + + +cdef extern from "cudf/lists/filling.hpp" namespace "cudf::lists" nogil: + cdef unique_ptr[column] sequences( + const column_view& starts, + const column_view& sizes, + ) except + + + cdef unique_ptr[column] sequences( + const column_view& starts, + const column_view& steps, + const column_view& sizes, + ) except + diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/set_operations.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/set_operations.pxd new file mode 100644 index 00000000000..266f04ef6b3 --- /dev/null +++ b/python/pylibcudf/pylibcudf/libcudf/lists/set_operations.pxd @@ -0,0 +1,36 @@ +# Copyright (c) 2021-2024, NVIDIA CORPORATION. + +from libcpp.memory cimport unique_ptr +from pylibcudf.libcudf.column.column cimport column +from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view +from pylibcudf.libcudf.types cimport nan_equality, null_equality + + +cdef extern from "cudf/lists/set_operations.hpp" namespace "cudf::lists" nogil: + cdef unique_ptr[column] difference_distinct( + const lists_column_view& lhs, + const lists_column_view& rhs, + null_equality nulls_equal, + nan_equality nans_equal + ) except + + + cdef unique_ptr[column] have_overlap( + const lists_column_view& lhs, + const lists_column_view& rhs, + null_equality nulls_equal, + nan_equality nans_equal + ) except + + + cdef unique_ptr[column] intersect_distinct( + const lists_column_view& lhs, + const lists_column_view& rhs, + null_equality nulls_equal, + nan_equality nans_equal + ) except + + + cdef unique_ptr[column] union_distinct( + const lists_column_view& lhs, + const lists_column_view& rhs, + null_equality nulls_equal, + nan_equality nans_equal + ) except + diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/sorting.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/sorting.pxd index 561b25ed0a9..ea45f999c47 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/sorting.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/sorting.pxd @@ -12,3 +12,9 @@ cdef extern from "cudf/lists/sorting.hpp" namespace "cudf::lists" nogil: order column_order, null_order null_precedence ) except + + + cdef unique_ptr[column] stable_sort_lists( + const lists_column_view source_column, + order column_order, + null_order null_precedence + ) except + diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd index f9980765772..d9df7c3ca2e 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd @@ -8,8 +8,13 @@ from pylibcudf.libcudf.types cimport nan_equality, null_equality cdef extern from "cudf/lists/stream_compaction.hpp" \ namespace "cudf::lists" nogil: + cdef unique_ptr[column] apply_boolean_mask( + const lists_column_view& lists_column, + const lists_column_view& boolean_mask, + ) except + + cdef unique_ptr[column] distinct( - const lists_column_view lists_column, + const lists_column_view& lists_column, null_equality nulls_equal, nan_equality nans_equal ) except + diff --git a/python/pylibcudf/pylibcudf/libcudf/scalar/scalar_factories.pxd b/python/pylibcudf/pylibcudf/libcudf/scalar/scalar_factories.pxd index 3a2dd57f15c..ee4b47935b2 100644 --- a/python/pylibcudf/pylibcudf/libcudf/scalar/scalar_factories.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/scalar/scalar_factories.pxd @@ -2,9 +2,12 @@ from libcpp.memory cimport unique_ptr from libcpp.string cimport string +from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport scalar cdef extern from "cudf/scalar/scalar_factories.hpp" namespace "cudf" nogil: cdef unique_ptr[scalar] make_string_scalar(const string & _string) except + cdef unique_ptr[scalar] make_fixed_width_scalar[T](T value) except + + + cdef unique_ptr[scalar] make_empty_scalar_like(const column_view &) except + diff --git a/python/pylibcudf/pylibcudf/libcudf/utilities/type_dispatcher.pxd b/python/pylibcudf/pylibcudf/libcudf/utilities/type_dispatcher.pxd new file mode 100644 index 00000000000..fbeb6e9db90 --- /dev/null +++ b/python/pylibcudf/pylibcudf/libcudf/utilities/type_dispatcher.pxd @@ -0,0 +1,7 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.libcudf.types cimport type_id + + +cdef extern from "cudf/utilities/type_dispatcher.hpp" namespace "cudf" nogil: + cdef type_id type_to_id[T]() diff --git a/python/pylibcudf/pylibcudf/lists.pxd b/python/pylibcudf/pylibcudf/lists.pxd index a8c5d3a5a7f..e7d006e6e2e 100644 --- a/python/pylibcudf/pylibcudf/lists.pxd +++ b/python/pylibcudf/pylibcudf/lists.pxd @@ -1,7 +1,7 @@ # Copyright (c) 2024, NVIDIA CORPORATION. from libcpp cimport bool -from pylibcudf.libcudf.types cimport size_type +from pylibcudf.libcudf.types cimport null_order, size_type from .column cimport Column from .scalar cimport Scalar @@ -32,3 +32,21 @@ cpdef Column reverse(Column) cpdef Column segmented_gather(Column, Column) cpdef Column extract_list_element(Column, ColumnOrSizeType) + +cpdef Column count_elements(Column) + +cpdef Column sequences(Column, Column, Column steps = *) + +cpdef Column sort_lists(Column, bool, null_order, bool stable = *) + +cpdef Column difference_distinct(Column, Column, bool nulls_equal=*, bool nans_equal=*) + +cpdef Column have_overlap(Column, Column, bool nulls_equal=*, bool nans_equal=*) + +cpdef Column intersect_distinct(Column, Column, bool nulls_equal=*, bool nans_equal=*) + +cpdef Column union_distinct(Column, Column, bool nulls_equal=*, bool nans_equal=*) + +cpdef Column apply_boolean_mask(Column, Column) + +cpdef Column distinct(Column, bool, bool) diff --git a/python/pylibcudf/pylibcudf/lists.pyx b/python/pylibcudf/pylibcudf/lists.pyx index 4081249a0b7..947caddc485 100644 --- a/python/pylibcudf/pylibcudf/lists.pyx +++ b/python/pylibcudf/pylibcudf/lists.pyx @@ -8,19 +8,38 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.lists cimport ( contains as cpp_contains, explode as cpp_explode, + filling as cpp_filling, gather as cpp_gather, reverse as cpp_reverse, + set_operations as cpp_set_operations, ) from pylibcudf.libcudf.lists.combine cimport ( concatenate_list_elements as cpp_concatenate_list_elements, concatenate_null_policy, concatenate_rows as cpp_concatenate_rows, ) +from pylibcudf.libcudf.lists.count_elements cimport ( + count_elements as cpp_count_elements, +) from pylibcudf.libcudf.lists.extract cimport ( extract_list_element as cpp_extract_list_element, ) +from pylibcudf.libcudf.lists.sorting cimport ( + sort_lists as cpp_sort_lists, + stable_sort_lists as cpp_stable_sort_lists, +) +from pylibcudf.libcudf.lists.stream_compaction cimport ( + apply_boolean_mask as cpp_apply_boolean_mask, + distinct as cpp_distinct, +) from pylibcudf.libcudf.table.table cimport table -from pylibcudf.libcudf.types cimport size_type +from pylibcudf.libcudf.types cimport ( + nan_equality, + null_equality, + null_order, + order, + size_type, +) from pylibcudf.lists cimport ColumnOrScalar, ColumnOrSizeType from .column cimport Column, ListColumnView @@ -292,3 +311,376 @@ cpdef Column extract_list_element(Column input, ColumnOrSizeType index): index.view() if ColumnOrSizeType is Column else index, )) return Column.from_libcudf(move(c_result)) + + +cpdef Column count_elements(Column input): + """Count the number of rows in each + list element in the given lists column. + For details, see :cpp:func:`count_elements`. + + Parameters + ---------- + input : Column + The input column + + Returns + ------- + Column + A new Column of the lengths of each list element + """ + cdef ListColumnView list_view = input.list_view() + cdef unique_ptr[column] c_result + + with nogil: + c_result = move(cpp_count_elements(list_view.view())) + + return Column.from_libcudf(move(c_result)) + + +cpdef Column sequences(Column starts, Column sizes, Column steps = None): + """Create a lists column in which each row contains a sequence of + values specified by a tuple of (start, step, size) parameters. + + For details, see :cpp:func:`sequences`. + + Parameters + ---------- + starts : Column + First values in the result sequences. + sizes : Column + Numbers of values in the result sequences. + steps : Optional[Column] + Increment values for the result sequences. + + Returns + ------- + Column + The result column containing generated sequences. + """ + cdef unique_ptr[column] c_result + + if steps is not None: + with nogil: + c_result = move(cpp_filling.sequences( + starts.view(), + steps.view(), + sizes.view(), + )) + else: + with nogil: + c_result = move(cpp_filling.sequences( + starts.view(), + sizes.view(), + )) + return Column.from_libcudf(move(c_result)) + +cpdef Column sort_lists( + Column input, + bool ascending, + null_order na_position, + bool stable = False +): + """Sort the elements within a list in each row of a list column. + + For details, see :cpp:func:`sort_lists`. + + Parameters + ---------- + input : Column + The input column. + ascending : bool + If true, the sort order is ascending. Otherwise, the sort order is descending. + na_position : NullOrder + If na_position equals NullOrder.FIRST, then the null values in the output + column are placed first. Otherwise, they are be placed after. + stable: bool + If true :cpp:func:`stable_sort_lists` is used, Otherwise, + :cpp:func:`sort_lists` is used. + + Returns + ------- + Column + A new Column with elements in each list sorted. + """ + cdef unique_ptr[column] c_result + cdef ListColumnView list_view = input.list_view() + + cdef order c_sort_order = ( + order.ASCENDING if ascending else order.DESCENDING + ) + + with nogil: + if stable: + c_result = move(cpp_stable_sort_lists( + list_view.view(), + c_sort_order, + na_position, + )) + else: + c_result = move(cpp_sort_lists( + list_view.view(), + c_sort_order, + na_position, + )) + return Column.from_libcudf(move(c_result)) + + +cpdef Column difference_distinct( + Column lhs, + Column rhs, + bool nulls_equal=True, + bool nans_equal=True +): + """Create a column of index values indicating the position of a search + key row within the corresponding list row in the lists column. + + For details, see :cpp:func:`difference_distinct`. + + Parameters + ---------- + lhs : Column + The input lists column of elements that may be included. + rhs : Column + The input lists column of elements to exclude. + nulls_equal : bool, default True + If true, null elements are considered equal. Otherwise, unequal. + nans_equal : bool, default True + If true, libcudf will treat nan elements from {-nan, +nan} + as equal. Otherwise, unequal. Otherwise, unequal. + + Returns + ------- + Column + A lists column containing the difference results. + """ + cdef unique_ptr[column] c_result + cdef ListColumnView lhs_view = lhs.list_view() + cdef ListColumnView rhs_view = rhs.list_view() + + cdef null_equality c_nulls_equal = ( + null_equality.EQUAL if nulls_equal else null_equality.UNEQUAL + ) + cdef nan_equality c_nans_equal = ( + nan_equality.ALL_EQUAL if nans_equal else nan_equality.UNEQUAL + ) + + with nogil: + c_result = move(cpp_set_operations.difference_distinct( + lhs_view.view(), + rhs_view.view(), + c_nulls_equal, + c_nans_equal, + )) + return Column.from_libcudf(move(c_result)) + + +cpdef Column have_overlap( + Column lhs, + Column rhs, + bool nulls_equal=True, + bool nans_equal=True +): + """Check if lists at each row of the given lists columns overlap. + + For details, see :cpp:func:`have_overlap`. + + Parameters + ---------- + lhs : Column + The input lists column for one side. + rhs : Column + The input lists column for the other side. + nulls_equal : bool, default True + If true, null elements are considered equal. Otherwise, unequal. + nans_equal : bool, default True + If true, libcudf will treat nan elements from {-nan, +nan} + as equal. Otherwise, unequal. Otherwise, unequal. + + Returns + ------- + Column + A column containing the check results. + """ + cdef unique_ptr[column] c_result + cdef ListColumnView lhs_view = lhs.list_view() + cdef ListColumnView rhs_view = rhs.list_view() + + cdef null_equality c_nulls_equal = ( + null_equality.EQUAL if nulls_equal else null_equality.UNEQUAL + ) + cdef nan_equality c_nans_equal = ( + nan_equality.ALL_EQUAL if nans_equal else nan_equality.UNEQUAL + ) + + with nogil: + c_result = move(cpp_set_operations.have_overlap( + lhs_view.view(), + rhs_view.view(), + c_nulls_equal, + c_nans_equal, + )) + return Column.from_libcudf(move(c_result)) + + +cpdef Column intersect_distinct( + Column lhs, + Column rhs, + bool nulls_equal=True, + bool nans_equal=True +): + """Create a lists column of distinct elements common to two input lists columns. + + For details, see :cpp:func:`intersect_distinct`. + + Parameters + ---------- + lhs : Column + The input lists column of elements that may be included. + rhs : Column + The input lists column of elements to exclude. + nulls_equal : bool, default True + If true, null elements are considered equal. Otherwise, unequal. + nans_equal : bool, default True + If true, libcudf will treat nan elements from {-nan, +nan} + as equal. Otherwise, unequal. Otherwise, unequal. + + Returns + ------- + Column + A lists column containing the intersection results. + """ + cdef unique_ptr[column] c_result + cdef ListColumnView lhs_view = lhs.list_view() + cdef ListColumnView rhs_view = rhs.list_view() + + cdef null_equality c_nulls_equal = ( + null_equality.EQUAL if nulls_equal else null_equality.UNEQUAL + ) + cdef nan_equality c_nans_equal = ( + nan_equality.ALL_EQUAL if nans_equal else nan_equality.UNEQUAL + ) + + with nogil: + c_result = move(cpp_set_operations.intersect_distinct( + lhs_view.view(), + rhs_view.view(), + c_nulls_equal, + c_nans_equal, + )) + return Column.from_libcudf(move(c_result)) + + +cpdef Column union_distinct( + Column lhs, + Column rhs, + bool nulls_equal=True, + bool nans_equal=True +): + """Create a lists column of distinct elements found in + either of two input lists columns. + + For details, see :cpp:func:`union_distinct`. + + Parameters + ---------- + lhs : Column + The input lists column of elements that may be included. + rhs : Column + The input lists column of elements to exclude. + nulls_equal : bool, default True + If true, null elements are considered equal. Otherwise, unequal. + nans_equal : bool, default True + If true, libcudf will treat nan elements from {-nan, +nan} + as equal. Otherwise, unequal. Otherwise, unequal. + + Returns + ------- + Column + A lists column containing the union results. + """ + cdef unique_ptr[column] c_result + cdef ListColumnView lhs_view = lhs.list_view() + cdef ListColumnView rhs_view = rhs.list_view() + + cdef null_equality c_nulls_equal = ( + null_equality.EQUAL if nulls_equal else null_equality.UNEQUAL + ) + cdef nan_equality c_nans_equal = ( + nan_equality.ALL_EQUAL if nans_equal else nan_equality.UNEQUAL + ) + + with nogil: + c_result = move(cpp_set_operations.union_distinct( + lhs_view.view(), + rhs_view.view(), + c_nulls_equal, + c_nans_equal, + )) + return Column.from_libcudf(move(c_result)) + + +cpdef Column apply_boolean_mask(Column input, Column boolean_mask): + """Filters elements in each row of the input lists column using a boolean mask + + For details, see :cpp:func:`apply_boolean_mask`. + + Parameters + ---------- + input : Column + The input column. + boolean_mask : Column + The boolean mask. + + Returns + ------- + Column + A Column of filtered elements based upon the boolean mask. + """ + cdef unique_ptr[column] c_result + cdef ListColumnView list_view = input.list_view() + cdef ListColumnView mask_view = boolean_mask.list_view() + with nogil: + c_result = move(cpp_apply_boolean_mask( + list_view.view(), + mask_view.view(), + )) + return Column.from_libcudf(move(c_result)) + + +cpdef Column distinct(Column input, bool nulls_equal, bool nans_equal): + """Create a new list column without duplicate elements in each list. + + For details, see :cpp:func:`distinct`. + + Parameters + ---------- + input : Column + The input column. + nulls_equal : bool + If true, null elements are considered equal. Otherwise, unequal. + nans_equal : bool + If true, libcudf will treat nan elements from {-nan, +nan} + as equal. Otherwise, unequal. Otherwise, unequal. + + Returns + ------- + Column + A new list column without duplicate elements in each list. + """ + cdef unique_ptr[column] c_result + cdef ListColumnView list_view = input.list_view() + + cdef null_equality c_nulls_equal = ( + null_equality.EQUAL if nulls_equal else null_equality.UNEQUAL + ) + cdef nan_equality c_nans_equal = ( + nan_equality.ALL_EQUAL if nans_equal else nan_equality.UNEQUAL + ) + + with nogil: + c_result = move(cpp_distinct( + list_view.view(), + c_nulls_equal, + c_nans_equal, + )) + return Column.from_libcudf(move(c_result)) diff --git a/python/pylibcudf/pylibcudf/scalar.pxd b/python/pylibcudf/pylibcudf/scalar.pxd index 1f10649c4e0..8664dfa4b7e 100644 --- a/python/pylibcudf/pylibcudf/scalar.pxd +++ b/python/pylibcudf/pylibcudf/scalar.pxd @@ -6,6 +6,7 @@ from pylibcudf.libcudf.scalar.scalar cimport scalar from rmm._lib.memory_resource cimport DeviceMemoryResource +from .column cimport Column from .types cimport DataType @@ -23,5 +24,8 @@ cdef class Scalar: cpdef DataType type(self) cpdef bool is_valid(self) + @staticmethod + cdef Scalar empty_like(Column column) + @staticmethod cdef Scalar from_libcudf(unique_ptr[scalar] libcudf_scalar, dtype=*) diff --git a/python/pylibcudf/pylibcudf/scalar.pyx b/python/pylibcudf/pylibcudf/scalar.pyx index c2b89b222cf..3e20938af0c 100644 --- a/python/pylibcudf/pylibcudf/scalar.pyx +++ b/python/pylibcudf/pylibcudf/scalar.pyx @@ -2,10 +2,13 @@ from cython cimport no_gc_clear from libcpp.memory cimport unique_ptr +from libcpp.utility cimport move from pylibcudf.libcudf.scalar.scalar cimport scalar +from pylibcudf.libcudf.scalar.scalar_factories cimport make_empty_scalar_like from rmm._lib.memory_resource cimport get_current_device_resource +from .column cimport Column from .types cimport DataType @@ -45,6 +48,21 @@ cdef class Scalar: """True if the scalar is valid, false if not""" return self.get().is_valid() + @staticmethod + cdef Scalar empty_like(Column column): + """Construct a null scalar with the same type as column. + + Parameters + ---------- + column + Column to take type from + + Returns + ------- + New empty (null) scalar of the given type. + """ + return Scalar.from_libcudf(move(make_empty_scalar_like(column.view()))) + @staticmethod cdef Scalar from_libcudf(unique_ptr[scalar] libcudf_scalar, dtype=None): """Construct a Scalar object from a libcudf scalar. diff --git a/python/pylibcudf/pylibcudf/tests/common/utils.py b/python/pylibcudf/pylibcudf/tests/common/utils.py index 798b14c01a8..e92a5fc655f 100644 --- a/python/pylibcudf/pylibcudf/tests/common/utils.py +++ b/python/pylibcudf/pylibcudf/tests/common/utils.py @@ -6,11 +6,11 @@ import numpy as np import pyarrow as pa +import pylibcudf as plc import pytest +from pyarrow.parquet import write_table as pq_write_table from pylibcudf.io.types import CompressionType -from cudf._lib import pylibcudf as plc - def metadata_from_arrow_type( pa_type: pa.Array, @@ -103,15 +103,68 @@ def _make_fields_nullable(typ): return pa.list_(new_fields[0]) return typ + def _contains_type(parent_typ, typ_checker): + """ + Check whether the parent or one of the children + satisfies the typ_checker. + """ + if typ_checker(parent_typ): + return True + if pa.types.is_nested(parent_typ): + for i in range(parent_typ.num_fields): + if _contains_type(parent_typ.field(i).type, typ_checker): + return True + return False + if not check_field_nullability: rhs_type = _make_fields_nullable(rhs.type) rhs = rhs.cast(rhs_type) lhs_type = _make_fields_nullable(lhs.type) - lhs = rhs.cast(lhs_type) + lhs = lhs.cast(lhs_type) - if pa.types.is_floating(lhs.type) and pa.types.is_floating(rhs.type): - np.testing.assert_array_almost_equal(lhs, rhs) + assert lhs.type == rhs.type, f"{lhs.type} != {rhs.type}" + if _contains_type(lhs.type, pa.types.is_floating) and _contains_type( + rhs.type, pa.types.is_floating + ): + # Flatten nested arrays to liststo do comparisons if nested + # This is so we can do approximate comparisons + # for floats in numpy + def _flatten_arrays(arr): + if pa.types.is_nested(arr.type): + flattened = arr.flatten() + flat_arrs = [] + if isinstance(flattened, list): + for flat_arr in flattened: + flat_arrs += _flatten_arrays(flat_arr) + else: + flat_arrs = [flattened] + else: + flat_arrs = [arr] + return flat_arrs + + if isinstance(lhs, (pa.ListArray, pa.StructArray)): + lhs = _flatten_arrays(lhs) + rhs = _flatten_arrays(rhs) + else: + # Just a regular doublearray + lhs = [lhs] + rhs = [rhs] + + for lh_arr, rh_arr in zip(lhs, rhs): + # Check NaNs positions match + # and then filter out nans + lhs_nans = pa.compute.is_nan(lh_arr) + rhs_nans = pa.compute.is_nan(rh_arr) + assert lhs_nans.equals(rhs_nans) + + if pa.compute.any(lhs_nans) or pa.compute.any(rhs_nans): + # masks must be equal at this point + mask = pa.compute.fill_null(pa.compute.invert(lhs_nans), True) + lh_arr = lh_arr.filter(mask) + rh_arr = rh_arr.filter(mask) + + np.testing.assert_array_almost_equal(lh_arr, rh_arr) else: assert lhs.equals(rhs) @@ -266,6 +319,16 @@ def make_source(path_or_buf, pa_table, format, **kwargs): df.to_json(path_or_buf, mode=mode, **kwargs) elif format == "csv": df.to_csv(path_or_buf, mode=mode, **kwargs) + elif format == "parquet": + # The conversion to pandas is lossy (doesn't preserve + # nested types) so we + # will just use pyarrow directly to write this + pq_write_table( + pa_table, + pa.PythonFile(path_or_buf) + if isinstance(path_or_buf, io.IOBase) + else path_or_buf, + ) if isinstance(path_or_buf, io.IOBase): path_or_buf.seek(0) return path_or_buf diff --git a/python/pylibcudf/pylibcudf/tests/conftest.py b/python/pylibcudf/pylibcudf/tests/conftest.py index 83166bb4990..fdce6f353ca 100644 --- a/python/pylibcudf/pylibcudf/tests/conftest.py +++ b/python/pylibcudf/pylibcudf/tests/conftest.py @@ -169,6 +169,21 @@ def source_or_sink(request, tmp_path): return fp_or_buf() +@pytest.fixture( + params=["a.txt", pathlib.Path("a.txt"), io.BytesIO], +) +def binary_source_or_sink(request, tmp_path): + fp_or_buf = request.param + if isinstance(fp_or_buf, str): + return f"{tmp_path}/{fp_or_buf}" + elif isinstance(fp_or_buf, os.PathLike): + return tmp_path.joinpath(fp_or_buf) + elif issubclass(fp_or_buf, io.IOBase): + # Must construct io.StringIO/io.BytesIO inside + # fixture, or we'll end up re-using it + return fp_or_buf() + + unsupported_types = { # Not supported by pandas # TODO: find a way to test these diff --git a/python/pylibcudf/pylibcudf/tests/io/test_parquet.py b/python/pylibcudf/pylibcudf/tests/io/test_parquet.py new file mode 100644 index 00000000000..7c27115008e --- /dev/null +++ b/python/pylibcudf/pylibcudf/tests/io/test_parquet.py @@ -0,0 +1,108 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +import pyarrow as pa +import pyarrow.compute as pc +import pylibcudf as plc +import pytest +from pyarrow.parquet import read_table +from pylibcudf.expressions import ( + ASTOperator, + ColumnNameReference, + ColumnReference, + Literal, + Operation, +) +from utils import assert_table_and_meta_eq, make_source + +# Shared kwargs to pass to make_source +_COMMON_PARQUET_SOURCE_KWARGS = {"format": "parquet"} + + +@pytest.mark.parametrize("columns", [None, ["col_int64", "col_bool"]]) +def test_read_parquet_basic( + table_data, binary_source_or_sink, nrows_skiprows, columns +): + _, pa_table = table_data + nrows, skiprows = nrows_skiprows + + source = make_source( + binary_source_or_sink, pa_table, **_COMMON_PARQUET_SOURCE_KWARGS + ) + + res = plc.io.parquet.read_parquet( + plc.io.SourceInfo([source]), + num_rows=nrows, + skip_rows=skiprows, + columns=columns, + ) + + if columns is not None: + pa_table = pa_table.select(columns) + + # Adapt to nrows/skiprows + pa_table = pa_table.slice( + offset=skiprows, length=nrows if nrows != -1 else None + ) + + assert_table_and_meta_eq(pa_table, res, check_field_nullability=False) + + +@pytest.mark.parametrize( + "pa_filters,plc_filters", + [ + ( + pc.field("col_int64") >= 10, + Operation( + ASTOperator.GREATER_EQUAL, + ColumnNameReference("col_int64"), + Literal(plc.interop.from_arrow(pa.scalar(10))), + ), + ), + ( + (pc.field("col_int64") >= 10) & (pc.field("col_double") < 0), + Operation( + ASTOperator.LOGICAL_AND, + Operation( + ASTOperator.GREATER_EQUAL, + ColumnNameReference("col_int64"), + Literal(plc.interop.from_arrow(pa.scalar(10))), + ), + Operation( + ASTOperator.LESS, + ColumnNameReference("col_double"), + Literal(plc.interop.from_arrow(pa.scalar(0.0))), + ), + ), + ), + ( + (pc.field(0) == 10), + Operation( + ASTOperator.EQUAL, + ColumnReference(0), + Literal(plc.interop.from_arrow(pa.scalar(10))), + ), + ), + ], +) +def test_read_parquet_filters( + table_data, binary_source_or_sink, pa_filters, plc_filters +): + _, pa_table = table_data + + source = make_source( + binary_source_or_sink, pa_table, **_COMMON_PARQUET_SOURCE_KWARGS + ) + + plc_table_w_meta = plc.io.parquet.read_parquet( + plc.io.SourceInfo([source]), filters=plc_filters + ) + exp = read_table(source, filters=pa_filters) + assert_table_and_meta_eq( + exp, plc_table_w_meta, check_field_nullability=False + ) + + +# TODO: Test these options +# list row_groups = None, +# ^^^ This one is not tested since it's not in pyarrow/pandas, deprecate? +# bool convert_strings_to_categories = False, +# bool use_pandas_metadata = True diff --git a/python/pylibcudf/pylibcudf/tests/io/test_source_sink_info.py b/python/pylibcudf/pylibcudf/tests/io/test_source_sink_info.py index 907e69d309a..747f58ec8cf 100644 --- a/python/pylibcudf/pylibcudf/tests/io/test_source_sink_info.py +++ b/python/pylibcudf/pylibcudf/tests/io/test_source_sink_info.py @@ -2,10 +2,8 @@ import io -import pyarrow as pa import pylibcudf as plc import pytest -from pylibcudf.io.datasource import NativeFileDatasource @pytest.fixture(params=[plc.io.SourceInfo, plc.io.SinkInfo]) @@ -17,10 +15,8 @@ def _skip_invalid_sinks(io_class, sink): """ Skip invalid sinks for SinkInfo """ - if io_class is plc.io.SinkInfo and isinstance( - sink, (bytes, NativeFileDatasource) - ): - pytest.skip(f"{sink} is not a valid input for SinkInfo") + if io_class is plc.io.SinkInfo and isinstance(sink, bytes): + pytest.skip("bytes is not a valid input for SinkInfo") @pytest.mark.parametrize( @@ -29,7 +25,6 @@ def _skip_invalid_sinks(io_class, sink): "a.txt", b"hello world", io.BytesIO(b"hello world"), - NativeFileDatasource(pa.PythonFile(io.BytesIO(), mode="r")), ], ) def test_source_info_ctor(io_class, source, tmp_path): @@ -46,13 +41,12 @@ def test_source_info_ctor(io_class, source, tmp_path): @pytest.mark.parametrize( "sources", [ + ["a.txt"], + [b"hello world"], + [io.BytesIO(b"hello world")], ["a.txt", "a.txt"], [b"hello world", b"hello there"], [io.BytesIO(b"hello world"), io.BytesIO(b"hello there")], - [ - NativeFileDatasource(pa.PythonFile(io.BytesIO(), mode="r")), - NativeFileDatasource(pa.PythonFile(io.BytesIO(), mode="r")), - ], ], ) def test_source_info_ctor_multiple(io_class, sources, tmp_path): @@ -78,11 +72,6 @@ def test_source_info_ctor_multiple(io_class, sources, tmp_path): io.BytesIO(b"hello there"), b"hello world", ], - [ - NativeFileDatasource(pa.PythonFile(io.BytesIO(), mode="r")), - "awef.txt", - b"hello world", - ], ], ) def test_source_info_ctor_mixing_invalid(io_class, sources, tmp_path): diff --git a/python/pylibcudf/pylibcudf/tests/test_binaryops.py b/python/pylibcudf/pylibcudf/tests/test_binaryops.py new file mode 100644 index 00000000000..f784cb3c191 --- /dev/null +++ b/python/pylibcudf/pylibcudf/tests/test_binaryops.py @@ -0,0 +1,785 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +import math + +import numpy as np +import pyarrow as pa +import pylibcudf as plc +import pytest +from utils import assert_column_eq + + +def idfn(param): + ltype, rtype, outtype, plc_op, _ = param + params = (plc_op.name, ltype, rtype, outtype) + return "-".join(map(str, params)) + + +@pytest.fixture(params=[True, False], ids=["nulls", "no_nulls"]) +def nulls(request): + return request.param + + +def make_col(dtype, nulls): + if dtype == "int64": + data = [1, 2, 3, 4, 5] + pa_type = pa.int64() + elif dtype == "uint64": + data = [1, 2, 3, 4, 5] + pa_type = pa.uint64() + elif dtype == "float64": + data = [1.0, 2.0, 3.0, 4.0, 5.0] + pa_type = pa.float64() + elif dtype == "bool": + data = [True, False, True, False, True] + pa_type = pa.bool_() + elif dtype == "timestamp64[ns]": + data = [ + np.datetime64("2022-01-01"), + np.datetime64("2022-01-02"), + np.datetime64("2022-01-03"), + np.datetime64("2022-01-04"), + np.datetime64("2022-01-05"), + ] + pa_type = pa.timestamp("ns") + elif dtype == "timedelta64[ns]": + data = [ + np.timedelta64(1, "ns"), + np.timedelta64(2, "ns"), + np.timedelta64(3, "ns"), + np.timedelta64(4, "ns"), + np.timedelta64(5, "ns"), + ] + pa_type = pa.duration("ns") + else: + raise ValueError("Unsupported dtype") + + if nulls: + data[3] = None + + return pa.array(data, type=pa_type) + + +@pytest.fixture +def pa_data(request, nulls): + ltype, rtype, outtype = request.param + values = make_col(ltype, nulls), make_col(rtype, nulls), outtype + return values + + +@pytest.fixture +def plc_data(pa_data): + lhs, rhs, outtype = pa_data + return ( + plc.interop.from_arrow(lhs), + plc.interop.from_arrow(rhs), + plc.interop.from_arrow(pa.from_numpy_dtype(np.dtype(outtype))), + ) + + +@pytest.fixture +def tests(request, nulls): + ltype, rtype, py_outtype, plc_op, py_op = request.param + pa_lhs, pa_rhs = make_col(ltype, nulls), make_col(rtype, nulls) + plc_lhs, plc_rhs = ( + plc.interop.from_arrow(pa_lhs), + plc.interop.from_arrow(pa_rhs), + ) + plc_dtype = plc.interop.from_arrow( + pa.from_numpy_dtype(np.dtype(py_outtype)) + ) + return ( + pa_lhs, + pa_rhs, + py_outtype, + plc_lhs, + plc_rhs, + plc_dtype, + py_op, + plc_op, + ) + + +def custom_pyop(func): + def wrapper(x, y): + x = x.to_pylist() + y = y.to_pylist() + + def inner(x, y): + if x is None or y is None: + return None + return func(x, y) + + return pa.array([inner(x, y) for x, y in zip(x, y)]) + + return wrapper + + +@custom_pyop +def py_floordiv(x, y): + return x // y + + +@custom_pyop +def py_pmod(x, y): + return (x % y + y) % y + + +@custom_pyop +def py_mod(x, y): + return x % y + + +@custom_pyop +def py_atan2(x, y): + return math.atan2(x, y) + + +@custom_pyop +def py_shift_right_unsigned(x, y): + unsigned_x = np.uint32(x) + result = unsigned_x >> y + return result + + +@pytest.mark.parametrize( + "tests", + [ + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.ADD, + pa.compute.add, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.ADD, + pa.compute.add, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.ADD, + pa.compute.add, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.SUB, + pa.compute.subtract, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.SUB, + pa.compute.subtract, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.SUB, + pa.compute.subtract, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.MUL, + pa.compute.multiply, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.MUL, + pa.compute.multiply, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.MUL, + pa.compute.multiply, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.DIV, + pa.compute.divide, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.DIV, + pa.compute.divide, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.DIV, + pa.compute.divide, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.TRUE_DIV, + pa.compute.divide, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.TRUE_DIV, + pa.compute.divide, + ), + ( + "int64", + "int64", + "timedelta64[ns]", + plc.binaryop.BinaryOperator.TRUE_DIV, + pa.compute.divide, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.FLOOR_DIV, + py_floordiv, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.FLOOR_DIV, + py_floordiv, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.FLOOR_DIV, + py_floordiv, + ), + ("int64", "int64", "int64", plc.binaryop.BinaryOperator.MOD, py_mod), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.MOD, + py_mod, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.MOD, + py_mod, + ), + ("int64", "int64", "int64", plc.binaryop.BinaryOperator.PMOD, py_pmod), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.PMOD, + py_pmod, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.PMOD, + py_pmod, + ), + ("int64", "int64", "int64", plc.binaryop.BinaryOperator.PYMOD, py_mod), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.PYMOD, + py_mod, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.PYMOD, + py_mod, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.POW, + pa.compute.power, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.POW, + pa.compute.power, + ), + ( + "int64", + "int64", + "timedelta64[ns]", + plc.binaryop.BinaryOperator.POW, + pa.compute.power, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.INT_POW, + pa.compute.power, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.INT_POW, + pa.compute.power, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.INT_POW, + pa.compute.power, + ), + ( + "float64", + "float64", + "float64", + plc.binaryop.BinaryOperator.LOG_BASE, + pa.compute.logb, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.LOG_BASE, + pa.compute.logb, + ), + ( + "int64", + "int64", + "timedelta64[ns]", + plc.binaryop.BinaryOperator.LOG_BASE, + pa.compute.logb, + ), + ( + "float64", + "float64", + "float64", + plc.binaryop.BinaryOperator.ATAN2, + py_atan2, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.ATAN2, + py_atan2, + ), + ( + "int64", + "int64", + "timedelta64[ns]", + plc.binaryop.BinaryOperator.ATAN2, + py_atan2, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.SHIFT_LEFT, + pa.compute.shift_left, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.SHIFT_LEFT, + pa.compute.shift_left, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.SHIFT_LEFT, + pa.compute.shift_left, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.SHIFT_RIGHT, + pa.compute.shift_right, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.SHIFT_RIGHT, + pa.compute.shift_right, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.SHIFT_RIGHT, + pa.compute.shift_right, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.SHIFT_RIGHT_UNSIGNED, + py_shift_right_unsigned, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.SHIFT_RIGHT_UNSIGNED, + py_shift_right_unsigned, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.SHIFT_RIGHT_UNSIGNED, + py_shift_right_unsigned, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.BITWISE_AND, + pa.compute.bit_wise_and, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.BITWISE_AND, + pa.compute.bit_wise_and, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.BITWISE_AND, + pa.compute.bit_wise_and, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.BITWISE_OR, + pa.compute.bit_wise_or, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.BITWISE_OR, + pa.compute.bit_wise_or, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.BITWISE_OR, + pa.compute.bit_wise_or, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.BITWISE_XOR, + pa.compute.bit_wise_xor, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.BITWISE_XOR, + pa.compute.bit_wise_xor, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.BITWISE_XOR, + pa.compute.bit_wise_xor, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.LOGICAL_AND, + pa.compute.and_, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.LOGICAL_AND, + pa.compute.and_, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.LOGICAL_AND, + pa.compute.and_, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.LOGICAL_OR, + pa.compute.or_, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.LOGICAL_OR, + pa.compute.or_, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.LOGICAL_OR, + pa.compute.or_, + ), + ( + "int64", + "int64", + "bool", + plc.binaryop.BinaryOperator.EQUAL, + pa.compute.equal, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.EQUAL, + pa.compute.equal, + ), + ( + "int64", + "int64", + "bool", + plc.binaryop.BinaryOperator.NOT_EQUAL, + pa.compute.not_equal, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.NOT_EQUAL, + pa.compute.not_equal, + ), + ( + "int64", + "int64", + "bool", + plc.binaryop.BinaryOperator.LESS, + pa.compute.less, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.LESS, + pa.compute.less, + ), + ( + "int64", + "int64", + "bool", + plc.binaryop.BinaryOperator.GREATER, + pa.compute.greater, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.GREATER, + pa.compute.greater, + ), + ( + "int64", + "int64", + "bool", + plc.binaryop.BinaryOperator.LESS_EQUAL, + pa.compute.less_equal, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.LESS_EQUAL, + pa.compute.less_equal, + ), + ( + "int64", + "int64", + "bool", + plc.binaryop.BinaryOperator.GREATER_EQUAL, + pa.compute.greater_equal, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.GREATER_EQUAL, + pa.compute.greater_equal, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.NULL_EQUALS, + pa.compute.equal, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.NULL_EQUALS, + pa.compute.equal, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.NULL_MAX, + pa.compute.max_element_wise, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.NULL_MAX, + pa.compute.max_element_wise, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.NULL_MIN, + pa.compute.min_element_wise, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.NULL_MIN, + pa.compute.min_element_wise, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.NULL_NOT_EQUALS, + pa.compute.not_equal, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.NULL_NOT_EQUALS, + pa.compute.not_equal, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.NULL_LOGICAL_AND, + pa.compute.and_, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.NULL_LOGICAL_AND, + pa.compute.and_, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.NULL_LOGICAL_OR, + pa.compute.or_, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.NULL_LOGICAL_OR, + pa.compute.or_, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.GENERIC_BINARY, + None, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.INVALID_BINARY, + None, + ), + ], + indirect=True, + ids=idfn, +) +def test_binaryops(tests): + ( + pa_lhs, + pa_rhs, + py_outtype, + plc_lhs, + plc_rhs, + plc_outtype, + py_op, + plc_op, + ) = tests + + def get_result(): + return plc.binaryop.binary_operation( + plc_lhs, + plc_rhs, + plc_op, + plc_outtype, + ) + + if not plc.binaryop.is_supported_operation( + plc_outtype, plc_lhs.type(), plc_rhs.type(), plc_op + ): + with pytest.raises(TypeError): + get_result() + else: + expect = py_op(pa_lhs, pa_rhs).cast(py_outtype) + got = get_result() + assert_column_eq(expect, got) diff --git a/python/pylibcudf/pylibcudf/tests/test_column_factories.py b/python/pylibcudf/pylibcudf/tests/test_column_factories.py index 4c05770a41f..8cedbc6d42f 100644 --- a/python/pylibcudf/pylibcudf/tests/test_column_factories.py +++ b/python/pylibcudf/pylibcudf/tests/test_column_factories.py @@ -1,11 +1,10 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa +import pylibcudf as plc import pytest from utils import DEFAULT_STRUCT_TESTING_TYPE, assert_column_eq -from cudf._lib import pylibcudf as plc - EMPTY_COL_SIZE = 3 NUMERIC_TYPES = [ diff --git a/python/pylibcudf/pylibcudf/tests/test_column_from_device.py b/python/pylibcudf/pylibcudf/tests/test_column_from_device.py deleted file mode 100644 index c4ff7bb43a5..00000000000 --- a/python/pylibcudf/pylibcudf/tests/test_column_from_device.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. - -import pyarrow as pa -import pytest -from utils import assert_column_eq - -import cudf -from cudf._lib import pylibcudf as plc - -VALID_TYPES = [ - pa.int8(), - pa.int16(), - pa.int32(), - pa.int64(), - pa.uint8(), - pa.uint16(), - pa.uint32(), - pa.uint64(), - pa.float32(), - pa.float64(), - pa.bool_(), - pa.timestamp("s"), - pa.timestamp("ms"), - pa.timestamp("us"), - pa.timestamp("ns"), - pa.duration("s"), - pa.duration("ms"), - pa.duration("us"), - pa.duration("ns"), -] - - -@pytest.fixture(params=VALID_TYPES, ids=repr) -def valid_type(request): - return request.param - - -@pytest.fixture -def valid_column(valid_type): - if valid_type == pa.bool_(): - return pa.array([True, False, True], type=valid_type) - return pa.array([1, 2, 3], type=valid_type) - - -def test_from_cuda_array_interface(valid_column): - col = plc.column.Column.from_cuda_array_interface_obj( - cudf.Series(valid_column) - ) - expect = valid_column - - assert_column_eq(expect, col) diff --git a/python/pylibcudf/pylibcudf/tests/test_copying.py b/python/pylibcudf/pylibcudf/tests/test_copying.py index f27fe4e942e..628682d0a66 100644 --- a/python/pylibcudf/pylibcudf/tests/test_copying.py +++ b/python/pylibcudf/pylibcudf/tests/test_copying.py @@ -2,6 +2,7 @@ import pyarrow as pa import pyarrow.compute as pc +import pylibcudf as plc import pytest from utils import ( DEFAULT_STRUCT_TESTING_TYPE, @@ -15,8 +16,6 @@ metadata_from_arrow_type, ) -from cudf._lib import pylibcudf as plc - # TODO: consider moving this to conftest and "pairing" # it with pa_type, so that they don't get out of sync diff --git a/python/pylibcudf/pylibcudf/tests/test_join.py b/python/pylibcudf/pylibcudf/tests/test_join.py index eb25ed915b1..61e02f4d28d 100644 --- a/python/pylibcudf/pylibcudf/tests/test_join.py +++ b/python/pylibcudf/pylibcudf/tests/test_join.py @@ -2,10 +2,9 @@ import numpy as np import pyarrow as pa +import pylibcudf as plc from utils import assert_table_eq -from cudf._lib import pylibcudf as plc - def test_cross_join(): left = pa.Table.from_arrays([[0, 1, 2], [3, 4, 5]], names=["a", "b"]) diff --git a/python/pylibcudf/pylibcudf/tests/test_lists.py b/python/pylibcudf/pylibcudf/tests/test_lists.py index 07ecaed5012..9176bb536ef 100644 --- a/python/pylibcudf/pylibcudf/tests/test_lists.py +++ b/python/pylibcudf/pylibcudf/tests/test_lists.py @@ -1,27 +1,49 @@ # Copyright (c) 2024, NVIDIA CORPORATION. +import numpy as np import pyarrow as pa +import pylibcudf as plc import pytest from utils import assert_column_eq -from cudf._lib import pylibcudf as plc - @pytest.fixture def test_data(): return [[[[0, 1], [2], [5], [6, 7]], [[8], [9], [], [13, 14, 15]]]] +@pytest.fixture +def list_column(): + return [[0, 1], [2], [5], [6, 7]] + + @pytest.fixture def scalar(): return pa.scalar(1) @pytest.fixture -def column(): +def search_key_column(): return pa.array([3, 2, 5, 6]), pa.array([-1, 0, 0, 0], type=pa.int32()) +@pytest.fixture +def bool_column(): + return pa.array([[False, True], [True], [True], [True, True]]) + + +@pytest.fixture +def set_lists_column(): + lhs = [[np.nan, np.nan, 2, 1, 2], [1, 2, 3], None, [4, None, 5]] + rhs = [[np.nan, 1, 2, 3], [4, 5], [None, 7, 8], [None, None]] + return lhs, rhs + + +@pytest.fixture +def lists_column(): + return [[4, 2, 3, 1], [1, 2, None, 4], [-10, 10, 10, 0]] + + def test_concatenate_rows(test_data): arrow_tbl = pa.Table.from_arrays(test_data[0], names=["a", "b"]) plc_tbl = plc.interop.from_arrow(arrow_tbl) @@ -59,8 +81,7 @@ def test_concatenate_list_elements(test_data, dropna, expected): assert_column_eq(expect, res) -def test_contains_scalar(test_data, scalar): - list_column = test_data[0][0] +def test_contains_scalar(list_column, scalar): arr = pa.array(list_column) plc_column = plc.interop.from_arrow(arr) @@ -72,9 +93,9 @@ def test_contains_scalar(test_data, scalar): assert_column_eq(expect, res) -def test_contains_list_column(test_data): - list_column1 = test_data[0][0] - list_column2 = [1, 3, 5, 1] +def test_contains_list_column(list_column, search_key_column): + list_column1 = list_column + list_column2, _ = search_key_column arr1 = pa.array(list_column1) arr2 = pa.array(list_column2) @@ -82,7 +103,7 @@ def test_contains_list_column(test_data): plc_column2 = plc.interop.from_arrow(arr2) res = plc.lists.contains(plc_column1, plc_column2) - expect = pa.array([True, False, True, False]) + expect = pa.array([False, True, True, True]) assert_column_eq(expect, res) @@ -110,8 +131,7 @@ def test_contains_nulls(list_column, expected): assert_column_eq(expect, res) -def test_index_of_scalar(test_data, scalar): - list_column = test_data[0][0] +def test_index_of_scalar(list_column, scalar): arr = pa.array(list_column) plc_column = plc.interop.from_arrow(arr) @@ -123,21 +143,19 @@ def test_index_of_scalar(test_data, scalar): assert_column_eq(expect, res) -def test_index_of_list_column(test_data, column): - list_column = test_data[0][0] +def test_index_of_list_column(list_column, search_key_column): arr1 = pa.array(list_column) - arr2, expect = column + arr2, expect = search_key_column plc_column1 = plc.interop.from_arrow(arr1) plc_column2 = plc.interop.from_arrow(arr2) res = plc.lists.index_of(plc_column1, plc_column2, True) - expect = pa.array(column[1], type=pa.int32()) + expect = pa.array(search_key_column[1], type=pa.int32()) assert_column_eq(expect, res) -def test_reverse(test_data): - list_column = test_data[0][0] +def test_reverse(list_column): arr = pa.array(list_column) plc_column = plc.interop.from_arrow(arr) @@ -149,8 +167,7 @@ def test_reverse(test_data): def test_segmented_gather(test_data): - list_column1 = test_data[0][0] - list_column2 = test_data[0][1] + list_column1, list_column2 = test_data[0] plc_column1 = plc.interop.from_arrow(pa.array(list_column1)) plc_column2 = plc.interop.from_arrow(pa.array(list_column2)) @@ -162,22 +179,212 @@ def test_segmented_gather(test_data): assert_column_eq(expect, res) -def test_extract_list_element_scalar(test_data): - arr = pa.array(test_data[0][0]) - plc_column = plc.interop.from_arrow(arr) +def test_extract_list_element_scalar(list_column): + plc_column = plc.interop.from_arrow(pa.array(list_column)) res = plc.lists.extract_list_element(plc_column, 0) - expect = pa.compute.list_element(test_data[0][0], 0) + expect = pa.compute.list_element(list_column, 0) assert_column_eq(expect, res) -def test_extract_list_element_column(test_data): - arr = pa.array(test_data[0][0]) - plc_column = plc.interop.from_arrow(arr) +def test_extract_list_element_column(list_column): + plc_column = plc.interop.from_arrow(pa.array(list_column)) indices = plc.interop.from_arrow(pa.array([0, 1, -4, -1])) res = plc.lists.extract_list_element(plc_column, indices) expect = pa.array([0, None, None, 7]) assert_column_eq(expect, res) + + +def test_count_elements(test_data): + arr = pa.array(test_data[0][1]) + plc_column = plc.interop.from_arrow(arr) + res = plc.lists.count_elements(plc_column) + + expect = pa.array([1, 1, 0, 3], type=pa.int32()) + + assert_column_eq(expect, res) + + +def test_sequences(): + starts = plc.interop.from_arrow(pa.array([0, 1, 2, 3, 4])) + steps = plc.interop.from_arrow(pa.array([2, 1, 1, 1, -3])) + sizes = plc.interop.from_arrow(pa.array([0, 2, 2, 1, 3])) + + res1 = plc.lists.sequences(starts, sizes, steps) + res2 = plc.lists.sequences(starts, sizes) + + expect1 = pa.array([[], [1, 2], [2, 3], [3], [4, 1, -2]]) + expect2 = pa.array([[], [1, 2], [2, 3], [3], [4, 5, 6]]) + + assert_column_eq(expect1, res1) + + assert_column_eq(expect2, res2) + + +@pytest.mark.parametrize( + "ascending,na_position,expected", + [ + ( + True, + plc.types.NullOrder.BEFORE, + [[1, 2, 3, 4], [None, 1, 2, 4], [-10, 0, 10, 10]], + ), + ( + True, + plc.types.NullOrder.AFTER, + [[1, 2, 3, 4], [1, 2, 4, None], [-10, 0, 10, 10]], + ), + ( + False, + plc.types.NullOrder.BEFORE, + [[4, 3, 2, 1], [4, 2, 1, None], [10, 10, 0, -10]], + ), + ( + False, + plc.types.NullOrder.AFTER, + [[4, 3, 2, 1], [None, 4, 2, 1], [10, 10, 0, -10]], + ), + ( + False, + plc.types.NullOrder.AFTER, + [[4, 3, 2, 1], [None, 4, 2, 1], [10, 10, 0, -10]], + ), + ], +) +def test_sort_lists(lists_column, ascending, na_position, expected): + plc_column = plc.interop.from_arrow(pa.array(lists_column)) + res = plc.lists.sort_lists(plc_column, ascending, na_position, False) + res_stable = plc.lists.sort_lists(plc_column, ascending, na_position, True) + + expect = pa.array(expected) + + assert_column_eq(expect, res) + assert_column_eq(expect, res_stable) + + +@pytest.mark.parametrize( + "set_operation,nans_equal,nulls_equal,expected", + [ + ( + plc.lists.difference_distinct, + True, + True, + [[], [1, 2, 3], None, [4, 5]], + ), + ( + plc.lists.difference_distinct, + False, + True, + [[], [1, 2, 3], None, [4, None, 5]], + ), + ( + plc.lists.have_overlap, + True, + True, + [True, False, None, True], + ), + ( + plc.lists.have_overlap, + False, + False, + [True, False, None, False], + ), + ( + plc.lists.intersect_distinct, + True, + True, + [[np.nan, 1, 2], [], None, [None]], + ), + ( + plc.lists.intersect_distinct, + True, + False, + [[1, 2], [], None, [None]], + ), + ( + plc.lists.union_distinct, + False, + True, + [ + [np.nan, 2, 1, 3], + [1, 2, 3, 4, 5], + None, + [4, None, 5, None, None], + ], + ), + ( + plc.lists.union_distinct, + False, + False, + [ + [np.nan, np.nan, 2, 1, np.nan, 3], + [1, 2, 3, 4, 5], + None, + [4, None, 5, None, None], + ], + ), + ], +) +def test_set_operations( + set_lists_column, set_operation, nans_equal, nulls_equal, expected +): + lhs, rhs = set_lists_column + + res = set_operation( + plc.interop.from_arrow(pa.array(lhs)), + plc.interop.from_arrow(pa.array(rhs)), + nans_equal, + nulls_equal, + ) + + if set_operation != plc.lists.have_overlap: + expect = pa.array(expected, type=pa.list_(pa.float64())) + else: + expect = pa.array(expected) + assert_column_eq(expect, res) + + +@pytest.mark.parametrize( + "nans_equal,nulls_equal,expected", + [ + (True, True, [[np.nan, 0, 1, 2, 3], [3, 1, 2], None, [4, None, 5]]), + ( + False, + True, + [[np.nan, 0, 1, 2, 3], [3, 1, 2], None, [4, None, None, 5]], + ), + ( + True, + False, + [[np.nan, np.nan, 0, 1, 2, 3], [3, 1, 2], None, [4, None, 5]], + ), + ( + False, + False, + [ + [np.nan, np.nan, 0, 1, 2, 3], + [3, 1, 2], + None, + [4, None, None, 5], + ], + ), + ], +) +def test_distinct(list_column, nans_equal, nulls_equal, expected): + list_column = [ + [np.nan, np.nan, 0, 1, 2, 3, 2], + [3, 1, 2], + None, + [4, None, None, 5], + ] + arr = pa.array(list_column) + plc_column = plc.interop.from_arrow(arr) + + res = plc.lists.distinct(plc_column, nans_equal, nulls_equal) + + expect = pa.array(expected) + + assert_column_eq(expect, res) diff --git a/python/pylibcudf/pylibcudf/tests/test_reshape.py b/python/pylibcudf/pylibcudf/tests/test_reshape.py index da1157e5832..01115bc363a 100644 --- a/python/pylibcudf/pylibcudf/tests/test_reshape.py +++ b/python/pylibcudf/pylibcudf/tests/test_reshape.py @@ -1,11 +1,10 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa +import pylibcudf as plc import pytest from utils import assert_column_eq, assert_table_eq -from cudf._lib import pylibcudf as plc - @pytest.fixture(scope="module") def reshape_data(): diff --git a/python/pylibcudf/pylibcudf/tests/test_traits.py b/python/pylibcudf/pylibcudf/tests/test_traits.py index 6c22cb02f21..2570e8abd51 100644 --- a/python/pylibcudf/pylibcudf/tests/test_traits.py +++ b/python/pylibcudf/pylibcudf/tests/test_traits.py @@ -1,6 +1,6 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from cudf._lib import pylibcudf as plc +import pylibcudf as plc def test_is_relationally_comparable(): diff --git a/python/pylibcudf/pylibcudf/tests/test_transform.py b/python/pylibcudf/pylibcudf/tests/test_transform.py index 312939888dd..06fc35d8835 100644 --- a/python/pylibcudf/pylibcudf/tests/test_transform.py +++ b/python/pylibcudf/pylibcudf/tests/test_transform.py @@ -3,10 +3,9 @@ import math import pyarrow as pa +import pylibcudf as plc from utils import assert_column_eq -from cudf._lib import pylibcudf as plc - def test_nans_to_nulls(has_nans): if has_nans: diff --git a/python/pylibcudf/pylibcudf/tests/test_unary.py b/python/pylibcudf/pylibcudf/tests/test_unary.py index b5e4f0cb0e8..9b8085d5c52 100644 --- a/python/pylibcudf/pylibcudf/tests/test_unary.py +++ b/python/pylibcudf/pylibcudf/tests/test_unary.py @@ -1,6 +1,6 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from cudf._lib import pylibcudf as plc +import pylibcudf as plc def test_is_supported_cast(): diff --git a/python/pylibcudf/pylibcudf/types.pyx b/python/pylibcudf/pylibcudf/types.pyx index 0a8cf5fcb6a..d13365eebfb 100644 --- a/python/pylibcudf/pylibcudf/types.pyx +++ b/python/pylibcudf/pylibcudf/types.pyx @@ -1,7 +1,8 @@ # Copyright (c) 2023-2024, NVIDIA CORPORATION. from libc.stdint cimport int32_t -from pylibcudf.libcudf.types cimport data_type, type_id +from pylibcudf.libcudf.types cimport data_type, size_type, type_id +from pylibcudf.libcudf.utilities.type_dispatcher cimport type_to_id from pylibcudf.libcudf.types import type_id as TypeId # no-cython-lint, isort:skip from pylibcudf.libcudf.types import nan_policy as NanPolicy # no-cython-lint, isort:skip @@ -66,3 +67,7 @@ cdef class DataType: cdef DataType ret = DataType.__new__(DataType, type_id.EMPTY) ret.c_obj = dt return ret + + +SIZE_TYPE = DataType(type_to_id[size_type]()) +SIZE_TYPE_ID = SIZE_TYPE.id() diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml index 2686e09269a..d9f4ffc0759 100644 --- a/python/pylibcudf/pyproject.toml +++ b/python/pylibcudf/pyproject.toml @@ -22,7 +22,7 @@ dependencies = [ "nvtx>=0.2.1", "packaging", "pyarrow>=16.1.0,<16.2.0a0", - "rmm==24.8.*,>=0.0.0a0", + "rmm==24.10.*,>=0.0.0a0", "typing_extensions>=4.0.0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ @@ -111,13 +111,14 @@ skip = [ [tool.rapids-build-backend] build-backend = "scikit_build_core.build" dependencies-file = "../../dependencies.yaml" +matrix-entry = "cuda_suffixed=true" requires = [ "cmake>=3.26.4,!=3.30.0", "cython>=3.0.3", "ninja", "numpy==1.23.*", "pyarrow==16.1.0.*", - "rmm==24.8.*,>=0.0.0a0", + "rmm==24.10.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [tool.scikit-build] @@ -127,7 +128,7 @@ cmake.minimum-version = "3.26.4" ninja.make-fallback = true sdist.exclude = ["*tests*"] sdist.reproducible = true -wheel.packages = ["cudf"] +wheel.packages = ["pylibcudf"] [tool.scikit-build.metadata.version] provider = "scikit_build_core.metadata.regex"