diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 21f15ade458..cd7b8aea6d7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,3 +1,5 @@ +# Copyright (c) 2019-2022, NVIDIA CORPORATION. + repos: - repo: https://github.com/PyCQA/isort rev: 5.6.4 @@ -56,20 +58,28 @@ repos: hooks: - id: pydocstyle args: ["--config=python/.flake8"] - - repo: local + exclude: | + (?x)^( + ci| + cpp| + conda| + docs| + java| + notebooks| + python/dask_cudf| + python/cudf_kafka| + python/custreamz| + python/cudf/cudf/tests + ) + - repo: https://github.com/pre-commit/mirrors-clang-format + rev: v11.1.0 hooks: - id: clang-format - # Using the pre-commit stage to simplify invocation of all - # other hooks simultaneously (via any other hook stage). This - # can be removed if we also move to running clang-format - # entirely through pre-commit. - stages: [commit] - name: clang-format - description: Format files with ClangFormat. - entry: clang-format -i - language: system files: \.(cu|cuh|h|hpp|cpp|inl)$ - args: ['-fallback-style=none'] + types_or: [file] + args: ['-fallback-style=none', '-style=file', '-i'] + - repo: local + hooks: - id: cmake-format name: cmake-format entry: ./cpp/scripts/run-cmake-format.sh cmake-format @@ -78,7 +88,7 @@ repos: # Note that pre-commit autoupdate does not update the versions # of dependencies, so we'll have to update this manually. additional_dependencies: - - cmake-format==0.6.11 + - cmakelang==0.6.13 - id: cmake-lint name: cmake-lint entry: ./cpp/scripts/run-cmake-format.sh cmake-lint @@ -87,7 +97,7 @@ repos: # Note that pre-commit autoupdate does not update the versions # of dependencies, so we'll have to update this manually. additional_dependencies: - - cmake-format==0.6.11 + - cmakelang==0.6.13 - id: copyright-check name: copyright-check # This hook's use of Git tools appears to conflict with diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6d1c0528832..db8a8d88b99 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,69 +1,79 @@ # Contributing to cuDF -Contributions to cuDF fall into the following three categories. - -1. To report a bug, request a new feature, or report a problem with - documentation, please file an [issue](https://github.com/rapidsai/cudf/issues/new/choose) - describing in detail the problem or new feature. The RAPIDS team evaluates - and triages issues, and schedules them for a release. If you believe the - issue needs priority attention, please comment on the issue to notify the - team. -2. To propose and implement a new Feature, please file a new feature request - [issue](https://github.com/rapidsai/cudf/issues/new/choose). Describe the - intended feature and discuss the design and implementation with the team and - community. Once the team agrees that the plan looks good, go ahead and - implement it, using the [code contributions](#code-contributions) guide below. -3. To implement a feature or bug-fix for an existing outstanding issue, please - Follow the [code contributions](#code-contributions) guide below. If you - need more context on a particular issue, please ask in a comment. - -As contributors and maintainers to this project, -you are expected to abide by cuDF's code of conduct. -More information can be found at: [Contributor Code of Conduct](https://docs.rapids.ai/resources/conduct/). +Contributions to cuDF fall into the following categories: + +1. To report a bug, request a new feature, or report a problem with documentation, please file an + [issue](https://github.com/rapidsai/cudf/issues/new/choose) describing the problem or new feature + in detail. The RAPIDS team evaluates and triages issues, and schedules them for a release. If you + believe the issue needs priority attention, please comment on the issue to notify the team. +2. To propose and implement a new feature, please file a new feature request + [issue](https://github.com/rapidsai/cudf/issues/new/choose). Describe the intended feature and + discuss the design and implementation with the team and community. Once the team agrees that the + plan looks good, go ahead and implement it, using the [code contributions](#code-contributions) + guide below. +3. To implement a feature or bug fix for an existing issue, please follow the [code + contributions](#code-contributions) guide below. If you need more context on a particular issue, + please ask in a comment. + +As contributors and maintainers to this project, you are expected to abide by cuDF's code of +conduct. More information can be found at: +[Contributor Code of Conduct](https://docs.rapids.ai/resources/conduct/). ## Code contributions ### Your first issue -1. Follow the guide at the bottom of this page for [Setting Up Your Build Environment](#setting-up-your-build-environment). -2. Find an issue to work on. The best way is to look for the [good first issue](https://github.com/rapidsai/cudf/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) - or [help wanted](https://github.com/rapidsai/cudf/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22) labels. +1. Follow the guide at the bottom of this page for + [Setting up your build environment](#setting-up-your-build-environment). +2. Find an issue to work on. The best way is to look for the + [good first issue](https://github.com/rapidsai/cudf/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) + or [help wanted](https://github.com/rapidsai/cudf/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22) + labels. 3. Comment on the issue stating that you are going to work on it. -4. Code! Make sure to update unit tests! -5. When done, [create your pull request](https://github.com/rapidsai/cudf/compare). -6. Verify that CI passes all [status checks](https://help.github.com/articles/about-status-checks/). Fix if needed. -7. Wait for other developers to review your code and update code as needed. -8. Once reviewed and approved, a RAPIDS developer will merge your pull request. - -Remember, if you are unsure about anything, don't hesitate to comment on issues -and ask for clarifications! +4. Create a fork of the cudf repository and check out a branch with a name that + describes your planned work. For example, `fix-documentation`. +5. Write code to address the issue or implement the feature. +6. Add unit tests and unit benchmarks. +7. [Create your pull request](https://github.com/rapidsai/cudf/compare). +8. Verify that CI passes all [status checks](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/about-status-checks). + Fix if needed. +9. Wait for other developers to review your code and update code as needed. +10. Once reviewed and approved, a RAPIDS developer will merge your pull request. + +If you are unsure about anything, don't hesitate to comment on issues and ask for clarification! ### Seasoned developers -Once you have gotten your feet wet and are more comfortable with the code, you -can look at the prioritized issues for our next release in our [project boards](https://github.com/rapidsai/cudf/projects). - -> **Pro Tip:** Always look at the release board with the highest number for -issues to work on. This is where RAPIDS developers also focus their efforts. +Once you have gotten your feet wet and are more comfortable with the code, you can look at the +prioritized issues for our next release in our +[project boards](https://github.com/rapidsai/cudf/projects). -Look at the unassigned issues, and find an issue to which you are comfortable -contributing. Start with _Step 3_ above, commenting on the issue to let -others know you are working on it. If you have any questions related to the -implementation of the issue, ask them in the issue instead of the PR. +**Note:** Always look at the release board that is +[currently under development](https://docs.rapids.ai/maintainers) for issues to work on. This is +where RAPIDS developers also focus their efforts. -## Setting Up Your Build Environment +Look at the unassigned issues, and find an issue to which you are comfortable contributing. Start +with _Step 3_ above, commenting on the issue to let others know you are working on it. If you have +any questions related to the implementation of the issue, ask them in the issue instead of the PR. -The following instructions are for developers and contributors to cuDF OSS development. These instructions are tested on Linux Ubuntu 16.04 & 18.04. Use these instructions to build cuDF from source and contribute to its development. Other operating systems may be compatible, but are not currently tested. +## Setting up your build environment +The following instructions are for developers and contributors to cuDF development. These +instructions are tested on Ubuntu Linux LTS releases. Use these instructions to build cuDF from +source and contribute to its development. Other operating systems may be compatible, but are not +currently tested. +Building cudf with the provided conda environment is recommended for users who wish to enable all +library features. The following instructions are for building with a conda environment. Dependencies +for a minimal build of libcudf without using conda are also listed below. ### General requirements Compilers: -* `gcc` version 9.3+ -* `nvcc` version 11.5+ -* `cmake` version 3.20.1+ +* `gcc` version 9.3+ +* `nvcc` version 11.5+ +* `cmake` version 3.20.1+ CUDA/GPU: @@ -71,127 +81,166 @@ CUDA/GPU: * NVIDIA driver 450.80.02+ * Pascal architecture or better -You can obtain CUDA from [https://developer.nvidia.com/cuda-downloads](https://developer.nvidia.com/cuda-downloads). +You can obtain CUDA from +[https://developer.nvidia.com/cuda-downloads](https://developer.nvidia.com/cuda-downloads). -### Create the build Environment +### Create the build environment + +- Clone the repository: -- Clone the repository and submodules ```bash CUDF_HOME=$(pwd)/cudf git clone https://github.com/rapidsai/cudf.git $CUDF_HOME cd $CUDF_HOME -git submodule update --init --remote --recursive ``` + +#### Building with a conda environment + +**Note:** Using a conda environment is the easiest way to satisfy the library's dependencies. +Instructions for a minimal build environment without conda are included below. + - Create the conda development environment `cudf_dev`: + ```bash # create the conda environment (assuming in base `cudf` directory) -# note: RAPIDS currently doesn't support `channel_priority: strict`; use `channel_priority: flexible` instead +# note: RAPIDS currently doesn't support `channel_priority: strict`; +# use `channel_priority: flexible` instead conda env create --name cudf_dev --file conda/environments/cudf_dev_cuda11.5.yml # activate the environment conda activate cudf_dev ``` -- For other CUDA versions, check the corresponding cudf_dev_cuda*.yml file in conda/environments + +- **Note**: the conda environment files are updated frequently, so the + development environment may also need to be updated if dependency versions or + pinnings are changed. + +- For other CUDA versions, check the corresponding `cudf_dev_cuda*.yml` file in + `conda/environments/`. + +#### Building without a conda environment + +- libcudf has the following minimal dependencies (in addition to those listed in the [General + requirements](#general-requirements)). The packages listed below use Ubuntu package names: + + - `build-essential` + - `libssl-dev` + - `libz-dev` + - `libpython3-dev` (required if building cudf) ### Build cuDF from source -- A `build.sh` script is provided in `$CUDF_HOME`. Running the script with no additional arguments will install the `libcudf`, `cudf` and `dask_cudf` libraries. By default, the libraries are installed to the `$CONDA_PREFIX` directory. To install into a different location, set the location in `$INSTALL_PREFIX`. Finally, note that the script depends on the `nvcc` executable being on your path, or defined in `$CUDACXX`. +- A `build.sh` script is provided in `$CUDF_HOME`. Running the script with no additional arguments + will install the `libcudf`, `cudf` and `dask_cudf` libraries. By default, the libraries are + installed to the `$CONDA_PREFIX` directory. To install into a different location, set the location + in `$INSTALL_PREFIX`. Finally, note that the script depends on the `nvcc` executable being on your + path, or defined in `$CUDACXX`. + ```bash cd $CUDF_HOME # Choose one of the following commands, depending on whether -# you want to build and install the libcudf C++ library only, +# you want to build and install the libcudf C++ library only, # or include the cudf and/or dask_cudf Python libraries: ./build.sh # libcudf, cudf and dask_cudf ./build.sh libcudf # libcudf only -./build.sh libcudf cudf # libcudf and cudf only +./build.sh libcudf cudf # libcudf and cudf only ``` -- Other libraries like `cudf-kafka` and `custreamz` can be installed with this script. For the complete list of libraries as well as details about the script usage, run the `help` command: + +- Other libraries like `cudf-kafka` and `custreamz` can be installed with this script. For the + complete list of libraries as well as details about the script usage, run the `help` command: + ```bash -./build.sh --help +./build.sh --help ``` ### Build, install and test cuDF libraries for contributors -The general workflow is provided below. Please, also see the last section about [code formatting](###code-formatting). +The general workflow is provided below. Please also see the last section about +[code formatting](#code-formatting). #### `libcudf` (C++) -If you're only interested in building the library (and not the unit tests): - +- If you're only interested in building the library (and not the unit tests): + ```bash cd $CUDF_HOME ./build.sh libcudf ``` -If, in addition, you want to build tests: + +- If, in addition, you want to build tests: ```bash ./build.sh libcudf tests ``` -To run the tests: + +- To run the tests: ```bash -make test +make test ``` #### `cudf` (Python) - First, build the `libcudf` C++ library following the steps above -- To build and install in edit/develop `cudf` python package: +- To build and install in edit/develop `cudf` Python package: ```bash cd $CUDF_HOME/python/cudf python setup.py build_ext --inplace python setup.py develop ``` -- To run `cudf` tests : +- To run `cudf` tests: ```bash cd $CUDF_HOME/python -py.test -v cudf/cudf/tests +pytest -v cudf/cudf/tests ``` #### `dask-cudf` (Python) - First, build the `libcudf` C++ and `cudf` Python libraries following the steps above -- To install in edit/develop mode the `dask-cudf` python package: +- To install the `dask-cudf` Python package in editable/develop mode: ```bash cd $CUDF_HOME/python/dask_cudf python setup.py build_ext --inplace python setup.py develop ``` -- To run `dask_cudf` tests : +- To run `dask_cudf` tests: ```bash cd $CUDF_HOME/python -py.test -v dask_cudf +pytest -v dask_cudf ``` #### `libcudf_kafka` (C++) -If you're only interested in building the library (and not the unit tests): - +- If you're only interested in building the library (and not the unit tests): + ```bash cd $CUDF_HOME ./build.sh libcudf_kafka ``` -If, in addition, you want to build tests: + +- If, in addition, you want to build tests: ```bash ./build.sh libcudf_kafka tests ``` -To run the tests: + +- To run the tests: ```bash -make test +make test ``` #### `cudf-kafka` (Python) -- First, build the `libcudf` and `libcudf_kafka` following the steps above +- First, build the `libcudf` and `libcudf_kafka` libraries following the steps above + +- To install the `cudf-kafka` Python package in editable/develop mode: -- To install in edit/develop mode the `cudf-kafka` python package: ```bash cd $CUDF_HOME/python/cudf_kafka python setup.py build_ext --inplace @@ -202,7 +251,8 @@ python setup.py develop - First, build `libcudf`, `libcudf_kafka`, and `cudf_kafka` following the steps above -- To install in edit/develop mode the `custreamz` python package: +- To install the `custreamz` Python package in editable/develop mode: + ```bash cd $CUDF_HOME/python/custreamz python setup.py build_ext --inplace @@ -210,40 +260,45 @@ python setup.py develop ``` - To run `custreamz` tests : + ```bash cd $CUDF_HOME/python -py.test -v custreamz +pytest -v custreamz ``` #### `cudf` (Java): - First, build the `libcudf` C++ library following the steps above -- Then, refer to [Java README](https://github.com/rapidsai/cudf/blob/branch-21.10/java/README.md) - +- Then, refer to the [Java README](java/README.md) -Done! You are ready to develop for the cuDF OSS project. But please go to [code formatting](###code-formatting) to ensure that you contributing code follows the expected format. +Done! You are ready to develop for the cuDF project. Please review the project's +[code formatting guidelines](#code-formatting). ## Debugging cuDF -### Building Debug mode from source +### Building in debug mode from source -Follow the [above instructions](####build-cudf-from-source) to build from source and add `-g` to the `./build.sh` command. +Follow the instructions to [build from source](#build-cudf-from-source) and add `-g` to the +`./build.sh` command. For example: + ```bash ./build.sh libcudf -g ``` -This builds `libcudf` in Debug mode which enables some `assert` safety checks and includes symbols in the library for debugging. +This builds `libcudf` in debug mode which enables some `assert` safety checks and includes symbols +in the library for debugging. All other steps for installing `libcudf` into your environment are the same. ### Debugging with `cuda-gdb` and `cuda-memcheck` -When you have a debug build of `libcudf` installed, debugging with the `cuda-gdb` and `cuda-memcheck` is easy. +When you have a debug build of `libcudf` installed, debugging with the `cuda-gdb` and +`cuda-memcheck` is easy. -If you are debugging a Python script, simply run the following: +If you are debugging a Python script, run the following: ```bash cuda-gdb -ex r --args python .py @@ -255,143 +310,71 @@ cuda-memcheck python .py ### Device debug symbols -The device debug symbols are not automatically added with the cmake `Debug` -build type because it causes a runtime delay of several minutes when loading -the libcudf.so library. +The device debug symbols are not automatically added with the cmake `Debug` build type because it +causes a runtime delay of several minutes when loading the libcudf.so library. -Therefore, it is recommended to add device debug symbols only to specific files by -setting the `-G` compile option locally in your `cpp/CMakeLists.txt` for that file. -Here is an example of adding the `-G` option to the compile command for -`src/copying/copy.cu` source file: +Therefore, it is recommended to add device debug symbols only to specific files by setting the `-G` +compile option locally in your `cpp/CMakeLists.txt` for that file. Here is an example of adding the +`-G` option to the compile command for `src/copying/copy.cu` source file: -``` +```cmake set_source_files_properties(src/copying/copy.cu PROPERTIES COMPILE_OPTIONS "-G") ``` -This will add the device debug symbols for this object file in libcudf.so. -You can then use `cuda-dbg` to debug into the kernels in that source file. - -### Building and Testing on a gpuCI image locally - -Before submitting a pull request, you can do a local build and test on your machine that mimics our gpuCI environment using the `ci/local/build.sh` script. -For detailed information on usage of this script, see [here](ci/local/README.md). - +This will add the device debug symbols for this object file in `libcudf.so`. You can then use +`cuda-dbg` to debug into the kernels in that source file. -## Automated Build in Docker Container +## Code Formatting -A Dockerfile is provided with a preconfigured conda environment for building and installing cuDF from source based off of the main branch. +### C++/CUDA -### Prerequisites +cuDF uses [`clang-format`](https://clang.llvm.org/docs/ClangFormat.html). -* Install [nvidia-docker2](https://github.com/nvidia/nvidia-docker/wiki/Installation-(version-2.0)) for Docker + GPU support -* Verify NVIDIA driver is `450.80.02` or higher -* Ensure CUDA 11.0+ is installed - -### Usage +In order to format the C++/CUDA files, navigate to the root (`cudf`) directory and run: -From cudf project root run the following, to build with defaults: -```bash -docker build --tag cudf . -``` -After the container is built run the container: ```bash -docker run --runtime=nvidia -it cudf bash -``` -Activate the conda environment `cudf` to use the newly built cuDF and libcudf libraries: -``` -root@3f689ba9c842:/# source activate cudf -(cudf) root@3f689ba9c842:/# python -c "import cudf" -(cudf) root@3f689ba9c842:/# +python3 ./cpp/scripts/run-clang-format.py -inplace ``` -### Customizing the Build - -Several build arguments are available to customize the build process of the -container. These are specified by using the Docker [build-arg](https://docs.docker.com/engine/reference/commandline/build/#set-build-time-variables---build-arg) -flag. Below is a list of the available arguments and their purpose: +Additionally, many editors have plugins or extensions that you can set up to automatically run +`clang-format` either manually or on file save. -| Build Argument | Default Value | Other Value(s) | Purpose | -| --- | --- | --- | --- | -| `CUDA_VERSION` | 11.0 | 11.2.2 | set CUDA version | -| `LINUX_VERSION` | ubuntu18.04 | ubuntu20.04 | set Ubuntu version | -| `CC` & `CXX` | 9 | 10 | set gcc/g++ version | -| `CUDF_REPO` | This repo | Forks of cuDF | set git URL to use for `git clone` | -| `CUDF_BRANCH` | main | Any branch name | set git branch to checkout of `CUDF_REPO` | -| `NUMBA_VERSION` | newest | >=0.40.0 | set numba version | -| `NUMPY_VERSION` | newest | >=1.14.3 | set numpy version | -| `PANDAS_VERSION` | newest | >=0.23.4 | set pandas version | -| `PYARROW_VERSION` | 1.0.1 | Not supported | set pyarrow version | -| `CMAKE_VERSION` | newest | >=3.18 | set cmake version | -| `CYTHON_VERSION` | 0.29 | Not supported | set Cython version | -| `PYTHON_VERSION` | 3.7 | 3.8 | set python version | +### Python / Pre-commit hooks +cuDF uses [pre-commit](https://pre-commit.com/) to execute code linters and formatters such as +[Black](https://black.readthedocs.io/en/stable/), [isort](https://pycqa.github.io/isort/), and +[flake8](https://flake8.pycqa.org/en/latest/). These tools ensure a consistent code format +throughout the project. Using pre-commit ensures that linter versions and options are aligned for +all developers. Additionally, there is a CI check in place to enforce that committed code follows +our standards. -### Code Formatting - - -#### Python - -cuDF uses [Black](https://black.readthedocs.io/en/stable/), -[isort](https://readthedocs.org/projects/isort/), and -[flake8](http://flake8.pycqa.org/en/latest/) to ensure a consistent code format -throughout the project. They have been installed during the `cudf_dev` environment creation. - -These tools are used to auto-format the Python code, as well as check the Cython -code in the repository. Additionally, there is a CI check in place to enforce -that committed code follows our standards. You can use the tools to -automatically format your python code by running: +To use `pre-commit`, install via `conda` or `pip`: ```bash -isort --atomic python/**/*.py -black python +conda install -c conda-forge pre-commit ``` -and then check the syntax of your Python and Cython code by running: - ```bash -flake8 python -flake8 --config=python/.flake8.cython -``` - -Additionally, many editors have plugins that will apply `isort` and `Black` as -you edit files, as well as use `flake8` to report any style / syntax issues. - -#### C++/CUDA - -cuDF uses [`clang-format`](https://clang.llvm.org/docs/ClangFormat.html) - -In order to format the C++/CUDA files, navigate to the root (`cudf`) directory and run: -``` -python3 ./cpp/scripts/run-clang-format.py -inplace +pip install pre-commit ``` -Additionally, many editors have plugins or extensions that you can set up to automatically run `clang-format` either manually or on file save. - -#### Pre-commit hooks - -Optionally, you may wish to setup [pre-commit hooks](https://pre-commit.com/) -to automatically run `isort`, `Black`, `flake8` and `clang-format` when you make a git commit. -This can be done by installing `pre-commit` via `conda` or `pip`: +Then run pre-commit hooks before committing code: ```bash -conda install -c conda-forge pre_commit +pre-commit run ``` -```bash -pip install pre-commit -``` - -and then running: +Optionally, you may set up the pre-commit hooks to run automatically when you make a git commit. This can be done by running: ```bash pre-commit install ``` -from the root of the cuDF repository. Now `isort`, `Black`, `flake8` and `clang-format` will be -run each time you commit changes. +Now code linters and formatters will be run each time you commit changes. ---- +You can skip these checks with `git commit --no-verify` or with the short version `git commit -n`. ## Attribution + Portions adopted from https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md Portions adopted from https://github.com/dask/dask/blob/master/docs/source/develop.rst diff --git a/build.sh b/build.sh index e1d6df016dd..48182ca1a6f 100755 --- a/build.sh +++ b/build.sh @@ -148,10 +148,8 @@ function buildLibCudfJniInDocker { -DCUDF_CPP_BUILD_DIR=$workspaceRepoDir/java/target/libcudf-cmake-build \ -DCUDA_STATIC_RUNTIME=ON \ -DPER_THREAD_DEFAULT_STREAM=ON \ - -DRMM_LOGGING_LEVEL=OFF \ -DUSE_GDS=ON \ -DGPU_ARCHS=${CUDF_CMAKE_CUDA_ARCHITECTURES} \ - -DCUDF_JNI_ARROW_STATIC=ON \ -DCUDF_JNI_LIBCUDF_STATIC=ON \ -Dtest=*,!CuFileTest" } diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml index 6bea7b2623b..15f4bff583e 100644 --- a/conda/environments/cudf_dev_cuda11.5.yml +++ b/conda/environments/cudf_dev_cuda11.5.yml @@ -12,7 +12,7 @@ dependencies: - clang-tools=11.1.0 - cupy>=9.5.0,<11.0.0a0 - rmm=22.06.* - - cmake>=3.20.1,<3.23 + - cmake>=3.20.1,!=3.23.0 - cmake_setuptools>=0.1.3 - python>=3.7,<3.9 - numba>=0.54 @@ -54,6 +54,10 @@ dependencies: - hypothesis - sphinx-markdown-tables - sphinx-copybutton + - sphinx-autobuild + - myst-nb + - scipy + - dask-cuda - mimesis<4.1 - packaging - protobuf diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index 9e77d44c15d..7d7b5d65cce 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -25,7 +25,7 @@ build: requirements: build: - - cmake >=3.20.1,<3.23 + - cmake >=3.20.1,!=3.23.0 host: - python - cython >=0.29,<0.30 diff --git a/conda/recipes/libcudf/conda_build_config.yaml b/conda/recipes/libcudf/conda_build_config.yaml index 397feab067e..b598a157196 100644 --- a/conda/recipes/libcudf/conda_build_config.yaml +++ b/conda/recipes/libcudf/conda_build_config.yaml @@ -1,5 +1,5 @@ cmake_version: - - ">=3.20.1,<3.23" + - ">=3.20.1,!=3.23.0" gtest_version: - "=1.10.0" diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index 0806bb964cf..68008e13897 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -79,6 +79,7 @@ outputs: - test -f $PREFIX/include/cudf/detail/calendrical_month_sequence.cuh - test -f $PREFIX/include/cudf/detail/concatenate.hpp - test -f $PREFIX/include/cudf/detail/copy.hpp + - test -f $PREFIX/include/cudf/detail/copy.cuh - test -f $PREFIX/include/cudf/detail/datetime.hpp - test -f $PREFIX/include/cudf/detail/fill.hpp - test -f $PREFIX/include/cudf/detail/gather.hpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d9422edaa8f..cbe2811afe4 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -154,21 +154,28 @@ include(cmake/thirdparty/get_gtest.cmake) # preprocess jitify-able kernels include(cmake/Modules/JitifyPreprocessKernels.cmake) # find cuFile -include(cmake/Modules/FindcuFile.cmake) +include(cmake/thirdparty/get_cufile.cmake) +# find KvikIO +include(cmake/thirdparty/get_kvikio.cmake) # Workaround until https://github.com/rapidsai/rapids-cmake/issues/176 is resolved if(NOT BUILD_SHARED_LIBS) include("${rapids-cmake-dir}/export/find_package_file.cmake") list(APPEND METADATA_KINDS BUILD INSTALL) + list(APPEND dependencies cuco KvikIO ZLIB nvcomp) + if(TARGET cufile::cuFile_interface) + list(APPEND dependencies cuFile) + endif() + foreach(METADATA_KIND IN LISTS METADATA_KINDS) - rapids_export_find_package_file( - ${METADATA_KIND} "${CUDF_SOURCE_DIR}/cmake/Modules/FindcuFile.cmake" cudf-exports - ) - rapids_export_package(${METADATA_KIND} cuco cudf-exports) - rapids_export_package(${METADATA_KIND} ZLIB cudf-exports) - rapids_export_package(${METADATA_KIND} cuFile cudf-exports) - rapids_export_package(${METADATA_KIND} nvcomp cudf-exports) + foreach(dep IN LISTS dependencies) + rapids_export_package(${METADATA_KIND} ${dep} cudf-exports) + endforeach() endforeach() + + if(TARGET conda_env) + install(TARGETS conda_env EXPORT cudf-exports) + endif() endif() # ################################################################################################## @@ -231,6 +238,7 @@ add_library( src/copying/gather.cu src/copying/get_element.cu src/copying/pack.cpp + src/copying/purge_nonempty_nulls.cu src/copying/reverse.cu src/copying/sample.cu src/copying/scatter.cu @@ -295,6 +303,8 @@ add_library( src/io/comp/cpu_unbz2.cpp src/io/comp/debrotli.cu src/io/comp/gpuinflate.cu + src/io/comp/nvcomp_adapter.cpp + src/io/comp/nvcomp_adapter.cu src/io/comp/snap.cu src/io/comp/uncomp.cpp src/io/comp/unsnap.cu @@ -344,10 +354,12 @@ add_library( src/join/join.cu src/join/join_utils.cu src/join/mixed_join.cu - src/join/mixed_join_kernels.cu + src/join/mixed_join_kernel.cu + src/join/mixed_join_kernel_nulls.cu src/join/mixed_join_kernels_semi.cu src/join/mixed_join_semi.cu - src/join/mixed_join_size_kernels.cu + src/join/mixed_join_size_kernel.cu + src/join/mixed_join_size_kernel_nulls.cu src/join/mixed_join_size_kernels_semi.cu src/join/semi_join.cu src/lists/contains.cu @@ -584,7 +596,8 @@ add_dependencies(cudf jitify_preprocess_run) target_link_libraries( cudf PUBLIC ${ARROW_LIBRARIES} libcudacxx::libcudacxx cudf::Thrust rmm::rmm - PRIVATE cuco::cuco ZLIB::ZLIB nvcomp::nvcomp + PRIVATE cuco::cuco ZLIB::ZLIB nvcomp::nvcomp kvikio::kvikio + $ ) # Add Conda library, and include paths if specified @@ -592,11 +605,6 @@ if(TARGET conda_env) target_link_libraries(cudf PRIVATE conda_env) endif() -# Add cuFile interface if available -if(TARGET cuFile::cuFile_interface) - target_link_libraries(cudf PRIVATE cuFile::cuFile_interface) -endif() - if(CUDA_STATIC_RUNTIME) # Tell CMake what CUDA language runtime to use set_target_properties(cudf PROPERTIES CUDA_RUNTIME_LIBRARY Static) diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 26bb10da69f..e93b2bf4f25 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -194,10 +194,18 @@ ConfigureBench(FILL_BENCH filling/repeat.cpp) # ################################################################################################## # * groupby benchmark ----------------------------------------------------------------------------- ConfigureBench( - GROUPBY_BENCH groupby/group_sum.cu groupby/group_nth.cu groupby/group_shift.cu - groupby/group_struct.cu groupby/group_no_requests.cu groupby/group_scan.cu + GROUPBY_BENCH + groupby/group_sum.cu + groupby/group_nth.cu + groupby/group_shift.cu + groupby/group_struct.cu + groupby/group_no_requests.cu + groupby/group_scan.cu + groupby/group_rank_benchmark.cu ) +ConfigureNVBench(GROUPBY_NVBENCH groupby/group_rank_benchmark.cu) + # ################################################################################################## # * hashing benchmark ----------------------------------------------------------------------------- ConfigureBench(HASHING_BENCH hashing/hash.cpp hashing/partition.cpp) diff --git a/cpp/benchmarks/copying/contiguous_split.cu b/cpp/benchmarks/copying/contiguous_split.cu index 6b129a4a435..a61b18df8d1 100644 --- a/cpp/benchmarks/copying/contiguous_split.cu +++ b/cpp/benchmarks/copying/contiguous_split.cu @@ -17,6 +17,7 @@ #include #include #include + #include #include diff --git a/cpp/benchmarks/copying/copy_if_else.cpp b/cpp/benchmarks/copying/copy_if_else.cpp index 6f094aba680..6f355118f49 100644 --- a/cpp/benchmarks/copying/copy_if_else.cpp +++ b/cpp/benchmarks/copying/copy_if_else.cpp @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include #include diff --git a/cpp/benchmarks/groupby/group_rank_benchmark.cu b/cpp/benchmarks/groupby/group_rank_benchmark.cu new file mode 100644 index 00000000000..1eeb15debe9 --- /dev/null +++ b/cpp/benchmarks/groupby/group_rank_benchmark.cu @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include + +#include +#include +#include +#include + +#include + +template +static void nvbench_groupby_rank(nvbench::state& state, + nvbench::type_list>) +{ + using namespace cudf; + using type = int64_t; + constexpr auto dtype = type_to_id(); + cudf::rmm_pool_raii pool_raii; + + bool const is_sorted = state.get_int64("is_sorted"); + cudf::size_type const column_size = state.get_int64("data_size"); + constexpr int num_groups = 100; + + data_profile profile; + profile.set_null_frequency(std::nullopt); + profile.set_cardinality(0); + profile.set_distribution_params(dtype, distribution_id::UNIFORM, 0, num_groups); + + auto source_table = create_random_table({dtype, dtype}, row_count{column_size}, profile); + + // values to be pre-sorted too for groupby rank + if (is_sorted) source_table = cudf::sort(*source_table); + + table_view keys{{source_table->view().column(0)}}; + column_view order_by{source_table->view().column(1)}; + + auto agg = cudf::make_rank_aggregation(method); + std::vector requests; + requests.emplace_back(groupby::scan_request()); + requests[0].values = order_by; + requests[0].aggregations.push_back(std::move(agg)); + + groupby::groupby gb_obj(keys, null_policy::EXCLUDE, is_sorted ? sorted::YES : sorted::NO); + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + rmm::cuda_stream_view stream_view{launch.get_stream()}; + // groupby scan uses sort implementation + auto result = gb_obj.scan(requests); + }); +} + +enum class rank_method : int32_t {}; + +NVBENCH_DECLARE_ENUM_TYPE_STRINGS( + cudf::rank_method, + [](cudf::rank_method value) { + switch (value) { + case cudf::rank_method::FIRST: return "FIRST"; + case cudf::rank_method::AVERAGE: return "AVERAGE"; + case cudf::rank_method::MIN: return "MIN"; + case cudf::rank_method::MAX: return "MAX"; + case cudf::rank_method::DENSE: return "DENSE"; + default: return "unknown"; + } + }, + [](cudf::rank_method value) { + switch (value) { + case cudf::rank_method::FIRST: return "cudf::rank_method::FIRST"; + case cudf::rank_method::AVERAGE: return "cudf::rank_method::AVERAGE"; + case cudf::rank_method::MIN: return "cudf::rank_method::MIN"; + case cudf::rank_method::MAX: return "cudf::rank_method::MAX"; + case cudf::rank_method::DENSE: return "cudf::rank_method::DENSE"; + default: return "unknown"; + } + }) + +using methods = nvbench::enum_type_list; + +NVBENCH_BENCH_TYPES(nvbench_groupby_rank, NVBENCH_TYPE_AXES(methods)) + .set_type_axes_names({"rank_method"}) + .set_name("groupby_rank") + .add_int64_axis("data_size", + { + 1000000, // 1M + 10000000, // 10M + 100000000, // 100M + }) + + .add_int64_axis("is_sorted", {0, 1}); diff --git a/cpp/benchmarks/hashing/hash.cpp b/cpp/benchmarks/hashing/hash.cpp index 1110b6fe9ef..9c0ef5b528d 100644 --- a/cpp/benchmarks/hashing/hash.cpp +++ b/cpp/benchmarks/hashing/hash.cpp @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include #include diff --git a/cpp/benchmarks/io/csv/csv_reader.cpp b/cpp/benchmarks/io/csv/csv_reader.cpp index c50f5220200..b61ba75ce6e 100644 --- a/cpp/benchmarks/io/csv/csv_reader.cpp +++ b/cpp/benchmarks/io/csv/csv_reader.cpp @@ -14,8 +14,6 @@ * limitations under the License. */ -#include - #include #include #include @@ -52,6 +50,7 @@ void BM_csv_read_varying_input(benchmark::State& state) auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { + try_drop_l3_cache(); cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::read_csv(read_options); } @@ -98,6 +97,7 @@ void BM_csv_read_varying_options(benchmark::State& state) cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks; auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { + try_drop_l3_cache(); cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 for (int32_t chunk = 0; chunk < num_chunks; ++chunk) { // only read the header in the first chunk diff --git a/cpp/benchmarks/io/csv/csv_writer.cpp b/cpp/benchmarks/io/csv/csv_writer.cpp index 65aa31c68dc..079df45b1d8 100644 --- a/cpp/benchmarks/io/csv/csv_writer.cpp +++ b/cpp/benchmarks/io/csv/csv_writer.cpp @@ -14,8 +14,6 @@ * limitations under the License. */ -#include - #include #include #include diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp index afe0cc77a4c..7d356263220 100644 --- a/cpp/benchmarks/io/cuio_common.cpp +++ b/cpp/benchmarks/io/cuio_common.cpp @@ -141,3 +141,31 @@ std::vector segments_in_chunk(int num_segments, int num_chunks, return selected_segments; } + +// Executes the command and returns stderr output +std::string exec_cmd(std::string_view cmd) +{ + // Switch stderr and stdout to only capture stderr + auto const redirected_cmd = std::string{"( "}.append(cmd).append(" 3>&2 2>&1 1>&3) 2>/dev/null"); + std::unique_ptr pipe(popen(redirected_cmd.c_str(), "r"), pclose); + CUDF_EXPECTS(pipe != nullptr, "popen() failed"); + + std::array buffer; + std::string error_out; + while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) { + error_out += buffer.data(); + } + return error_out; +} + +void try_drop_l3_cache() +{ + static bool is_drop_cache_enabled = std::getenv("CUDF_BENCHMARK_DROP_CACHE") != nullptr; + if (not is_drop_cache_enabled) { return; } + + std::array drop_cache_cmds{"/sbin/sysctl vm.drop_caches=3", "sudo /sbin/sysctl vm.drop_caches=3"}; + CUDF_EXPECTS(std::any_of(drop_cache_cmds.cbegin(), + drop_cache_cmds.cend(), + [](auto& cmd) { return exec_cmd(cmd).empty(); }), + "Failed to execute the drop cache command"); +} diff --git a/cpp/benchmarks/io/cuio_common.hpp b/cpp/benchmarks/io/cuio_common.hpp index 2ed534d5333..8ea29684aae 100644 --- a/cpp/benchmarks/io/cuio_common.hpp +++ b/cpp/benchmarks/io/cuio_common.hpp @@ -16,12 +16,12 @@ #pragma once +#include + #include #include #include -#include - using cudf::io::io_type; #define RD_BENCHMARK_DEFINE_ALL_SOURCES(benchmark, name, type_or_group) \ @@ -132,3 +132,13 @@ std::vector select_column_names(std::vector const& col * The segments could be Parquet row groups or ORC stripes. */ std::vector segments_in_chunk(int num_segments, int num_chunks, int chunk); + +/** + * @brief Drops L3 cache if `CUDF_BENCHMARK_DROP_CACHE` environment variable is set. + * + * Has no effect if the environment variable is not set. + * May require sudo access ro run successfully. + * + * @throw cudf::logic_error if the environment variable is set and the command fails + */ +void try_drop_l3_cache(); diff --git a/cpp/benchmarks/io/orc/orc_reader.cpp b/cpp/benchmarks/io/orc/orc_reader.cpp index 0fc2238a272..7d6eb432b5b 100644 --- a/cpp/benchmarks/io/orc/orc_reader.cpp +++ b/cpp/benchmarks/io/orc/orc_reader.cpp @@ -14,8 +14,6 @@ * limitations under the License. */ -#include - #include #include #include @@ -60,6 +58,7 @@ void BM_orc_read_varying_input(benchmark::State& state) auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { + try_drop_l3_cache(); cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::read_orc(read_opts); } @@ -117,6 +116,7 @@ void BM_orc_read_varying_options(benchmark::State& state) cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks; auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { + try_drop_l3_cache(); cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf::size_type rows_read = 0; diff --git a/cpp/benchmarks/io/orc/orc_writer.cpp b/cpp/benchmarks/io/orc/orc_writer.cpp index 525c13af5c0..4e7781b402a 100644 --- a/cpp/benchmarks/io/orc/orc_writer.cpp +++ b/cpp/benchmarks/io/orc/orc_writer.cpp @@ -14,15 +14,13 @@ * limitations under the License. */ -#include "cudf/io/types.hpp" -#include - #include #include #include #include #include +#include // to enable, run cmake with -DBUILD_BENCHMARKS=ON diff --git a/cpp/benchmarks/io/parquet/parquet_reader.cpp b/cpp/benchmarks/io/parquet/parquet_reader.cpp index 8a97fd35c31..af7121d37dc 100644 --- a/cpp/benchmarks/io/parquet/parquet_reader.cpp +++ b/cpp/benchmarks/io/parquet/parquet_reader.cpp @@ -14,8 +14,6 @@ * limitations under the License. */ -#include - #include #include #include @@ -60,6 +58,7 @@ void BM_parq_read_varying_input(benchmark::State& state) auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { + try_drop_l3_cache(); cuda_event_timer const raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::read_parquet(read_opts); } @@ -117,6 +116,7 @@ void BM_parq_read_varying_options(benchmark::State& state) cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks; auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { + try_drop_l3_cache(); cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf::size_type rows_read = 0; diff --git a/cpp/benchmarks/io/parquet/parquet_writer.cpp b/cpp/benchmarks/io/parquet/parquet_writer.cpp index d25fae42d0e..776121028ef 100644 --- a/cpp/benchmarks/io/parquet/parquet_writer.cpp +++ b/cpp/benchmarks/io/parquet/parquet_writer.cpp @@ -14,8 +14,6 @@ * limitations under the License. */ -#include - #include #include #include diff --git a/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp b/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp index 30ed245ed9a..e22696b9c01 100644 --- a/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp +++ b/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp @@ -14,21 +14,14 @@ * limitations under the License. */ -#include - -#include -#include - -#include -#include -#include - #include #include #include #include +#include #include +#include // to enable, run cmake with -DBUILD_BENCHMARKS=ON diff --git a/cpp/benchmarks/io/text/multibyte_split.cpp b/cpp/benchmarks/io/text/multibyte_split.cpp index ada8856e8e5..d274f79a77c 100644 --- a/cpp/benchmarks/io/text/multibyte_split.cpp +++ b/cpp/benchmarks/io/text/multibyte_split.cpp @@ -19,10 +19,9 @@ #include #include -#include - #include +#include #include #include #include @@ -38,8 +37,6 @@ #include #include -using cudf::test::fixed_width_column_wrapper; - temp_directory const temp_dir("cudf_gbench"); enum data_chunk_source_type { @@ -137,6 +134,7 @@ static void BM_multibyte_split(benchmark::State& state) auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { + try_drop_l3_cache(); cuda_event_timer raii(state, true); auto output = cudf::io::text::multibyte_split(*source, delim); } diff --git a/cpp/benchmarks/iterator/iterator.cu b/cpp/benchmarks/iterator/iterator.cu index 595775ddf00..5eaaec23211 100644 --- a/cpp/benchmarks/iterator/iterator.cu +++ b/cpp/benchmarks/iterator/iterator.cu @@ -14,13 +14,14 @@ * limitations under the License. */ -#include "../fixture/benchmark_fixture.hpp" -#include "../synchronization/synchronization.hpp" +#include +#include + +#include #include #include #include -#include #include @@ -31,8 +32,6 @@ #include #include -#include - #include template diff --git a/cpp/benchmarks/join/join_common.hpp b/cpp/benchmarks/join/join_common.hpp index 6ff2543cf7d..a031b4e656d 100644 --- a/cpp/benchmarks/join/join_common.hpp +++ b/cpp/benchmarks/join/join_common.hpp @@ -21,10 +21,8 @@ #include #include -#include -#include - #include +#include #include #include #include diff --git a/cpp/benchmarks/merge/merge.cpp b/cpp/benchmarks/merge/merge.cpp index 88354bcc731..82d89233a33 100644 --- a/cpp/benchmarks/merge/merge.cpp +++ b/cpp/benchmarks/merge/merge.cpp @@ -14,18 +14,15 @@ * limitations under the License. */ -#include +#include +#include +#include #include +#include #include #include -#include - -#include -#include -#include - #include #include diff --git a/cpp/benchmarks/null_mask/set_null_mask.cpp b/cpp/benchmarks/null_mask/set_null_mask.cpp index 2057951ff8d..429a62a2bfa 100644 --- a/cpp/benchmarks/null_mask/set_null_mask.cpp +++ b/cpp/benchmarks/null_mask/set_null_mask.cpp @@ -19,8 +19,6 @@ #include -#include - class SetNullmask : public cudf::benchmark { }; diff --git a/cpp/benchmarks/reduction/scan.cpp b/cpp/benchmarks/reduction/scan.cpp index aef4960789a..8c434465795 100644 --- a/cpp/benchmarks/reduction/scan.cpp +++ b/cpp/benchmarks/reduction/scan.cpp @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include #include diff --git a/cpp/benchmarks/reduction/segment_reduce.cu b/cpp/benchmarks/reduction/segment_reduce.cu index 3723147d95c..08fc4622b43 100644 --- a/cpp/benchmarks/reduction/segment_reduce.cu +++ b/cpp/benchmarks/reduction/segment_reduce.cu @@ -82,7 +82,7 @@ std::pair, thrust::device_vector> make_test_d thrust::device_vector d_offsets(offset_it, offset_it + num_segments + 1); - return std::make_pair(std::move((input->release())[0]), d_offsets); + return std::pair(std::move((input->release())[0]), d_offsets); } template diff --git a/cpp/benchmarks/replace/clamp.cpp b/cpp/benchmarks/replace/clamp.cpp index d3a7415a478..e9a259d0c7b 100644 --- a/cpp/benchmarks/replace/clamp.cpp +++ b/cpp/benchmarks/replace/clamp.cpp @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include #include diff --git a/cpp/benchmarks/replace/nans.cpp b/cpp/benchmarks/replace/nans.cpp index e1b05bbc337..28ca798ebf0 100644 --- a/cpp/benchmarks/replace/nans.cpp +++ b/cpp/benchmarks/replace/nans.cpp @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include #include diff --git a/cpp/benchmarks/sort/rank.cpp b/cpp/benchmarks/sort/rank.cpp index 22acb241f0b..0a5c1844c69 100644 --- a/cpp/benchmarks/sort/rank.cpp +++ b/cpp/benchmarks/sort/rank.cpp @@ -14,20 +14,13 @@ * limitations under the License. */ -#include "cudf/column/column_view.hpp" -#include - -#include -#include -#include -#include -#include - -#include #include #include #include +#include +#include + class Rank : public cudf::benchmark { }; diff --git a/cpp/benchmarks/sort/sort.cpp b/cpp/benchmarks/sort/sort.cpp index 1a42daa5bb0..d7c33e7170e 100644 --- a/cpp/benchmarks/sort/sort.cpp +++ b/cpp/benchmarks/sort/sort.cpp @@ -14,19 +14,12 @@ * limitations under the License. */ -#include - -#include -#include -#include -#include -#include - -#include #include #include #include +#include + template class Sort : public cudf::benchmark { }; diff --git a/cpp/benchmarks/sort/sort_strings.cpp b/cpp/benchmarks/sort/sort_strings.cpp index 30a7aee043b..a58b9a4f6da 100644 --- a/cpp/benchmarks/sort/sort_strings.cpp +++ b/cpp/benchmarks/sort/sort_strings.cpp @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include #include diff --git a/cpp/benchmarks/sort/sort_structs.cpp b/cpp/benchmarks/sort/sort_structs.cpp index 81f7ad8a4c1..9b6c32940f5 100644 --- a/cpp/benchmarks/sort/sort_structs.cpp +++ b/cpp/benchmarks/sort/sort_structs.cpp @@ -16,11 +16,10 @@ #include -#include - -#include #include +#include + #include #include diff --git a/cpp/benchmarks/string/case.cpp b/cpp/benchmarks/string/case.cpp index 0d74d0a6b7c..daa22d25677 100644 --- a/cpp/benchmarks/string/case.cpp +++ b/cpp/benchmarks/string/case.cpp @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include #include diff --git a/cpp/benchmarks/string/combine.cpp b/cpp/benchmarks/string/combine.cpp index a0cfcd15fe8..85c48e18ce1 100644 --- a/cpp/benchmarks/string/combine.cpp +++ b/cpp/benchmarks/string/combine.cpp @@ -16,7 +16,6 @@ #include "string_bench_args.hpp" -#include #include #include #include @@ -24,7 +23,6 @@ #include #include #include -#include class StringCombine : public cudf::benchmark { }; diff --git a/cpp/benchmarks/string/contains.cpp b/cpp/benchmarks/string/contains.cpp index 3a89b5646d7..6689e3611d1 100644 --- a/cpp/benchmarks/string/contains.cpp +++ b/cpp/benchmarks/string/contains.cpp @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include #include diff --git a/cpp/benchmarks/string/convert_datetime.cpp b/cpp/benchmarks/string/convert_datetime.cpp index 3782fea1e36..488ce95d397 100644 --- a/cpp/benchmarks/string/convert_datetime.cpp +++ b/cpp/benchmarks/string/convert_datetime.cpp @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include #include diff --git a/cpp/benchmarks/string/convert_durations.cpp b/cpp/benchmarks/string/convert_durations.cpp index dc9a1e991b2..6e3a9e8faa9 100644 --- a/cpp/benchmarks/string/convert_durations.cpp +++ b/cpp/benchmarks/string/convert_durations.cpp @@ -14,24 +14,19 @@ * limitations under the License. */ -#include +#include +#include +#include + +#include #include #include - -#include -#include -#include -#include +#include #include #include -#include "../fixture/benchmark_fixture.hpp" -#include "../synchronization/synchronization.hpp" -#include "cudf/column/column_view.hpp" -#include "cudf/wrappers/durations.hpp" - class DurationsToString : public cudf::benchmark { }; template diff --git a/cpp/benchmarks/string/convert_fixed_point.cpp b/cpp/benchmarks/string/convert_fixed_point.cpp index 05b87906eca..88657c409cd 100644 --- a/cpp/benchmarks/string/convert_fixed_point.cpp +++ b/cpp/benchmarks/string/convert_fixed_point.cpp @@ -14,11 +14,9 @@ * limitations under the License. */ -#include -#include - -#include #include +#include +#include #include #include diff --git a/cpp/benchmarks/string/convert_numerics.cpp b/cpp/benchmarks/string/convert_numerics.cpp index 71a23c76829..3025c32b888 100644 --- a/cpp/benchmarks/string/convert_numerics.cpp +++ b/cpp/benchmarks/string/convert_numerics.cpp @@ -14,11 +14,9 @@ * limitations under the License. */ -#include -#include - -#include #include +#include +#include #include #include diff --git a/cpp/benchmarks/string/copy.cu b/cpp/benchmarks/string/copy.cu index a8f9eb111fc..0280322a3a1 100644 --- a/cpp/benchmarks/string/copy.cu +++ b/cpp/benchmarks/string/copy.cu @@ -20,9 +20,9 @@ #include #include +#include #include #include -#include #include #include diff --git a/cpp/benchmarks/string/extract.cpp b/cpp/benchmarks/string/extract.cpp index b8d206386f5..4ff29285482 100644 --- a/cpp/benchmarks/string/extract.cpp +++ b/cpp/benchmarks/string/extract.cpp @@ -20,9 +20,10 @@ #include #include +#include + #include #include -#include #include diff --git a/cpp/benchmarks/string/factory.cu b/cpp/benchmarks/string/factory.cu index 2e0bf4afb36..dde0b7e4424 100644 --- a/cpp/benchmarks/string/factory.cu +++ b/cpp/benchmarks/string/factory.cu @@ -16,14 +16,14 @@ #include "string_bench_args.hpp" -#include #include #include #include +#include + #include #include -#include #include #include diff --git a/cpp/benchmarks/string/filter.cpp b/cpp/benchmarks/string/filter.cpp index b39cf25bc91..064b824619e 100644 --- a/cpp/benchmarks/string/filter.cpp +++ b/cpp/benchmarks/string/filter.cpp @@ -14,17 +14,17 @@ * limitations under the License. */ -#include #include #include #include +#include + #include #include #include #include #include -#include #include #include diff --git a/cpp/benchmarks/string/find.cpp b/cpp/benchmarks/string/find.cpp index 55eb52c9b30..aaa7bd29b31 100644 --- a/cpp/benchmarks/string/find.cpp +++ b/cpp/benchmarks/string/find.cpp @@ -14,16 +14,16 @@ * limitations under the License. */ -#include #include #include #include +#include + #include #include #include #include -#include #include diff --git a/cpp/benchmarks/string/repeat_strings.cpp b/cpp/benchmarks/string/repeat_strings.cpp index 9044db18522..835a437e3b5 100644 --- a/cpp/benchmarks/string/repeat_strings.cpp +++ b/cpp/benchmarks/string/repeat_strings.cpp @@ -16,7 +16,6 @@ #include "string_bench_args.hpp" -#include #include #include #include diff --git a/cpp/benchmarks/string/replace.cpp b/cpp/benchmarks/string/replace.cpp index 0a3607c64f0..10f6e2a19ed 100644 --- a/cpp/benchmarks/string/replace.cpp +++ b/cpp/benchmarks/string/replace.cpp @@ -14,20 +14,20 @@ * limitations under the License. */ -#include +#include "string_bench_args.hpp" + #include #include #include +#include + #include #include #include -#include #include -#include "string_bench_args.hpp" - class StringReplace : public cudf::benchmark { }; diff --git a/cpp/benchmarks/string/replace_re.cpp b/cpp/benchmarks/string/replace_re.cpp index b9d04630837..148cbe678bd 100644 --- a/cpp/benchmarks/string/replace_re.cpp +++ b/cpp/benchmarks/string/replace_re.cpp @@ -16,14 +16,14 @@ #include "string_bench_args.hpp" -#include #include #include #include +#include + #include #include -#include class StringReplace : public cudf::benchmark { }; diff --git a/cpp/benchmarks/string/split.cpp b/cpp/benchmarks/string/split.cpp index ad25cfe54de..97eb0ba6dbf 100644 --- a/cpp/benchmarks/string/split.cpp +++ b/cpp/benchmarks/string/split.cpp @@ -14,15 +14,15 @@ * limitations under the License. */ -#include #include #include #include +#include + #include #include #include -#include #include diff --git a/cpp/benchmarks/string/substring.cpp b/cpp/benchmarks/string/substring.cpp index 2195cc56515..a18462385fc 100644 --- a/cpp/benchmarks/string/substring.cpp +++ b/cpp/benchmarks/string/substring.cpp @@ -16,21 +16,20 @@ #include "string_bench_args.hpp" -#include #include #include #include +#include + #include #include #include -#include -#include - -#include #include +#include + class StringSubstring : public cudf::benchmark { }; diff --git a/cpp/benchmarks/string/translate.cpp b/cpp/benchmarks/string/translate.cpp index 38c6ff9c701..2ed0ccceba6 100644 --- a/cpp/benchmarks/string/translate.cpp +++ b/cpp/benchmarks/string/translate.cpp @@ -16,19 +16,19 @@ #include "string_bench_args.hpp" -#include #include #include #include -#include -#include #include -#include +#include +#include #include +#include + class StringTranslate : public cudf::benchmark { }; diff --git a/cpp/benchmarks/string/url_decode.cu b/cpp/benchmarks/string/url_decode.cu index 7971d44536d..40bf2b090d4 100644 --- a/cpp/benchmarks/string/url_decode.cu +++ b/cpp/benchmarks/string/url_decode.cu @@ -14,21 +14,17 @@ * limitations under the License. */ -#include #include #include +#include + #include #include #include #include #include -#include -#include -#include -#include - #include #include #include diff --git a/cpp/benchmarks/text/ngrams.cpp b/cpp/benchmarks/text/ngrams.cpp index 157c27ae48a..b1e70517aea 100644 --- a/cpp/benchmarks/text/ngrams.cpp +++ b/cpp/benchmarks/text/ngrams.cpp @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include #include @@ -22,7 +21,6 @@ #include #include -#include #include diff --git a/cpp/benchmarks/text/normalize.cpp b/cpp/benchmarks/text/normalize.cpp index 2cc083f4ae8..3b58a7dd187 100644 --- a/cpp/benchmarks/text/normalize.cpp +++ b/cpp/benchmarks/text/normalize.cpp @@ -14,15 +14,12 @@ * limitations under the License. */ -#include #include #include #include #include #include -#include -#include #include diff --git a/cpp/benchmarks/text/normalize_spaces.cpp b/cpp/benchmarks/text/normalize_spaces.cpp index 3bd636d4aa9..1fe912e5740 100644 --- a/cpp/benchmarks/text/normalize_spaces.cpp +++ b/cpp/benchmarks/text/normalize_spaces.cpp @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include #include @@ -22,8 +21,6 @@ #include #include -#include -#include #include diff --git a/cpp/benchmarks/text/replace.cpp b/cpp/benchmarks/text/replace.cpp index 3fbb6054d5c..a093cd767b3 100644 --- a/cpp/benchmarks/text/replace.cpp +++ b/cpp/benchmarks/text/replace.cpp @@ -14,17 +14,18 @@ * limitations under the License. */ -#include #include #include #include -#include -#include #include +#include + #include +#include + class TextReplace : public cudf::benchmark { }; diff --git a/cpp/benchmarks/text/subword.cpp b/cpp/benchmarks/text/subword.cpp index 150f578a22a..d8357dcf92c 100644 --- a/cpp/benchmarks/text/subword.cpp +++ b/cpp/benchmarks/text/subword.cpp @@ -15,12 +15,13 @@ */ #include -#include -#include -#include #include +#include +#include + +#include #include #include #include @@ -29,7 +30,7 @@ static std::string create_hash_vocab_file() { - std::string dir_template("/tmp"); + std::string dir_template{std::filesystem::temp_directory_path().string()}; if (const char* env_p = std::getenv("WORKSPACE")) dir_template = env_p; std::string hash_file = dir_template + "/hash_vocab.txt"; // create a fake hashed vocab text file for this test diff --git a/cpp/benchmarks/text/tokenize.cpp b/cpp/benchmarks/text/tokenize.cpp index 4cb9c9e5271..fea1973c026 100644 --- a/cpp/benchmarks/text/tokenize.cpp +++ b/cpp/benchmarks/text/tokenize.cpp @@ -14,16 +14,15 @@ * limitations under the License. */ -#include #include #include #include #include +#include + #include #include -#include -#include #include #include diff --git a/cpp/benchmarks/type_dispatcher/type_dispatcher.cu b/cpp/benchmarks/type_dispatcher/type_dispatcher.cu index aba78dad3fe..53dac455b04 100644 --- a/cpp/benchmarks/type_dispatcher/type_dispatcher.cu +++ b/cpp/benchmarks/type_dispatcher/type_dispatcher.cu @@ -17,8 +17,6 @@ #include #include -#include - #include #include #include diff --git a/cpp/cmake/Modules/FindcuFile.cmake b/cpp/cmake/Modules/FindcuFile.cmake index e539a6604a8..3661d7d68d6 100644 --- a/cpp/cmake/Modules/FindcuFile.cmake +++ b/cpp/cmake/Modules/FindcuFile.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -20,9 +20,9 @@ Find cuFile headers and libraries. Imported Targets ^^^^^^^^^^^^^^^^ -``cuFile::cuFile`` +``cufile::cuFile`` The cuFile library, if found. -``cuFile::cuFileRDMA`` +``cufile::cuFileRDMA`` The cuFile RDMA library, if found. Result Variables @@ -80,29 +80,29 @@ find_package_handle_standard_args( VERSION_VAR cuFile_VERSION ) -if(cuFile_INCLUDE_DIR AND NOT TARGET cuFile::cuFile_interface) - add_library(cuFile::cuFile_interface IMPORTED INTERFACE) +if(cuFile_INCLUDE_DIR AND NOT TARGET cufile::cuFile_interface) + add_library(cufile::cuFile_interface INTERFACE IMPORTED GLOBAL) target_include_directories( - cuFile::cuFile_interface INTERFACE "$" + cufile::cuFile_interface INTERFACE "$" ) - target_compile_options(cuFile::cuFile_interface INTERFACE "${cuFile_COMPILE_OPTIONS}") - target_compile_definitions(cuFile::cuFile_interface INTERFACE CUFILE_FOUND) + target_compile_options(cufile::cuFile_interface INTERFACE "${cuFile_COMPILE_OPTIONS}") + target_compile_definitions(cufile::cuFile_interface INTERFACE CUFILE_FOUND) endif() -if(cuFile_FOUND AND NOT TARGET cuFile::cuFile) - add_library(cuFile::cuFile UNKNOWN IMPORTED) +if(cuFile_FOUND AND NOT TARGET cufile::cuFile) + add_library(cufile::cuFile UNKNOWN IMPORTED GLOBAL) set_target_properties( - cuFile::cuFile + cufile::cuFile PROPERTIES IMPORTED_LOCATION "${cuFile_LIBRARY}" INTERFACE_COMPILE_OPTIONS "${cuFile_COMPILE_OPTIONS}" INTERFACE_INCLUDE_DIRECTORIES "${cuFile_INCLUDE_DIR}" ) endif() -if(cuFile_FOUND AND NOT TARGET cuFile::cuFileRDMA) - add_library(cuFile::cuFileRDMA UNKNOWN IMPORTED) +if(cuFile_FOUND AND NOT TARGET cufile::cuFileRDMA) + add_library(cufile::cuFileRDMA UNKNOWN IMPORTED GLOBAL) set_target_properties( - cuFile::cuFileRDMA + cufile::cuFileRDMA PROPERTIES IMPORTED_LOCATION "${cuFileRDMA_LIBRARY}" INTERFACE_COMPILE_OPTIONS "${cuFile_COMPILE_OPTIONS}" INTERFACE_INCLUDE_DIRECTORIES "${cuFile_INCLUDE_DIR}" diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake index 2b08e9f2d6c..0b14b812a05 100644 --- a/cpp/cmake/thirdparty/get_arrow.cmake +++ b/cpp/cmake/thirdparty/get_arrow.cmake @@ -80,10 +80,17 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB list(APPEND ARROW_PYTHON_OPTIONS "ARROW_PYTHON ON") # Arrow's logic to build Boost from source is busted, so we have to get it from the system. list(APPEND ARROW_PYTHON_OPTIONS "BOOST_SOURCE SYSTEM") - list(APPEND ARROW_PYTHON_OPTIONS "Thrift_SOURCE BUNDLED") list(APPEND ARROW_PYTHON_OPTIONS "ARROW_DEPENDENCY_SOURCE AUTO") endif() + set(ARROW_PARQUET_OPTIONS "") + if(ENABLE_PARQUET) + # Arrow's logic to build Boost from source is busted, so we have to get it from the system. + list(APPEND ARROW_PARQUET_OPTIONS "BOOST_SOURCE SYSTEM") + list(APPEND ARROW_PARQUET_OPTIONS "Thrift_SOURCE BUNDLED") + list(APPEND ARROW_PARQUET_OPTIONS "ARROW_DEPENDENCY_SOURCE AUTO") + endif() + # Set this so Arrow correctly finds the CUDA toolkit when the build machine does not have the CUDA # driver installed. This must be an env var. set(ENV{CUDA_LIB_PATH} "${CUDAToolkit_LIBRARY_DIR}/stubs") @@ -106,6 +113,7 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB "ARROW_S3 ${ENABLE_S3}" "ARROW_ORC ${ENABLE_ORC}" # e.g. needed by blazingsql-io + ${ARROW_PARQUET_OPTIONS} "ARROW_PARQUET ${ENABLE_PARQUET}" ${ARROW_PYTHON_OPTIONS} # Arrow modifies CMake's GLOBAL RULE_LAUNCH_COMPILE unless this is off diff --git a/cpp/cmake/thirdparty/get_cucollections.cmake b/cpp/cmake/thirdparty/get_cucollections.cmake index 1639655d1e9..5232821d113 100644 --- a/cpp/cmake/thirdparty/get_cucollections.cmake +++ b/cpp/cmake/thirdparty/get_cucollections.cmake @@ -21,12 +21,14 @@ function(find_and_configure_cucollections) cuco 0.0.1 GLOBAL_TARGETS cuco::cuco BUILD_EXPORT_SET cudf-exports - INSTALL_EXPORT_SET cudf-exports CPM_ARGS GITHUB_REPOSITORY NVIDIA/cuCollections GIT_TAG fb58a38701f1c24ecfe07d8f1f208bbe80930da5 EXCLUDE_FROM_ALL ${BUILD_SHARED_LIBS} OPTIONS "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF" "BUILD_EXAMPLES OFF" ) + if(NOT BUILD_SHARED_LIBS) + rapids_export_package(INSTALL cuco cudf-exports) + endif() endfunction() diff --git a/cpp/cmake/thirdparty/get_cufile.cmake b/cpp/cmake/thirdparty/get_cufile.cmake new file mode 100644 index 00000000000..21088f4ec0f --- /dev/null +++ b/cpp/cmake/thirdparty/get_cufile.cmake @@ -0,0 +1,32 @@ +# ============================================================================= +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# This function finds nvcomp and sets any additional necessary environment variables. +function(find_and_configure_cufile) + + list(APPEND CMAKE_MODULE_PATH ${CUDF_SOURCE_DIR}/cmake/Modules) + rapids_find_package(cuFile QUIET) + + if(cuFile_FOUND AND NOT BUILD_SHARED_LIBS) + include("${rapids-cmake-dir}/export/find_package_file.cmake") + rapids_export_find_package_file( + BUILD "${CUDF_SOURCE_DIR}/cmake/Modules/FindcuFile.cmake" cudf-exports + ) + rapids_export_find_package_file( + INSTALL "${CUDF_SOURCE_DIR}/cmake/Modules/FindcuFile.cmake" cudf-exports + ) + endif() +endfunction() + +find_and_configure_cufile() diff --git a/cpp/cmake/thirdparty/get_kvikio.cmake b/cpp/cmake/thirdparty/get_kvikio.cmake new file mode 100644 index 00000000000..e94e024d6c9 --- /dev/null +++ b/cpp/cmake/thirdparty/get_kvikio.cmake @@ -0,0 +1,36 @@ +# ============================================================================= +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# This function finds KvikIO +function(find_and_configure_kvikio VERSION) + + rapids_cpm_find( + KvikIO ${VERSION} + GLOBAL_TARGETS kvikio::kvikio + CPM_ARGS + GIT_REPOSITORY https://github.com/rapidsai/kvikio.git + GIT_TAG branch-${VERSION} + GIT_SHALLOW TRUE SOURCE_SUBDIR cpp + OPTIONS "KvikIO_BUILD_EXAMPLES OFF" + ) + + if(KvikIO_BINARY_DIR) + include("${rapids-cmake-dir}/export/find_package_root.cmake") + rapids_export_find_package_root(BUILD KvikIO "${KvikIO_BINARY_DIR}" cudf-exports) + endif() + +endfunction() + +set(KVIKIO_MIN_VERSION_cudf "${CUDF_VERSION_MAJOR}.${CUDF_VERSION_MINOR}") +find_and_configure_kvikio(${KVIKIO_MIN_VERSION_cudf}) diff --git a/cpp/cmake/thirdparty/get_nvcomp.cmake b/cpp/cmake/thirdparty/get_nvcomp.cmake index 0356725548b..d0007f93628 100644 --- a/cpp/cmake/thirdparty/get_nvcomp.cmake +++ b/cpp/cmake/thirdparty/get_nvcomp.cmake @@ -25,6 +25,11 @@ function(find_and_configure_nvcomp VERSION_MIN VERSION_MAX) OPTIONS "BUILD_STATIC ON" "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF" "BUILD_EXAMPLES OFF" ) + if(nvcomp_BINARY_DIR) + include("${rapids-cmake-dir}/export/find_package_root.cmake") + rapids_export_find_package_root(BUILD nvcomp "${nvcomp_BINARY_DIR}" cudf-exports) + endif() + if(NOT TARGET nvcomp::nvcomp) add_library(nvcomp::nvcomp ALIAS nvcomp) endif() diff --git a/cpp/docs/BENCHMARKING.md b/cpp/docs/BENCHMARKING.md index 8794c90d1db..270e7a87e85 100644 --- a/cpp/docs/BENCHMARKING.md +++ b/cpp/docs/BENCHMARKING.md @@ -35,6 +35,12 @@ provided in `cpp/benchmarks/synchronization/synchronization.hpp` to help with th can also optionally clear the GPU L2 cache in order to ensure cache hits do not artificially inflate performance in repeated iterations. +## Data generation + +For generating benchmark input data, helper functions are available at [cpp/benchmarks/common/generate_input.hpp](/cpp/benchmarks/common/generate_input.hpp). The input data generation happens on device, in contrast to any `column_wrapper` where data generation happens on the host. +* `create_sequence_table` can generate sequence columns starting with value 0 in first row and increasing by 1 in subsequent rows. +* `create_random_table` can generate a table filled with random data. The random data parameters are configurable. + ## What should we benchmark? In general, we should benchmark all features over a range of data sizes and types, so that we can diff --git a/cpp/docs/DEVELOPER_GUIDE.md b/cpp/docs/DEVELOPER_GUIDE.md index 1599c81cbe5..84f69f559a8 100644 --- a/cpp/docs/DEVELOPER_GUIDE.md +++ b/cpp/docs/DEVELOPER_GUIDE.md @@ -572,7 +572,7 @@ The preferred style for how inputs are passed in and outputs are returned is the Sometimes it is necessary for functions to have multiple outputs. There are a few ways this can be done in C++ (including creating a `struct` for the output). One convenient way to do this is -using `std::tie` and `std::make_pair`. Note that objects passed to `std::make_pair` will invoke +using `std::tie` and `std::pair`. Note that objects passed to `std::pair` will invoke either the copy constructor or the move constructor of the object, and it may be preferable to move non-trivially copyable objects (and required for types with deleted copy constructors, like `std::unique_ptr`). @@ -585,7 +585,7 @@ std::pair return_two_tables(void){ // Do stuff with out0, out1 // Return a std::pair of the two outputs - return std::make_pair(std::move(out0), std::move(out1)); + return std::pair(std::move(out0), std::move(out1)); } cudf::table out0; @@ -745,6 +745,30 @@ void isolated_helper_function(...); [**Anonymous namespaces should *never* be used in a header file.**](https://wiki.sei.cmu.edu/confluence/display/cplusplus/DCL59-CPP.+Do+not+define+an+unnamed+namespace+in+a+header+file) +# Deprecating and Removing Code + +libcudf is constantly evolving to improve performance and better meet our users' needs. As a +result, we occasionally need to break or entirely remove APIs to respond to new and improved +understanding of the functionality we provide. Remaining free to do this is essential to making +libcudf an agile library that can rapidly accommodate our users needs. As a result, we do not +always provide a warning or any lead time prior to releasing breaking changes. On a best effort +basis, the libcudf team will notify users of changes that we expect to have significant or +widespread effects. + +Where possible, indicate pending API removals using the +[deprecated](https://en.cppreference.com/w/cpp/language/attributes/deprecated) attribute and +document them using Doxygen's +[deprecated](https://www.doxygen.nl/manual/commands.html#cmddeprecated) command prior to removal. +When a replacement API is available for a deprecated API, mention the replacement in both the +deprecation message and the deprecation documentation. Pull requests that introduce deprecations +should be labeled "deprecation" to facilitate discovery and removal in the subsequent release. + +Advertise breaking changes by labeling any pull request that breaks or removes an existing API with +the "breaking" tag. This ensures that the "Breaking" section of the release notes includes a +description of what has broken from the past release. Label pull requests that contain deprecations +with the "non-breaking" tag. + + # Error Handling libcudf follows conventions (and provides utilities) enforcing compile-time and run-time diff --git a/cpp/docs/DOCUMENTATION.md b/cpp/docs/DOCUMENTATION.md index 2382a0eb022..ebb52836577 100644 --- a/cpp/docs/DOCUMENTATION.md +++ b/cpp/docs/DOCUMENTATION.md @@ -369,6 +369,20 @@ Although using 3 backtick marks `` ``` `` for example blocks will work too, they Do not use the `@example` tag in the comments for a declaration, or doxygen will interpret the entire source file as example source code. The source file is then published under a separate _Examples_ page in the output. +### Deprecations + +Add a single [@deprecated](https://www.doxygen.nl/manual/commands.html#cmddeprecated) comment line +to comment blocks for APIs that will be removed in future releases. Mention alternative / +replacement APIs in the deprecation comment. + +```c++ +/** + * ... + * + * @deprecated This function is deprecated. Use another new function instead. + */ +``` + ## Namespaces Doxygen output includes a _Namespaces_ page that shows all the namespaces declared with comment blocks in the processed files. diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index 539a7c04106..5c7513a6c99 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -43,6 +43,32 @@ namespace detail { class simple_aggregations_collector; class aggregation_finalizer; } // namespace detail + +/** + * @brief Tie-breaker method to use for ranking the column. + * + * @see cudf::make_rank_aggregation for more details. + * @ingroup column_sort + */ +enum class rank_method : int32_t { + FIRST, ///< stable sort order ranking (no ties) + AVERAGE, ///< mean of first in the group + MIN, ///< min of first in the group + MAX, ///< max of first in the group + DENSE ///< rank always increases by 1 between groups +}; + +/** + * @brief Whether returned rank should be percentage or not and + * mention the type of percentage normalization. + * + */ +enum class rank_percentage : int32_t { + NONE, ///< rank + ZERO_NORMALIZED, ///< rank / count + ONE_NORMALIZED ///< (rank - 1) / (count - 1) +}; + /** * @brief Abstract base class for specifying the desired aggregation in an * `aggregation_request`. @@ -77,9 +103,7 @@ class aggregation { NUNIQUE, ///< count number of unique elements NTH_ELEMENT, ///< get the nth element ROW_NUMBER, ///< get row-number of current index (relative to rolling window) - RANK, ///< get rank of current index - DENSE_RANK, ///< get dense rank of current index - PERCENT_RANK, ///< get percent (i.e. fractional) rank of current index + RANK, ///< get rank of current index COLLECT_LIST, ///< collect values into a list COLLECT_SET, ///< collect values into a list without duplicate entries LEAD, ///< window function, accesses row at specified offset following current row @@ -323,9 +347,11 @@ std::unique_ptr make_row_number_aggregation(); /** * @brief Factory to create a RANK aggregation * - * `RANK` returns a non-nullable column of size_type "ranks": the number of rows preceding or - * equal to the current row plus one. As a result, ranks are not unique and gaps will appear in - * the ranking sequence. + * `RANK` returns a column of size_type or double "ranks" (see note 3 below for how the + * data type is determined) for a given rank method and column order. + * If nulls are excluded, the rank will be null for those rows, otherwise a non-nullable column is + * returned. Double precision column is returned only when percentage!=NONE and when rank method is + * average. * * This aggregation only works with "scan" algorithms. The input column into the group or * ungrouped scan is an orderby column that orders the rows that the aggregate function ranks. @@ -333,10 +359,12 @@ std::unique_ptr make_row_number_aggregation(); * column containing the ordering columns. * * Note: - * 1. This method requires that the rows are presorted by the group keys and order_by columns. - * 2. `RANK` aggregations will return a fully valid column regardless of null_handling policy - * specified in the scan. - * 3. `RANK` aggregations are not compatible with exclusive scans. + * 1. This method could work faster with the rows that are presorted by the group keys and order_by + * columns. Though groupby object does not require order_by column to be sorted, groupby rank + * scan aggregation does require the order_by column to be sorted if the keys are sorted. + * 2. `RANK` aggregations are not compatible with exclusive scans. + * 3. All rank methods except AVERAGE method and percentage!=NONE returns size_type column. + * For AVERAGE method and percentage!=NONE, the return type is double column. * * @code{.pseudo} * Example: Consider a motor-racing statistics dataset, containing the following columns: @@ -362,123 +390,37 @@ std::unique_ptr make_row_number_aggregation(); * A grouped rank aggregation scan with: * groupby column : venue * input orderby column: time - * Produces the following rank column: - * { 1, 2, 3, 3, 5, 1, 2, 2, 4, 5} - * (This corresponds to the following grouping and `driver` rows:) - * { "HAM", "LEC", "BOT", "NOR", "RIC", "RIC", "NOR", "BOT", "LEC", "PER" } - * <----------silverstone----------->|<-------------monza--------------> - * @endcode - */ -template -std::unique_ptr make_rank_aggregation(); - -/** - * @brief Factory to create a DENSE_RANK aggregation - * - * `DENSE_RANK` returns a non-nullable column of size_type "dense ranks": the preceding unique - * value's rank plus one. As a result, ranks are not unique but there are no gaps in the ranking - * sequence (unlike RANK aggregations). - * - * This aggregation only works with "scan" algorithms. The input column into the group or - * ungrouped scan is an orderby column that orders the rows that the aggregate function ranks. - * If rows are ordered by more than one column, the orderby input column should be a struct - * column containing the ordering columns. - * - * Note: - * 1. This method requires that the rows are presorted by the group keys and order_by columns. - * 2. `DENSE_RANK` aggregations will return a fully valid column regardless of null_handling - * policy specified in the scan. - * 3. `DENSE_RANK` aggregations are not compatible with exclusive scans. - * - * @code{.pseudo} - * Example: Consider a motor-racing statistics dataset, containing the following columns: - * 1. venue: (STRING) Location of the race event - * 2. driver: (STRING) Name of the car driver (abbreviated to 3 characters) - * 3. time: (INT32) Time taken to complete the circuit - * - * For the following presorted data: + * Produces the following rank column for each methods: + * first: { 1, 2, 3, 4, 5, 1, 2, 3, 4, 5} + * average: { 1, 2, 3.5, 3.5, 5, 1, 2.5, 2.5, 4, 5} + * min: { 1, 2, 3, 3, 5, 1, 2, 2, 4, 5} + * max: { 1, 2, 4, 4, 5, 1, 3, 3, 4, 5} + * dense: { 1, 2, 3, 3, 4, 1, 2, 2, 3, 4} + * This corresponds to the following grouping and `driver` rows: + * { "HAM", "LEC", "BOT", "NOR", "RIC", "RIC", "NOR", "BOT", "LEC", "PER" } + * <----------silverstone----------->|<-------------monza--------------> + * + * min rank for each percentage types: + * NONE: { 1, 2, 3, 3, 5, 1, 2, 2, 4, 5 } + * ZERO_NORMALIZED : { 0.16, 0.33, 0.50, 0.50, 0.83, 0.16, 0.33, 0.33, 0.66, 0.83 } + * ONE_NORMALIZED: { 0.00, 0.25, 0.50, 0.50, 1.00, 0.00, 0.25, 0.25, 0.75, 1.00 } + * where count corresponds to the number of rows in the group. @see cudf::rank_percentage * - * [ // venue, driver, time - * { "silverstone", "HAM" ("hamilton"), 15823}, - * { "silverstone", "LEC" ("leclerc"), 15827}, - * { "silverstone", "BOT" ("bottas"), 15834}, // <-- Tied for 3rd place. - * { "silverstone", "NOR" ("norris"), 15834}, // <-- Tied for 3rd place. - * { "silverstone", "RIC" ("ricciardo"), 15905}, - * { "monza", "RIC" ("ricciardo"), 12154}, - * { "monza", "NOR" ("norris"), 12156}, // <-- Tied for 2nd place. - * { "monza", "BOT" ("bottas"), 12156}, // <-- Tied for 2nd place. - * { "monza", "LEC" ("leclerc"), 12201}, - * { "monza", "PER" ("perez"), 12203} - * ] - * - * A grouped dense rank aggregation scan with: - * groupby column : venue - * input orderby column: time - * Produces the following dense rank column: - * { 1, 2, 3, 3, 4, 1, 2, 2, 3, 4} - * (This corresponds to the following grouping and `driver` rows:) - * { "HAM", "LEC", "BOT", "NOR", "RIC", "RIC", "NOR", "BOT", "LEC", "PER" } - * <----------silverstone----------->|<-------------monza--------------> * @endcode - */ -template -std::unique_ptr make_dense_rank_aggregation(); - -/** - * @brief Factory to create a PERCENT_RANK aggregation * - * `PERCENT_RANK` returns a non-nullable column of double precision "fractional" ranks. - * For row index `i`, the percent rank of row `i` is defined as: - * percent_rank = (rank - 1) / (group_row_count - 1) - * where, - * 1. rank is the `RANK` of the row within the group - * 2. group_row_count is the number of rows in the group - * - * This aggregation only works with "scan" algorithms. The input to the grouped or - * ungrouped scan is an orderby column that orders the rows that the aggregate function ranks. - * If rows are ordered by more than one column, the orderby input column should be a struct - * column containing the ordering columns. - * - * Note: - * 1. This method requires that the rows are presorted by the group keys and order_by columns. - * 2. `PERCENT_RANK` aggregations will return a fully valid column regardless of null_handling - * policy specified in the scan. - * 3. `PERCENT_RANK` aggregations are not compatible with exclusive scans. - * - * @code{.pseudo} - * Example: Consider a motor-racing statistics dataset, containing the following columns: - * 1. venue: (STRING) Location of the race event - * 2. driver: (STRING) Name of the car driver (abbreviated to 3 characters) - * 3. time: (INT32) Time taken to complete the circuit - * - * For the following presorted data: - * - * [ // venue, driver, time - * { "silverstone", "HAM" ("hamilton"), 15823}, - * { "silverstone", "LEC" ("leclerc"), 15827}, - * { "silverstone", "BOT" ("bottas"), 15834}, // <-- Tied for 3rd place. - * { "silverstone", "NOR" ("norris"), 15834}, // <-- Tied for 3rd place. - * { "silverstone", "RIC" ("ricciardo"), 15905}, - * { "monza", "RIC" ("ricciardo"), 12154}, - * { "monza", "NOR" ("norris"), 12156}, // <-- Tied for 2nd place. - * { "monza", "BOT" ("bottas"), 12156}, // <-- Tied for 2nd place. - * { "monza", "LEC" ("leclerc"), 12201}, - * { "monza", "PER" ("perez"), 12203} - * ] - * - * A grouped percent rank aggregation scan with: - * groupby column : venue - * input orderby column: time - * Produces the following percent rank column: - * { 0.00, 0.25, 0.50, 0.50, 1.00, 0.00, 0.25, 0.25, 0.75, 1.00 } - * - * (This corresponds to the following grouping and `driver` rows:) - * { "HAM", "LEC", "BOT", "NOR", "RIC", "RIC", "NOR", "BOT", "LEC", "PER" } - * <----------silverstone----------->|<-------------monza--------------> - * @endcode + * @param method The ranking method used for tie breaking (same values). + * @param column_order The desired sort order for ranking + * @param null_handling flag to include nulls during ranking. If nulls are not included, + * the corresponding rank will be null. + * @param null_precedence The desired order of null compared to other elements for column + * @param percentage enum to denote the type of conversion of ranks to percentage in range (0,1] */ template -std::unique_ptr make_percent_rank_aggregation(); +std::unique_ptr make_rank_aggregation(rank_method method, + order column_order = order::ASCENDING, + null_policy null_handling = null_policy::EXCLUDE, + null_order null_precedence = null_order::AFTER, + rank_percentage percentage = rank_percentage::NONE); /** * @brief Factory to create a COLLECT_LIST aggregation diff --git a/cpp/include/cudf/ast/expressions.hpp b/cpp/include/cudf/ast/expressions.hpp index eb98e0e0bee..96c99e054a5 100644 --- a/cpp/include/cudf/ast/expressions.hpp +++ b/cpp/include/cudf/ast/expressions.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,8 @@ #include #include +#include + namespace cudf { namespace ast { @@ -53,7 +55,7 @@ struct expression { /** * @brief Enum of supported operators. */ -enum class ast_operator { +enum class ast_operator : int32_t { // Binary operators ADD, ///< operator + SUB, ///< operator - diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp index 2e559afef4f..8f1ad7da9b6 100644 --- a/cpp/include/cudf/copying.hpp +++ b/cpp/include/cudf/copying.hpp @@ -17,7 +17,10 @@ #pragma once #include +#include #include +#include +#include #include #include @@ -939,5 +942,155 @@ std::unique_ptr sample( int64_t const seed = 0, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Checks if a column or its descendants have non-empty null rows + * + * @note This function is exact. If it returns `true`, there exists one or more + * non-empty null elements. + * + * A LIST or STRING column might have non-empty rows that are marked as null. + * A STRUCT OR LIST column might have child columns that have non-empty null rows. + * Other types of columns are deemed incapable of having non-empty null rows. + * E.g. Fixed width columns have no concept of an "empty" row. + * + * @param input The column which is (and whose descendants are) to be checked for + * non-empty null rows. + * @return true If either the column or its descendants have non-empty null rows. + * @return false If neither the column or its descendants have non-empty null rows. + */ +bool has_nonempty_nulls(column_view const& input); + +/** + * @brief Approximates if a column or its descendants *may* have non-empty null elements + * + * @note This function is approximate. + * - `true`: Non-empty null elements could exist + * - `false`: Non-empty null elements definitely do not exist + * + * False positives are possible, but false negatives are not. + * + * Compared to the exact `has_nonempty_nulls()` function, this function is typically + * more efficient. + * + * Complexity: + * - Best case: `O(count_descendants(input))` + * - Worst case: `O(count_descendants(input)) * m`, where `m` is the number of rows in the largest + * descendant + * + * @param input The column which is (and whose descendants are) to be checked for + * non-empty null rows + * @return true If either the column or its decendants have null rows + * @return false If neither the column nor its descendants have null rows + */ +bool may_have_nonempty_nulls(column_view const& input); + +/** + * @brief Copies `input`, purging any non-empty null rows in the column or its descendants + * + * LIST columns may have non-empty null rows. + * For example: + * @code{.pseudo} + * + * auto const lists = lists_column_wrapper{ {0,1}, {2,3}, {4,5} }.release(); + * cudf::detail::set_null_mask(lists->null_mask(), 1, 2, false); + * + * lists[1] is now null, but the lists child column still stores `{2,3}`. + * The lists column contents will be: + * Validity: 101 + * Offsets: [0, 2, 4, 6] + * Child: [0, 1, 2, 3, 4, 5] + * + * After purging the contents of the list's null rows, the column's contents + * will be: + * Validity: 101 + * Offsets: [0, 2, 2, 4] + * Child: [0, 1, 4, 5] + * @endcode + * + * The purge operation only applies directly to LIST and STRING columns, but it + * applies indirectly to STRUCT columns as well, since LIST and STRUCT columns + * may have child/decendant columns that are LIST or STRING. + * + * @param input The column whose null rows are to be checked and purged + * @param mr Device memory resource used to allocate the returned column's device memory + * @return std::unique_ptr Column with equivalent contents to `input`, but with + * the contents of null rows purged + */ +std::unique_ptr purge_nonempty_nulls( + lists_column_view const& input, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Copies `input`, purging any non-empty null rows in the column or its descendants + * + * STRING columns may have non-empty null rows. + * For example: + * @code{.pseudo} + * + * auto const strings = strings_column_wrapper{ "AB", "CD", "EF" }.release(); + * cudf::detail::set_null_mask(strings->null_mask(), 1, 2, false); + * + * strings[1] is now null, but the strings column still stores `"CD"`. + * The lists column contents will be: + * Validity: 101 + * Offsets: [0, 2, 4, 6] + * Child: [A, B, C, D, E, F] + * + * After purging the contents of the list's null rows, the column's contents + * will be: + * Validity: 101 + * Offsets: [0, 2, 2, 4] + * Child: [A, B, E, F] + * @endcode + * + * The purge operation only applies directly to LIST and STRING columns, but it + * applies indirectly to STRUCT columns as well, since LIST and STRUCT columns + * may have child/decendant columns that are LIST or STRING. + * + * @param input The column whose null rows are to be checked and purged + * @param mr Device memory resource used to allocate the returned column's device memory + * @return std::unique_ptr Column with equivalent contents to `input`, but with + * the contents of null rows purged + */ +std::unique_ptr purge_nonempty_nulls( + strings_column_view const& input, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Copies `input`, purging any non-empty null rows in the column or its descendants + * + * STRUCTS columns may have null rows, with non-empty child rows. + * For example: + * @code{.pseudo} + * + * auto const lists = lists_column_wrapper{ {0,1}, {2,3}, {4,5} }; + * auto const structs = structs_column_wrapper{ {lists}, null_at(1) }; + * + * structs[1].child is now null, but the lists column still stores `{2,3}`. + * The lists column contents will be: + * Validity: 101 + * Offsets: [0, 2, 4, 6] + * Child: [0, 1, 2, 3, 4, 5] + * + * After purging the contents of the list's null rows, the column's contents + * will be: + * Validity: 101 + * Offsets: [0, 2, 2, 4] + * Child: [0, 1, 4, 5] + * @endcode + * + * The purge operation only applies directly to LIST and STRING columns, but it + * applies indirectly to STRUCT columns as well, since LIST and STRUCT columns + * may have child/decendant columns that are LIST or STRING. + * + * @param input The column whose null rows are to be checked and purged + * @param mr Device memory resource used to allocate the returned column's device memory + * @return std::unique_ptr Column with equivalent contents to `input`, but with + * the contents of null rows purged + */ +std::unique_ptr purge_nonempty_nulls( + structs_column_view const& input, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** @} */ } // namespace cudf diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index 886151fb9d6..8ca49dd7d5f 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -75,10 +75,6 @@ class simple_aggregations_collector { // Declares the interface for the simple class row_number_aggregation const& agg); virtual std::vector> visit(data_type col_type, class rank_aggregation const& agg); - virtual std::vector> visit(data_type col_type, - class dense_rank_aggregation const& agg); - virtual std::vector> visit( - data_type col_type, class percent_rank_aggregation const& agg); virtual std::vector> visit( data_type col_type, class collect_list_aggregation const& agg); virtual std::vector> visit(data_type col_type, @@ -127,8 +123,6 @@ class aggregation_finalizer { // Declares the interface for the finalizer virtual void visit(class nth_element_aggregation const& agg); virtual void visit(class row_number_aggregation const& agg); virtual void visit(class rank_aggregation const& agg); - virtual void visit(class dense_rank_aggregation const& agg); - virtual void visit(class percent_rank_aggregation const& agg); virtual void visit(class collect_list_aggregation const& agg); virtual void visit(class collect_set_aggregation const& agg); virtual void visit(class lead_lag_aggregation const& agg); @@ -642,32 +636,42 @@ class rank_aggregation final : public rolling_aggregation, public groupby_scan_aggregation, public scan_aggregation { public: - rank_aggregation() : aggregation{RANK} {} - - [[nodiscard]] std::unique_ptr clone() const override + rank_aggregation(rank_method method, + order column_order, + null_policy null_handling, + null_order null_precedence, + rank_percentage percentage) + : aggregation{RANK}, + _method{method}, + _column_order{column_order}, + _null_handling{null_handling}, + _null_precedence{null_precedence}, + _percentage(percentage) { - return std::make_unique(*this); } - std::vector> get_simple_aggregations( - data_type col_type, simple_aggregations_collector& collector) const override + rank_method const _method; ///< rank method + order const _column_order; ///< order of the column to rank + null_policy const _null_handling; ///< include or exclude nulls in ranks + null_order const _null_precedence; ///< order of nulls in ranks + rank_percentage const _percentage; ///< whether to return percentage ranks + + [[nodiscard]] bool is_equal(aggregation const& _other) const override { - return collector.visit(col_type, *this); + if (!this->aggregation::is_equal(_other)) { return false; } + auto const& other = dynamic_cast(_other); + return _method == other._method and _null_handling == other._null_handling and + _column_order == other._column_order and _null_precedence == other._null_precedence and + _percentage == other._percentage; } - void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } -}; -/** - * @brief Derived class for specifying a dense rank aggregation - */ -class dense_rank_aggregation final : public rolling_aggregation, - public groupby_scan_aggregation, - public scan_aggregation { - public: - dense_rank_aggregation() : aggregation{DENSE_RANK} {} + [[nodiscard]] size_t do_hash() const override + { + return this->aggregation::do_hash() ^ hash_impl(); + } [[nodiscard]] std::unique_ptr clone() const override { - return std::make_unique(*this); + return std::make_unique(*this); } std::vector> get_simple_aggregations( data_type col_type, simple_aggregations_collector& collector) const override @@ -675,24 +679,16 @@ class dense_rank_aggregation final : public rolling_aggregation, return collector.visit(col_type, *this); } void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } -}; - -class percent_rank_aggregation final : public rolling_aggregation, - public groupby_scan_aggregation, - public scan_aggregation { - public: - percent_rank_aggregation() : aggregation{PERCENT_RANK} {} - [[nodiscard]] std::unique_ptr clone() const override - { - return std::make_unique(*this); - } - std::vector> get_simple_aggregations( - data_type col_type, simple_aggregations_collector& collector) const override + private: + [[nodiscard]] size_t hash_impl() const { - return collector.visit(col_type, *this); + return std::hash{}(static_cast(_method)) ^ + std::hash{}(static_cast(_column_order)) ^ + std::hash{}(static_cast(_null_handling)) ^ + std::hash{}(static_cast(_null_precedence)) ^ + std::hash{}(static_cast(_percentage)); } - void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } }; /** @@ -1278,19 +1274,7 @@ struct target_type_impl { // Always use size_type accumulator for RANK template struct target_type_impl { - using type = size_type; -}; - -// Always use size_type accumulator for DENSE_RANK -template -struct target_type_impl { - using type = size_type; -}; - -// Always use double for PERCENT_RANK -template -struct target_type_impl { - using type = double; + using type = size_type; // double for percentage=true. }; // Always use list for COLLECT_LIST @@ -1453,10 +1437,6 @@ CUDF_HOST_DEVICE inline decltype(auto) aggregation_dispatcher(aggregation::Kind return f.template operator()(std::forward(args)...); case aggregation::RANK: return f.template operator()(std::forward(args)...); - case aggregation::DENSE_RANK: - return f.template operator()(std::forward(args)...); - case aggregation::PERCENT_RANK: - return f.template operator()(std::forward(args)...); case aggregation::COLLECT_LIST: return f.template operator()(std::forward(args)...); case aggregation::COLLECT_SET: diff --git a/cpp/include/cudf/detail/copy.cuh b/cpp/include/cudf/detail/copy.cuh new file mode 100644 index 00000000000..773bce7131f --- /dev/null +++ b/cpp/include/cudf/detail/copy.cuh @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +namespace cudf::detail { + +/** + * @copydoc cudf::purge_nonempty_nulls(structs_column_view const&, rmm::mr::device_memory_resource*) + * + * @tparam ColumnViewT View type (lists_column_view, strings_column_view, or strings_column_view) + * @param stream CUDA stream used for device memory operations and kernel launches + */ +template +std::unique_ptr purge_nonempty_nulls(ColumnViewT const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + // Implement via identity gather. + auto const input_column = input.parent(); + auto const gather_begin = thrust::counting_iterator(0); + auto const gather_end = gather_begin + input_column.size(); + + auto gathered_table = cudf::detail::gather(table_view{{input_column}}, + gather_begin, + gather_end, + out_of_bounds_policy::DONT_CHECK, + stream, + mr); + return std::move(gathered_table->release()[0]); +} + +} // namespace cudf::detail diff --git a/cpp/include/cudf/detail/copy.hpp b/cpp/include/cudf/detail/copy.hpp index 50157d16876..abd14fbda89 100644 --- a/cpp/include/cudf/detail/copy.hpp +++ b/cpp/include/cudf/detail/copy.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, NVIDIA CORPORATION. + * Copyright (c) 2018-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -299,5 +299,22 @@ std::unique_ptr get_element( size_type index, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @copydoc cudf::has_nonempty_nulls + * + * @param stream CUDA stream used for device memory operations and kernel launches. + */ +bool has_nonempty_nulls(column_view const& input, + rmm::cuda_stream_view stream = rmm::cuda_stream_default); + +/** + * @copydoc cudf::may_have_nonempty_nulls + * + * @param stream CUDA stream used for device memory operations and kernel launches. + */ +bool may_have_nonempty_nulls(column_view const& input, + rmm::cuda_stream_view stream = rmm::cuda_stream_default); + } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/join.hpp b/cpp/include/cudf/detail/join.hpp new file mode 100644 index 00000000000..12e4aaa03fd --- /dev/null +++ b/cpp/include/cudf/detail/join.hpp @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include +#include + +// Forward declaration +template +class default_allocator; + +namespace cudf { +namespace detail { + +constexpr int DEFAULT_JOIN_CG_SIZE = 2; + +enum class join_kind { INNER_JOIN, LEFT_JOIN, FULL_JOIN, LEFT_SEMI_JOIN, LEFT_ANTI_JOIN }; + +/** + * @brief Hash join that builds hash table in creation and probes results in subsequent `*_join` + * member functions. + * + * User-defined hash function can be passed via the template parameter `Hasher` + * + * @tparam Hasher Unary callable type + */ +template +struct hash_join { + public: + using map_type = + cuco::static_multimap>, + cuco::double_hashing>; + + hash_join() = delete; + ~hash_join() = default; + hash_join(hash_join const&) = delete; + hash_join(hash_join&&) = delete; + hash_join& operator=(hash_join const&) = delete; + hash_join& operator=(hash_join&&) = delete; + + private: + bool const _is_empty; ///< true if `_hash_table` is empty + cudf::null_equality const _nulls_equal; ///< whether to consider nulls as equal + cudf::table_view _build; ///< input table to build the hash map + cudf::structs::detail::flattened_table + _flattened_build_table; ///< flattened data structures for `_build` + map_type _hash_table; ///< hash table built on `_build` + + public: + /** + * @brief Constructor that internally builds the hash table based on the given `build` table. + * + * @throw cudf::logic_error if the number of columns in `build` table is 0. + * @throw cudf::logic_error if the number of rows in `build` table exceeds MAX_JOIN_SIZE. + * + * @param build The build table, from which the hash table is built. + * @param compare_nulls Controls whether null join-key values should match or not. + * @param stream CUDA stream used for device memory operations and kernel launches. + */ + hash_join(cudf::table_view const& build, + cudf::null_equality compare_nulls, + rmm::cuda_stream_view stream = rmm::cuda_stream_default); + + /** + * @copydoc cudf::hash_join::inner_join + */ + std::pair>, + std::unique_ptr>> + inner_join(cudf::table_view const& probe, + std::optional output_size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const; + + /** + * @copydoc cudf::hash_join::left_join + */ + std::pair>, + std::unique_ptr>> + left_join(cudf::table_view const& probe, + std::optional output_size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const; + + /** + * @copydoc cudf::hash_join::full_join + */ + std::pair>, + std::unique_ptr>> + full_join(cudf::table_view const& probe, + std::optional output_size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const; + + /** + * @copydoc cudf::hash_join::inner_join_size + */ + [[nodiscard]] std::size_t inner_join_size(cudf::table_view const& probe, + rmm::cuda_stream_view stream) const; + + /** + * @copydoc cudf::hash_join::left_join_size + */ + [[nodiscard]] std::size_t left_join_size(cudf::table_view const& probe, + rmm::cuda_stream_view stream) const; + + /** + * @copydoc cudf::hash_join::full_join_size + */ + std::size_t full_join_size(cudf::table_view const& probe, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const; + + private: + /** + * @brief Probes the `_hash_table` built from `_build` for tuples in `probe_table`, + * and returns the output indices of `build_table` and `probe_table` as a combined table, + * i.e. if full join is specified as the join type then left join is called. Behavior + * is undefined if the provided `output_size` is smaller than the actual output size. + * + * @throw cudf::logic_error if build table is empty and `JoinKind == INNER_JOIN`. + * + * @tparam JoinKind The type of join to be performed. + * + * @param probe_table Table of probe side columns to join. + * @param output_size Optional value which allows users to specify the exact output size. + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned vectors. + * + * @return Join output indices vector pair. + */ + template + std::pair>, + std::unique_ptr>> + probe_join_indices(cudf::table_view const& probe_table, + std::optional output_size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const; + + /** + * @copydoc cudf::detail::hash_join::probe_join_indices + * + * @throw cudf::logic_error if probe table is empty. + * @throw cudf::logic_error if the size of probe table exceeds `MAX_JOIN_SIZE`. + * @throw cudf::logic_error if the number of columns in build table and probe table do not match. + * @throw cudf::logic_error if the column data types in build table and probe table do not match. + */ + template + std::pair>, + std::unique_ptr>> + compute_hash_join(cudf::table_view const& probe, + std::optional output_size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const; +}; +} // namespace detail +} // namespace cudf diff --git a/cpp/include/cudf/detail/null_mask.cuh b/cpp/include/cudf/detail/null_mask.cuh index 7aec56fdc51..6a6cdd43004 100644 --- a/cpp/include/cudf/detail/null_mask.cuh +++ b/cpp/include/cudf/detail/null_mask.cuh @@ -133,7 +133,7 @@ std::pair bitmask_binop( stream, mr); - return std::make_pair(std::move(dest_mask), null_count); + return std::pair(std::move(dest_mask), null_count); } /** diff --git a/cpp/include/cudf/detail/reduction_functions.hpp b/cpp/include/cudf/detail/reduction_functions.hpp index 3a6113e66ce..317e4d0cf47 100644 --- a/cpp/include/cudf/detail/reduction_functions.hpp +++ b/cpp/include/cudf/detail/reduction_functions.hpp @@ -17,9 +17,9 @@ #pragma once #include +#include #include -#include "cudf/lists/lists_column_view.hpp" #include namespace cudf { diff --git a/cpp/include/cudf/detail/scan.hpp b/cpp/include/cudf/detail/scan.hpp index fc829617c2d..13dddd3b0c8 100644 --- a/cpp/include/cudf/detail/scan.hpp +++ b/cpp/include/cudf/detail/scan.hpp @@ -103,16 +103,17 @@ std::unique_ptr inclusive_dense_rank_scan(column_view const& order_by, rmm::mr::device_memory_resource* mr); /** - * @brief Generate row percent ranks for a column. + * @brief Generate row ONE_NORMALIZED percent ranks for a column. + * Also, knowns as ANSI SQL PERCENT RANK. + * Calculated by (rank - 1) / (count - 1). * * @param order_by Input column to generate ranks for. * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. * @return rank values. */ -std::unique_ptr inclusive_percent_rank_scan(column_view const& order_by, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); +std::unique_ptr inclusive_one_normalized_percent_rank_scan( + column_view const& order_by, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh index 09d94d10e79..2c5434b63d2 100644 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh @@ -31,11 +31,21 @@ #include #include -using hash_value_type = uint32_t; - namespace cudf { namespace detail { +/** + * Normalization of floating point NaNs, passthrough for all other values. + */ +template +T __device__ inline normalize_nans(T const& key) +{ + if constexpr (cudf::is_floating_point()) { + if (std::isnan(key)) { return std::numeric_limits::quiet_NaN(); } + } + return key; +} + /** * Normalization of floating point NaNs and zeros, passthrough for all other values. */ @@ -43,13 +53,9 @@ template T __device__ inline normalize_nans_and_zeros(T const& key) { if constexpr (cudf::is_floating_point()) { - if (std::isnan(key)) { - return std::numeric_limits::quiet_NaN(); - } else if (key == T{0.0}) { - return T{0.0}; - } + if (key == T{0.0}) { return T{0.0}; } } - return key; + return normalize_nans(key); } __device__ inline uint32_t rotate_bits_left(uint32_t x, uint32_t r) @@ -176,9 +182,6 @@ void __device__ inline uint32ToLowercaseHexString(uint32_t num, char* destinatio std::memcpy(destination, reinterpret_cast(&x), 8); } -} // namespace detail -} // namespace cudf - // MurmurHash3_32 implementation from // https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp //----------------------------------------------------------------------------- @@ -192,7 +195,7 @@ template struct MurmurHash3_32 { using result_type = hash_value_type; - MurmurHash3_32() = default; + constexpr MurmurHash3_32() = default; constexpr MurmurHash3_32(uint32_t seed) : m_seed(seed) {} [[nodiscard]] __device__ inline uint32_t fmix32(uint32_t h) const @@ -214,24 +217,9 @@ struct MurmurHash3_32 { return block[0] | (block[1] << 8) | (block[2] << 16) | (block[3] << 24); } - // TODO Do we need this operator() and/or compute? Probably not both. [[nodiscard]] result_type __device__ inline operator()(Key const& key) const { - return compute(key); - } - - // compute wrapper for floating point types - template >* = nullptr> - hash_value_type __device__ inline compute_floating_point(T const& key) const - { - if (key == T{0.0}) { - return compute(T{0.0}); - } else if (std::isnan(key)) { - T nan = std::numeric_limits::quiet_NaN(); - return compute(nan); - } else { - return compute(key); - } + return compute(detail::normalize_nans_and_zeros(key)); } template @@ -240,17 +228,32 @@ struct MurmurHash3_32 { return compute_bytes(reinterpret_cast(&key), sizeof(T)); } + result_type __device__ inline compute_remaining_bytes(std::byte const* data, + cudf::size_type len, + cudf::size_type tail_offset, + result_type h) const + { + // Process remaining bytes that do not fill a four-byte chunk. + uint32_t k1 = 0; + switch (len % 4) { + case 3: k1 ^= std::to_integer(data[tail_offset + 2]) << 16; [[fallthrough]]; + case 2: k1 ^= std::to_integer(data[tail_offset + 1]) << 8; [[fallthrough]]; + case 1: + k1 ^= std::to_integer(data[tail_offset]); + k1 *= c1; + k1 = cudf::detail::rotate_bits_left(k1, rot_c1); + k1 *= c2; + h ^= k1; + }; + return h; + } + result_type __device__ compute_bytes(std::byte const* data, cudf::size_type const len) const { constexpr cudf::size_type BLOCK_SIZE = 4; cudf::size_type const nblocks = len / BLOCK_SIZE; cudf::size_type const tail_offset = nblocks * BLOCK_SIZE; - result_type h1 = m_seed; - constexpr uint32_t c1 = 0xcc9e2d51; - constexpr uint32_t c2 = 0x1b873593; - constexpr uint32_t c3 = 0xe6546b64; - constexpr uint32_t rot_c1 = 15; - constexpr uint32_t rot_c2 = 13; + result_type h = m_seed; // Process all four-byte chunks. for (cudf::size_type i = 0; i < nblocks; i++) { @@ -258,50 +261,44 @@ struct MurmurHash3_32 { k1 *= c1; k1 = cudf::detail::rotate_bits_left(k1, rot_c1); k1 *= c2; - h1 ^= k1; - h1 = cudf::detail::rotate_bits_left(h1, rot_c2); - h1 = h1 * 5 + c3; + h ^= k1; + h = cudf::detail::rotate_bits_left(h, rot_c2); + h = h * 5 + c3; } - // Process remaining bytes that do not fill a four-byte chunk. - uint32_t k1 = 0; - switch (len % 4) { - case 3: k1 ^= std::to_integer(data[tail_offset + 2]) << 16; - case 2: k1 ^= std::to_integer(data[tail_offset + 1]) << 8; - case 1: - k1 ^= std::to_integer(data[tail_offset]); - k1 *= c1; - k1 = cudf::detail::rotate_bits_left(k1, rot_c1); - k1 *= c2; - h1 ^= k1; - }; + h = compute_remaining_bytes(data, len, tail_offset, h); // Finalize hash. - h1 ^= len; - h1 = fmix32(h1); - return h1; + h ^= len; + h = fmix32(h); + return h; } private: uint32_t m_seed{cudf::DEFAULT_HASH_SEED}; + static constexpr uint32_t c1 = 0xcc9e2d51; + static constexpr uint32_t c2 = 0x1b873593; + static constexpr uint32_t c3 = 0xe6546b64; + static constexpr uint32_t rot_c1 = 15; + static constexpr uint32_t rot_c2 = 13; }; template <> hash_value_type __device__ inline MurmurHash3_32::operator()(bool const& key) const { - return this->compute(static_cast(key)); + return compute(static_cast(key)); } template <> hash_value_type __device__ inline MurmurHash3_32::operator()(float const& key) const { - return this->compute_floating_point(key); + return compute(detail::normalize_nans_and_zeros(key)); } template <> hash_value_type __device__ inline MurmurHash3_32::operator()(double const& key) const { - return this->compute_floating_point(key); + return compute(detail::normalize_nans_and_zeros(key)); } template <> @@ -310,28 +307,28 @@ hash_value_type __device__ inline MurmurHash3_32::operator()( { auto const data = reinterpret_cast(key.data()); auto const len = key.size_bytes(); - return this->compute_bytes(data, len); + return compute_bytes(data, len); } template <> hash_value_type __device__ inline MurmurHash3_32::operator()( numeric::decimal32 const& key) const { - return this->compute(key.value()); + return compute(key.value()); } template <> hash_value_type __device__ inline MurmurHash3_32::operator()( numeric::decimal64 const& key) const { - return this->compute(key.value()); + return compute(key.value()); } template <> hash_value_type __device__ inline MurmurHash3_32::operator()( numeric::decimal128 const& key) const { - return this->compute(key.value()); + return compute(key.value()); } template <> @@ -352,10 +349,10 @@ template struct SparkMurmurHash3_32 { using result_type = hash_value_type; - SparkMurmurHash3_32() = default; + constexpr SparkMurmurHash3_32() = default; constexpr SparkMurmurHash3_32(uint32_t seed) : m_seed(seed) {} - __device__ inline uint32_t fmix32(uint32_t h) const + [[nodiscard]] __device__ inline uint32_t fmix32(uint32_t h) const { h ^= h >> 16; h *= 0x85ebca6b; @@ -365,18 +362,18 @@ struct SparkMurmurHash3_32 { return h; } - result_type __device__ inline operator()(Key const& key) const { return compute(key); } + [[nodiscard]] __device__ inline uint32_t getblock32(std::byte const* data, + cudf::size_type offset) const + { + // Read a 4-byte value from the data pointer as individual bytes for safe + // unaligned access (very likely for string types). + auto block = reinterpret_cast(data + offset); + return block[0] | (block[1] << 8) | (block[2] << 16) | (block[3] << 24); + } - // compute wrapper for floating point types - template >* = nullptr> - hash_value_type __device__ inline compute_floating_point(T const& key) const + [[nodiscard]] result_type __device__ inline operator()(Key const& key) const { - if (std::isnan(key)) { - T nan = std::numeric_limits::quiet_NaN(); - return compute(nan); - } else { - return compute(key); - } + return compute(key); } template @@ -385,24 +382,35 @@ struct SparkMurmurHash3_32 { return compute_bytes(reinterpret_cast(&key), sizeof(T)); } - [[nodiscard]] __device__ inline uint32_t getblock32(std::byte const* data, - cudf::size_type offset) const + result_type __device__ inline compute_remaining_bytes(std::byte const* data, + cudf::size_type len, + cudf::size_type tail_offset, + result_type h) const { - // Individual byte reads for unaligned accesses (very likely for strings) - auto block = reinterpret_cast(data + offset); - return block[0] | (block[1] << 8) | (block[2] << 16) | (block[3] << 24); + // Process remaining bytes that do not fill a four-byte chunk using Spark's approach + // (does not conform to normal MurmurHash3). + for (auto i = tail_offset; i < len; i++) { + // We require a two-step cast to get the k1 value from the byte. First, + // we must cast to a signed int8_t. Then, the sign bit is preserved when + // casting to uint32_t under 2's complement. Java preserves the sign when + // casting byte-to-int, but C++ does not. + uint32_t k1 = static_cast(std::to_integer(data[i])); + k1 *= c1; + k1 = cudf::detail::rotate_bits_left(k1, rot_c1); + k1 *= c2; + h ^= k1; + h = cudf::detail::rotate_bits_left(h, rot_c2); + h = h * 5 + c3; + } + return h; } result_type __device__ compute_bytes(std::byte const* data, cudf::size_type const len) const { constexpr cudf::size_type BLOCK_SIZE = 4; cudf::size_type const nblocks = len / BLOCK_SIZE; - result_type h1 = m_seed; - constexpr uint32_t c1 = 0xcc9e2d51; - constexpr uint32_t c2 = 0x1b873593; - constexpr uint32_t c3 = 0xe6546b64; - constexpr uint32_t rot_c1 = 15; - constexpr uint32_t rot_c2 = 13; + cudf::size_type const tail_offset = nblocks * BLOCK_SIZE; + result_type h = m_seed; // Process all four-byte chunks. for (cudf::size_type i = 0; i < nblocks; i++) { @@ -410,78 +418,69 @@ struct SparkMurmurHash3_32 { k1 *= c1; k1 = cudf::detail::rotate_bits_left(k1, rot_c1); k1 *= c2; - h1 ^= k1; - h1 = cudf::detail::rotate_bits_left(h1, rot_c2); - h1 = h1 * 5 + c3; + h ^= k1; + h = cudf::detail::rotate_bits_left(h, rot_c2); + h = h * 5 + c3; } - // Process remaining bytes that do not fill a four-byte chunk using Spark's approach - // (does not conform to normal MurmurHash3). - for (cudf::size_type i = nblocks * 4; i < len; i++) { - // We require a two-step cast to get the k1 value from the byte. First, - // we must cast to a signed int8_t. Then, the sign bit is preserved when - // casting to uint32_t under 2's complement. Java preserves the - // signedness when casting byte-to-int, but C++ does not. - uint32_t k1 = static_cast(std::to_integer(data[i])); - k1 *= c1; - k1 = cudf::detail::rotate_bits_left(k1, rot_c1); - k1 *= c2; - h1 ^= k1; - h1 = cudf::detail::rotate_bits_left(h1, rot_c2); - h1 = h1 * 5 + c3; - } + h = compute_remaining_bytes(data, len, tail_offset, h); // Finalize hash. - h1 ^= len; - h1 = fmix32(h1); - return h1; + h ^= len; + h = fmix32(h); + return h; } private: uint32_t m_seed{cudf::DEFAULT_HASH_SEED}; + static constexpr uint32_t c1 = 0xcc9e2d51; + static constexpr uint32_t c2 = 0x1b873593; + static constexpr uint32_t c3 = 0xe6546b64; + static constexpr uint32_t rot_c1 = 15; + static constexpr uint32_t rot_c2 = 13; }; template <> hash_value_type __device__ inline SparkMurmurHash3_32::operator()(bool const& key) const { - return this->compute(key); + return compute(key); } template <> hash_value_type __device__ inline SparkMurmurHash3_32::operator()(int8_t const& key) const { - return this->compute(key); + return compute(key); } template <> hash_value_type __device__ inline SparkMurmurHash3_32::operator()(uint8_t const& key) const { - return this->compute(key); + return compute(key); } template <> hash_value_type __device__ inline SparkMurmurHash3_32::operator()(int16_t const& key) const { - return this->compute(key); + return compute(key); } template <> hash_value_type __device__ inline SparkMurmurHash3_32::operator()( uint16_t const& key) const { - return this->compute(key); + return compute(key); } template <> hash_value_type __device__ inline SparkMurmurHash3_32::operator()(float const& key) const { - return this->compute_floating_point(key); + return compute(detail::normalize_nans(key)); } template <> hash_value_type __device__ inline SparkMurmurHash3_32::operator()(double const& key) const { - return this->compute_floating_point(key); + return compute(detail::normalize_nans(key)); } template <> @@ -490,21 +489,21 @@ hash_value_type __device__ inline SparkMurmurHash3_32::operat { auto const data = reinterpret_cast(key.data()); auto const len = key.size_bytes(); - return this->compute_bytes(data, len); + return compute_bytes(data, len); } template <> hash_value_type __device__ inline SparkMurmurHash3_32::operator()( numeric::decimal32 const& key) const { - return this->compute(key.value()); + return compute(key.value()); } template <> hash_value_type __device__ inline SparkMurmurHash3_32::operator()( numeric::decimal64 const& key) const { - return this->compute(key.value()); + return compute(key.value()); } template <> @@ -546,7 +545,7 @@ hash_value_type __device__ inline SparkMurmurHash3_32::oper __int128_t big_endian_value = 0; auto big_endian_data = reinterpret_cast(&big_endian_value); thrust::reverse_copy(thrust::seq, data, data + length, big_endian_data); - return this->compute_bytes(big_endian_data, length); + return compute_bytes(big_endian_data, length); } template <> @@ -593,3 +592,6 @@ struct IdentityHash { template using default_hash = MurmurHash3_32; + +} // namespace detail +} // namespace cudf diff --git a/cpp/include/cudf/detail/valid_if.cuh b/cpp/include/cudf/detail/valid_if.cuh index aa4421bb4ed..f91f51b2161 100644 --- a/cpp/include/cudf/detail/valid_if.cuh +++ b/cpp/include/cudf/detail/valid_if.cuh @@ -110,7 +110,7 @@ std::pair valid_if( null_count = size - valid_count.value(stream); } - return std::make_pair(std::move(null_mask), null_count); + return std::pair(std::move(null_mask), null_count); } /** diff --git a/cpp/include/cudf/hashing.hpp b/cpp/include/cudf/hashing.hpp index e973c585410..bbff304e547 100644 --- a/cpp/include/cudf/hashing.hpp +++ b/cpp/include/cudf/hashing.hpp @@ -19,6 +19,9 @@ #include namespace cudf { + +using hash_value_type = uint32_t; + /** * @addtogroup column_hash * @{ diff --git a/cpp/include/cudf/io/types.hpp b/cpp/include/cudf/io/types.hpp index 7e4ab5b8d9d..23ed0153f3f 100644 --- a/cpp/include/cudf/io/types.hpp +++ b/cpp/include/cudf/io/types.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -244,6 +244,7 @@ class column_in_metadata { bool _use_int96_timestamp = false; // bool _output_as_binary = false; thrust::optional _decimal_precision; + thrust::optional _parquet_field_id; std::vector children; public: @@ -324,6 +325,18 @@ class column_in_metadata { return *this; } + /** + * @brief Set the parquet field id of this column. + * + * @param field_id The parquet field id to set + * @return this for chaining + */ + column_in_metadata& set_parquet_field_id(int32_t field_id) + { + _parquet_field_id = field_id; + return *this; + } + /** * @brief Get reference to a child of this column * @@ -379,6 +392,18 @@ class column_in_metadata { */ [[nodiscard]] uint8_t get_decimal_precision() const { return _decimal_precision.value(); } + /** + * @brief Get whether parquet field id has been set for this column. + */ + [[nodiscard]] bool is_parquet_field_id_set() const { return _parquet_field_id.has_value(); } + + /** + * @brief Get the parquet field id that was set for this column. + * @throws If parquet field id was not set for this column. + * Check using `is_parquet_field_id_set()` first. + */ + [[nodiscard]] int32_t get_parquet_field_id() const { return _parquet_field_id.value(); } + /** * @brief Get the number of children of this column */ diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp index d56f8f0e904..f48f8a83e9a 100644 --- a/cpp/include/cudf/join.hpp +++ b/cpp/include/cudf/join.hpp @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include #include @@ -29,6 +30,16 @@ #include namespace cudf { + +// forward declaration +namespace detail { +template +class MurmurHash3_32; + +template +class hash_join; +} // namespace detail + /** * @addtogroup column_join * @{ @@ -503,6 +514,9 @@ std::unique_ptr cross_join( */ class hash_join { public: + using impl_type = + typename cudf::detail::hash_join>; + hash_join() = delete; ~hash_join(); hash_join(hash_join const&) = delete; @@ -634,8 +648,7 @@ class hash_join { rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const; private: - struct hash_join_impl; - const std::unique_ptr impl; + const std::unique_ptr _impl; }; /** diff --git a/cpp/include/cudf/lists/detail/gather.cuh b/cpp/include/cudf/lists/detail/gather.cuh index c637ad041ba..7df36be2385 100644 --- a/cpp/include/cudf/lists/detail/gather.cuh +++ b/cpp/include/cudf/lists/detail/gather.cuh @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -82,6 +83,7 @@ gather_data make_gather_data(cudf::lists_column_view const& source_column, auto dst_offsets_c = cudf::make_fixed_width_column( data_type{type_id::INT32}, offset_count, mask_state::UNALLOCATED, stream, mr); mutable_column_view dst_offsets_v = dst_offsets_c->mutable_view(); + auto const source_column_nullmask = source_column.null_mask(); // generate the compacted outgoing offsets. auto count_iter = thrust::make_counting_iterator(0); @@ -90,12 +92,23 @@ gather_data make_gather_data(cudf::lists_column_view const& source_column, count_iter, count_iter + offset_count, dst_offsets_v.begin(), - [gather_map, output_count, src_offsets, src_size] __device__(int32_t index) -> int32_t { + [source_column_nullmask, + source_column_offset = source_column.offset(), + gather_map, + output_count, + src_offsets, + src_size] __device__(int32_t index) -> int32_t { int32_t offset_index = index < output_count ? gather_map[index] : 0; // if this is an invalid index, this will be a NULL list if (NullifyOutOfBounds && ((offset_index < 0) || (offset_index >= src_size))) { return 0; } + // If the source row is null, the output row size must be 0. + if (source_column_nullmask != nullptr && + not cudf::bit_is_set(source_column_nullmask, source_column_offset + offset_index)) { + return 0; + } + // the length of this list return src_offsets[offset_index + 1] - src_offsets[offset_index]; }, @@ -110,15 +123,27 @@ gather_data make_gather_data(cudf::lists_column_view const& source_column, // generate the base offsets rmm::device_uvector base_offsets = rmm::device_uvector(output_count, stream); - thrust::transform(rmm::exec_policy(stream), - gather_map, - gather_map + output_count, - base_offsets.data(), - [src_offsets, src_size, shift] __device__(int32_t index) { - // if this is an invalid index, this will be a NULL list - if (NullifyOutOfBounds && ((index < 0) || (index >= src_size))) { return 0; } - return src_offsets[index] - shift; - }); + thrust::transform( + rmm::exec_policy(stream), + gather_map, + gather_map + output_count, + base_offsets.data(), + [source_column_nullmask, + source_column_offset = source_column.offset(), + src_offsets, + src_size, + shift] __device__(int32_t index) { + // if this is an invalid index, this will be a NULL list + if (NullifyOutOfBounds && ((index < 0) || (index >= src_size))) { return 0; } + + // If the source row is null, the output row size must be 0. + if (source_column_nullmask != nullptr && + not cudf::bit_is_set(source_column_nullmask, source_column_offset + index)) { + return 0; + } + + return src_offsets[index] - shift; + }); // Retrieve size of the resulting gather map for level N+1 (the last offset) size_type child_gather_map_size = diff --git a/cpp/include/cudf/sorting.hpp b/cpp/include/cudf/sorting.hpp index ff334b9ee85..b7e915650dc 100644 --- a/cpp/include/cudf/sorting.hpp +++ b/cpp/include/cudf/sorting.hpp @@ -16,6 +16,7 @@ #pragma once +#include #include #include @@ -23,19 +24,6 @@ namespace cudf { -/** - * @brief Tie-breaker method to use for ranking the column. - * - * @ingroup column_sort - */ -enum class rank_method { - FIRST, ///< stable sort order ranking (no ties) - AVERAGE, ///< mean of first in the group - MIN, ///< min of first in the group - MAX, ///< max of first in the group - DENSE ///< rank always increases by 1 between groups -}; - /** * @addtogroup column_sort * @{ @@ -198,7 +186,7 @@ std::unique_ptr
stable_sort_by_key( * included, corresponding rank will be null. * @param null_precedence The desired order of null compared to other elements * for column - * @param percentage flag to convert ranks to percentage in range (0,1} + * @param percentage flag to convert ranks to percentage in range (0,1] * @param mr Device memory resource used to allocate the returned column's device memory * @return std::unique_ptr A column of containing the rank of the each * element of the column of `input`. The output column type will be `size_type` diff --git a/cpp/include/cudf/strings/detail/gather.cuh b/cpp/include/cudf/strings/detail/gather.cuh index 1b10c70d6d6..d46ab3a91a1 100644 --- a/cpp/include/cudf/strings/detail/gather.cuh +++ b/cpp/include/cudf/strings/detail/gather.cuh @@ -303,14 +303,17 @@ std::unique_ptr gather( data_type{type_id::INT32}, output_count + 1, mask_state::UNALLOCATED, stream, mr); auto const d_out_offsets = out_offsets_column->mutable_view().template data(); auto const d_in_offsets = (strings_count > 0) ? strings.offsets_begin() : nullptr; - thrust::transform(rmm::exec_policy(stream), - begin, - end, - d_out_offsets, - [d_in_offsets, strings_count] __device__(size_type in_idx) { - if (NullifyOutOfBounds && (in_idx < 0 || in_idx >= strings_count)) return 0; - return d_in_offsets[in_idx + 1] - d_in_offsets[in_idx]; - }); + auto const d_strings = column_device_view::create(strings.parent(), stream); + thrust::transform( + rmm::exec_policy(stream), + begin, + end, + d_out_offsets, + [d_strings = *d_strings, d_in_offsets, strings_count] __device__(size_type in_idx) { + if (NullifyOutOfBounds && (in_idx < 0 || in_idx >= strings_count)) return 0; + if (not d_strings.is_valid(in_idx)) return 0; + return d_in_offsets[in_idx + 1] - d_in_offsets[in_idx]; + }); // check total size is not too large size_t const total_bytes = thrust::transform_reduce( @@ -329,7 +332,6 @@ std::unique_ptr gather( // build chars column cudf::device_span const d_out_offsets_span(d_out_offsets, output_count + 1); - auto const d_strings = column_device_view::create(strings.parent(), stream); auto out_chars_column = gather_chars(d_strings->begin(), begin, end, diff --git a/cpp/include/cudf/strings/detail/scatter.cuh b/cpp/include/cudf/strings/detail/scatter.cuh index b6aa22cc316..cfede60c771 100644 --- a/cpp/include/cudf/strings/detail/scatter.cuh +++ b/cpp/include/cudf/strings/detail/scatter.cuh @@ -15,14 +15,13 @@ */ #pragma once -#include -#include -#include -#include +#include #include #include +#include #include +#include #include #include @@ -68,20 +67,17 @@ std::unique_ptr scatter( // create vector of string_view's to scatter into rmm::device_uvector target_vector = create_string_vector_from_column(target, stream); - // do the scatter - thrust::scatter(rmm::exec_policy(stream), begin, end, scatter_map, target_vector.begin()); + // this ensures empty strings are not mapped to nulls in the make_strings_column function + auto const size = thrust::distance(begin, end); + auto itr = thrust::make_transform_iterator( + begin, [] __device__(string_view const sv) { return sv.empty() ? string_view{} : sv; }); - // build offsets column - auto offsets_column = child_offsets_from_string_vector(target_vector, stream, mr); - // build chars column - auto chars_column = - child_chars_from_string_vector(target_vector, offsets_column->view(), stream, mr); + // do the scatter + thrust::scatter(rmm::exec_policy(stream), itr, itr + size, scatter_map, target_vector.begin()); - return make_strings_column(target.size(), - std::move(offsets_column), - std::move(chars_column), - UNKNOWN_NULL_COUNT, - cudf::detail::copy_bitmask(target.parent(), stream, mr)); + // build the output column + auto sv_span = cudf::device_span(target_vector); + return make_strings_column(sv_span, string_view{nullptr, 0}, stream, mr); } } // namespace detail diff --git a/cpp/include/cudf/strings/detail/utilities.cuh b/cpp/include/cudf/strings/detail/utilities.cuh index b9ea2d9ecff..e6dba5147b5 100644 --- a/cpp/include/cudf/strings/detail/utilities.cuh +++ b/cpp/include/cudf/strings/detail/utilities.cuh @@ -71,28 +71,6 @@ std::unique_ptr make_offsets_child_column( return offsets_column; } -/** - * @brief Creates an offsets column from a string_view iterator, and size. - * - * @tparam Iter Iterator type that returns string_view instances - * @param strings_begin Iterator to the beginning of the string_view sequence - * @param num_strings The number of string_view instances in the sequence - * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return Child offsets column - */ -template -std::unique_ptr child_offsets_from_string_iterator( - Iter strings_begin, - cudf::size_type num_strings, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) -{ - auto transformer = [] __device__(string_view v) { return v.size_bytes(); }; - auto begin = thrust::make_transform_iterator(strings_begin, transformer); - return make_offsets_child_column(begin, begin + num_strings, stream, mr); -} - /** * @brief Copies input string data into a buffer and increments the pointer by the number of bytes * copied. @@ -178,7 +156,7 @@ auto make_strings_children( for_each_fn(size_and_exec_fn); } - return std::make_pair(std::move(offsets_column), std::move(chars_column)); + return std::pair(std::move(offsets_column), std::move(chars_column)); } /** diff --git a/cpp/include/cudf/strings/detail/utilities.hpp b/cpp/include/cudf/strings/detail/utilities.hpp index 6424841ba86..c4f9e547148 100644 --- a/cpp/include/cudf/strings/detail/utilities.hpp +++ b/cpp/include/cudf/strings/detail/utilities.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,36 +45,11 @@ std::unique_ptr create_chars_child_column( * * @param strings Strings column instance. * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned vector's device memory. * @return Device vector of string_views */ rmm::device_uvector create_string_vector_from_column( - cudf::strings_column_view const strings, rmm::cuda_stream_view stream = rmm::cuda_stream_default); - -/** - * @brief Creates an offsets column from a string_view vector. - * - * @param strings Strings input data - * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return Child offsets column - */ -std::unique_ptr child_offsets_from_string_vector( - cudf::device_span strings, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Creates a chars column from a string_view vector. - * - * @param strings Strings input data - * @param d_offsets Offsets vector for placing strings into column's memory. - * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return Child chars column - */ -std::unique_ptr child_chars_from_string_vector( - cudf::device_span strings, - column_view const& offsets, + cudf::strings_column_view const strings, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/strings/string.cuh b/cpp/include/cudf/strings/string.cuh index 0cfcaeb913e..d20080cc0e5 100644 --- a/cpp/include/cudf/strings/string.cuh +++ b/cpp/include/cudf/strings/string.cuh @@ -23,7 +23,6 @@ namespace cudf { namespace strings { -namespace string { /** * @addtogroup strings_classes * @{ @@ -150,6 +149,5 @@ inline __device__ bool is_float(string_view const& d_str) } /** @} */ // end of group -} // namespace string } // namespace strings } // namespace cudf diff --git a/cpp/include/cudf/structs/structs_column_view.hpp b/cpp/include/cudf/structs/structs_column_view.hpp index 329c24cfe0a..ca866d8555e 100644 --- a/cpp/include/cudf/structs/structs_column_view.hpp +++ b/cpp/include/cudf/structs/structs_column_view.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,6 +41,11 @@ class structs_column_view : public column_view { explicit structs_column_view(column_view const& rhs); + /** + * @brief Returns the parent column. + */ + [[nodiscard]] column_view parent() const; + using column_view::child_begin; using column_view::child_end; using column_view::has_nulls; diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 88e31744fdf..32b71e660ac 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -165,14 +165,13 @@ class device_row_comparator { bool const rhs_is_null{_rhs.is_null(rhs_element_index)}; if (lhs_is_null or rhs_is_null) { // at least one is null - return cuda::std::make_pair(null_compare(lhs_is_null, rhs_is_null, _null_precedence), - _depth); + return cuda::std::pair(null_compare(lhs_is_null, rhs_is_null, _null_precedence), _depth); } } - return cuda::std::make_pair(relational_compare(_lhs.element(lhs_element_index), - _rhs.element(rhs_element_index)), - std::numeric_limits::max()); + return cuda::std::pair(relational_compare(_lhs.element(lhs_element_index), + _rhs.element(rhs_element_index)), + std::numeric_limits::max()); } template strings_column_wrapper(StringsIterator begin, StringsIterator end) : column_wrapper{} { - std::vector chars; - std::vector offsets; - auto all_valid = thrust::make_constant_iterator(true); - std::tie(chars, offsets) = detail::make_chars_and_offsets(begin, end, all_valid); - auto d_chars = cudf::detail::make_device_uvector_sync(chars); - auto d_offsets = cudf::detail::make_device_uvector_sync(offsets); - wrapped = cudf::make_strings_column(d_chars, d_offsets); + auto all_valid = thrust::make_constant_iterator(true); + auto [chars, offsets] = detail::make_chars_and_offsets(begin, end, all_valid); + auto d_chars = cudf::detail::make_device_uvector_sync(chars); + auto d_offsets = cudf::detail::make_device_uvector_sync(offsets); + wrapped = cudf::make_strings_column(d_chars, d_offsets); } /** @@ -744,14 +742,12 @@ class strings_column_wrapper : public detail::column_wrapper { : column_wrapper{} { size_type num_strings = std::distance(begin, end); - std::vector chars; - std::vector offsets; - std::tie(chars, offsets) = detail::make_chars_and_offsets(begin, end, v); - auto null_mask = detail::make_null_mask_vector(v, v + num_strings); - auto d_chars = cudf::detail::make_device_uvector_sync(chars); - auto d_offsets = cudf::detail::make_device_uvector_sync(offsets); - auto d_bitmask = cudf::detail::make_device_uvector_sync(null_mask); - wrapped = cudf::make_strings_column(d_chars, d_offsets, d_bitmask); + auto [chars, offsets] = detail::make_chars_and_offsets(begin, end, v); + auto null_mask = detail::make_null_mask_vector(v, v + num_strings); + auto d_chars = cudf::detail::make_device_uvector_sync(chars); + auto d_offsets = cudf::detail::make_device_uvector_sync(offsets); + auto d_bitmask = cudf::detail::make_device_uvector_sync(null_mask); + wrapped = cudf::make_strings_column(d_chars, d_offsets, d_bitmask); } /** @@ -1468,20 +1464,18 @@ class lists_column_wrapper : public detail::column_wrapper { 0, [&v](auto i) { return v.empty() ? true : v[i]; }); // compute the expected hierarchy and depth - auto const hierarchy_and_depth = std::accumulate( - elements.begin(), - elements.end(), - std::pair{{}, -1}, - [](auto acc, lists_column_wrapper const& lcw) { - return lcw.depth > acc.second ? std::make_pair(lcw.get_view(), lcw.depth) : acc; - }); + auto const hierarchy_and_depth = + std::accumulate(elements.begin(), + elements.end(), + std::pair{{}, -1}, + [](auto acc, lists_column_wrapper const& lcw) { + return lcw.depth > acc.second ? std::pair(lcw.get_view(), lcw.depth) : acc; + }); column_view expected_hierarchy = hierarchy_and_depth.first; int32_t const expected_depth = hierarchy_and_depth.second; // preprocess columns so that every column_view in 'cols' is an equivalent hierarchy - std::vector> stubs; - std::vector cols; - std::tie(cols, stubs) = preprocess_columns(elements, expected_hierarchy, expected_depth); + auto [cols, stubs] = preprocess_columns(elements, expected_hierarchy, expected_depth); // generate offsets size_type count = 0; diff --git a/cpp/include/cudf_test/cudf_gtest.hpp b/cpp/include/cudf_test/cudf_gtest.hpp index d078bf90a8a..7bd704a288d 100644 --- a/cpp/include/cudf_test/cudf_gtest.hpp +++ b/cpp/include/cudf_test/cudf_gtest.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -120,6 +120,9 @@ struct TypeList> { #define CUDA_EXPECT_THROW_MESSAGE(x, msg) \ EXPECT_THROW_MESSAGE(x, cudf::cuda_error, "CUDA error encountered at:", msg) +#define FATAL_CUDA_EXPECT_THROW_MESSAGE(x, msg) \ + EXPECT_THROW_MESSAGE(x, cudf::fatal_cuda_error, "Fatal CUDA error encountered at:", msg) + /** * @brief test macro to be expected as no exception. * The testing is same with EXPECT_NO_THROW() in gtest. diff --git a/cpp/include/cudf_test/file_utilities.hpp b/cpp/include/cudf_test/file_utilities.hpp index 4df7b6a69c8..d722b836674 100644 --- a/cpp/include/cudf_test/file_utilities.hpp +++ b/cpp/include/cudf_test/file_utilities.hpp @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -34,17 +35,14 @@ class temp_directory { public: temp_directory(const std::string& base_name) { - std::string dir_template("/tmp"); - if (const char* env_p = std::getenv("WORKSPACE")) dir_template = env_p; + std::string dir_template{std::filesystem::temp_directory_path().string()}; + if (auto env_p = std::getenv("WORKSPACE")) dir_template = env_p; + dir_template += "/" + base_name + ".XXXXXX"; auto const tmpdirptr = mkdtemp(const_cast(dir_template.data())); - if (tmpdirptr == nullptr) CUDF_FAIL("Temporary directory creation failure: " + dir_template); - _path = dir_template + "/"; - } + CUDF_EXPECTS(tmpdirptr != nullptr, "Temporary directory creation failure: " + dir_template); - static int rm_files(const char* pathname, const struct stat* sbuf, int type, struct FTW* ftwb) - { - return std::remove(pathname); + _path = dir_template + "/"; } temp_directory& operator=(temp_directory const&) = delete; @@ -52,11 +50,7 @@ class temp_directory { temp_directory& operator=(temp_directory&&) = default; temp_directory(temp_directory&&) = default; - ~temp_directory() - { - // TODO: should use std::filesystem instead, once C++17 support added - nftw(_path.c_str(), rm_files, 10, FTW_DEPTH | FTW_MOUNT | FTW_PHYS); - } + ~temp_directory() { std::filesystem::remove_all(std::filesystem::path{_path}); } /** * @brief Returns the path of the temporary directory diff --git a/cpp/libcudf_kafka/CMakeLists.txt b/cpp/libcudf_kafka/CMakeLists.txt index c94c1a3b9b7..bfd5a42493b 100644 --- a/cpp/libcudf_kafka/CMakeLists.txt +++ b/cpp/libcudf_kafka/CMakeLists.txt @@ -79,8 +79,10 @@ endif() set_target_properties( cudf_kafka - PROPERTIES BUILD_RPATH "\$ORIGIN" INSTALL_RPATH "\$ORIGIN" # set target compile options - CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON + PROPERTIES BUILD_RPATH "\$ORIGIN" + INSTALL_RPATH "\$ORIGIN" # set target compile options + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON ) # ################################################################################################## diff --git a/cpp/libcudf_kafka/src/kafka_callback.cpp b/cpp/libcudf_kafka/src/kafka_callback.cpp index 6b98747c145..79a40640627 100644 --- a/cpp/libcudf_kafka/src/kafka_callback.cpp +++ b/cpp/libcudf_kafka/src/kafka_callback.cpp @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "cudf_kafka/kafka_callback.hpp" +#include #include diff --git a/cpp/libcudf_kafka/src/kafka_consumer.cpp b/cpp/libcudf_kafka/src/kafka_consumer.cpp index 49e89a56e60..2ddaa9892da 100644 --- a/cpp/libcudf_kafka/src/kafka_consumer.cpp +++ b/cpp/libcudf_kafka/src/kafka_consumer.cpp @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "cudf_kafka/kafka_consumer.hpp" +#include #include diff --git a/cpp/src/aggregation/aggregation.cpp b/cpp/src/aggregation/aggregation.cpp index 8fedf641c8f..27732b25401 100644 --- a/cpp/src/aggregation/aggregation.cpp +++ b/cpp/src/aggregation/aggregation.cpp @@ -154,18 +154,6 @@ std::vector> simple_aggregations_collector::visit( return visit(col_type, static_cast(agg)); } -std::vector> simple_aggregations_collector::visit( - data_type col_type, dense_rank_aggregation const& agg) -{ - return visit(col_type, static_cast(agg)); -} - -std::vector> simple_aggregations_collector::visit( - data_type col_type, percent_rank_aggregation const& agg) -{ - return visit(col_type, static_cast(agg)); -} - std::vector> simple_aggregations_collector::visit( data_type col_type, collect_list_aggregation const& agg) { @@ -334,16 +322,6 @@ void aggregation_finalizer::visit(rank_aggregation const& agg) visit(static_cast(agg)); } -void aggregation_finalizer::visit(dense_rank_aggregation const& agg) -{ - visit(static_cast(agg)); -} - -void aggregation_finalizer::visit(percent_rank_aggregation const& agg) -{ - visit(static_cast(agg)); -} - void aggregation_finalizer::visit(collect_list_aggregation const& agg) { visit(static_cast(agg)); @@ -644,36 +622,33 @@ template std::unique_ptr make_row_number_aggregation -std::unique_ptr make_rank_aggregation() -{ - return std::make_unique(); -} -template std::unique_ptr make_rank_aggregation(); -template std::unique_ptr -make_rank_aggregation(); -template std::unique_ptr make_rank_aggregation(); - -/// Factory to create a DENSE_RANK aggregation -template -std::unique_ptr make_dense_rank_aggregation() -{ - return std::make_unique(); -} -template std::unique_ptr make_dense_rank_aggregation(); -template std::unique_ptr -make_dense_rank_aggregation(); -template std::unique_ptr make_dense_rank_aggregation(); - -/// Factory to create a PERCENT_RANK aggregation -template -std::unique_ptr make_percent_rank_aggregation() -{ - return std::make_unique(); -} -template std::unique_ptr make_percent_rank_aggregation(); -template std::unique_ptr -make_percent_rank_aggregation(); -template std::unique_ptr make_percent_rank_aggregation(); +std::unique_ptr make_rank_aggregation(rank_method method, + order column_order, + null_policy null_handling, + null_order null_precedence, + rank_percentage percentage) +{ + return std::make_unique( + method, column_order, null_handling, null_precedence, percentage); +} +template std::unique_ptr make_rank_aggregation( + rank_method method, + order column_order, + null_policy null_handling, + null_order null_precedence, + rank_percentage percentage); +template std::unique_ptr make_rank_aggregation( + rank_method method, + order column_order, + null_policy null_handling, + null_order null_precedence, + rank_percentage percentage); +template std::unique_ptr make_rank_aggregation( + rank_method method, + order column_order, + null_policy null_handling, + null_order null_precedence, + rank_percentage percentage); /// Factory to create a COLLECT_LIST aggregation template diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index 756cf3421c9..ec14f8e6ded 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -445,7 +445,7 @@ std::pair bitmask_and(table_view const& view, CUDF_FUNC_RANGE(); rmm::device_buffer null_mask{0, stream, mr}; if (view.num_rows() == 0 or view.num_columns() == 0) { - return std::make_pair(std::move(null_mask), 0); + return std::pair(std::move(null_mask), 0); } std::vector masks; @@ -467,7 +467,7 @@ std::pair bitmask_and(table_view const& view, mr); } - return std::make_pair(std::move(null_mask), 0); + return std::pair(std::move(null_mask), 0); } // Returns the bitwise OR of the null masks of all columns in the table view @@ -478,7 +478,7 @@ std::pair bitmask_or(table_view const& view, CUDF_FUNC_RANGE(); rmm::device_buffer null_mask{0, stream, mr}; if (view.num_rows() == 0 or view.num_columns() == 0) { - return std::make_pair(std::move(null_mask), 0); + return std::pair(std::move(null_mask), 0); } std::vector masks; @@ -500,7 +500,7 @@ std::pair bitmask_or(table_view const& view, mr); } - return std::make_pair(std::move(null_mask), 0); + return std::pair(std::move(null_mask), 0); } } // namespace detail diff --git a/cpp/src/copying/contiguous_split.cu b/cpp/src/copying/contiguous_split.cu index 514374d450d..35e7eba974f 100644 --- a/cpp/src/copying/contiguous_split.cu +++ b/cpp/src/copying/contiguous_split.cu @@ -688,9 +688,9 @@ BufInfo build_output_columns(InputIter begin, ? 0 : (current_info->num_rows - current_info->valid_count); ++current_info; - return std::make_pair(ptr, null_count); + return std::pair(ptr, null_count); } - return std::make_pair(static_cast(nullptr), 0); + return std::pair(static_cast(nullptr), 0); }(); // size/data pointer for the column diff --git a/cpp/src/copying/purge_nonempty_nulls.cu b/cpp/src/copying/purge_nonempty_nulls.cu new file mode 100644 index 00000000000..778d6c4df55 --- /dev/null +++ b/cpp/src/copying/purge_nonempty_nulls.cu @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include + +#include + +namespace cudf { +namespace detail { + +using cudf::type_id; + +namespace { + +/// Check if nonempty-null checks can be skipped for a given type. +bool type_may_have_nonempty_nulls(cudf::type_id const& type) +{ + return type == type_id::STRING || type == type_id::LIST || type == type_id::STRUCT; +} + +/// Check if the (STRING/LIST) column has any null rows with non-zero length. +bool has_nonempty_null_rows(cudf::column_view const& input, rmm::cuda_stream_view stream) +{ + if (not input.has_nulls()) { return false; } // No nulls => no dirty rows. + + // Cross-reference nullmask and offsets. + auto const type = input.type().id(); + auto const offsets = (type == type_id::STRING) ? (strings_column_view{input}).offsets() + : (lists_column_view{input}).offsets(); + auto const d_input = cudf::column_device_view::create(input); + auto const is_dirty_row = [d_input = *d_input, offsets = offsets.begin()] __device__( + size_type const& row_idx) { + return d_input.is_null_nocheck(row_idx) && (offsets[row_idx] != offsets[row_idx + 1]); + }; + + auto const row_begin = thrust::counting_iterator(0); + auto const row_end = row_begin + input.size(); + return thrust::count_if(rmm::exec_policy(stream), row_begin, row_end, is_dirty_row) > 0; +} + +} // namespace + +/** + * @copydoc cudf::detail::has_nonempty_nulls + */ +bool has_nonempty_nulls(cudf::column_view const& input, rmm::cuda_stream_view stream) +{ + auto const type = input.type().id(); + + if (not type_may_have_nonempty_nulls(type)) { return false; } + + // For types with variable-length rows, check if any rows are "dirty". + // A dirty row is a null row with non-zero length. + if ((type == type_id::STRING || type == type_id::LIST) && has_nonempty_null_rows(input, stream)) { + return true; + } + + // For complex types, check if child columns need purging. + if ((type == type_id::STRUCT || type == type_id::LIST) && + std::any_of(input.child_begin(), input.child_end(), [stream](auto const& child) { + return cudf::detail::has_nonempty_nulls(child, stream); + })) { + return true; + } + + return false; +} +} // namespace detail + +/** + * @copydoc cudf::may_have_nonempty_nulls + */ +bool may_have_nonempty_nulls(column_view const& input) +{ + auto const type = input.type().id(); + + if (not detail::type_may_have_nonempty_nulls(type)) { return false; } + + if ((type == type_id::STRING || type == type_id::LIST) && input.has_nulls()) { return true; } + + if ((type == type_id::STRUCT || type == type_id::LIST) && + std::any_of(input.child_begin(), input.child_end(), may_have_nonempty_nulls)) { + return true; + } + + return false; +} + +/** + * @copydoc cudf::has_nonempty_nulls + */ +bool has_nonempty_nulls(column_view const& input) { return detail::has_nonempty_nulls(input); } + +/** + * @copydoc cudf::purge_nonempty_nulls(lists_column_view const&, rmm::mr::device_memory_resource*) + */ +std::unique_ptr purge_nonempty_nulls(lists_column_view const& input, + rmm::mr::device_memory_resource* mr) +{ + return detail::purge_nonempty_nulls(input, rmm::cuda_stream_default, mr); +} + +/** + * @copydoc cudf::purge_nonempty_nulls(structs_column_view const&, rmm::mr::device_memory_resource*) + */ +std::unique_ptr purge_nonempty_nulls(structs_column_view const& input, + rmm::mr::device_memory_resource* mr) +{ + return detail::purge_nonempty_nulls(input, rmm::cuda_stream_default, mr); +} + +/** + * @copydoc cudf::purge_nonempty_nulls(strings_column_view const&, rmm::mr::device_memory_resource*) + */ +std::unique_ptr purge_nonempty_nulls(strings_column_view const& input, + rmm::mr::device_memory_resource* mr) +{ + return detail::purge_nonempty_nulls(input, rmm::cuda_stream_default, mr); +} + +} // namespace cudf diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index 57bb222aaa0..a002b0bb744 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -83,8 +83,7 @@ std::pair, std::vector> groupby::disp "Unsupported groupby key type does not support equality comparison"); auto [grouped_keys, results] = detail::hash::groupby(flattened_keys, requests, _include_null_keys, stream, mr); - return std::make_pair(unflatten_nested_columns(std::move(grouped_keys), _keys), - std::move(results)); + return std::pair(unflatten_nested_columns(std::move(grouped_keys), _keys), std::move(results)); } else { return sort_aggregate(requests, stream, mr); } @@ -102,9 +101,12 @@ namespace { * Adds special handling for COLLECT_LIST/COLLECT_SET, because: * 1. `make_empty_column()` does not support construction of nested columns. * 2. Empty lists need empty child columns, to persist type information. + * Adds special handling for RANK, because it needs to return double type column when rank_method is + * AVERAGE or percentage is true. */ struct empty_column_constructor { column_view values; + aggregation const& agg; template std::unique_ptr operator()() const @@ -117,6 +119,14 @@ struct empty_column_constructor { 0, make_empty_column(type_to_id()), empty_like(values), 0, {}); } + if constexpr (k == aggregation::Kind::RANK) { + auto const& rank_agg = dynamic_cast(agg); + if (rank_agg._method == cudf::rank_method::AVERAGE or + rank_agg._percentage != rank_percentage::NONE) + return make_empty_column(type_to_id()); + return make_empty_column(target_type(values.type(), k)); + } + // If `values` is LIST typed, and the aggregation results match the type, // construct empty results based on `values`. // Most generally, this applies if input type matches output type. @@ -149,7 +159,7 @@ auto empty_results(host_span requests) std::back_inserter(results), [&request](auto const& agg) { return cudf::detail::dispatch_type_and_aggregation( - request.values.type(), agg->kind, empty_column_constructor{request.values}); + request.values.type(), agg->kind, empty_column_constructor{request.values, *agg}); }); return aggregation_result{std::move(results)}; @@ -193,7 +203,7 @@ std::pair, std::vector> groupby::aggr verify_valid_requests(requests); - if (_keys.num_rows() == 0) { return std::make_pair(empty_like(_keys), empty_results(requests)); } + if (_keys.num_rows() == 0) { return std::pair(empty_like(_keys), empty_results(requests)); } return dispatch_aggregation(requests, rmm::cuda_stream_default, mr); } @@ -211,7 +221,7 @@ std::pair, std::vector> groupby::scan verify_valid_requests(requests); - if (_keys.num_rows() == 0) { return std::make_pair(empty_like(_keys), empty_results(requests)); } + if (_keys.num_rows() == 0) { return std::pair(empty_like(_keys), empty_results(requests)); } return sort_scan(requests, rmm::cuda_stream_default, mr); } @@ -250,7 +260,7 @@ std::pair, std::unique_ptr
> groupby::replace_nulls CUDF_EXPECTS(static_cast(replace_policies.size()) == values.num_columns(), "Size mismatch between num_columns and replace_policies."); - if (values.is_empty()) { return std::make_pair(empty_like(_keys), empty_like(values)); } + if (values.is_empty()) { return std::pair(empty_like(_keys), empty_like(values)); } auto const stream = rmm::cuda_stream_default; auto const& group_labels = helper().group_labels(stream); @@ -269,8 +279,8 @@ std::pair, std::unique_ptr
> groupby::replace_nulls : std::move(grouped_values); }); - return std::make_pair(std::move(helper().sorted_keys(stream, mr)), - std::make_unique
(std::move(results))); + return std::pair(std::move(helper().sorted_keys(stream, mr)), + std::make_unique
(std::move(results))); } // Get the sort helper object @@ -310,8 +320,8 @@ std::pair, std::unique_ptr
> groupby::shift( grouped_values->view(), group_offsets, offsets[i], fill_values[i].get(), stream, mr); }); - return std::make_pair(helper().sorted_keys(stream, mr), - std::make_unique(std::move(results))); + return std::pair(helper().sorted_keys(stream, mr), + std::make_unique(std::move(results))); } } // namespace groupby diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index 44df981f5bf..e22b3a4f3a4 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -429,17 +429,19 @@ auto create_hash_map(table_device_view const& d_keys, size_type constexpr unused_key{std::numeric_limits::max()}; size_type constexpr unused_value{std::numeric_limits::max()}; - using map_type = concurrent_unordered_map, - row_equality_comparator>; + using map_type = + concurrent_unordered_map, + row_equality_comparator>; using allocator_type = typename map_type::allocator_type; auto const null_keys_are_equal = include_null_keys == null_policy::INCLUDE ? null_equality::EQUAL : null_equality::UNEQUAL; - row_hasher hasher{nullate::DYNAMIC{keys_have_nulls}, d_keys}; + row_hasher hasher{nullate::DYNAMIC{keys_have_nulls}, + d_keys}; row_equality_comparator rows_equal{ nullate::DYNAMIC{keys_have_nulls}, d_keys, d_keys, null_keys_are_equal}; @@ -670,7 +672,7 @@ std::pair, std::vector> groupby( std::unique_ptr
unique_keys = groupby(keys, requests, &cache, has_nulls(keys), include_null_keys, stream, mr); - return std::make_pair(std::move(unique_keys), extract_results(requests, cache, stream, mr)); + return std::pair(std::move(unique_keys), extract_results(requests, cache, stream, mr)); } } // namespace hash } // namespace detail diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index 4904aa42723..02036ff0bbf 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -778,7 +778,7 @@ std::pair, std::vector> groupby::sort auto results = detail::extract_results(requests, cache, stream, mr); - return std::make_pair(helper().unique_keys(stream, mr), std::move(results)); + return std::pair(helper().unique_keys(stream, mr), std::move(results)); } } // namespace groupby } // namespace cudf diff --git a/cpp/src/groupby/sort/functors.hpp b/cpp/src/groupby/sort/functors.hpp index fa3d19bdcfd..748e34a583d 100644 --- a/cpp/src/groupby/sort/functors.hpp +++ b/cpp/src/groupby/sort/functors.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -87,7 +87,6 @@ struct store_result_functor { */ column_view get_sorted_values() { - if (is_presorted()) { return values; } return sorted_values ? sorted_values->view() : (sorted_values = helper.sorted_values(values, stream))->view(); }; diff --git a/cpp/src/groupby/sort/group_collect.cu b/cpp/src/groupby/sort/group_collect.cu index 8b8a03f35a5..000a595ea2f 100644 --- a/cpp/src/groupby/sort/group_collect.cu +++ b/cpp/src/groupby/sort/group_collect.cu @@ -82,8 +82,7 @@ std::pair, std::unique_ptr> purge_null_entries( auto null_purged_offsets = strings::detail::make_offsets_child_column( null_purged_sizes.cbegin(), null_purged_sizes.cend(), stream, mr); - return std::make_pair, std::unique_ptr>( - std::move(null_purged_values), std::move(null_purged_offsets)); + return std::pair(std::move(null_purged_values), std::move(null_purged_offsets)); } std::unique_ptr group_collect(column_view const& values, @@ -109,8 +108,8 @@ std::unique_ptr group_collect(column_view const& values, return cudf::groupby::detail::purge_null_entries( values, offsets_column->view(), num_groups, stream, mr); } else { - return std::make_pair(std::make_unique(values, stream, mr), - std::move(offsets_column)); + return std::pair(std::make_unique(values, stream, mr), + std::move(offsets_column)); } }(); diff --git a/cpp/src/groupby/sort/group_rank_scan.cu b/cpp/src/groupby/sort/group_rank_scan.cu index 77d68edaa3a..0b25ab9a33d 100644 --- a/cpp/src/groupby/sort/group_rank_scan.cu +++ b/cpp/src/groupby/sort/group_rank_scan.cu @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -27,6 +28,7 @@ #include #include +#include #include #include #include @@ -35,23 +37,59 @@ namespace cudf { namespace groupby { namespace detail { namespace { + +/** + * @brief Functor to compare two rows of a table in given permutation order + * This is useful to identify unique elements in a sorted order table, when the permutation order is + * the sorted order of the table. + * + */ +template +struct permuted_comparator { + /** + * @brief comparator object which compares two rows of the table in given permutation order + * + * @param device_table Device table to compare + * @param permutation The permutation order, integer type column. + * @param has_nulls whether the table has nulls + */ + permuted_comparator(table_device_view device_table, Iterator const permutation, bool has_nulls) + : comparator(nullate::DYNAMIC{has_nulls}, device_table, device_table, null_equality::EQUAL), + permutation(permutation) + { + } + __device__ bool operator()(size_type index1, size_type index2) const + { + return comparator(permutation[index1], permutation[index2]); + }; + + private: + row_equality_comparator comparator; + Iterator const permutation; +}; + /** * @brief generate grouped row ranks or dense ranks using a row comparison then scan the results * + * @tparam forward true if the rank scan computation should use forward iterator traversal (default) + * else reverse iterator traversal * @tparam value_resolver flag value resolver function with boolean first and row number arguments * @tparam scan_operator scan function ran on the flag values - * @param order_by input column to generate ranks for + * @param grouped_values input column to generate ranks for + * @param value_order column of type INT32 that contains the order of the values in the + * grouped_values column * @param group_labels ID of group that the corresponding value belongs to * @param group_offsets group index offsets with group ID indices * @param resolver flag value resolver * @param scan_op scan operation ran on the flag results - * @param has_nulls true if nulls are included in the `order_by` column + * @param has_nulls true if nulls are included in the `grouped_values` column * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return std::unique_ptr rank values */ -template -std::unique_ptr rank_generator(column_view const& order_by, +template +std::unique_ptr rank_generator(column_view const& grouped_values, + column_view const& value_order, device_span group_labels, device_span group_offsets, value_resolver resolver, @@ -61,10 +99,11 @@ std::unique_ptr rank_generator(column_view const& order_by, rmm::mr::device_memory_resource* mr) { auto const flattened = cudf::structs::detail::flatten_nested_columns( - table_view{{order_by}}, {}, {}, structs::detail::column_nullability::MATCH_INCOMING); + table_view{{grouped_values}}, {}, {}, structs::detail::column_nullability::MATCH_INCOMING); auto const d_flat_order = table_device_view::create(flattened, stream); - row_equality_comparator comparator( - nullate::DYNAMIC{has_nulls}, *d_flat_order, *d_flat_order, null_equality::EQUAL); + auto sorted_index_order = value_order.begin(); + auto comparator = permuted_comparator(*d_flat_order, sorted_index_order, has_nulls); + auto ranks = make_fixed_width_column(data_type{type_to_id()}, flattened.flattened_columns().num_rows(), mask_state::UNALLOCATED, @@ -72,100 +111,218 @@ std::unique_ptr rank_generator(column_view const& order_by, mr); auto mutable_ranks = ranks->mutable_view(); - thrust::tabulate( - rmm::exec_policy(stream), - mutable_ranks.begin(), - mutable_ranks.end(), - [comparator, resolver, labels = group_labels.data(), offsets = group_offsets.data()] __device__( - size_type row_index) { - auto group_start = offsets[labels[row_index]]; + auto unique_identifier = [labels = group_labels.begin(), + offsets = group_offsets.begin(), + comparator, + resolver] __device__(size_type row_index) { + auto const group_start = offsets[labels[row_index]]; + if constexpr (forward) { + // First value of equal values is 1. return resolver(row_index == group_start || !comparator(row_index, row_index - 1), row_index - group_start); - }); + } else { + auto const group_end = offsets[labels[row_index] + 1]; + // Last value of equal values is 1. + return resolver(row_index + 1 == group_end || !comparator(row_index, row_index + 1), + row_index - group_start); + } + }; + thrust::tabulate(rmm::exec_policy(stream), + mutable_ranks.begin(), + mutable_ranks.end(), + unique_identifier); + auto [group_labels_begin, mutable_rank_begin] = [&]() { + if constexpr (forward) { + return thrust::pair{group_labels.begin(), mutable_ranks.begin()}; + } else { + return thrust::pair{thrust::reverse_iterator(group_labels.end()), + thrust::reverse_iterator(mutable_ranks.end())}; + } + }(); thrust::inclusive_scan_by_key(rmm::exec_policy(stream), - group_labels.begin(), - group_labels.end(), - mutable_ranks.begin(), - mutable_ranks.begin(), + group_labels_begin, + group_labels_begin + group_labels.size(), + mutable_rank_begin, + mutable_rank_begin, thrust::equal_to{}, scan_op); - return ranks; } } // namespace -std::unique_ptr rank_scan(column_view const& order_by, - device_span group_labels, - device_span group_offsets, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::unique_ptr min_rank_scan(column_view const& grouped_values, + column_view const& value_order, + device_span group_labels, + device_span group_offsets, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { - return rank_generator( - order_by, + return rank_generator( + grouped_values, + value_order, group_labels, group_offsets, [] __device__(bool unequal, auto row_index_in_group) { return unequal ? row_index_in_group + 1 : 0; }, DeviceMax{}, - has_nested_nulls(table_view{{order_by}}), + has_nested_nulls(table_view{{grouped_values}}), stream, mr); } -std::unique_ptr dense_rank_scan(column_view const& order_by, - device_span group_labels, - device_span group_offsets, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::unique_ptr max_rank_scan(column_view const& grouped_values, + column_view const& value_order, + device_span group_labels, + device_span group_offsets, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { - return rank_generator( - order_by, + return rank_generator( + grouped_values, + value_order, group_labels, group_offsets, - [] __device__(bool const unequal, size_type const) { return unequal ? 1 : 0; }, - DeviceSum{}, - has_nested_nulls(table_view{{order_by}}), + [] __device__(bool unequal, auto row_index_in_group) { + return unequal ? row_index_in_group + 1 : std::numeric_limits::max(); + }, + DeviceMin{}, + has_nested_nulls(table_view{{grouped_values}}), stream, mr); } -std::unique_ptr percent_rank_scan(column_view const& order_by, +std::unique_ptr first_rank_scan(column_view const& grouped_values, + column_view const&, + device_span group_labels, + device_span group_offsets, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto ranks = make_fixed_width_column( + data_type{type_to_id()}, group_labels.size(), mask_state::UNALLOCATED, stream, mr); + auto mutable_ranks = ranks->mutable_view(); + thrust::tabulate(rmm::exec_policy(stream), + mutable_ranks.begin(), + mutable_ranks.end(), + [labels = group_labels.begin(), + offsets = group_offsets.begin()] __device__(size_type row_index) { + auto group_start = offsets[labels[row_index]]; + return row_index - group_start + 1; + }); + return ranks; +} + +std::unique_ptr average_rank_scan(column_view const& grouped_values, + column_view const& value_order, device_span group_labels, device_span group_offsets, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto const rank_column = rank_scan( - order_by, group_labels, group_offsets, stream, rmm::mr::get_current_device_resource()); - auto const rank_view = rank_column->view(); - auto const group_size_iter = cudf::detail::make_counting_transform_iterator( - 0, - [labels = group_labels.begin(), - offsets = group_offsets.begin()] __device__(size_type row_index) { - auto const group_label = labels[row_index]; - auto const group_start = offsets[group_label]; - auto const group_end = offsets[group_label + 1]; - return group_end - group_start; - }); - - // Result type for PERCENT_RANK is independent of input type. - using result_type = cudf::detail::target_type_t; - - auto percent_rank_result = cudf::make_fixed_width_column( - data_type{type_to_id()}, rank_view.size(), mask_state::UNALLOCATED, stream, mr); - + auto max_rank = max_rank_scan(grouped_values, + value_order, + group_labels, + group_offsets, + stream, + rmm::mr::get_current_device_resource()); + auto min_rank = min_rank_scan(grouped_values, + value_order, + group_labels, + group_offsets, + stream, + rmm::mr::get_current_device_resource()); + auto ranks = make_fixed_width_column( + data_type{type_to_id()}, group_labels.size(), mask_state::UNALLOCATED, stream, mr); + auto mutable_ranks = ranks->mutable_view(); thrust::transform(rmm::exec_policy(stream), - rank_view.begin(), - rank_view.end(), - group_size_iter, - percent_rank_result->mutable_view().begin(), - [] __device__(auto const rank, auto const group_size) { - return group_size == 1 ? 0.0 : ((rank - 1.0) / (group_size - 1)); + max_rank->view().begin(), + max_rank->view().end(), + min_rank->view().begin(), + mutable_ranks.begin(), + [] __device__(auto max_rank, auto min_rank) -> double { + return min_rank + (max_rank - min_rank) / 2.0; }); + return ranks; +} - return percent_rank_result; +std::unique_ptr dense_rank_scan(column_view const& grouped_values, + column_view const& value_order, + device_span group_labels, + device_span group_offsets, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + return rank_generator( + grouped_values, + value_order, + group_labels, + group_offsets, + [] __device__(bool const unequal, size_type const) { return unequal ? 1 : 0; }, + DeviceSum{}, + has_nested_nulls(table_view{{grouped_values}}), + stream, + mr); +} + +std::unique_ptr group_rank_to_percentage(rank_method const method, + rank_percentage const percentage, + column_view const& rank, + column_view const& count, + device_span group_labels, + device_span group_offsets, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_EXPECTS(percentage != rank_percentage::NONE, "Percentage cannot be NONE"); + auto ranks = make_fixed_width_column( + data_type{type_to_id()}, group_labels.size(), mask_state::UNALLOCATED, stream, mr); + ranks->set_null_mask(copy_bitmask(rank, stream, mr)); + auto mutable_ranks = ranks->mutable_view(); + + auto one_normalized = [] __device__(auto const rank, auto const group_size) { + return group_size == 1 ? 0.0 : ((rank - 1.0) / (group_size - 1)); + }; + if (method == rank_method::DENSE) { + thrust::tabulate(rmm::exec_policy(stream), + mutable_ranks.begin(), + mutable_ranks.end(), + [percentage, + one_normalized, + is_double = rank.type().id() == type_id::FLOAT64, + dcount = count.begin(), + labels = group_labels.begin(), + offsets = group_offsets.begin(), + d_rank = rank.begin(), + s_rank = rank.begin()] __device__(size_type row_index) -> double { + double const r = is_double ? d_rank[row_index] : s_rank[row_index]; + auto const count = dcount[labels[row_index]]; + size_type const last_rank_index = offsets[labels[row_index]] + count - 1; + auto const last_rank = s_rank[last_rank_index]; + return percentage == rank_percentage::ZERO_NORMALIZED + ? r / last_rank + : one_normalized(r, last_rank); + }); + } else { + thrust::tabulate(rmm::exec_policy(stream), + mutable_ranks.begin(), + mutable_ranks.end(), + [percentage, + one_normalized, + is_double = rank.type().id() == type_id::FLOAT64, + dcount = count.begin(), + labels = group_labels.begin(), + d_rank = rank.begin(), + s_rank = rank.begin()] __device__(size_type row_index) -> double { + double const r = is_double ? d_rank[row_index] : s_rank[row_index]; + auto const count = dcount[labels[row_index]]; + return percentage == rank_percentage::ZERO_NORMALIZED + ? r / count + : one_normalized(r, count); + }); + } + return ranks; } } // namespace detail diff --git a/cpp/src/groupby/sort/group_scan.hpp b/cpp/src/groupby/sort/group_scan.hpp index 76a7f3f73c7..dc0eb691748 100644 --- a/cpp/src/groupby/sort/group_scan.hpp +++ b/cpp/src/groupby/sort/group_scan.hpp @@ -85,52 +85,115 @@ std::unique_ptr count_scan(device_span group_labels, rmm::mr::device_memory_resource* mr); /** - * @brief Internal API to calculate groupwise rank value + * @brief Internal API to calculate groupwise min rank value * - * @param order_by column or struct column that rows within a group are sorted by + * @param grouped_values column or struct column that rows within a group are sorted by + * @param value_order column of type INT32 that contains the order of the values in the + * grouped_values column * @param group_labels ID of group that the corresponding value belongs to * @param group_offsets group index offsets with group ID indices * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return Column of type size_type of rank values */ -std::unique_ptr rank_scan(column_view const& order_by, - device_span group_labels, - device_span group_offsets, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); +std::unique_ptr min_rank_scan(column_view const& grouped_values, + column_view const& value_order, + device_span group_labels, + device_span group_offsets, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + +/** + * @brief Internal API to calculate groupwise max rank value + * + * @details @copydetails min_rank_scan(column_view const& grouped_values, + * column_view const& value_order, + * device_span group_labels, + * device_span group_offsets, + * rmm::cuda_stream_view stream, + * rmm::mr::device_memory_resource* mr) + */ +std::unique_ptr max_rank_scan(column_view const& grouped_values, + column_view const& value_order, + device_span group_labels, + device_span group_offsets, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + +/** + * @brief Internal API to calculate groupwise first rank value + * + * @details @copydetails min_rank_scan(column_view const& grouped_values, + * column_view const& value_order, + * device_span group_labels, + * device_span group_offsets, + * rmm::cuda_stream_view stream, + * rmm::mr::device_memory_resource* mr) + */ +std::unique_ptr first_rank_scan(column_view const& grouped_values, + column_view const& value_order, + device_span group_labels, + device_span group_offsets, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + +/** + * @brief Internal API to calculate groupwise average rank value + * + * @details @copydetails min_rank_scan(column_view const& grouped_values, + * column_view const& value_order, + * device_span group_labels, + * device_span group_offsets, + * rmm::cuda_stream_view stream, + * rmm::mr::device_memory_resource* mr) + */ +std::unique_ptr average_rank_scan(column_view const& grouped_values, + column_view const& value_order, + device_span group_labels, + device_span group_offsets, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Internal API to calculate groupwise dense rank value * - * @param order_by column or struct column that rows within a group are sorted by + * @param grouped_values column or struct column that rows within a group are sorted by * @param group_labels ID of group that the corresponding value belongs to * @param group_offsets group index offsets with group ID indices * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return Column of type size_type of dense rank values */ -std::unique_ptr dense_rank_scan(column_view const& order_by, +std::unique_ptr dense_rank_scan(column_view const& grouped_values, + column_view const& value_order, device_span group_labels, device_span group_offsets, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); /** - * @brief Internal API to calculate groupwise percent rank value + * @brief Convert groupwise rank to groupwise percentage rank * - * @param order_by column or struct column by which the rows within a group are sorted - * @param group_labels ID of group to which the row belongs + * @param method rank method + * @param percentage enum to denote the type of conversion ranks to percentage in range (0,1] + * @param rank Groupwise rank column + * @param count Groupwise count column + * @param group_labels ID of group that the corresponding value belongs to * @param group_offsets group index offsets with group ID indices * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory - * @return Column of type `double` of percent rank values + * @return Column of type double of rank values + */ -std::unique_ptr percent_rank_scan(column_view const& order_by, - device_span group_labels, - device_span group_offsets, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); +std::unique_ptr group_rank_to_percentage(rank_method const method, + rank_percentage const percentage, + column_view const& rank, + column_view const& count, + device_span group_labels, + device_span group_offsets, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + } // namespace detail } // namespace groupby } // namespace cudf diff --git a/cpp/src/groupby/sort/scan.cpp b/cpp/src/groupby/sort/scan.cpp index 8c4959da35b..5d345273782 100644 --- a/cpp/src/groupby/sort/scan.cpp +++ b/cpp/src/groupby/sort/scan.cpp @@ -16,14 +16,20 @@ #include #include +#include #include #include #include #include #include +#include +#include +#include +#include #include #include +#include #include #include #include @@ -115,51 +121,70 @@ template <> void scan_result_functor::operator()(aggregation const& agg) { if (cache.has_result(values, agg)) return; - CUDF_EXPECTS(helper.is_presorted(), - "Rank aggregate in groupby scan requires the keys to be presorted"); - auto const order_by = get_grouped_values(); - CUDF_EXPECTS(!cudf::structs::detail::is_or_has_nested_lists(order_by), - "Unsupported list type in grouped rank scan."); - - cache.add_result( - values, - agg, - detail::rank_scan( - order_by, helper.group_labels(stream), helper.group_offsets(stream), stream, mr)); -} - -template <> -void scan_result_functor::operator()(aggregation const& agg) -{ - if (cache.has_result(values, agg)) return; - CUDF_EXPECTS(helper.is_presorted(), - "Dense rank aggregate in groupby scan requires the keys to be presorted"); - auto const order_by = get_grouped_values(); - CUDF_EXPECTS(!cudf::structs::detail::is_or_has_nested_lists(order_by), - "Unsupported list type in grouped dense_rank scan."); - cache.add_result( - values, - agg, - detail::dense_rank_scan( - order_by, helper.group_labels(stream), helper.group_offsets(stream), stream, mr)); -} - -template <> -void scan_result_functor::operator()(aggregation const& agg) -{ - if (cache.has_result(values, agg)) return; - CUDF_EXPECTS(helper.is_presorted(), - "Percent rank aggregate in groupby scan requires the keys to be presorted"); - auto const order_by = get_grouped_values(); - CUDF_EXPECTS(!cudf::structs::detail::is_or_has_nested_lists(order_by), - "Unsupported list type in grouped percent_rank scan."); - - cache.add_result( - values, - agg, - detail::percent_rank_scan( - order_by, helper.group_labels(stream), helper.group_offsets(stream), stream, mr)); + CUDF_EXPECTS(!cudf::structs::detail::is_or_has_nested_lists(values), + "Unsupported list type in grouped rank scan."); + auto const& rank_agg = dynamic_cast(agg); + auto const& group_labels = helper.group_labels(stream); + auto const group_labels_view = column_view(cudf::device_span(group_labels)); + auto const gather_map = [&]() { + if (is_presorted()) { // assumes both keys and values are sorted, Spark does this. + return cudf::detail::sequence( + group_labels.size(), *cudf::make_fixed_width_scalar(size_type{0}, stream), stream); + } else { + auto sort_order = (rank_agg._method == rank_method::FIRST ? cudf::detail::stable_sorted_order + : cudf::detail::sorted_order); + return sort_order(table_view({group_labels_view, get_grouped_values()}), + {order::ASCENDING, rank_agg._column_order}, + {null_order::AFTER, rank_agg._null_precedence}, + stream, + rmm::mr::get_current_device_resource()); + } + }(); + + auto rank_scan = [&]() { + switch (rank_agg._method) { + case rank_method::FIRST: return detail::first_rank_scan; + case rank_method::AVERAGE: return detail::average_rank_scan; + case rank_method::DENSE: return detail::dense_rank_scan; + case rank_method::MIN: return detail::min_rank_scan; + case rank_method::MAX: return detail::max_rank_scan; + default: CUDF_FAIL("Unsupported rank method in groupby scan"); + } + }(); + auto result = rank_scan(get_grouped_values(), + *gather_map, + helper.group_labels(stream), + helper.group_offsets(stream), + stream, + rmm::mr::get_current_device_resource()); + if (rank_agg._percentage != rank_percentage::NONE) { + auto count = get_grouped_values().nullable() and rank_agg._null_handling == null_policy::EXCLUDE + ? detail::group_count_valid(get_grouped_values(), + helper.group_labels(stream), + helper.num_groups(stream), + stream, + rmm::mr::get_current_device_resource()) + : detail::group_count_all(helper.group_offsets(stream), + helper.num_groups(stream), + stream, + rmm::mr::get_current_device_resource()); + result = detail::group_rank_to_percentage(rank_agg._method, + rank_agg._percentage, + *result, + *count, + helper.group_labels(stream), + helper.group_offsets(stream), + stream, + mr); + } + result = std::move(cudf::detail::scatter( + table_view{{*result}}, *gather_map, table_view{{*result}}, false, stream, mr) + ->release()[0]); + if (rank_agg._null_handling == null_policy::EXCLUDE) { + result->set_null_mask(cudf::detail::copy_bitmask(get_grouped_values(), stream, mr)); + } + cache.add_result(values, agg, std::move(result)); } } // namespace detail @@ -185,7 +210,7 @@ std::pair, std::vector> groupby::sort auto results = detail::extract_results(requests, cache, stream, mr); - return std::make_pair(helper().sorted_keys(stream, mr), std::move(results)); + return std::pair(helper().sorted_keys(stream, mr), std::move(results)); } } // namespace groupby } // namespace cudf diff --git a/cpp/src/hash/concurrent_unordered_map.cuh b/cpp/src/hash/concurrent_unordered_map.cuh index 76f3fba4689..9136410a03d 100644 --- a/cpp/src/hash/concurrent_unordered_map.cuh +++ b/cpp/src/hash/concurrent_unordered_map.cuh @@ -113,7 +113,7 @@ union pair_packer()>> { */ template , + typename Hasher = cudf::detail::default_hash, typename Equality = equal_to, typename Allocator = default_allocator>> class concurrent_unordered_map { diff --git a/cpp/src/io/avro/reader_impl.cu b/cpp/src/io/avro/reader_impl.cu index 5885b61b35b..556ca6b9d80 100644 --- a/cpp/src/io/avro/reader_impl.cu +++ b/cpp/src/io/avro/reader_impl.cu @@ -162,62 +162,66 @@ rmm::device_buffer decompress_data(datasource& source, rmm::cuda_stream_view stream) { if (meta.codec == "deflate") { - size_t uncompressed_data_size = 0; + auto inflate_in = hostdevice_vector>(meta.block_list.size(), stream); + auto inflate_out = hostdevice_vector>(meta.block_list.size(), stream); + auto inflate_stats = hostdevice_vector(meta.block_list.size(), stream); - auto inflate_in = hostdevice_vector(meta.block_list.size(), stream); - auto inflate_out = hostdevice_vector(meta.block_list.size(), stream); + // Guess an initial maximum uncompressed block size. We estimate the compression factor is two + // and round up to the next multiple of 4096 bytes. + uint32_t const initial_blk_len = meta.max_block_size * 2 + (meta.max_block_size * 2) % 4096; + size_t const uncomp_size = initial_blk_len * meta.block_list.size(); - // Guess an initial maximum uncompressed block size - uint32_t initial_blk_len = (meta.max_block_size * 2 + 0xfff) & ~0xfff; - uncompressed_data_size = initial_blk_len * meta.block_list.size(); - for (size_t i = 0; i < inflate_in.size(); ++i) { - inflate_in[i].dstSize = initial_blk_len; - } - - rmm::device_buffer decomp_block_data(uncompressed_data_size, stream); + rmm::device_buffer decomp_block_data(uncomp_size, stream); auto const base_offset = meta.block_list[0].offset; for (size_t i = 0, dst_pos = 0; i < meta.block_list.size(); i++) { auto const src_pos = meta.block_list[i].offset - base_offset; - inflate_in[i].srcDevice = static_cast(comp_block_data.data()) + src_pos; - inflate_in[i].srcSize = meta.block_list[i].size; - inflate_in[i].dstDevice = static_cast(decomp_block_data.data()) + dst_pos; + inflate_in[i] = {static_cast(comp_block_data.data()) + src_pos, + meta.block_list[i].size}; + inflate_out[i] = {static_cast(decomp_block_data.data()) + dst_pos, initial_blk_len}; // Update blocks offsets & sizes to refer to uncompressed data meta.block_list[i].offset = dst_pos; - meta.block_list[i].size = static_cast(inflate_in[i].dstSize); + meta.block_list[i].size = static_cast(inflate_out[i].size()); dst_pos += meta.block_list[i].size; } + inflate_in.host_to_device(stream); for (int loop_cnt = 0; loop_cnt < 2; loop_cnt++) { - inflate_in.host_to_device(stream); - CUDF_CUDA_TRY( - cudaMemsetAsync(inflate_out.device_ptr(), 0, inflate_out.memory_size(), stream.value())); - CUDF_CUDA_TRY(gpuinflate( - inflate_in.device_ptr(), inflate_out.device_ptr(), inflate_in.size(), 0, stream)); - inflate_out.device_to_host(stream, true); + inflate_out.host_to_device(stream); + CUDF_CUDA_TRY(cudaMemsetAsync( + inflate_stats.device_ptr(), 0, inflate_stats.memory_size(), stream.value())); + gpuinflate(inflate_in, inflate_out, inflate_stats, gzip_header_included::NO, stream); + inflate_stats.device_to_host(stream, true); // Check if larger output is required, as it's not known ahead of time if (loop_cnt == 0) { - size_t actual_uncompressed_size = 0; - for (size_t i = 0; i < meta.block_list.size(); i++) { - // If error status is 1 (buffer too small), the `bytes_written` field - // is actually contains the uncompressed data size - if (inflate_out[i].status == 1 && inflate_out[i].bytes_written > inflate_in[i].dstSize) { - inflate_in[i].dstSize = inflate_out[i].bytes_written; - } - actual_uncompressed_size += inflate_in[i].dstSize; - } - if (actual_uncompressed_size > uncompressed_data_size) { - decomp_block_data.resize(actual_uncompressed_size, stream); - for (size_t i = 0, dst_pos = 0; i < meta.block_list.size(); i++) { - auto dst_base = static_cast(decomp_block_data.data()); - inflate_in[i].dstDevice = dst_base + dst_pos; - - meta.block_list[i].offset = dst_pos; - meta.block_list[i].size = static_cast(inflate_in[i].dstSize); - dst_pos += meta.block_list[i].size; + std::vector actual_uncomp_sizes; + actual_uncomp_sizes.reserve(inflate_out.size()); + std::transform(inflate_out.begin(), + inflate_out.end(), + inflate_stats.begin(), + std::back_inserter(actual_uncomp_sizes), + [](auto const& inf_out, auto const& inf_stats) { + // If error status is 1 (buffer too small), the `bytes_written` field + // actually contains the uncompressed data size + return inf_stats.status == 1 + ? std::max(inf_out.size(), inf_stats.bytes_written) + : inf_out.size(); + }); + auto const total_actual_uncomp_size = + std::accumulate(actual_uncomp_sizes.cbegin(), actual_uncomp_sizes.cend(), 0ul); + if (total_actual_uncomp_size > uncomp_size) { + decomp_block_data.resize(total_actual_uncomp_size, stream); + for (size_t i = 0; i < meta.block_list.size(); ++i) { + meta.block_list[i].offset = + i > 0 ? (meta.block_list[i - 1].size + meta.block_list[i - 1].offset) : 0; + meta.block_list[i].size = static_cast(actual_uncomp_sizes[i]); + + inflate_out[i] = { + static_cast(decomp_block_data.data()) + meta.block_list[i].offset, + meta.block_list[i].size}; } } else { break; diff --git a/cpp/src/io/comp/debrotli.cu b/cpp/src/io/comp/debrotli.cu index 631cf19b2aa..cf4d1b0e0f4 100644 --- a/cpp/src/io/comp/debrotli.cu +++ b/cpp/src/io/comp/debrotli.cu @@ -1904,41 +1904,42 @@ static __device__ void ProcessCommands(debrotli_state_s* s, const brotli_diction * * blockDim = {block_size,1,1} * - * @param[in] inputs Source/Destination buffer information per block - * @param[out] outputs Decompressor status per block + * @param[in] inputs Source buffer per block + * @param[out] outputs Destination buffer per block + * @param[out] statuses Decompressor status per block * @param scratch Intermediate device memory heap space (will be dynamically shared between blocks) * @param scratch_size Size of scratch heap space (smaller sizes may result in serialization between - *blocks) - * @param count Number of blocks to decompress + * blocks) */ -extern "C" __global__ void __launch_bounds__(block_size, 2) - gpu_debrotli_kernel(gpu_inflate_input_s* inputs, - gpu_inflate_status_s* outputs, +__global__ void __launch_bounds__(block_size, 2) + gpu_debrotli_kernel(device_span const> inputs, + device_span const> outputs, + device_span statuses, uint8_t* scratch, - uint32_t scratch_size, - uint32_t count) + uint32_t scratch_size) { __shared__ __align__(16) debrotli_state_s state_g; int t = threadIdx.x; - int z = blockIdx.x; + auto const block_id = blockIdx.x; debrotli_state_s* const s = &state_g; - if (z >= count) { return; } + if (block_id >= inputs.size()) { return; } // Thread0: initializes shared state and decode stream header if (!t) { - auto const* src = static_cast(inputs[z].srcDevice); - size_t src_size = inputs[z].srcSize; + auto const src = inputs[block_id].data(); + auto const src_size = inputs[block_id].size(); if (src && src_size >= 8) { - s->error = 0; - s->out = s->outbase = static_cast(inputs[z].dstDevice); - s->bytes_left = inputs[z].dstSize; - s->mtf_upper_bound = 63; - s->dist_rb[0] = 16; - s->dist_rb[1] = 15; - s->dist_rb[2] = 11; - s->dist_rb[3] = 4; - s->dist_rb_idx = 0; + s->error = 0; + s->out = outputs[block_id].data(); + s->outbase = s->out; + s->bytes_left = outputs[block_id].size(); + s->mtf_upper_bound = 63; + s->dist_rb[0] = 16; + s->dist_rb[1] = 15; + s->dist_rb[2] = 11; + s->dist_rb[3] = 4; + s->dist_rb_idx = 0; s->p1 = s->p2 = 0; initbits(s, src, src_size); DecodeStreamHeader(s); @@ -2015,9 +2016,10 @@ extern "C" __global__ void __launch_bounds__(block_size, 2) __syncthreads(); // Output decompression status if (!t) { - outputs[z].bytes_written = s->out - s->outbase; - outputs[z].status = s->error; - outputs[z].reserved = s->fb_size; // Return ext heap used by last block (statistics) + statuses[block_id].bytes_written = s->out - s->outbase; + statuses[block_id].status = s->error; + // Return ext heap used by last block (statistics) + statuses[block_id].reserved = s->fb_size; } } @@ -2075,20 +2077,21 @@ size_t __host__ get_gpu_debrotli_scratch_size(int max_num_inputs) #include #endif -cudaError_t __host__ gpu_debrotli(gpu_inflate_input_s* inputs, - gpu_inflate_status_s* outputs, - void* scratch, - size_t scratch_size, - int count, - rmm::cuda_stream_view stream) +void gpu_debrotli(device_span const> inputs, + device_span const> outputs, + device_span statuses, + void* scratch, + size_t scratch_size, + rmm::cuda_stream_view stream) { - uint32_t count32 = (count > 0) ? count : 0; + auto const count = inputs.size(); uint32_t fb_heap_size; auto* scratch_u8 = static_cast(scratch); dim3 dim_block(block_size, 1); - dim3 dim_grid(count32, 1); // TODO: Check max grid dimensions vs max expected count + dim3 dim_grid(count, 1); // TODO: Check max grid dimensions vs max expected count - if (scratch_size < sizeof(brotli_dictionary_s)) { return cudaErrorLaunchOutOfResources; } + CUDF_EXPECTS(scratch_size >= sizeof(brotli_dictionary_s), + "Insufficient scratch space for debrotli"); scratch_size = min(scratch_size, (size_t)0xffffffffu); fb_heap_size = (uint32_t)((scratch_size - sizeof(brotli_dictionary_s)) & ~0xf); @@ -2101,7 +2104,7 @@ cudaError_t __host__ gpu_debrotli(gpu_inflate_input_s* inputs, cudaMemcpyHostToDevice, stream.value())); gpu_debrotli_kernel<<>>( - inputs, outputs, scratch_u8, fb_heap_size, count32); + inputs, outputs, statuses, scratch_u8, fb_heap_size); #if DUMP_FB_HEAP uint32_t dump[2]; uint32_t cur = 0; @@ -2114,8 +2117,6 @@ cudaError_t __host__ gpu_debrotli(gpu_inflate_input_s* inputs, cur = (dump[0] > cur) ? dump[0] : 0xffffffffu; } #endif - - return cudaSuccess; } } // namespace io diff --git a/cpp/src/io/comp/gpuinflate.cu b/cpp/src/io/comp/gpuinflate.cu index 508e960430d..0d33158da2b 100644 --- a/cpp/src/io/comp/gpuinflate.cu +++ b/cpp/src/io/comp/gpuinflate.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, NVIDIA CORPORATION. + * Copyright (c) 2018-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -124,8 +124,8 @@ struct inflate_state_s { uint8_t* outbase; ///< start of output buffer uint8_t* outend; ///< end of output buffer // Input state - uint8_t* cur; ///< input buffer - uint8_t* end; ///< end of input buffer + uint8_t const* cur; ///< input buffer + uint8_t const* end; ///< end of input buffer uint2 bitbuf; ///< bit buffer (64-bit) uint32_t bitpos; ///< position in bit buffer @@ -180,10 +180,10 @@ inline __device__ void skipbits(inflate_state_s* s, uint32_t n) { uint32_t bitpos = s->bitpos + n; if (bitpos >= 32) { - uint8_t* cur = s->cur + 8; - s->bitbuf.x = s->bitbuf.y; - s->bitbuf.y = (cur < s->end) ? *reinterpret_cast(cur) : 0; - s->cur = cur - 4; + auto cur = s->cur + 8; + s->bitbuf.x = s->bitbuf.y; + s->bitbuf.y = (cur < s->end) ? *reinterpret_cast(cur) : 0; + s->cur = cur - 4; bitpos &= 0x1f; } s->bitpos = bitpos; @@ -510,8 +510,8 @@ __device__ void decode_symbols(inflate_state_s* s) { uint32_t bitpos = s->bitpos; uint2 bitbuf = s->bitbuf; - uint8_t* cur = s->cur; - uint8_t* end = s->end; + auto cur = s->cur; + auto end = s->end; int32_t batch = 0; int32_t sym, batch_len; @@ -871,13 +871,11 @@ __device__ int init_stored(inflate_state_s* s) /// Copy bytes from stored block to destination __device__ void copy_stored(inflate_state_s* s, int t) { - int len = s->stored_blk_len; - uint8_t* cur = s->cur + (s->bitpos >> 3); - uint8_t* out = s->out; - uint8_t* outend = s->outend; - uint8_t* cur4; - int slow_bytes = min(len, (int)((16 - (size_t)out) & 0xf)); - int fast_bytes, bitpos; + auto len = s->stored_blk_len; + auto cur = s->cur + s->bitpos / 8; + auto out = s->out; + auto outend = s->outend; + auto const slow_bytes = min(len, (int)((16 - reinterpret_cast(out)) % 16)); // Slow copy until output is 16B aligned if (slow_bytes) { @@ -890,11 +888,11 @@ __device__ void copy_stored(inflate_state_s* s, int t) out += slow_bytes; len -= slow_bytes; } - fast_bytes = len; + auto fast_bytes = len; if (out < outend) { fast_bytes = (int)min((size_t)fast_bytes, (outend - out)); } fast_bytes &= ~0xf; - bitpos = ((int)(3 & (size_t)cur)) << 3; - cur4 = cur - (bitpos >> 3); + auto bitpos = ((int)((size_t)cur % 4)) * 8; + auto cur4 = cur - (bitpos / 8); if (out < outend) { // Fast copy 16 bytes at a time for (int i = t * 16; i < fast_bytes; i += blockDim.x * 16) { @@ -926,13 +924,13 @@ __device__ void copy_stored(inflate_state_s* s, int t) __syncthreads(); if (t == 0) { // Reset bitstream to end of block - uint8_t* p = cur + len; + auto p = cur + len; auto prefix_bytes = (uint32_t)(((size_t)p) & 3); p -= prefix_bytes; s->cur = p; - s->bitbuf.x = (p < s->end) ? *reinterpret_cast(p) : 0; + s->bitbuf.x = (p < s->end) ? *reinterpret_cast(p) : 0; p += 4; - s->bitbuf.y = (p < s->end) ? *reinterpret_cast(p) : 0; + s->bitbuf.y = (p < s->end) ? *reinterpret_cast(p) : 0; s->bitpos = prefix_bytes * 8; s->out = out; } @@ -1021,12 +1019,16 @@ __device__ int parse_gzip_header(const uint8_t* src, size_t src_size) * * @tparam block_size Thread block dimension for this call * @param inputs Source and destination buffer information per block - * @param outputs Decompression status buffer per block + * @param outputs Destination buffer information per block + * @param statuses Decompression status buffer per block * @param parse_hdr If nonzero, indicates that the compressed bitstream includes a GZIP header */ template __global__ void __launch_bounds__(block_size) - inflate_kernel(gpu_inflate_input_s* inputs, gpu_inflate_status_s* outputs, int parse_hdr) + inflate_kernel(device_span const> inputs, + device_span const> outputs, + device_span statuses, + gzip_header_included parse_hdr) { __shared__ __align__(16) inflate_state_s state_g; @@ -1035,12 +1037,11 @@ __global__ void __launch_bounds__(block_size) inflate_state_s* state = &state_g; if (!t) { - auto* p = const_cast(static_cast(inputs[z].srcDevice)); - size_t src_size = inputs[z].srcSize; - uint32_t prefix_bytes; + auto p = inputs[z].data(); + auto src_size = inputs[z].size(); // Parse header if needed state->err = 0; - if (parse_hdr) { + if (parse_hdr == gzip_header_included::YES) { int hdr_len = parse_gzip_header(p, src_size); src_size = (src_size >= 8) ? src_size - 8 : 0; // ignore footer if (hdr_len >= 0) { @@ -1051,16 +1052,16 @@ __global__ void __launch_bounds__(block_size) } } // Initialize shared state - state->out = const_cast(static_cast(inputs[z].dstDevice)); - state->outbase = state->out; - state->outend = state->out + inputs[z].dstSize; - state->end = p + src_size; - prefix_bytes = (uint32_t)(((size_t)p) & 3); + state->out = outputs[z].data(); + state->outbase = state->out; + state->outend = state->out + outputs[z].size(); + state->end = p + src_size; + auto const prefix_bytes = (uint32_t)(((size_t)p) & 3); p -= prefix_bytes; state->cur = p; - state->bitbuf.x = (p < state->end) ? *reinterpret_cast(p) : 0; + state->bitbuf.x = (p < state->end) ? *reinterpret_cast(p) : 0; p += 4; - state->bitbuf.y = (p < state->end) ? *reinterpret_cast(p) : 0; + state->bitbuf.y = (p < state->end) ? *reinterpret_cast(p) : 0; state->bitpos = prefix_bytes * 8; } __syncthreads(); @@ -1132,9 +1133,9 @@ __global__ void __launch_bounds__(block_size) // Output buffer too small state->err = 1; } - outputs[z].bytes_written = state->out - state->outbase; - outputs[z].status = state->err; - outputs[z].reserved = (int)(state->end - state->cur); // Here mainly for debug purposes + statuses[z].bytes_written = state->out - state->outbase; + statuses[z].status = state->err; + statuses[z].reserved = (int)(state->end - state->cur); // Here mainly for debug purposes } } @@ -1145,7 +1146,9 @@ __global__ void __launch_bounds__(block_size) * * @param inputs Source and destination information per block */ -__global__ void __launch_bounds__(1024) copy_uncompressed_kernel(gpu_inflate_input_s* inputs) +__global__ void __launch_bounds__(1024) + copy_uncompressed_kernel(device_span const> inputs, + device_span const> outputs) { __shared__ const uint8_t* volatile src_g; __shared__ uint8_t* volatile dst_g; @@ -1158,9 +1161,9 @@ __global__ void __launch_bounds__(1024) copy_uncompressed_kernel(gpu_inflate_inp uint32_t len, src_align_bytes, src_align_bits, dst_align_bytes; if (!t) { - src = static_cast(inputs[z].srcDevice); - dst = static_cast(inputs[z].dstDevice); - len = min((uint32_t)inputs[z].srcSize, (uint32_t)inputs[z].dstSize); + src = inputs[z].data(); + dst = outputs[z].data(); + len = static_cast(min(inputs[z].size(), outputs[z].size())); src_g = src; dst_g = dst; copy_len_g = len; @@ -1195,26 +1198,26 @@ __global__ void __launch_bounds__(1024) copy_uncompressed_kernel(gpu_inflate_inp if (t < len) { dst[t] = src[t]; } } -cudaError_t __host__ gpuinflate(gpu_inflate_input_s* inputs, - gpu_inflate_status_s* outputs, - int count, - int parse_hdr, - rmm::cuda_stream_view stream) +void gpuinflate(device_span const> inputs, + device_span const> outputs, + device_span statuses, + gzip_header_included parse_hdr, + rmm::cuda_stream_view stream) { constexpr int block_size = 128; // Threads per block - if (count > 0) { + if (inputs.size() > 0) { inflate_kernel - <<>>(inputs, outputs, parse_hdr); + <<>>(inputs, outputs, statuses, parse_hdr); } - return cudaSuccess; } -cudaError_t __host__ gpu_copy_uncompressed_blocks(gpu_inflate_input_s* inputs, - int count, - rmm::cuda_stream_view stream) +void gpu_copy_uncompressed_blocks(device_span const> inputs, + device_span const> outputs, + rmm::cuda_stream_view stream) { - if (count > 0) { copy_uncompressed_kernel<<>>(inputs); } - return cudaSuccess; + if (inputs.size() > 0) { + copy_uncompressed_kernel<<>>(inputs, outputs); + } } } // namespace io diff --git a/cpp/src/io/comp/gpuinflate.h b/cpp/src/io/comp/gpuinflate.h index 29856bcd3f3..3870b2ac3b3 100644 --- a/cpp/src/io/comp/gpuinflate.h +++ b/cpp/src/io/comp/gpuinflate.h @@ -16,75 +16,70 @@ #pragma once -#include +#include #include +#include + namespace cudf { namespace io { -/** - * @brief Input parameters for the decompression interface - */ -struct gpu_inflate_input_s { - const void* srcDevice; - uint64_t srcSize; - void* dstDevice; - uint64_t dstSize; -}; /** * @brief Output parameters for the decompression interface */ -struct gpu_inflate_status_s { +struct decompress_status { uint64_t bytes_written; uint32_t status; uint32_t reserved; }; +enum class gzip_header_included { NO, YES }; + /** * @brief Interface for decompressing GZIP-compressed data * * Multiple, independent chunks of compressed data can be decompressed by using - * separate gpu_inflate_input_s/gpu_inflate_status_s pairs for each chunk. + * separate input/output/status for each chunk. * - * @param[in] inputs List of input argument structures - * @param[out] outputs List of output status structures - * @param[in] count Number of input/output structures + * @param[in] inputs List of input buffers + * @param[out] outputs List of output buffers + * @param[out] statuses List of output status structures * @param[in] parse_hdr Whether or not to parse GZIP header * @param[in] stream CUDA stream to use */ -cudaError_t gpuinflate(gpu_inflate_input_s* inputs, - gpu_inflate_status_s* outputs, - int count, - int parse_hdr, - rmm::cuda_stream_view stream); +void gpuinflate(device_span const> inputs, + device_span const> outputs, + device_span statuses, + gzip_header_included parse_hdr, + rmm::cuda_stream_view stream); /** * @brief Interface for copying uncompressed byte blocks * - * @param[in] inputs List of input argument structures - * @param[in] count Number of input structures + * @param[in] inputs List of input buffers + * @param[out] outputs List of output buffers * @param[in] stream CUDA stream to use */ -cudaError_t gpu_copy_uncompressed_blocks(gpu_inflate_input_s* inputs, - int count, - rmm::cuda_stream_view stream); +void gpu_copy_uncompressed_blocks(device_span const> inputs, + device_span const> outputs, + rmm::cuda_stream_view stream); /** * @brief Interface for decompressing Snappy-compressed data * * Multiple, independent chunks of compressed data can be decompressed by using - * separate gpu_inflate_input_s/gpu_inflate_status_s pairs for each chunk. + * separate input/output/status for each chunk. * - * @param[in] inputs List of input argument structures - * @param[out] outputs List of output status structures - * @param[in] count Number of input/output structures + * @param[in] inputs List of input buffers + * @param[out] outputs List of output buffers + * @param[out] statuses List of output status structures * @param[in] stream CUDA stream to use */ -cudaError_t gpu_unsnap(gpu_inflate_input_s* inputs, - gpu_inflate_status_s* outputs, - int count, - rmm::cuda_stream_view stream); +void gpu_unsnap(device_span const> inputs, + device_span const> outputs, + device_span statuses, + rmm::cuda_stream_view stream); /** * @brief Computes the size of temporary memory for Brotli decompression @@ -99,37 +94,37 @@ size_t get_gpu_debrotli_scratch_size(int max_num_inputs = 0); * @brief Interface for decompressing Brotli-compressed data * * Multiple, independent chunks of compressed data can be decompressed by using - * separate gpu_inflate_input_s/gpu_inflate_status_s pairs for each chunk. + * separate input/output/status pairs for each chunk. * - * @param[in] inputs List of input argument structures - * @param[out] outputs List of output status structures + * @param[in] inputs List of input buffers + * @param[out] outputs List of output buffers + * @param[out] statuses List of output status structures * @param[in] scratch Temporary memory for intermediate work * @param[in] scratch_size Size in bytes of the temporary memory - * @param[in] count Number of input/output structures * @param[in] stream CUDA stream to use */ -cudaError_t gpu_debrotli(gpu_inflate_input_s* inputs, - gpu_inflate_status_s* outputs, - void* scratch, - size_t scratch_size, - int count, - rmm::cuda_stream_view stream); +void gpu_debrotli(device_span const> inputs, + device_span const> outputs, + device_span statuses, + void* scratch, + size_t scratch_size, + rmm::cuda_stream_view stream); /** * @brief Interface for compressing data with Snappy * * Multiple, independent chunks of compressed data can be compressed by using - * separate gpu_inflate_input_s/gpu_inflate_status_s pairs for each chunk. + * separate input/output/status for each chunk. * - * @param[in] inputs List of input argument structures - * @param[out] outputs List of output status structures - * @param[in] count Number of input/output structures + * @param[in] inputs List of input buffers + * @param[out] outputs List of output buffers + * @param[out] statuses List of output status structures * @param[in] stream CUDA stream to use */ -cudaError_t gpu_snap(gpu_inflate_input_s* inputs, - gpu_inflate_status_s* outputs, - int count, - rmm::cuda_stream_view stream); +void gpu_snap(device_span const> inputs, + device_span const> outputs, + device_span statuses, + rmm::cuda_stream_view stream); } // namespace io } // namespace cudf diff --git a/cpp/src/io/comp/nvcomp_adapter.cpp b/cpp/src/io/comp/nvcomp_adapter.cpp new file mode 100644 index 00000000000..b2e6f07b80b --- /dev/null +++ b/cpp/src/io/comp/nvcomp_adapter.cpp @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "nvcomp_adapter.hpp" +#include "nvcomp_adapter.cuh" + +#include + +#include + +namespace cudf::io::nvcomp { + +template +auto batched_decompress_get_temp_size(compression_type type, Args&&... args) +{ + switch (type) { + case compression_type::SNAPPY: + return nvcompBatchedSnappyDecompressGetTempSize(std::forward(args)...); + default: CUDF_FAIL("Unsupported compression type"); + } +}; + +template +auto batched_decompress_async(compression_type type, Args&&... args) +{ + switch (type) { + case compression_type::SNAPPY: + return nvcompBatchedSnappyDecompressAsync(std::forward(args)...); + default: CUDF_FAIL("Unsupported compression type"); + } +}; + +size_t get_temp_size(compression_type type, size_t num_chunks, size_t max_uncomp_chunk_size) +{ + size_t temp_size = 0; + nvcompStatus_t nvcomp_status = + batched_decompress_get_temp_size(type, num_chunks, max_uncomp_chunk_size, &temp_size); + CUDF_EXPECTS(nvcomp_status == nvcompStatus_t::nvcompSuccess, + "Unable to get scratch size for decompression"); + + return temp_size; +} + +void batched_decompress(compression_type type, + device_span const> inputs, + device_span const> outputs, + device_span statuses, + size_t max_uncomp_chunk_size, + rmm::cuda_stream_view stream) +{ + auto const num_chunks = inputs.size(); + + // cuDF inflate inputs converted to nvcomp inputs + auto const nvcomp_args = create_batched_nvcomp_args(inputs, outputs, stream); + rmm::device_uvector actual_uncompressed_data_sizes(num_chunks, stream); + rmm::device_uvector nvcomp_statuses(num_chunks, stream); + // Temporary space required for decompression + rmm::device_buffer scratch(get_temp_size(type, num_chunks, max_uncomp_chunk_size), stream); + auto const nvcomp_status = batched_decompress_async(type, + nvcomp_args.compressed_data_ptrs.data(), + nvcomp_args.compressed_data_sizes.data(), + nvcomp_args.uncompressed_data_sizes.data(), + actual_uncompressed_data_sizes.data(), + num_chunks, + scratch.data(), + scratch.size(), + nvcomp_args.uncompressed_data_ptrs.data(), + nvcomp_statuses.data(), + stream.value()); + CUDF_EXPECTS(nvcomp_status == nvcompStatus_t::nvcompSuccess, "unable to perform decompression"); + + convert_status(nvcomp_statuses, actual_uncompressed_data_sizes, statuses, stream); +} +} // namespace cudf::io::nvcomp diff --git a/cpp/src/io/comp/nvcomp_adapter.cu b/cpp/src/io/comp/nvcomp_adapter.cu new file mode 100644 index 00000000000..ce294cc9b00 --- /dev/null +++ b/cpp/src/io/comp/nvcomp_adapter.cu @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "nvcomp_adapter.cuh" + +#include + +#include + +namespace cudf::io::nvcomp { + +batched_args create_batched_nvcomp_args(device_span const> inputs, + device_span const> outputs, + rmm::cuda_stream_view stream) +{ + size_t num_comp_pages = inputs.size(); + rmm::device_uvector compressed_data_ptrs(num_comp_pages, stream); + rmm::device_uvector compressed_data_sizes(num_comp_pages, stream); + rmm::device_uvector uncompressed_data_ptrs(num_comp_pages, stream); + rmm::device_uvector uncompressed_data_sizes(num_comp_pages, stream); + + // Prepare the input vectors + auto ins_it = + thrust::make_zip_iterator(compressed_data_ptrs.begin(), compressed_data_sizes.begin()); + thrust::transform( + rmm::exec_policy(stream), inputs.begin(), inputs.end(), ins_it, [] __device__(auto const& in) { + return thrust::make_tuple(in.data(), in.size()); + }); + + // Prepare the output vectors + auto outs_it = + thrust::make_zip_iterator(uncompressed_data_ptrs.begin(), uncompressed_data_sizes.begin()); + thrust::transform( + rmm::exec_policy(stream), + outputs.begin(), + outputs.end(), + outs_it, + [] __device__(auto const& out) { return thrust::make_tuple(out.data(), out.size()); }); + + return {std::move(compressed_data_ptrs), + std::move(compressed_data_sizes), + std::move(uncompressed_data_ptrs), + std::move(uncompressed_data_sizes)}; +} + +void convert_status(device_span nvcomp_stats, + device_span actual_uncompressed_sizes, + device_span cudf_stats, + rmm::cuda_stream_view stream) +{ + thrust::transform( + rmm::exec_policy(stream), + nvcomp_stats.begin(), + nvcomp_stats.end(), + actual_uncompressed_sizes.begin(), + cudf_stats.begin(), + [] __device__(auto const& status, auto const& size) { + return decompress_status{size, status == nvcompStatus_t::nvcompSuccess ? 0u : 1u}; + }); +} +} // namespace cudf::io::nvcomp diff --git a/cpp/src/io/comp/nvcomp_adapter.cuh b/cpp/src/io/comp/nvcomp_adapter.cuh new file mode 100644 index 00000000000..a76ddcf6813 --- /dev/null +++ b/cpp/src/io/comp/nvcomp_adapter.cuh @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "gpuinflate.h" + +#include + +#include + +#include +#include + +namespace cudf::io::nvcomp { + +struct batched_args { + rmm::device_uvector compressed_data_ptrs; + rmm::device_uvector compressed_data_sizes; + rmm::device_uvector uncompressed_data_ptrs; + rmm::device_uvector uncompressed_data_sizes; +}; + +/** + * @brief Split lists of src/dst device spans into lists of pointers/sizes. + * + * @param[in] inputs List of input buffers + * @param[in] outputs List of output buffers + * @param[in] stream CUDA stream to use + */ +batched_args create_batched_nvcomp_args(device_span const> inputs, + device_span const> outputs, + rmm::cuda_stream_view stream); + +/** + * @brief Convert nvcomp statuses into cuIO compression statuses. + */ +void convert_status(device_span nvcomp_stats, + device_span actual_uncompressed_sizes, + device_span cudf_stats, + rmm::cuda_stream_view stream); +} // namespace cudf::io::nvcomp diff --git a/cpp/src/io/comp/nvcomp_adapter.hpp b/cpp/src/io/comp/nvcomp_adapter.hpp new file mode 100644 index 00000000000..a0eb6bc4fbf --- /dev/null +++ b/cpp/src/io/comp/nvcomp_adapter.hpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "gpuinflate.h" + +#include + +#include + +namespace cudf::io::nvcomp { + +enum class compression_type { SNAPPY }; + +/** + * @brief Device batch decompression of given type. + * + * @param[in] type Compression type + * @param[in] inputs List of input buffers + * @param[out] outputs List of output buffers + * @param[out] statuses List of output status structures + * @param[in] max_uncomp_page_size maximum size of uncompressed block + * @param[in] stream CUDA stream to use + */ +void batched_decompress(compression_type type, + device_span const> inputs, + device_span const> outputs, + device_span statuses, + size_t max_uncomp_page_size, + rmm::cuda_stream_view stream); +} // namespace cudf::io::nvcomp diff --git a/cpp/src/io/comp/snap.cu b/cpp/src/io/comp/snap.cu index 9f0a610f8f7..d64eea06631 100644 --- a/cpp/src/io/comp/snap.cu +++ b/cpp/src/io/comp/snap.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, NVIDIA CORPORATION. + * Copyright (c) 2018-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -258,7 +258,9 @@ static __device__ uint32_t Match60(const uint8_t* src1, * @param[in] count Number of blocks to compress */ __global__ void __launch_bounds__(128) - snap_kernel(gpu_inflate_input_s* inputs, gpu_inflate_status_s* outputs, int count) + snap_kernel(device_span const> inputs, + device_span const> outputs, + device_span statuses) { __shared__ __align__(16) snap_state_s state_g; @@ -268,15 +270,15 @@ __global__ void __launch_bounds__(128) const uint8_t* src; if (!t) { - const auto* src = static_cast(inputs[blockIdx.x].srcDevice); - auto src_len = static_cast(inputs[blockIdx.x].srcSize); - auto* dst = static_cast(inputs[blockIdx.x].dstDevice); - auto dst_len = static_cast(inputs[blockIdx.x].dstSize); - uint8_t* end = dst + dst_len; - s->src = src; - s->src_len = src_len; - s->dst_base = dst; - s->end = end; + auto const src = inputs[blockIdx.x].data(); + auto src_len = static_cast(inputs[blockIdx.x].size()); + auto dst = outputs[blockIdx.x].data(); + auto const dst_len = static_cast(outputs[blockIdx.x].size()); + auto const end = dst + dst_len; + s->src = src; + s->src_len = src_len; + s->dst_base = dst; + s->end = end; while (src_len > 0x7f) { if (dst < end) { dst[0] = src_len | 0x80; } dst++; @@ -335,23 +337,22 @@ __global__ void __launch_bounds__(128) } __syncthreads(); if (!t) { - outputs[blockIdx.x].bytes_written = s->dst - s->dst_base; - outputs[blockIdx.x].status = (s->dst > s->end) ? 1 : 0; - outputs[blockIdx.x].reserved = 0; + statuses[blockIdx.x].bytes_written = s->dst - s->dst_base; + statuses[blockIdx.x].status = (s->dst > s->end) ? 1 : 0; + statuses[blockIdx.x].reserved = 0; } } -cudaError_t __host__ gpu_snap(gpu_inflate_input_s* inputs, - gpu_inflate_status_s* outputs, - int count, - rmm::cuda_stream_view stream) +void gpu_snap(device_span const> inputs, + device_span const> outputs, + device_span statuses, + rmm::cuda_stream_view stream) { dim3 dim_block(128, 1); // 4 warps per stream, 1 stream per block - dim3 dim_grid(count, 1); - if (count > 0) { - snap_kernel<<>>(inputs, outputs, count); + dim3 dim_grid(inputs.size(), 1); + if (inputs.size() > 0) { + snap_kernel<<>>(inputs, outputs, statuses); } - return cudaSuccess; } } // namespace io diff --git a/cpp/src/io/comp/unsnap.cu b/cpp/src/io/comp/unsnap.cu index 791a16bc912..dc44b9fcd59 100644 --- a/cpp/src/io/comp/unsnap.cu +++ b/cpp/src/io/comp/unsnap.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, NVIDIA CORPORATION. + * Copyright (c) 2018-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -64,14 +64,15 @@ struct unsnap_queue_s { * @brief snappy decompression state */ struct unsnap_state_s { - const uint8_t* base; ///< base ptr of compressed stream - const uint8_t* end; ///< end of compressed stream - uint32_t uncompressed_size; ///< uncompressed stream size - uint32_t bytes_left; ///< bytes to uncompressed remaining - int32_t error; ///< current error status - uint32_t tstart; ///< start time for perf logging - volatile unsnap_queue_s q; ///< queue for cross-warp communication - gpu_inflate_input_s in; ///< input parameters for current block + const uint8_t* base; ///< base ptr of compressed stream + const uint8_t* end; ///< end of compressed stream + uint32_t uncompressed_size; ///< uncompressed stream size + uint32_t bytes_left; ///< remaining bytes to decompress + int32_t error; ///< current error status + uint32_t tstart; ///< start time for perf logging + volatile unsnap_queue_s q; ///< queue for cross-warp communication + device_span src; ///< input for current block + device_span dst; ///< output for current block }; inline __device__ volatile uint8_t& byte_access(unsnap_state_s* s, uint32_t pos) @@ -497,9 +498,9 @@ __device__ void snappy_decode_symbols(unsnap_state_s* s, uint32_t t) template __device__ void snappy_process_symbols(unsnap_state_s* s, int t, Storage& temp_storage) { - const uint8_t* literal_base = s->base; - auto* out = static_cast(s->in.dstDevice); - int batch = 0; + auto const literal_base = s->base; + auto out = s->dst.data(); + int batch = 0; do { volatile unsnap_batch_s* b = &s->q.batch[batch * batch_size]; @@ -624,7 +625,9 @@ __device__ void snappy_process_symbols(unsnap_state_s* s, int t, Storage& temp_s */ template __global__ void __launch_bounds__(block_size) - unsnap_kernel(gpu_inflate_input_s* inputs, gpu_inflate_status_s* outputs) + unsnap_kernel(device_span const> inputs, + device_span const> outputs, + device_span statuses) { __shared__ __align__(16) unsnap_state_s state_g; __shared__ cub::WarpReduce::TempStorage temp_storage; @@ -632,16 +635,14 @@ __global__ void __launch_bounds__(block_size) unsnap_state_s* s = &state_g; int strm_id = blockIdx.x; - if (t < sizeof(gpu_inflate_input_s) / sizeof(uint32_t)) { - reinterpret_cast(&s->in)[t] = reinterpret_cast(&inputs[strm_id])[t]; - __threadfence_block(); - } if (t < batch_count) { s->q.batch_len[t] = 0; } __syncthreads(); if (!t) { - const auto* cur = static_cast(s->in.srcDevice); - const uint8_t* end = cur + s->in.srcSize; - s->error = 0; + s->src = inputs[strm_id]; + s->dst = outputs[strm_id]; + auto cur = s->src.begin(); + auto const end = s->src.end(); + s->error = 0; if (log_cyclecount) { s->tstart = clock(); } if (cur < end) { // Read uncompressed size (varint), limited to 32-bit @@ -672,7 +673,7 @@ __global__ void __launch_bounds__(block_size) s->bytes_left = uncompressed_size; s->base = cur; s->end = end; - if ((cur >= end && uncompressed_size != 0) || (uncompressed_size > s->in.dstSize)) { + if ((cur >= end && uncompressed_size != 0) || (uncompressed_size > s->dst.size())) { s->error = -1; } } else { @@ -697,28 +698,25 @@ __global__ void __launch_bounds__(block_size) __syncthreads(); } if (!t) { - outputs[strm_id].bytes_written = s->uncompressed_size - s->bytes_left; - outputs[strm_id].status = s->error; + statuses[strm_id].bytes_written = s->uncompressed_size - s->bytes_left; + statuses[strm_id].status = s->error; if (log_cyclecount) { - outputs[strm_id].reserved = clock() - s->tstart; + statuses[strm_id].reserved = clock() - s->tstart; } else { - outputs[strm_id].reserved = 0; + statuses[strm_id].reserved = 0; } } } -cudaError_t __host__ gpu_unsnap(gpu_inflate_input_s* inputs, - gpu_inflate_status_s* outputs, - int count, - rmm::cuda_stream_view stream) +void gpu_unsnap(device_span const> inputs, + device_span const> outputs, + device_span statuses, + rmm::cuda_stream_view stream) { - uint32_t count32 = (count > 0) ? count : 0; - dim3 dim_block(128, 1); // 4 warps per stream, 1 stream per block - dim3 dim_grid(count32, 1); // TODO: Check max grid dimensions vs max expected count - - unsnap_kernel<128><<>>(inputs, outputs); + dim3 dim_block(128, 1); // 4 warps per stream, 1 stream per block + dim3 dim_grid(inputs.size(), 1); // TODO: Check max grid dimensions vs max expected count - return cudaSuccess; + unsnap_kernel<128><<>>(inputs, outputs, statuses); } } // namespace io diff --git a/cpp/src/io/json/json_gpu.cu b/cpp/src/io/json/json_gpu.cu index 56a00191ae4..43411157319 100644 --- a/cpp/src/io/json/json_gpu.cu +++ b/cpp/src/io/json/json_gpu.cu @@ -356,7 +356,7 @@ __device__ field_descriptor next_field_descriptor(const char* begin, ? field_descriptor{field_idx, begin, cudf::io::gpu::seek_field_end(begin, end, opts, true)} : [&]() { auto const key_range = get_next_key(begin, end, opts.quotechar); - auto const key_hash = MurmurHash3_32{}( + auto const key_hash = cudf::detail::MurmurHash3_32{}( cudf::string_view(key_range.first, key_range.second - key_range.first)); auto const hash_col = col_map.find(key_hash); // Fall back to field index if not found (parsing error) @@ -667,7 +667,8 @@ __global__ void collect_keys_info_kernel(parse_options_view const options, keys_info->column(0).element(idx) = field_range.key_begin - data.begin(); keys_info->column(1).element(idx) = len; keys_info->column(2).element(idx) = - MurmurHash3_32{}(cudf::string_view(field_range.key_begin, len)); + cudf::detail::MurmurHash3_32{}( + cudf::string_view(field_range.key_begin, len)); } } } diff --git a/cpp/src/io/orc/aggregate_orc_metadata.cpp b/cpp/src/io/orc/aggregate_orc_metadata.cpp index a4ae9999a19..47244279599 100644 --- a/cpp/src/io/orc/aggregate_orc_metadata.cpp +++ b/cpp/src/io/orc/aggregate_orc_metadata.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -177,7 +177,7 @@ std::vector aggregate_orc_metadata::select_stri per_file_metadata[src_file_idx].ff.stripes.size()), "Invalid stripe index"); stripe_infos.push_back( - std::make_pair(&per_file_metadata[src_file_idx].ff.stripes[stripe_idx], nullptr)); + std::pair(&per_file_metadata[src_file_idx].ff.stripes[stripe_idx], nullptr)); row_count += per_file_metadata[src_file_idx].ff.stripes[stripe_idx].numberOfRows; } selected_stripes_mapping.push_back({static_cast(src_file_idx), stripe_infos}); @@ -206,7 +206,7 @@ std::vector aggregate_orc_metadata::select_stri count += per_file_metadata[src_file_idx].ff.stripes[stripe_idx].numberOfRows; if (count > row_start || count == 0) { stripe_infos.push_back( - std::make_pair(&per_file_metadata[src_file_idx].ff.stripes[stripe_idx], nullptr)); + std::pair(&per_file_metadata[src_file_idx].ff.stripes[stripe_idx], nullptr)); } else { stripe_skip_rows = count; } diff --git a/cpp/src/io/orc/orc_gpu.h b/cpp/src/io/orc/orc_gpu.h index d94aa00c7b9..837fd03a112 100644 --- a/cpp/src/io/orc/orc_gpu.h +++ b/cpp/src/io/orc/orc_gpu.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,9 +43,10 @@ struct CompressedStreamInfo { : compressed_data(compressed_data_), uncompressed_data(nullptr), compressed_data_size(compressed_size_), - decctl(nullptr), - decstatus(nullptr), - copyctl(nullptr), + dec_in_ctl(nullptr), + dec_out_ctl(nullptr), + copy_in_ctl(nullptr), + copy_out_ctl(nullptr), num_compressed_blocks(0), num_uncompressed_blocks(0), max_uncompressed_size(0), @@ -54,14 +55,15 @@ struct CompressedStreamInfo { } const uint8_t* compressed_data; // [in] base ptr to compressed stream data uint8_t* uncompressed_data; // [in] base ptr to uncompressed stream data or NULL if not known yet - size_t compressed_data_size; // [in] compressed data size for this stream - gpu_inflate_input_s* decctl; // [in] base ptr to decompression structure to be filled - gpu_inflate_status_s* decstatus; // [in] results of decompression - gpu_inflate_input_s* - copyctl; // [in] base ptr to copy structure to be filled for uncompressed blocks + size_t compressed_data_size; // [in] compressed data size for this stream + device_span* dec_in_ctl; // [in] input buffer to decompress + device_span* dec_out_ctl; // [in] output buffer to decompress into + device_span decstatus; // [in] results of decompression + device_span* copy_in_ctl; // [out] input buffer to copy + device_span* copy_out_ctl; // [out] output buffer to copy to uint32_t num_compressed_blocks; // [in,out] number of entries in decctl(in), number of compressed // blocks(out) - uint32_t num_uncompressed_blocks; // [in,out] number of entries in copyctl(in), number of + uint32_t num_uncompressed_blocks; // [in,out] number of entries in dec_in_ctl(in), number of // uncompressed blocks(out) uint64_t max_uncompressed_size; // [out] maximum uncompressed data size of stream uint32_t max_uncompressed_block_size; // [out] maximum uncompressed size of any block in stream @@ -345,8 +347,9 @@ void CompactOrcDataStreams(device_2dspan strm_desc, * @param[in] max_comp_blk_size Max size of any block after compression * @param[in,out] strm_desc StripeStream device array [stripe][stream] * @param[in,out] enc_streams chunk streams device array [column][rowgroup] - * @param[out] comp_in Per-block compression input parameters - * @param[out] comp_out Per-block compression status + * @param[out] comp_in Per-block compression input buffers + * @param[out] comp_out Per-block compression output buffers + * @param[out] comp_stat Per-block compression status * @param[in] stream CUDA stream used for device memory operations and kernel launches */ void CompressOrcDataStreams(uint8_t* compressed_data, @@ -356,8 +359,9 @@ void CompressOrcDataStreams(uint8_t* compressed_data, uint32_t max_comp_blk_size, device_2dspan strm_desc, device_2dspan enc_streams, - device_span comp_in, - device_span comp_out, + device_span> comp_in, + device_span> comp_out, + device_span comp_stat, rmm::cuda_stream_view stream); /** diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 83c23774362..139eb28d1a1 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -25,6 +25,7 @@ #include "timezone.cuh" #include +#include #include #include @@ -40,8 +41,6 @@ #include #include -#include - #include #include #include @@ -108,20 +107,20 @@ constexpr std::pair get_index_type_and_pos( case orc::DATA: skip_count += 1; skip_count |= (skip_count & 0xff) << 8; - return std::make_pair(gpu::CI_DATA, skip_count); + return std::pair(gpu::CI_DATA, skip_count); case orc::LENGTH: case orc::SECONDARY: skip_count += 1; skip_count |= (skip_count & 0xff) << 16; - return std::make_pair(gpu::CI_DATA2, skip_count); - case orc::DICTIONARY_DATA: return std::make_pair(gpu::CI_DICTIONARY, skip_count); + return std::pair(gpu::CI_DATA2, skip_count); + case orc::DICTIONARY_DATA: return std::pair(gpu::CI_DICTIONARY, skip_count); case orc::PRESENT: skip_count += (non_child ? 1 : 0); - return std::make_pair(gpu::CI_PRESENT, skip_count); - case orc::ROW_INDEX: return std::make_pair(gpu::CI_INDEX, skip_count); + return std::pair(gpu::CI_PRESENT, skip_count); + case orc::ROW_INDEX: return std::pair(gpu::CI_INDEX, skip_count); default: // Skip this stream as it's not strictly required - return std::make_pair(gpu::CI_NUM_STREAMS, 0); + return std::pair(gpu::CI_NUM_STREAMS, 0); } } @@ -262,7 +261,7 @@ auto decimal_column_type(std::vector const& decimal128_columns, } // namespace -__global__ void decompress_check_kernel(device_span stats, +__global__ void decompress_check_kernel(device_span stats, bool* any_block_failure) { auto tid = blockIdx.x * blockDim.x + threadIdx.x; @@ -273,7 +272,7 @@ __global__ void decompress_check_kernel(device_span } } -void decompress_check(device_span stats, +void decompress_check(device_span stats, bool* any_block_failure, rmm::cuda_stream_view stream) { @@ -284,74 +283,6 @@ void decompress_check(device_span stats, decompress_check_kernel<<>>(stats, any_block_failure); } -__global__ void convert_nvcomp_status(device_span nvcomp_stats, - device_span actual_uncompressed_sizes, - device_span stats) -{ - auto tid = blockIdx.x * blockDim.x + threadIdx.x; - if (tid < stats.size()) { - stats[tid].status = nvcomp_stats[tid] == nvcompStatus_t::nvcompSuccess ? 0 : 1; - stats[tid].bytes_written = actual_uncompressed_sizes[tid]; - } -} - -void snappy_decompress(device_span comp_in, - device_span comp_stat, - size_t max_uncomp_page_size, - rmm::cuda_stream_view stream) -{ - size_t num_blocks = comp_in.size(); - size_t temp_size; - - auto status = - nvcompBatchedSnappyDecompressGetTempSize(num_blocks, max_uncomp_page_size, &temp_size); - CUDF_EXPECTS(nvcompStatus_t::nvcompSuccess == status, - "Unable to get scratch size for snappy decompression"); - - rmm::device_buffer scratch(temp_size, stream); - rmm::device_uvector compressed_data_ptrs(num_blocks, stream); - rmm::device_uvector compressed_data_sizes(num_blocks, stream); - rmm::device_uvector uncompressed_data_ptrs(num_blocks, stream); - rmm::device_uvector uncompressed_data_sizes(num_blocks, stream); - - rmm::device_uvector actual_uncompressed_data_sizes(num_blocks, stream); - rmm::device_uvector statuses(num_blocks, stream); - - device_span actual_uncompressed_sizes_span(actual_uncompressed_data_sizes.data(), - actual_uncompressed_data_sizes.size()); - device_span statuses_span(statuses.data(), statuses.size()); - - // Prepare the vectors - auto comp_it = thrust::make_zip_iterator(compressed_data_ptrs.begin(), - compressed_data_sizes.begin(), - uncompressed_data_ptrs.begin(), - uncompressed_data_sizes.data()); - thrust::transform(rmm::exec_policy(stream), - comp_in.begin(), - comp_in.end(), - comp_it, - [] __device__(gpu_inflate_input_s in) { - return thrust::make_tuple(in.srcDevice, in.srcSize, in.dstDevice, in.dstSize); - }); - - status = nvcompBatchedSnappyDecompressAsync(compressed_data_ptrs.data(), - compressed_data_sizes.data(), - uncompressed_data_sizes.data(), - actual_uncompressed_data_sizes.data(), - num_blocks, - scratch.data(), - scratch.size(), - uncompressed_data_ptrs.data(), - statuses.data(), - stream.value()); - CUDF_EXPECTS(nvcompStatus_t::nvcompSuccess == status, "unable to perform snappy decompression"); - - dim3 block(128); - dim3 grid(cudf::util::div_rounding_up_safe(num_blocks, static_cast(block.x))); - convert_nvcomp_status<<>>( - statuses_span, actual_uncompressed_sizes_span, comp_stat); -} - rmm::device_buffer reader::impl::decompress_stripe_data( cudf::detail::hostdevice_2dvector& chunks, const std::vector& stripe_data, @@ -396,9 +327,11 @@ rmm::device_buffer reader::impl::decompress_stripe_data( CUDF_EXPECTS(total_decomp_size > 0, "No decompressible data found"); rmm::device_buffer decomp_data(total_decomp_size, stream); - rmm::device_uvector inflate_in( + rmm::device_uvector> inflate_in( + num_compressed_blocks + num_uncompressed_blocks, stream); + rmm::device_uvector> inflate_out( num_compressed_blocks + num_uncompressed_blocks, stream); - rmm::device_uvector inflate_out(num_compressed_blocks, stream); + rmm::device_uvector inflate_stats(num_compressed_blocks, stream); // Parse again to populate the decompression input/output buffers size_t decomp_offset = 0; @@ -408,9 +341,11 @@ rmm::device_buffer reader::impl::decompress_stripe_data( for (size_t i = 0; i < compinfo.size(); ++i) { auto dst_base = static_cast(decomp_data.data()); compinfo[i].uncompressed_data = dst_base + decomp_offset; - compinfo[i].decctl = inflate_in.data() + start_pos; - compinfo[i].decstatus = inflate_out.data() + start_pos; - compinfo[i].copyctl = inflate_in.data() + start_pos_uncomp; + compinfo[i].dec_in_ctl = inflate_in.data() + start_pos; + compinfo[i].dec_out_ctl = inflate_out.data() + start_pos; + compinfo[i].decstatus = {inflate_stats.data() + start_pos, compinfo[i].num_compressed_blocks}; + compinfo[i].copy_in_ctl = inflate_in.data() + start_pos_uncomp; + compinfo[i].copy_out_ctl = inflate_out.data() + start_pos_uncomp; stream_info[i].dst_pos = decomp_offset; decomp_offset += compinfo[i].max_uncompressed_size; @@ -428,29 +363,36 @@ rmm::device_buffer reader::impl::decompress_stripe_data( // Dispatch batches of blocks to decompress if (num_compressed_blocks > 0) { - device_span inflate_out_view(inflate_out.data(), num_compressed_blocks); + device_span> inflate_in_view{inflate_in.data(), + num_compressed_blocks}; + device_span> inflate_out_view{inflate_out.data(), num_compressed_blocks}; switch (decompressor->GetKind()) { case orc::ZLIB: - CUDF_CUDA_TRY( - gpuinflate(inflate_in.data(), inflate_out.data(), num_compressed_blocks, 0, stream)); + gpuinflate( + inflate_in_view, inflate_out_view, inflate_stats, gzip_header_included::NO, stream); break; case orc::SNAPPY: if (nvcomp_integration::is_stable_enabled()) { - device_span inflate_in_view{inflate_in.data(), - num_compressed_blocks}; - snappy_decompress(inflate_in_view, inflate_out_view, max_uncomp_block_size, stream); + nvcomp::batched_decompress(nvcomp::compression_type::SNAPPY, + inflate_in_view, + inflate_out_view, + inflate_stats, + max_uncomp_block_size, + stream); } else { - CUDF_CUDA_TRY( - gpu_unsnap(inflate_in.data(), inflate_out.data(), num_compressed_blocks, stream)); + gpu_unsnap(inflate_in_view, inflate_out_view, inflate_stats, stream); } break; default: CUDF_FAIL("Unexpected decompression dispatch"); break; } - decompress_check(inflate_out_view, any_block_failure.device_ptr(), stream); + decompress_check(inflate_stats, any_block_failure.device_ptr(), stream); } if (num_uncompressed_blocks > 0) { - CUDF_CUDA_TRY(gpu_copy_uncompressed_blocks( - inflate_in.data() + num_compressed_blocks, num_uncompressed_blocks, stream)); + device_span> copy_in_view{inflate_in.data() + num_compressed_blocks, + num_uncompressed_blocks}; + device_span> copy_out_view{inflate_out.data() + num_compressed_blocks, + num_uncompressed_blocks}; + gpu_copy_uncompressed_blocks(copy_in_view, copy_out_view, stream); } gpu::PostDecompressionReassemble(compinfo.device_ptr(), compinfo.size(), stream); @@ -1120,9 +1062,9 @@ table_with_metadata reader::impl::read(size_type skip_rows, if (_metadata.per_file_metadata[stripe_source_mapping.source_idx] .source->is_device_read_preferred(len)) { read_tasks.push_back( - std::make_pair(_metadata.per_file_metadata[stripe_source_mapping.source_idx] - .source->device_read_async(offset, len, d_dst, stream), - len)); + std::pair(_metadata.per_file_metadata[stripe_source_mapping.source_idx] + .source->device_read_async(offset, len, d_dst, stream), + len)); } else { const auto buffer = diff --git a/cpp/src/io/orc/stats_enc.cu b/cpp/src/io/orc/stats_enc.cu index b377a2e7076..3ddfebfbb24 100644 --- a/cpp/src/io/orc/stats_enc.cu +++ b/cpp/src/io/orc/stats_enc.cu @@ -81,8 +81,7 @@ __global__ void __launch_bounds__(block_size, 1) uint32_t stats_len = 0, stats_pos; uint32_t idx = start + t; if (idx < statistics_count) { - const stats_column_desc* col = groups[idx].col; - statistics_dtype dtype = col->stats_dtype; + statistics_dtype const dtype = groups[idx].stats_dtype; switch (dtype) { case dtype_bool: stats_len = pb_fldlen_common + pb_fld_hdrlen + pb_fldlen_bucket1; break; case dtype_int8: @@ -126,7 +125,7 @@ struct stats_state_s { uint8_t* end; ///< Output buffer end statistics_chunk chunk; statistics_merge_group group; - stats_column_desc col; + statistics_dtype stats_dtype; //!< Statistics data type for this column // ORC stats uint64_t numberOfValues; uint8_t hasNull; @@ -231,12 +230,12 @@ __global__ void __launch_bounds__(encode_threads_per_block) if (idx < statistics_count && t == 0) { s->chunk = chunks[idx]; s->group = groups[idx]; - s->col = *(s->group.col); + s->stats_dtype = s->group.stats_dtype; s->base = blob_bfr + s->group.start_chunk; s->end = blob_bfr + s->group.start_chunk + s->group.num_chunks; uint8_t* cur = pb_put_uint(s->base, 1, s->chunk.non_nulls); uint8_t* fld_start = cur; - switch (s->col.stats_dtype) { + switch (s->stats_dtype) { case dtype_int8: case dtype_int16: case dtype_int32: diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu index f1d524058d2..3fe623be5b1 100644 --- a/cpp/src/io/orc/stripe_enc.cu +++ b/cpp/src/io/orc/stripe_enc.cu @@ -1141,8 +1141,9 @@ __global__ void __launch_bounds__(1024) * * @param[in] strm_desc StripeStream device array [stripe][stream] * @param[in] chunks EncChunk device array [rowgroup][column] - * @param[out] comp_in Per-block compression input parameters - * @param[out] comp_out Per-block compression status + * @param[out] inputs Per-block compression input buffers + * @param[out] outputs Per-block compression output buffers + * @param[out] statuses Per-block compression status * @param[in] compressed_bfr Compression output buffer * @param[in] comp_blk_size Compression block size * @param[in] max_comp_blk_size Max size of any block after compression @@ -1151,8 +1152,9 @@ __global__ void __launch_bounds__(1024) __global__ void __launch_bounds__(256) gpuInitCompressionBlocks(device_2dspan strm_desc, device_2dspan streams, // const? - device_span comp_in, - device_span comp_out, + device_span> inputs, + device_span> outputs, + device_span statuses, uint8_t* compressed_bfr, uint32_t comp_blk_size, uint32_t max_comp_blk_size) @@ -1175,16 +1177,11 @@ __global__ void __launch_bounds__(256) dst = compressed_bfr + ss.bfr_offset; num_blocks = (ss.stream_size > 0) ? (ss.stream_size - 1) / comp_blk_size + 1 : 1; for (uint32_t b = t; b < num_blocks; b += 256) { - gpu_inflate_input_s* blk_in = &comp_in[ss.first_block + b]; - gpu_inflate_status_s* blk_out = &comp_out[ss.first_block + b]; uint32_t blk_size = min(comp_blk_size, ss.stream_size - min(b * comp_blk_size, ss.stream_size)); - blk_in->srcDevice = src + b * comp_blk_size; - blk_in->srcSize = blk_size; - blk_in->dstDevice = dst + b * (BLOCK_HEADER_SIZE + max_comp_blk_size) + BLOCK_HEADER_SIZE; - blk_in->dstSize = max_comp_blk_size; - blk_out->bytes_written = blk_size; - blk_out->status = 1; - blk_out->reserved = 0; + inputs[ss.first_block + b] = {src + b * comp_blk_size, blk_size}; + outputs[ss.first_block + b] = { + dst + b * (BLOCK_HEADER_SIZE + max_comp_blk_size) + BLOCK_HEADER_SIZE, max_comp_blk_size}; + statuses[ss.first_block + b] = {blk_size, 1, 0}; } } @@ -1194,8 +1191,9 @@ __global__ void __launch_bounds__(256) * * @param[in,out] strm_desc StripeStream device array [stripe][stream] * @param[in] chunks EncChunk device array [rowgroup][column] - * @param[in] comp_in Per-block compression input parameters - * @param[in] comp_out Per-block compression status + * @param[out] inputs Per-block compression input buffers + * @param[out] outputs Per-block compression output buffers + * @param[out] statuses Per-block compression status * @param[in] compressed_bfr Compression output buffer * @param[in] comp_blk_size Compression block size * @param[in] max_comp_blk_size Max size of any block after compression @@ -1203,8 +1201,9 @@ __global__ void __launch_bounds__(256) // blockDim {1024,1,1} __global__ void __launch_bounds__(1024) gpuCompactCompressedBlocks(device_2dspan strm_desc, - device_span comp_in, - device_span comp_out, + device_span const> inputs, + device_span const> outputs, + device_span statuses, uint8_t* compressed_bfr, uint32_t comp_blk_size, uint32_t max_comp_blk_size) @@ -1228,21 +1227,21 @@ __global__ void __launch_bounds__(1024) b = 0; do { if (t == 0) { - gpu_inflate_input_s* blk_in = &comp_in[ss.first_block + b]; - gpu_inflate_status_s* blk_out = &comp_out[ss.first_block + b]; - uint32_t src_len = + auto const src_len = min(comp_blk_size, ss.stream_size - min(b * comp_blk_size, ss.stream_size)); - uint32_t dst_len = (blk_out->status == 0) ? blk_out->bytes_written : src_len; - uint32_t blk_size24; + auto dst_len = (statuses[ss.first_block + b].status == 0) + ? statuses[ss.first_block + b].bytes_written + : src_len; + uint32_t blk_size24{}; if (dst_len >= src_len) { // Copy from uncompressed source - src = static_cast(blk_in->srcDevice); - blk_out->bytes_written = src_len; - dst_len = src_len; - blk_size24 = dst_len * 2 + 1; + src = inputs[ss.first_block + b].data(); + statuses[ss.first_block + b].bytes_written = src_len; + dst_len = src_len; + blk_size24 = dst_len * 2 + 1; } else { // Compressed block - src = static_cast(blk_in->dstDevice); + src = outputs[ss.first_block + b].data(); blk_size24 = dst_len * 2 + 0; } dst[0] = static_cast(blk_size24 >> 0); @@ -1311,14 +1310,21 @@ void CompressOrcDataStreams(uint8_t* compressed_data, uint32_t max_comp_blk_size, device_2dspan strm_desc, device_2dspan enc_streams, - device_span comp_in, - device_span comp_out, + device_span> comp_in, + device_span> comp_out, + device_span comp_stat, rmm::cuda_stream_view stream) { dim3 dim_block_init(256, 1); dim3 dim_grid(strm_desc.size().first, strm_desc.size().second); - gpuInitCompressionBlocks<<>>( - strm_desc, enc_streams, comp_in, comp_out, compressed_data, comp_blk_size, max_comp_blk_size); + gpuInitCompressionBlocks<<>>(strm_desc, + enc_streams, + comp_in, + comp_out, + comp_stat, + compressed_data, + comp_blk_size, + max_comp_blk_size); if (compression == SNAPPY) { if (detail::nvcomp_integration::is_stable_enabled()) { try { @@ -1336,15 +1342,18 @@ void CompressOrcDataStreams(uint8_t* compressed_data, rmm::device_uvector compressed_bytes_written(num_compressed_blocks, stream); auto comp_it = thrust::make_zip_iterator(uncompressed_data_ptrs.begin(), - uncompressed_data_sizes.begin(), - compressed_data_ptrs.begin()); + uncompressed_data_sizes.begin()); + thrust::transform( + rmm::exec_policy(stream), + comp_in.begin(), + comp_in.end(), + comp_it, + [] __device__(auto const& in) { return thrust::make_tuple(in.data(), in.size()); }); thrust::transform(rmm::exec_policy(stream), - comp_in.begin(), - comp_in.end(), - comp_it, - [] __device__(gpu_inflate_input_s in) { - return thrust::make_tuple(in.srcDevice, in.srcSize, in.dstDevice); - }); + comp_out.begin(), + comp_out.end(), + compressed_data_ptrs.begin(), + [] __device__(auto const& out) { return out.data(); }); nvcomp_status = nvcompBatchedSnappyCompressAsync(uncompressed_data_ptrs.data(), uncompressed_data_sizes.data(), max_comp_blk_size, @@ -1361,9 +1370,9 @@ void CompressOrcDataStreams(uint8_t* compressed_data, thrust::transform(rmm::exec_policy(stream), compressed_bytes_written.begin(), compressed_bytes_written.end(), - comp_out.begin(), + comp_stat.begin(), [] __device__(size_t size) { - gpu_inflate_status_s status{}; + decompress_status status{}; status.bytes_written = size; return status; }); @@ -1371,18 +1380,18 @@ void CompressOrcDataStreams(uint8_t* compressed_data, // If we reach this then there was an error in compressing so set an error status for each // block thrust::for_each(rmm::exec_policy(stream), - comp_out.begin(), - comp_out.end(), - [] __device__(gpu_inflate_status_s & stat) { stat.status = 1; }); + comp_stat.begin(), + comp_stat.end(), + [] __device__(decompress_status & stat) { stat.status = 1; }); }; } else { - gpu_snap(comp_in.data(), comp_out.data(), num_compressed_blocks, stream); + gpu_snap(comp_in, comp_out, comp_stat, stream); } } dim3 dim_block_compact(1024, 1); gpuCompactCompressedBlocks<<>>( - strm_desc, comp_in, comp_out, compressed_data, comp_blk_size, max_comp_blk_size); + strm_desc, comp_in, comp_out, comp_stat, compressed_data, comp_blk_size, max_comp_blk_size); } } // namespace gpu diff --git a/cpp/src/io/orc/stripe_init.cu b/cpp/src/io/orc/stripe_init.cu index 276a1f49abf..e44ca10922f 100644 --- a/cpp/src/io/orc/stripe_init.cu +++ b/cpp/src/io/orc/stripe_init.cu @@ -26,9 +26,16 @@ namespace cudf { namespace io { namespace orc { namespace gpu { + +struct comp_in_out { + uint8_t const* in_ptr; + size_t in_size; + uint8_t* out_ptr; + size_t out_size; +}; struct compressed_stream_s { CompressedStreamInfo info; - gpu_inflate_input_s ctl; + comp_in_out ctl; }; // blockDim {128,1,1} @@ -57,7 +64,8 @@ extern "C" __global__ void __launch_bounds__(128, 8) gpuParseCompressedStripeDat uint32_t block_len = shuffle((lane_id == 0) ? cur[0] | (cur[1] << 8) | (cur[2] << 16) : 0); uint32_t is_uncompressed = block_len & 1; uint32_t uncompressed_size; - gpu_inflate_input_s* init_ctl = nullptr; + device_span* init_in_ctl = nullptr; + device_span* init_out_ctl = nullptr; block_len >>= 1; cur += BLOCK_HEADER_SIZE; if (block_len > block_size || cur + block_len > end) { @@ -82,27 +90,34 @@ extern "C" __global__ void __launch_bounds__(128, 8) gpuParseCompressedStripeDat uncompressed[max_uncompressed_size + lane_id] = cur[lane_id]; } } else { - init_ctl = s->info.copyctl; - init_ctl = (init_ctl && num_uncompressed_blocks < s->info.num_uncompressed_blocks) - ? &init_ctl[num_uncompressed_blocks] - : nullptr; + init_in_ctl = + (s->info.copy_in_ctl && num_uncompressed_blocks < s->info.num_uncompressed_blocks) + ? &s->info.copy_in_ctl[num_uncompressed_blocks] + : nullptr; + init_out_ctl = + (s->info.copy_out_ctl && num_uncompressed_blocks < s->info.num_uncompressed_blocks) + ? &s->info.copy_out_ctl[num_uncompressed_blocks] + : nullptr; num_uncompressed_blocks++; } } else { - init_ctl = s->info.decctl; - init_ctl = (init_ctl && num_compressed_blocks < s->info.num_compressed_blocks) - ? &init_ctl[num_compressed_blocks] - : nullptr; + init_in_ctl = (s->info.dec_in_ctl && num_compressed_blocks < s->info.num_compressed_blocks) + ? &s->info.dec_in_ctl[num_compressed_blocks] + : nullptr; + init_out_ctl = + (s->info.dec_out_ctl && num_compressed_blocks < s->info.num_compressed_blocks) + ? &s->info.dec_out_ctl[num_compressed_blocks] + : nullptr; num_compressed_blocks++; } - if (!lane_id && init_ctl) { - s->ctl.srcDevice = const_cast(cur); - s->ctl.srcSize = block_len; - s->ctl.dstDevice = uncompressed + max_uncompressed_size; - s->ctl.dstSize = uncompressed_size; + if (!lane_id && init_in_ctl) { + s->ctl = {cur, block_len, uncompressed + max_uncompressed_size, uncompressed_size}; } __syncwarp(); - if (init_ctl && lane_id == 0) *init_ctl = s->ctl; + if (init_in_ctl && lane_id == 0) { + *init_in_ctl = {s->ctl.in_ptr, s->ctl.in_size}; + *init_out_ctl = {s->ctl.out_ptr, s->ctl.out_size}; + } cur += block_len; max_uncompressed_size += uncompressed_size; max_uncompressed_block_size = max(max_uncompressed_block_size, uncompressed_size); @@ -137,14 +152,14 @@ extern "C" __global__ void __launch_bounds__(128, 8) s->info.num_compressed_blocks + s->info.num_uncompressed_blocks > 0 && s->info.max_uncompressed_size > 0) { // Walk through the compressed blocks - const uint8_t* cur = s->info.compressed_data; - const uint8_t* end = cur + s->info.compressed_data_size; - const gpu_inflate_input_s* dec_in = s->info.decctl; - const gpu_inflate_status_s* dec_out = s->info.decstatus; - uint8_t* uncompressed_actual = s->info.uncompressed_data; - uint8_t* uncompressed_estimated = uncompressed_actual; - uint32_t num_compressed_blocks = 0; - uint32_t max_compressed_blocks = s->info.num_compressed_blocks; + const uint8_t* cur = s->info.compressed_data; + const uint8_t* end = cur + s->info.compressed_data_size; + auto dec_out = s->info.dec_out_ctl; + auto dec_status = s->info.decstatus; + uint8_t* uncompressed_actual = s->info.uncompressed_data; + uint8_t* uncompressed_estimated = uncompressed_actual; + uint32_t num_compressed_blocks = 0; + uint32_t max_compressed_blocks = s->info.num_compressed_blocks; while (cur + BLOCK_HEADER_SIZE < end) { uint32_t block_len = shuffle((lane_id == 0) ? cur[0] | (cur[1] << 8) | (cur[2] << 16) : 0); @@ -158,14 +173,14 @@ extern "C" __global__ void __launch_bounds__(128, 8) uncompressed_size_actual = block_len; } else { if (num_compressed_blocks > max_compressed_blocks) { break; } - if (shuffle((lane_id == 0) ? dec_out[num_compressed_blocks].status : 0) != 0) { + if (shuffle((lane_id == 0) ? dec_status[num_compressed_blocks].status : 0) != 0) { // Decompression failed, not much point in doing anything else break; } - uncompressed_size_est = - shuffle((lane_id == 0) ? *(const uint32_t*)&dec_in[num_compressed_blocks].dstSize : 0); - uncompressed_size_actual = shuffle( - (lane_id == 0) ? *(const uint32_t*)&dec_out[num_compressed_blocks].bytes_written : 0); + uint32_t const dst_size = dec_out[num_compressed_blocks].size(); + uncompressed_size_est = shuffle((lane_id == 0) ? dst_size : 0); + uint32_t const bytes_written = dec_status[num_compressed_blocks].bytes_written; + uncompressed_size_actual = shuffle((lane_id == 0) ? bytes_written : 0); } // In practice, this should never happen with a well-behaved writer, as we would expect the // uncompressed size to always be equal to the compression block size except for the last @@ -360,11 +375,11 @@ static __device__ void gpuMapRowIndexToUncompressed(rowindex_state_s* s, if (strm_len > 0) { int32_t compressed_offset = (t < num_rowgroups) ? s->compressed_offset[t][ci_id] : 0; if (compressed_offset > 0) { - const uint8_t* start = s->strm_info[ci_id].compressed_data; - const uint8_t* cur = start; - const uint8_t* end = cur + s->strm_info[ci_id].compressed_data_size; - gpu_inflate_status_s* decstatus = s->strm_info[ci_id].decstatus; - uint32_t uncomp_offset = 0; + const uint8_t* start = s->strm_info[ci_id].compressed_data; + const uint8_t* cur = start; + const uint8_t* end = cur + s->strm_info[ci_id].compressed_data_size; + auto decstatus = s->strm_info[ci_id].decstatus.data(); + uint32_t uncomp_offset = 0; for (;;) { uint32_t block_len, is_uncompressed; diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index d0c1cea97a8..ecd2d6f6ec0 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -190,6 +190,7 @@ class orc_column_view { void add_child(uint32_t child_idx) { children.emplace_back(child_idx); } + auto type() const noexcept { return cudf_column.type(); } auto is_string() const noexcept { return cudf_column.type().id() == type_id::STRING; } void set_dict_stride(size_t stride) noexcept { _dict_stride = stride; } [[nodiscard]] auto dict_stride() const noexcept { return _dict_stride; } @@ -1073,23 +1074,47 @@ void set_stat_desc_leaf_cols(device_span columns, [=] __device__(auto idx) { stat_desc[idx].leaf_column = &columns[idx]; }); } -writer::impl::encoded_statistics writer::impl::gather_statistic_blobs( - statistics_freq stats_freq, +hostdevice_vector allocate_and_encode_blobs( + hostdevice_vector& stats_merge_groups, + rmm::device_uvector& stat_chunks, + int num_stat_blobs, + rmm::cuda_stream_view stream) +{ + // figure out the buffer size needed for protobuf format + gpu::orc_init_statistics_buffersize( + stats_merge_groups.device_ptr(), stat_chunks.data(), num_stat_blobs, stream); + auto max_blobs = stats_merge_groups.element(num_stat_blobs - 1, stream); + + hostdevice_vector blobs(max_blobs.start_chunk + max_blobs.num_chunks, stream); + gpu::orc_encode_statistics(blobs.device_ptr(), + stats_merge_groups.device_ptr(), + stat_chunks.data(), + num_stat_blobs, + stream); + stats_merge_groups.device_to_host(stream); + blobs.device_to_host(stream, true); + return blobs; +} + +writer::impl::intermediate_statistics writer::impl::gather_statistic_blobs( + statistics_freq const stats_freq, orc_table_view const& orc_table, file_segmentation const& segmentation) { auto const num_rowgroup_blobs = segmentation.rowgroups.count(); auto const num_stripe_blobs = segmentation.num_stripes() * orc_table.num_columns(); - auto const num_file_blobs = orc_table.num_columns(); - auto const num_stat_blobs = num_rowgroup_blobs + num_stripe_blobs + num_file_blobs; auto const are_statistics_enabled = stats_freq != statistics_freq::STATISTICS_NONE; - if (not are_statistics_enabled or num_stat_blobs == 0) { return {}; } + if (not are_statistics_enabled or num_rowgroup_blobs + num_stripe_blobs == 0) { + return writer::impl::intermediate_statistics{stream}; + } hostdevice_vector stat_desc(orc_table.num_columns(), stream); - hostdevice_vector stat_merge(num_stat_blobs, stream); - auto rowgroup_stat_merge = stat_merge.host_ptr(); - auto stripe_stat_merge = rowgroup_stat_merge + num_rowgroup_blobs; - auto file_stat_merge = stripe_stat_merge + num_stripe_blobs; + hostdevice_vector rowgroup_merge(num_rowgroup_blobs, stream); + hostdevice_vector stripe_merge(num_stripe_blobs, stream); + std::vector col_stats_dtypes; + std::vector col_types; + auto rowgroup_stat_merge = rowgroup_merge.host_ptr(); + auto stripe_stat_merge = stripe_merge.host_ptr(); for (auto const& column : orc_table.columns) { stats_column_desc* desc = &stat_desc[column.index()]; @@ -1121,82 +1146,148 @@ writer::impl::encoded_statistics writer::impl::gather_statistic_blobs( } else { desc->ts_scale = 0; } + col_stats_dtypes.push_back(desc->stats_dtype); + col_types.push_back(column.type()); for (auto const& stripe : segmentation.stripes) { - auto& grp = stripe_stat_merge[column.index() * segmentation.num_stripes() + stripe.id]; - grp.col = stat_desc.device_ptr(column.index()); + auto& grp = stripe_stat_merge[column.index() * segmentation.num_stripes() + stripe.id]; + grp.col_dtype = column.type(); + grp.stats_dtype = desc->stats_dtype; grp.start_chunk = static_cast(column.index() * segmentation.num_rowgroups() + stripe.first); grp.num_chunks = stripe.size; - for (auto rg_idx_it = stripe.cbegin(); rg_idx_it < stripe.cend(); ++rg_idx_it) { + for (auto rg_idx_it = stripe.cbegin(); rg_idx_it != stripe.cend(); ++rg_idx_it) { auto& rg_grp = rowgroup_stat_merge[column.index() * segmentation.num_rowgroups() + *rg_idx_it]; - rg_grp.col = stat_desc.device_ptr(column.index()); + rg_grp.col_dtype = column.type(); + rg_grp.stats_dtype = desc->stats_dtype; rg_grp.start_chunk = *rg_idx_it; rg_grp.num_chunks = 1; } } - auto col_stats = &file_stat_merge[column.index()]; - col_stats->col = stat_desc.device_ptr(column.index()); - col_stats->start_chunk = static_cast(column.index() * segmentation.num_stripes()); - col_stats->num_chunks = static_cast(segmentation.num_stripes()); } stat_desc.host_to_device(stream); - stat_merge.host_to_device(stream); + rowgroup_merge.host_to_device(stream); + stripe_merge.host_to_device(stream); set_stat_desc_leaf_cols(orc_table.d_columns, stat_desc, stream); - rmm::device_uvector stat_chunks(num_stat_blobs, stream); - auto rowgroup_stat_chunks = stat_chunks.data(); - auto stripe_stat_chunks = rowgroup_stat_chunks + num_rowgroup_blobs; - auto file_stat_chunks = stripe_stat_chunks + num_stripe_blobs; + // The rowgroup stat chunks are written out in each stripe. The stripe and file-level chunks are + // written in the footer. To prevent persisting the rowgroup stat chunks across multiple write + // calls in a chunked write situation, these allocations are split up so stripe data can persist + // until the footer is written and rowgroup data can be freed after being written to the stripe. + rmm::device_uvector rowgroup_chunks(num_rowgroup_blobs, stream); + rmm::device_uvector stripe_chunks(num_stripe_blobs, stream); + auto rowgroup_stat_chunks = rowgroup_chunks.data(); + auto stripe_stat_chunks = stripe_chunks.data(); - rmm::device_uvector stat_groups(num_rowgroup_blobs, stream); + rmm::device_uvector rowgroup_groups(num_rowgroup_blobs, stream); gpu::orc_init_statistics_groups( - stat_groups.data(), stat_desc.device_ptr(), segmentation.rowgroups, stream); + rowgroup_groups.data(), stat_desc.device_ptr(), segmentation.rowgroups, stream); detail::calculate_group_statistics( - stat_chunks.data(), stat_groups.data(), num_rowgroup_blobs, stream); + rowgroup_chunks.data(), rowgroup_groups.data(), num_rowgroup_blobs, stream); detail::merge_group_statistics( - stripe_stat_chunks, - rowgroup_stat_chunks, - stat_merge.device_ptr(num_rowgroup_blobs), - num_stripe_blobs, - stream); + stripe_stat_chunks, rowgroup_stat_chunks, stripe_merge.device_ptr(), num_stripe_blobs, stream); - detail::merge_group_statistics( - file_stat_chunks, - stripe_stat_chunks, - stat_merge.device_ptr(num_rowgroup_blobs + num_stripe_blobs), - num_file_blobs, - stream); - gpu::orc_init_statistics_buffersize( - stat_merge.device_ptr(), stat_chunks.data(), num_stat_blobs, stream); - stat_merge.device_to_host(stream, true); + // With chunked writes, the orc table can be deallocated between write calls. + // This forces our hand to encode row groups and stripes only in this stage and further + // we have to persist any data from the table that we need later. The + // minimum and maximum string inside the `str_val` structure inside `statistics_val` in + // `statistic_chunk` that are copies of the largest and smallest strings in the row group, + // or stripe need to be persisted between write calls. We write rowgroup data with each + // stripe and then save each stripe's stats until the end where we merge those all together + // to get the file-level stats. - hostdevice_vector blobs( - stat_merge[num_stat_blobs - 1].start_chunk + stat_merge[num_stat_blobs - 1].num_chunks, stream); // Skip rowgroup blobs when encoding, if chosen granularity is coarser than "ROW_GROUP". auto const is_granularity_rowgroup = stats_freq == ORC_STATISTICS_ROW_GROUP; - auto const num_skip = is_granularity_rowgroup ? 0 : num_rowgroup_blobs; - gpu::orc_encode_statistics(blobs.device_ptr(), - stat_merge.device_ptr(num_skip), - stat_chunks.data() + num_skip, - num_stat_blobs - num_skip, - stream); - stat_merge.device_to_host(stream); - blobs.device_to_host(stream, true); - + // we have to encode the row groups now IF they are being written out auto rowgroup_blobs = [&]() -> std::vector { if (not is_granularity_rowgroup) { return {}; } + + hostdevice_vector blobs = + allocate_and_encode_blobs(rowgroup_merge, rowgroup_chunks, num_rowgroup_blobs, stream); + std::vector rowgroup_blobs(num_rowgroup_blobs); for (size_t i = 0; i < num_rowgroup_blobs; i++) { - auto const stat_begin = blobs.host_ptr(rowgroup_stat_merge[i].start_chunk); - auto const stat_end = stat_begin + rowgroup_stat_merge[i].num_chunks; + auto const stat_begin = blobs.host_ptr(rowgroup_merge[i].start_chunk); + auto const stat_end = stat_begin + rowgroup_merge[i].num_chunks; rowgroup_blobs[i].assign(stat_begin, stat_end); } return rowgroup_blobs; }(); + return {std::move(rowgroup_blobs), + std::move(stripe_chunks), + std::move(stripe_merge), + std::move(col_stats_dtypes), + std::move(col_types)}; +} + +writer::impl::encoded_footer_statistics writer::impl::finish_statistic_blobs( + int num_stripes, writer::impl::persisted_statistics& per_chunk_stats) +{ + auto stripe_size_iter = thrust::make_transform_iterator(per_chunk_stats.stripe_stat_merge.begin(), + [](auto const& i) { return i.size(); }); + + auto const num_columns = per_chunk_stats.col_types.size(); + auto const num_stripe_blobs = + thrust::reduce(stripe_size_iter, stripe_size_iter + per_chunk_stats.stripe_stat_merge.size()); + auto const num_file_blobs = num_columns; + auto const num_blobs = single_write_mode ? static_cast(num_stripe_blobs + num_file_blobs) + : static_cast(num_stripe_blobs); + + if (num_stripe_blobs == 0) { return {}; } + + // merge the stripe persisted data and add file data + rmm::device_uvector stat_chunks(num_blobs, stream); + hostdevice_vector stats_merge(num_blobs, stream); + + size_t chunk_offset = 0; + size_t merge_offset = 0; + for (size_t i = 0; i < per_chunk_stats.stripe_stat_chunks.size(); ++i) { + auto chunk_bytes = per_chunk_stats.stripe_stat_chunks[i].size() * sizeof(statistics_chunk); + auto merge_bytes = per_chunk_stats.stripe_stat_merge[i].size() * sizeof(statistics_merge_group); + cudaMemcpyAsync(stat_chunks.data() + chunk_offset, + per_chunk_stats.stripe_stat_chunks[i].data(), + chunk_bytes, + cudaMemcpyDeviceToDevice, + stream); + cudaMemcpyAsync(stats_merge.device_ptr() + merge_offset, + per_chunk_stats.stripe_stat_merge[i].device_ptr(), + merge_bytes, + cudaMemcpyDeviceToDevice, + stream); + chunk_offset += per_chunk_stats.stripe_stat_chunks[i].size(); + merge_offset += per_chunk_stats.stripe_stat_merge[i].size(); + } + + if (single_write_mode) { + std::vector file_stats_merge(num_file_blobs); + for (auto i = 0u; i < num_file_blobs; ++i) { + auto col_stats = &file_stats_merge[i]; + col_stats->col_dtype = per_chunk_stats.col_types[i]; + col_stats->stats_dtype = per_chunk_stats.stats_dtypes[i]; + col_stats->start_chunk = static_cast(i * num_stripes); + col_stats->num_chunks = static_cast(num_stripes); + } + + auto d_file_stats_merge = stats_merge.device_ptr(num_stripe_blobs); + cudaMemcpyAsync(d_file_stats_merge, + file_stats_merge.data(), + num_file_blobs * sizeof(statistics_merge_group), + cudaMemcpyHostToDevice, + stream); + + auto file_stat_chunks = stat_chunks.data() + num_stripe_blobs; + detail::merge_group_statistics( + file_stat_chunks, stat_chunks.data(), d_file_stats_merge, num_file_blobs, stream); + } + + hostdevice_vector blobs = + allocate_and_encode_blobs(stats_merge, stat_chunks, num_blobs, stream); + + auto stripe_stat_merge = stats_merge.host_ptr(); + std::vector stripe_blobs(num_stripe_blobs); for (size_t i = 0; i < num_stripe_blobs; i++) { auto const stat_begin = blobs.host_ptr(stripe_stat_merge[i].start_chunk); @@ -1204,13 +1295,17 @@ writer::impl::encoded_statistics writer::impl::gather_statistic_blobs( stripe_blobs[i].assign(stat_begin, stat_end); } - std::vector file_blobs(num_file_blobs); - for (size_t i = 0; i < num_file_blobs; i++) { - auto const stat_begin = blobs.host_ptr(file_stat_merge[i].start_chunk); - auto const stat_end = stat_begin + file_stat_merge[i].num_chunks; - file_blobs[i].assign(stat_begin, stat_end); + std::vector file_blobs(single_write_mode ? num_file_blobs : 0); + if (single_write_mode) { + auto file_stat_merge = stats_merge.host_ptr(num_stripe_blobs); + for (auto i = 0u; i < num_file_blobs; i++) { + auto const stat_begin = blobs.host_ptr(file_stat_merge[i].start_chunk); + auto const stat_end = stat_begin + file_stat_merge[i].num_chunks; + file_blobs[i].assign(stat_begin, stat_end); + } } - return {std::move(rowgroup_blobs), std::move(stripe_blobs), std::move(file_blobs)}; + + return {std::move(stripe_blobs), std::move(file_blobs)}; } void writer::impl::write_index_stream(int32_t stripe_id, @@ -1219,7 +1314,7 @@ void writer::impl::write_index_stream(int32_t stripe_id, file_segmentation const& segmentation, host_2dspan enc_streams, host_2dspan strm_desc, - host_span comp_out, + host_span comp_out, std::vector const& rg_stats, StripeInformation* stripe, orc_streams* streams, @@ -1955,8 +2050,9 @@ void writer::impl::write(table_view const& table) // Compress the data streams rmm::device_buffer compressed_data(compressed_bfr_size, stream); - hostdevice_vector comp_out(num_compressed_blocks, stream); - hostdevice_vector comp_in(num_compressed_blocks, stream); + hostdevice_vector> comp_in(num_compressed_blocks, stream); + hostdevice_vector> comp_out(num_compressed_blocks, stream); + hostdevice_vector comp_stats(num_compressed_blocks, stream); if (compression_kind_ != NONE) { strm_descs.host_to_device(stream); gpu::CompressOrcDataStreams(static_cast(compressed_data.data()), @@ -1968,14 +2064,25 @@ void writer::impl::write(table_view const& table) enc_data.streams, comp_in, comp_out, + comp_stats, stream); strm_descs.device_to_host(stream); - comp_out.device_to_host(stream, true); + comp_stats.device_to_host(stream, true); } ProtobufWriter pbw_(&buffer_); - auto const statistics = gather_statistic_blobs(stats_freq_, orc_table, segmentation); + auto intermediate_stats = gather_statistic_blobs(stats_freq_, orc_table, segmentation); + + if (intermediate_stats.stripe_stat_chunks.size() > 0) { + persisted_stripe_statistics.stripe_stat_chunks.emplace_back( + std::move(intermediate_stats.stripe_stat_chunks)); + persisted_stripe_statistics.stripe_stat_merge.emplace_back( + std::move(intermediate_stats.stripe_stat_merge)); + persisted_stripe_statistics.stats_dtypes = std::move(intermediate_stats.stats_dtypes); + persisted_stripe_statistics.col_types = std::move(intermediate_stats.col_types); + persisted_stripe_statistics.num_rows = orc_table.num_rows(); + } // Write stripes std::vector> write_tasks; @@ -1992,8 +2099,8 @@ void writer::impl::write(table_view const& table) segmentation, enc_data.streams, strm_descs, - comp_out, - statistics.rowgroup_level, + comp_stats, + intermediate_stats.rowgroup_blobs, &stripe, &streams, &pbw_); @@ -2034,41 +2141,10 @@ void writer::impl::write(table_view const& table) } out_sink_->host_write(buffer_.data(), buffer_.size()); } + for (auto const& task : write_tasks) { task.wait(); } - - // File-level statistics - // NOTE: Excluded from chunked write mode to avoid the need for merging stats across calls - if (single_write_mode and not statistics.file_level.empty()) { - // First entry contains total number of rows - buffer_.resize(0); - pbw_.put_uint(encode_field_number(1)); - pbw_.put_uint(num_rows); - ff.statistics.reserve(1 + orc_table.num_columns()); - ff.statistics.emplace_back(std::move(buffer_)); - // Add file stats, stored after stripe stats in `column_stats` - ff.statistics.insert(ff.statistics.end(), - std::make_move_iterator(statistics.file_level.begin()), - std::make_move_iterator(statistics.file_level.end())); - } - // Stripe-level statistics - if (not statistics.stripe_level.empty()) { - size_t first_stripe = md.stripeStats.size(); - md.stripeStats.resize(first_stripe + stripes.size()); - for (size_t stripe_id = 0; stripe_id < stripes.size(); stripe_id++) { - md.stripeStats[first_stripe + stripe_id].colStats.resize(1 + orc_table.num_columns()); - buffer_.resize(0); - pbw_.put_uint(encode_field_number(1)); - pbw_.put_uint(stripes[stripe_id].numberOfRows); - md.stripeStats[first_stripe + stripe_id].colStats[0] = std::move(buffer_); - for (size_t col_idx = 0; col_idx < orc_table.num_columns(); col_idx++) { - size_t idx = stripes.size() * col_idx + stripe_id; - md.stripeStats[first_stripe + stripe_id].colStats[1 + col_idx] = - std::move(statistics.stripe_level[idx]); - } - } - } } if (ff.headerLength == 0) { // First call @@ -2125,6 +2201,40 @@ void writer::impl::close() ProtobufWriter pbw_(&buffer_); PostScript ps; + auto const statistics = finish_statistic_blobs(ff.stripes.size(), persisted_stripe_statistics); + + // File-level statistics + if (single_write_mode and not statistics.file_level.empty()) { + buffer_.resize(0); + pbw_.put_uint(encode_field_number(1)); + pbw_.put_uint(persisted_stripe_statistics.num_rows); + // First entry contains total number of rows + ff.statistics.reserve(ff.types.size()); + ff.statistics.emplace_back(std::move(buffer_)); + // Add file stats, stored after stripe stats in `column_stats` + ff.statistics.insert(ff.statistics.end(), + std::make_move_iterator(statistics.file_level.begin()), + std::make_move_iterator(statistics.file_level.end())); + } + + // Stripe-level statistics + if (not statistics.stripe_level.empty()) { + md.stripeStats.resize(ff.stripes.size()); + for (size_t stripe_id = 0; stripe_id < ff.stripes.size(); stripe_id++) { + md.stripeStats[stripe_id].colStats.resize(ff.types.size()); + buffer_.resize(0); + pbw_.put_uint(encode_field_number(1)); + pbw_.put_uint(ff.stripes[stripe_id].numberOfRows); + md.stripeStats[stripe_id].colStats[0] = std::move(buffer_); + for (size_t col_idx = 0; col_idx < ff.types.size() - 1; col_idx++) { + size_t idx = ff.stripes.size() * col_idx + stripe_id; + md.stripeStats[stripe_id].colStats[1 + col_idx] = std::move(statistics.stripe_level[idx]); + } + } + } + + persisted_stripe_statistics.clear(); + ff.contentLength = out_sink_->bytes_written(); std::transform( kv_meta.begin(), kv_meta.end(), std::back_inserter(ff.metadata), [&](auto const& udata) { diff --git a/cpp/src/io/orc/writer_impl.hpp b/cpp/src/io/orc/writer_impl.hpp index b3662bf309f..d823c73007f 100644 --- a/cpp/src/io/orc/writer_impl.hpp +++ b/cpp/src/io/orc/writer_impl.hpp @@ -286,24 +286,84 @@ class writer::impl { hostdevice_2dvector* enc_streams, hostdevice_2dvector* strm_desc); - struct encoded_statistics { - std::vector rowgroup_level; + /** + * @brief Statistics data stored between calls to write for chunked writes + * + */ + struct intermediate_statistics { + explicit intermediate_statistics(rmm::cuda_stream_view stream) + : stripe_stat_chunks(0, stream){}; + intermediate_statistics(std::vector rb, + rmm::device_uvector sc, + hostdevice_vector smg, + std::vector sdt, + std::vector sct) + : rowgroup_blobs(std::move(rb)), + stripe_stat_chunks(std::move(sc)), + stripe_stat_merge(std::move(smg)), + stats_dtypes(std::move(sdt)), + col_types(std::move(sct)){}; + + // blobs for the rowgroups and stripes. Not persisted + std::vector rowgroup_blobs; + + rmm::device_uvector stripe_stat_chunks; + hostdevice_vector stripe_stat_merge; + std::vector stats_dtypes; + std::vector col_types; + }; + + /** + * @brief used for chunked writes to persist data between calls to write. + * + */ + struct persisted_statistics { + void clear() + { + stripe_stat_chunks.clear(); + stripe_stat_merge.clear(); + stats_dtypes.clear(); + col_types.clear(); + num_rows = 0; + } + + std::vector> stripe_stat_chunks; + std::vector> stripe_stat_merge; + std::vector stats_dtypes; + std::vector col_types; + int num_rows = 0; + }; + + /** + * @brief Protobuf encoded statistics created at file close + * + */ + struct encoded_footer_statistics { std::vector stripe_level; std::vector file_level; }; /** - * @brief Returns column statistics encoded in ORC protobuf format. + * @brief Returns column statistics in an intermediate format. * * @param statistics_freq Frequency of statistics to be included in the output file * @param orc_table Table information to be written - * @param columns List of columns * @param segmentation stripe and rowgroup ranges - * @return The statistic blobs + * @return The statistic information + */ + intermediate_statistics gather_statistic_blobs(statistics_freq const statistics_freq, + orc_table_view const& orc_table, + file_segmentation const& segmentation); + + /** + * @brief Returns column statistics encoded in ORC protobuf format stored in the footer. + * + * @param num_stripes number of stripes in the data + * @param incoming_stats intermediate statistics returned from `gather_statistic_blobs` + * @return The encoded statistic blobs */ - encoded_statistics gather_statistic_blobs(statistics_freq statistics_freq, - orc_table_view const& orc_table, - file_segmentation const& segmentation); + encoded_footer_statistics finish_statistic_blobs( + int num_stripes, writer::impl::persisted_statistics& incoming_stats); /** * @brief Writes the specified column's row index stream. @@ -326,7 +386,7 @@ class writer::impl { file_segmentation const& segmentation, host_2dspan enc_streams, host_2dspan strm_desc, - host_span comp_out, + host_span comp_out, std::vector const& rg_stats, StripeInformation* stripe, orc_streams* streams, @@ -384,6 +444,8 @@ class writer::impl { std::map kv_meta; // to track if the output has been written to sink bool closed = false; + // statistics data saved between calls to write before a close writes out the statistics + persisted_statistics persisted_stripe_statistics; std::vector buffer_; std::unique_ptr out_sink_; diff --git a/cpp/src/io/parquet/chunk_dict.cu b/cpp/src/io/parquet/chunk_dict.cu index f61cfa83579..93e76a6ac23 100644 --- a/cpp/src/io/parquet/chunk_dict.cu +++ b/cpp/src/io/parquet/chunk_dict.cu @@ -22,19 +22,24 @@ #include +#include + namespace cudf { namespace io { namespace parquet { namespace gpu { +namespace { +constexpr int DEFAULT_BLOCK_SIZE = 256; +} template -__global__ void __launch_bounds__(block_size, 1) +__global__ void __launch_bounds__(block_size) initialize_chunk_hash_maps_kernel(device_span chunks) { auto chunk = chunks[blockIdx.x]; auto t = threadIdx.x; // fut: Now that per-chunk dict is same size as ck.num_values, try to not use one block per chunk - for (size_t i = 0; i < chunk.dict_map_size; i += block_size) { + for (size_type i = 0; i < chunk.dict_map_size; i += block_size) { if (t + i < chunk.dict_map_size) { new (&chunk.dict_map_slots[t + i].first) map_type::atomic_key_type{KEY_SENTINEL}; new (&chunk.dict_map_slots[t + i].second) map_type::atomic_mapped_type{VALUE_SENTINEL}; @@ -55,7 +60,10 @@ struct equality_functor { template struct hash_functor { column_device_view const& col; - __device__ auto operator()(size_type idx) { return MurmurHash3_32{}(col.element(idx)); } + __device__ auto operator()(size_type idx) const + { + return cudf::detail::MurmurHash3_32{}(col.element(idx)); + } }; struct map_insert_fn { @@ -67,7 +75,7 @@ struct map_insert_fn { if constexpr (column_device_view::has_element_accessor()) { auto hash_fn = hash_functor{col}; auto equality_fn = equality_functor{col}; - return map.insert(std::make_pair(i, i), hash_fn, equality_fn); + return map.insert(std::pair(i, i), hash_fn, equality_fn); } else { CUDF_UNREACHABLE("Unsupported type to insert in map"); } @@ -91,9 +99,8 @@ struct map_find_fn { }; template -__global__ void __launch_bounds__(block_size, 1) - populate_chunk_hash_maps_kernel(cudf::detail::device_2dspan chunks, - cudf::detail::device_2dspan frags) +__global__ void __launch_bounds__(block_size) + populate_chunk_hash_maps_kernel(cudf::detail::device_2dspan frags) { auto col_idx = blockIdx.y; auto block_x = blockIdx.x; @@ -102,70 +109,57 @@ __global__ void __launch_bounds__(block_size, 1) auto chunk = frag.chunk; auto col = chunk->col_desc; - size_type start_row = frag.start_row; - size_type end_row = frag.start_row + frag.num_rows; + if (not chunk->use_dictionary) { return; } - __shared__ size_type s_start_value_idx; - __shared__ size_type s_num_values; + using block_reduce = cub::BlockReduce; + __shared__ typename block_reduce::TempStorage reduce_storage; - if (not chunk->use_dictionary) { return; } + size_type start_row = frag.start_row; + size_type end_row = frag.start_row + frag.num_rows; - if (t == 0) { - // Find the bounds of values in leaf column to be inserted into the map for current chunk - auto cudf_col = *(col->parent_column); - s_start_value_idx = row_to_value_idx(start_row, cudf_col); - auto end_value_idx = row_to_value_idx(end_row, cudf_col); - s_num_values = end_value_idx - s_start_value_idx; - } - __syncthreads(); + // Find the bounds of values in leaf column to be inserted into the map for current chunk + auto const cudf_col = *(col->parent_column); + size_type const s_start_value_idx = row_to_value_idx(start_row, cudf_col); + size_type const end_value_idx = row_to_value_idx(end_row, cudf_col); column_device_view const& data_col = *col->leaf_column; - using block_reduce = cub::BlockReduce; - __shared__ typename block_reduce::TempStorage reduce_storage; // Make a view of the hash map auto hash_map_mutable = map_type::device_mutable_view( chunk->dict_map_slots, chunk->dict_map_size, KEY_SENTINEL, VALUE_SENTINEL); - auto hash_map = map_type::device_view( - chunk->dict_map_slots, chunk->dict_map_size, KEY_SENTINEL, VALUE_SENTINEL); - __shared__ int total_num_dict_entries; - for (size_type i = 0; i < s_num_values; i += block_size) { - // add the value to hash map - size_type val_idx = i + t + s_start_value_idx; - bool is_valid = - (i + t < s_num_values && val_idx < data_col.size()) and data_col.is_valid(val_idx); + __shared__ size_type total_num_dict_entries; + size_type val_idx = s_start_value_idx + t; + while (val_idx - block_size < end_value_idx) { + auto const is_valid = + val_idx < end_value_idx and val_idx < data_col.size() and data_col.is_valid(val_idx); // insert element at val_idx to hash map and count successful insertions size_type is_unique = 0; size_type uniq_elem_size = 0; if (is_valid) { - auto found_slot = type_dispatcher(data_col.type(), map_find_fn{hash_map}, data_col, val_idx); - if (found_slot == hash_map.end()) { - is_unique = - type_dispatcher(data_col.type(), map_insert_fn{hash_map_mutable}, data_col, val_idx); - uniq_elem_size = [&]() -> size_type { - if (not is_unique) { return 0; } - switch (col->physical_type) { - case Type::INT32: return 4; - case Type::INT64: return 8; - case Type::INT96: return 12; - case Type::FLOAT: return 4; - case Type::DOUBLE: return 8; - case Type::BYTE_ARRAY: - if (data_col.type().id() == type_id::STRING) { - // Strings are stored as 4 byte length + string bytes - return 4 + data_col.element(val_idx).size_bytes(); - } - case Type::FIXED_LEN_BYTE_ARRAY: - if (data_col.type().id() == type_id::DECIMAL128) { return sizeof(__int128_t); } - default: CUDF_UNREACHABLE("Unsupported type for dictionary encoding"); - } - }(); - } + is_unique = + type_dispatcher(data_col.type(), map_insert_fn{hash_map_mutable}, data_col, val_idx); + uniq_elem_size = [&]() -> size_type { + if (not is_unique) { return 0; } + switch (col->physical_type) { + case Type::INT32: return 4; + case Type::INT64: return 8; + case Type::INT96: return 12; + case Type::FLOAT: return 4; + case Type::DOUBLE: return 8; + case Type::BYTE_ARRAY: + if (data_col.type().id() == type_id::STRING) { + // Strings are stored as 4 byte length + string bytes + return 4 + data_col.element(val_idx).size_bytes(); + } + case Type::FIXED_LEN_BYTE_ARRAY: + if (data_col.type().id() == type_id::DECIMAL128) { return sizeof(__int128_t); } + default: CUDF_UNREACHABLE("Unsupported type for dictionary encoding"); + } + }(); } - __syncthreads(); auto num_unique = block_reduce(reduce_storage).Sum(is_unique); __syncthreads(); auto uniq_data_size = block_reduce(reduce_storage).Sum(uniq_elem_size); @@ -178,11 +172,13 @@ __global__ void __launch_bounds__(block_size, 1) // Check if the num unique values in chunk has already exceeded max dict size and early exit if (total_num_dict_entries > MAX_DICT_SIZE) { return; } - } + + val_idx += block_size; + } // while } template -__global__ void __launch_bounds__(block_size, 1) +__global__ void __launch_bounds__(block_size) collect_map_entries_kernel(device_span chunks) { auto& chunk = chunks[blockIdx.x]; @@ -192,31 +188,30 @@ __global__ void __launch_bounds__(block_size, 1) auto map = map_type::device_view(chunk.dict_map_slots, chunk.dict_map_size, KEY_SENTINEL, VALUE_SENTINEL); - __shared__ size_type counter; - if (t == 0) counter = 0; + __shared__ cuda::atomic counter; + using cuda::std::memory_order_relaxed; + if (t == 0) { new (&counter) cuda::atomic{0}; } __syncthreads(); - for (size_t i = 0; i < chunk.dict_map_size; i += block_size) { + for (size_type i = 0; i < chunk.dict_map_size; i += block_size) { if (t + i < chunk.dict_map_size) { - auto slot = map.begin_slot() + t + i; - auto key = static_cast(slot->first); + auto* slot = reinterpret_cast(map.begin_slot() + t + i); + auto key = slot->first; if (key != KEY_SENTINEL) { - auto loc = atomicAdd(&counter, 1); + auto loc = counter.fetch_add(1, memory_order_relaxed); cudf_assert(loc < MAX_DICT_SIZE && "Number of filled slots exceeds max dict size"); chunk.dict_data[loc] = key; // If sorting dict page ever becomes a hard requirement, enable the following statement and // add a dict sorting step before storing into the slot's second field. // chunk.dict_data_idx[loc] = t + i; - slot->second.store(loc); - // TODO: ^ This doesn't need to be atomic. Try casting to value_type ptr and just writing. + slot->second = loc; } } } } template -__global__ void __launch_bounds__(block_size, 1) - get_dictionary_indices_kernel(cudf::detail::device_2dspan chunks, - cudf::detail::device_2dspan frags) +__global__ void __launch_bounds__(block_size) + get_dictionary_indices_kernel(cudf::detail::device_2dspan frags) { auto col_idx = blockIdx.y; auto block_x = blockIdx.x; @@ -225,47 +220,38 @@ __global__ void __launch_bounds__(block_size, 1) auto chunk = frag.chunk; auto col = chunk->col_desc; + if (not chunk->use_dictionary) { return; } + size_type start_row = frag.start_row; size_type end_row = frag.start_row + frag.num_rows; - __shared__ size_type s_start_value_idx; - __shared__ size_type s_ck_start_val_idx; - __shared__ size_type s_num_values; - - if (t == 0) { - // Find the bounds of values in leaf column to be searched in the map for current chunk - auto cudf_col = *(col->parent_column); - s_start_value_idx = row_to_value_idx(start_row, cudf_col); - s_ck_start_val_idx = row_to_value_idx(chunk->start_row, cudf_col); - auto end_value_idx = row_to_value_idx(end_row, cudf_col); - s_num_values = end_value_idx - s_start_value_idx; - } - __syncthreads(); - - if (not chunk->use_dictionary) { return; } + // Find the bounds of values in leaf column to be searched in the map for current chunk + auto const cudf_col = *(col->parent_column); + auto const s_start_value_idx = row_to_value_idx(start_row, cudf_col); + auto const s_ck_start_val_idx = row_to_value_idx(chunk->start_row, cudf_col); + auto const end_value_idx = row_to_value_idx(end_row, cudf_col); column_device_view const& data_col = *col->leaf_column; auto map = map_type::device_view( chunk->dict_map_slots, chunk->dict_map_size, KEY_SENTINEL, VALUE_SENTINEL); - for (size_t i = 0; i < s_num_values; i += block_size) { - if (t + i < s_num_values) { - auto val_idx = s_start_value_idx + t + i; - bool is_valid = - (i + t < s_num_values && val_idx < data_col.size()) ? data_col.is_valid(val_idx) : false; - - if (is_valid) { - auto found_slot = type_dispatcher(data_col.type(), map_find_fn{map}, data_col, val_idx); - cudf_assert(found_slot != map.end() && - "Unable to find value in map in dictionary index construction"); - if (found_slot != map.end()) { - // No need for atomic as this is not going to be modified by any other thread - auto* val_ptr = reinterpret_cast(&found_slot->second); - chunk->dict_index[val_idx - s_ck_start_val_idx] = *val_ptr; - } + auto val_idx = s_start_value_idx + t; + while (val_idx < end_value_idx) { + auto const is_valid = val_idx < data_col.size() and data_col.is_valid(val_idx); + + if (is_valid) { + auto found_slot = type_dispatcher(data_col.type(), map_find_fn{map}, data_col, val_idx); + cudf_assert(found_slot != map.end() && + "Unable to find value in map in dictionary index construction"); + if (found_slot != map.end()) { + // No need for atomic as this is not going to be modified by any other thread + auto* val_ptr = reinterpret_cast(&found_slot->second); + chunk->dict_index[val_idx - s_ck_start_val_idx] = *val_ptr; } } + + val_idx += block_size; } } @@ -276,15 +262,12 @@ void initialize_chunk_hash_maps(device_span chunks, rmm::cuda_st <<>>(chunks); } -void populate_chunk_hash_maps(cudf::detail::device_2dspan chunks, - cudf::detail::device_2dspan frags, +void populate_chunk_hash_maps(cudf::detail::device_2dspan frags, rmm::cuda_stream_view stream) { - constexpr int block_size = 256; dim3 const dim_grid(frags.size().second, frags.size().first); - - populate_chunk_hash_maps_kernel - <<>>(chunks, frags); + populate_chunk_hash_maps_kernel + <<>>(frags); } void collect_map_entries(device_span chunks, rmm::cuda_stream_view stream) @@ -293,15 +276,12 @@ void collect_map_entries(device_span chunks, rmm::cuda_stream_vi collect_map_entries_kernel<<>>(chunks); } -void get_dictionary_indices(cudf::detail::device_2dspan chunks, - cudf::detail::device_2dspan frags, +void get_dictionary_indices(cudf::detail::device_2dspan frags, rmm::cuda_stream_view stream) { - constexpr int block_size = 256; dim3 const dim_grid(frags.size().second, frags.size().first); - - get_dictionary_indices_kernel - <<>>(chunks, frags); + get_dictionary_indices_kernel + <<>>(frags); } } // namespace gpu } // namespace parquet diff --git a/cpp/src/io/parquet/compact_protocol_reader.cpp b/cpp/src/io/parquet/compact_protocol_reader.cpp index 7feaa8e61b4..a1fc2edb0bb 100644 --- a/cpp/src/io/parquet/compact_protocol_reader.cpp +++ b/cpp/src/io/parquet/compact_protocol_reader.cpp @@ -156,6 +156,7 @@ bool CompactProtocolReader::read(SchemaElement* s) ParquetFieldEnum(6, s->converted_type), ParquetFieldInt32(7, s->decimal_scale), ParquetFieldInt32(8, s->decimal_precision), + ParquetFieldOptionalInt32(9, s->field_id), ParquetFieldStruct(10, s->logical_type)); return function_builder(this, op); } diff --git a/cpp/src/io/parquet/compact_protocol_reader.hpp b/cpp/src/io/parquet/compact_protocol_reader.hpp index ba48f7b127f..ddca6c37e08 100644 --- a/cpp/src/io/parquet/compact_protocol_reader.hpp +++ b/cpp/src/io/parquet/compact_protocol_reader.hpp @@ -18,6 +18,8 @@ #include "parquet.hpp" +#include + #include #include #include @@ -137,6 +139,7 @@ class CompactProtocolReader { friend class ParquetFieldBool; friend class ParquetFieldInt8; friend class ParquetFieldInt32; + friend class ParquetFieldOptionalInt32; friend class ParquetFieldInt64; template friend class ParquetFieldStructListFunctor; @@ -216,6 +219,27 @@ class ParquetFieldInt32 { int field() { return field_val; } }; +/** + * @brief Functor to set value to optional 32 bit integer read from CompactProtocolReader + * + * @return True if field type is not int32 + */ +class ParquetFieldOptionalInt32 { + int field_val; + thrust::optional& val; + + public: + ParquetFieldOptionalInt32(int f, thrust::optional& v) : field_val(f), val(v) {} + + inline bool operator()(CompactProtocolReader* cpr, int field_type) + { + val = cpr->get_i32(); + return (field_type != ST_FLD_I32); + } + + int field() { return field_val; } +}; + /** * @brief Functor to set value to 64 bit integer read from CompactProtocolReader * diff --git a/cpp/src/io/parquet/compact_protocol_writer.cpp b/cpp/src/io/parquet/compact_protocol_writer.cpp index 927844cb1c2..176ecb6a572 100644 --- a/cpp/src/io/parquet/compact_protocol_writer.cpp +++ b/cpp/src/io/parquet/compact_protocol_writer.cpp @@ -144,6 +144,7 @@ size_t CompactProtocolWriter::write(const SchemaElement& s) c.field_int(8, s.decimal_precision); } } + if (s.field_id) { c.field_int(9, s.field_id.value()); } auto const isset = s.logical_type.isset; // TODO: add handling for all logical types // if (isset.STRING or isset.MAP or isset.LIST or isset.ENUM or isset.DECIMAL or isset.DATE or diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index da671d4c665..f05f0af2a79 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -81,8 +81,6 @@ struct page_enc_state_s { EncPage page; EncColumnChunk ck; parquet_column_device_view col; - gpu_inflate_input_s comp_in; - gpu_inflate_status_s comp_stat; uint16_t vals[rle_buffer_size]; }; @@ -278,7 +276,8 @@ __global__ void __launch_bounds__(128) uint32_t max_stats_len = 0; if (!t) { - pagestats_g.col = &col_desc[blockIdx.x]; + pagestats_g.col_dtype = col_g.leaf_column->type(); + pagestats_g.stats_dtype = col_g.stats_dtype; pagestats_g.start_chunk = ck_g.first_fragment; pagestats_g.num_chunks = 0; } @@ -749,8 +748,9 @@ static __device__ std::pair convert_nanoseconds(timesta template __global__ void __launch_bounds__(128, 8) gpuEncodePages(device_span pages, - device_span comp_in, - device_span comp_stat) + device_span> comp_in, + device_span> comp_out, + device_span comp_stats) { __shared__ __align__(8) page_enc_state_s state_g; using block_scan = cub::BlockScan; @@ -760,6 +760,7 @@ __global__ void __launch_bounds__(128, 8) uint32_t t = threadIdx.x; if (t == 0) { + state_g = page_enc_state_s{}; s->page = pages[blockIdx.x]; s->ck = *s->page.chunk; s->col = *s->ck.col_desc; @@ -1084,21 +1085,14 @@ __global__ void __launch_bounds__(128, 8) auto actual_data_size = static_cast(s->cur - base); uint32_t compressed_bfr_size = GetMaxCompressedBfrSize(actual_data_size); s->page.max_data_size = actual_data_size; - s->comp_in.srcDevice = base; - s->comp_in.srcSize = actual_data_size; - s->comp_in.dstDevice = s->page.compressed_data + s->page.max_hdr_size; - s->comp_in.dstSize = compressed_bfr_size; - s->comp_stat.bytes_written = 0; - s->comp_stat.status = ~0; - s->comp_stat.reserved = 0; - } - __syncthreads(); - if (t == 0) { + if (not comp_in.empty()) { + comp_in[blockIdx.x] = {base, actual_data_size}; + comp_out[blockIdx.x] = {s->page.compressed_data + s->page.max_hdr_size, compressed_bfr_size}; + } pages[blockIdx.x] = s->page; - if (not comp_in.empty()) comp_in[blockIdx.x] = s->comp_in; - if (not comp_stat.empty()) { - comp_stat[blockIdx.x] = s->comp_stat; - pages[blockIdx.x].comp_stat = &comp_stat[blockIdx.x]; + if (not comp_stats.empty()) { + comp_stats[blockIdx.x] = {0, ~0u}; + pages[blockIdx.x].comp_stat = &comp_stats[blockIdx.x]; } } } @@ -1316,7 +1310,7 @@ __device__ uint8_t* EncodeStatistics(uint8_t* start, // blockDim(128, 1, 1) __global__ void __launch_bounds__(128) gpuEncodePageHeaders(device_span pages, - device_span comp_stat, + device_span comp_stat, device_span page_stats, const statistics_chunk* chunk_stats) { @@ -1662,9 +1656,7 @@ dremel_data get_dremel_data(column_view h_col, } } - std::unique_ptr device_view_owners; - column_device_view* d_nesting_levels; - std::tie(device_view_owners, d_nesting_levels) = + auto [device_view_owners, d_nesting_levels] = contiguous_copy_column_device_views(nesting_levels, stream); thrust::exclusive_scan( @@ -1734,10 +1726,7 @@ dremel_data get_dremel_data(column_view h_col, auto offset_size_at_level = column_ends[level] - column_offsets[level] + 1; // Get empties at this level - rmm::device_uvector empties(0, stream); - rmm::device_uvector empties_idx(0, stream); - size_t empties_size; - std::tie(empties, empties_idx, empties_size) = + auto [empties, empties_idx, empties_size] = get_empties(nesting_levels[level], column_offsets[level], column_ends[level]); // Merge empty at deepest parent level with the rep, def level vals at leaf level @@ -1818,10 +1807,7 @@ dremel_data get_dremel_data(column_view h_col, auto offset_size_at_level = column_ends[level] - column_offsets[level] + 1; // Get empties at this level - rmm::device_uvector empties(0, stream); - rmm::device_uvector empties_idx(0, stream); - size_t empties_size; - std::tie(empties, empties_idx, empties_size) = + auto [empties, empties_idx, empties_size] = get_empties(nesting_levels[level], column_offsets[level], column_ends[level]); auto offset_transformer = [new_child_offsets = new_offsets.data(), @@ -1953,14 +1939,15 @@ void InitEncoderPages(device_2dspan chunks, } void EncodePages(device_span pages, - device_span comp_in, - device_span comp_stat, + device_span> comp_in, + device_span> comp_out, + device_span comp_stats, rmm::cuda_stream_view stream) { auto num_pages = pages.size(); // A page is part of one column. This is launching 1 block per page. 1 block will exclusively // deal with one datatype. - gpuEncodePages<128><<>>(pages, comp_in, comp_stat); + gpuEncodePages<128><<>>(pages, comp_in, comp_out, comp_stats); } void DecideCompression(device_span chunks, rmm::cuda_stream_view stream) @@ -1969,7 +1956,7 @@ void DecideCompression(device_span chunks, rmm::cuda_stream_view } void EncodePageHeaders(device_span pages, - device_span comp_stat, + device_span comp_stats, device_span page_stats, const statistics_chunk* chunk_stats, rmm::cuda_stream_view stream) @@ -1977,7 +1964,7 @@ void EncodePageHeaders(device_span pages, // TODO: single thread task. No need for 128 threads/block. Earlier it used to employ rest of the // threads to coop load structs gpuEncodePageHeaders<<>>( - pages, comp_stat, page_stats, chunk_stats); + pages, comp_stats, page_stats, chunk_stats); } void GatherPages(device_span chunks, diff --git a/cpp/src/io/parquet/parquet.hpp b/cpp/src/io/parquet/parquet.hpp index b1800640c91..ccaf3485bdf 100644 --- a/cpp/src/io/parquet/parquet.hpp +++ b/cpp/src/io/parquet/parquet.hpp @@ -18,6 +18,8 @@ #include "parquet_common.hpp" +#include + #include #include #include @@ -145,6 +147,7 @@ struct SchemaElement { int32_t num_children = 0; int32_t decimal_scale = 0; int32_t decimal_precision = 0; + thrust::optional field_id = thrust::nullopt; // The following fields are filled in later during schema initialization int max_definition_level = 0; @@ -157,7 +160,8 @@ struct SchemaElement { return type == other.type && converted_type == other.converted_type && type_length == other.type_length && repetition_type == other.repetition_type && name == other.name && num_children == other.num_children && - decimal_scale == other.decimal_scale && decimal_precision == other.decimal_precision; + decimal_scale == other.decimal_scale && decimal_precision == other.decimal_precision && + field_id == other.field_id; } // the parquet format is a little squishy when it comes to interpreting diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index 8d0aa8881c3..057b9a87214 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -378,7 +378,7 @@ struct EncPage { uint32_t num_leaf_values; //!< Values in page. Different from num_rows in case of nested types uint32_t num_values; //!< Number of def/rep level values in page. Includes null/empty elements in //!< non-leaf levels - gpu_inflate_status_s* comp_stat; //!< Ptr to compression status + decompress_status* comp_stat; //!< Ptr to compression status }; /** @@ -529,12 +529,10 @@ void initialize_chunk_hash_maps(device_span chunks, rmm::cuda_st /** * @brief Insert chunk values into their respective hash maps * - * @param chunks Column chunks [rowgroup][column] * @param frags Column fragments * @param stream CUDA stream to use */ -void populate_chunk_hash_maps(cudf::detail::device_2dspan chunks, - cudf::detail::device_2dspan frags, +void populate_chunk_hash_maps(cudf::detail::device_2dspan frags, rmm::cuda_stream_view stream); /** @@ -554,12 +552,10 @@ void collect_map_entries(device_span chunks, rmm::cuda_stream_vi * Since dict_data itself contains indices into the original cudf column, this means that * col[row] == col[dict_data[dict_index[row - chunk.start_row]]] * - * @param chunks Column chunks [rowgroup][column] * @param frags Column fragments * @param stream CUDA stream to use */ -void get_dictionary_indices(cudf::detail::device_2dspan chunks, - cudf::detail::device_2dspan frags, +void get_dictionary_indices(cudf::detail::device_2dspan frags, rmm::cuda_stream_view stream); /** @@ -588,13 +584,15 @@ void InitEncoderPages(cudf::detail::device_2dspan chunks, * @brief Launches kernel for packing column data into parquet pages * * @param[in,out] pages Device array of EncPages (unordered) - * @param[out] comp_in Optionally initializes compressor input params - * @param[out] comp_out Optionally initializes compressor output params + * @param[out] comp_in Compressor input buffers + * @param[out] comp_in Compressor output buffers + * @param[out] comp_stats Compressor statuses * @param[in] stream CUDA stream to use, default 0 */ void EncodePages(device_span pages, - device_span comp_in, - device_span comp_out, + device_span> comp_in, + device_span> comp_out, + device_span comp_stats, rmm::cuda_stream_view stream); /** @@ -609,13 +607,13 @@ void DecideCompression(device_span chunks, rmm::cuda_stream_view * @brief Launches kernel to encode page headers * * @param[in,out] pages Device array of EncPages - * @param[in] comp_out Compressor status or nullptr if no compression + * @param[in] comp_stats Compressor status * @param[in] page_stats Optional page-level statistics to be included in page header * @param[in] chunk_stats Optional chunk-level statistics to be encoded * @param[in] stream CUDA stream to use, default 0 */ void EncodePageHeaders(device_span pages, - device_span comp_out, + device_span comp_stats, device_span page_stats, const statistics_chunk* chunk_stats, rmm::cuda_stream_view stream); diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu index 46b3206f731..a40993ee2dd 100644 --- a/cpp/src/io/parquet/reader_impl.cu +++ b/cpp/src/io/parquet/reader_impl.cu @@ -24,6 +24,7 @@ #include "compact_protocol_reader.hpp" #include +#include #include #include @@ -38,10 +39,9 @@ #include #include -#include - #include #include +#include #include #include @@ -1050,96 +1050,13 @@ void reader::impl::decode_page_headers(hostdevice_vector& pages.device_to_host(stream, true); } -__global__ void decompress_check_kernel(device_span stats, - bool* any_block_failure) -{ - auto tid = blockIdx.x * blockDim.x + threadIdx.x; - if (tid < stats.size()) { - if (stats[tid].status != 0) { - *any_block_failure = true; // Doesn't need to be atomic - } - } -} - -void decompress_check(device_span stats, - bool* any_block_failure, - rmm::cuda_stream_view stream) -{ - if (stats.empty()) { return; } // early exit for empty stats - - dim3 block(128); - dim3 grid(cudf::util::div_rounding_up_safe(stats.size(), static_cast(block.x))); - decompress_check_kernel<<>>(stats, any_block_failure); -} - -__global__ void convert_nvcomp_status(device_span nvcomp_stats, - device_span stats) -{ - auto tid = blockIdx.x * blockDim.x + threadIdx.x; - if (tid < stats.size()) { - stats[tid].status = nvcomp_stats[tid] == nvcompStatus_t::nvcompSuccess ? 0 : 1; - } -} - -void snappy_decompress(device_span comp_in, - device_span comp_stat, - size_t max_uncomp_page_size, - rmm::cuda_stream_view stream) +void decompress_check(device_span stats, rmm::cuda_stream_view stream) { - size_t num_comp_pages = comp_in.size(); - size_t temp_size; - - nvcompStatus_t nvcomp_status = - nvcompBatchedSnappyDecompressGetTempSize(num_comp_pages, max_uncomp_page_size, &temp_size); - CUDF_EXPECTS(nvcomp_status == nvcompStatus_t::nvcompSuccess, - "Unable to get scratch size for snappy decompression"); - - // Not needed now but nvcomp API makes no promises about future - rmm::device_buffer scratch(temp_size, stream); - // Analogous to comp_in.srcDevice - rmm::device_uvector compressed_data_ptrs(num_comp_pages, stream); - // Analogous to comp_in.srcSize - rmm::device_uvector compressed_data_sizes(num_comp_pages, stream); - // Analogous to comp_in.dstDevice - rmm::device_uvector uncompressed_data_ptrs(num_comp_pages, stream); - // Analogous to comp_in.dstSize - rmm::device_uvector uncompressed_data_sizes(num_comp_pages, stream); - - // Analogous to comp_stat.bytes_written - rmm::device_uvector actual_uncompressed_data_sizes(num_comp_pages, stream); - // Convertible to comp_stat.status - rmm::device_uvector statuses(num_comp_pages, stream); - device_span statuses_span(statuses.data(), statuses.size()); - - // Prepare the vectors - auto comp_it = thrust::make_zip_iterator(compressed_data_ptrs.begin(), - compressed_data_sizes.begin(), - uncompressed_data_ptrs.begin(), - uncompressed_data_sizes.data()); - thrust::transform(rmm::exec_policy(stream), - comp_in.begin(), - comp_in.end(), - comp_it, - [] __device__(gpu_inflate_input_s in) { - return thrust::make_tuple(in.srcDevice, in.srcSize, in.dstDevice, in.dstSize); - }); - - nvcomp_status = nvcompBatchedSnappyDecompressAsync(compressed_data_ptrs.data(), - compressed_data_sizes.data(), - uncompressed_data_sizes.data(), - actual_uncompressed_data_sizes.data(), - num_comp_pages, - scratch.data(), - scratch.size(), - uncompressed_data_ptrs.data(), - statuses.data(), - stream.value()); - CUDF_EXPECTS(nvcomp_status == nvcompStatus_t::nvcompSuccess, - "unable to perform snappy decompression"); - - dim3 block(128); - dim3 grid(cudf::util::div_rounding_up_safe(num_comp_pages, static_cast(block.x))); - convert_nvcomp_status<<>>(statuses_span, comp_stat); + CUDF_EXPECTS(thrust::all_of(rmm::exec_policy(stream), + stats.begin(), + stats.end(), + [] __device__(auto const& stat) { return stat.status == 0; }), + "Error during decompression"); } /** @@ -1175,9 +1092,9 @@ rmm::device_buffer reader::impl::decompress_page_data( int32_t max_decompressed_size; }; - std::array codecs{codec_stats{parquet::GZIP, 0, 0}, - codec_stats{parquet::SNAPPY, 0, 0}, - codec_stats{parquet::BROTLI, 0, 0}}; + std::array codecs{codec_stats{parquet::GZIP, 0, 0}, + codec_stats{parquet::SNAPPY, 0, 0}, + codec_stats{parquet::BROTLI, 0, 0}}; auto is_codec_supported = [&codecs](int8_t codec) { if (codec == parquet::UNCOMPRESSED) return true; @@ -1207,91 +1124,73 @@ rmm::device_buffer reader::impl::decompress_page_data( // Dispatch batches of pages to decompress for each codec rmm::device_buffer decomp_pages(total_decomp_size, stream); - hostdevice_vector inflate_in(0, num_comp_pages, stream); - hostdevice_vector inflate_out(0, num_comp_pages, stream); - hostdevice_vector any_block_failure(1, stream); - any_block_failure[0] = false; - any_block_failure.host_to_device(stream); + std::vector> comp_in; + comp_in.reserve(num_comp_pages); + std::vector> comp_out; + comp_out.reserve(num_comp_pages); - device_span inflate_in_view(inflate_in.device_ptr(), inflate_in.size()); - device_span inflate_out_view(inflate_out.device_ptr(), inflate_out.size()); + rmm::device_uvector comp_stats(num_comp_pages, stream); + thrust::fill(rmm::exec_policy(stream), + comp_stats.begin(), + comp_stats.end(), + decompress_status{0, static_cast(-1000), 0}); size_t decomp_offset = 0; - int32_t argc = 0; + int32_t start_pos = 0; for (const auto& codec : codecs) { - if (codec.num_pages > 0) { - int32_t start_pos = argc; - - for_each_codec_page(codec.compression_type, [&](size_t page) { - auto dst_base = static_cast(decomp_pages.data()); - inflate_in[argc].srcDevice = pages[page].page_data; - inflate_in[argc].srcSize = pages[page].compressed_page_size; - inflate_in[argc].dstDevice = dst_base + decomp_offset; - inflate_in[argc].dstSize = pages[page].uncompressed_page_size; - - inflate_out[argc].bytes_written = 0; - inflate_out[argc].status = static_cast(-1000); - inflate_out[argc].reserved = 0; - - pages[page].page_data = static_cast(inflate_in[argc].dstDevice); - decomp_offset += inflate_in[argc].dstSize; - argc++; - }); + if (codec.num_pages == 0) { continue; } - CUDF_CUDA_TRY(cudaMemcpyAsync(inflate_in.device_ptr(start_pos), - inflate_in.host_ptr(start_pos), - sizeof(decltype(inflate_in)::value_type) * (argc - start_pos), - cudaMemcpyHostToDevice, - stream.value())); - CUDF_CUDA_TRY(cudaMemcpyAsync(inflate_out.device_ptr(start_pos), - inflate_out.host_ptr(start_pos), - sizeof(decltype(inflate_out)::value_type) * (argc - start_pos), - cudaMemcpyHostToDevice, - stream.value())); - - switch (codec.compression_type) { - case parquet::GZIP: - CUDF_CUDA_TRY(gpuinflate(inflate_in.device_ptr(start_pos), - inflate_out.device_ptr(start_pos), - argc - start_pos, - 1, - stream)) - break; - case parquet::SNAPPY: - if (nvcomp_integration::is_stable_enabled()) { - snappy_decompress(inflate_in_view.subspan(start_pos, argc - start_pos), - inflate_out_view.subspan(start_pos, argc - start_pos), - codec.max_decompressed_size, - stream); - } else { - CUDF_CUDA_TRY(gpu_unsnap(inflate_in.device_ptr(start_pos), - inflate_out.device_ptr(start_pos), - argc - start_pos, - stream)); - } - break; - case parquet::BROTLI: - CUDF_CUDA_TRY(gpu_debrotli(inflate_in.device_ptr(start_pos), - inflate_out.device_ptr(start_pos), - debrotli_scratch.data(), - debrotli_scratch.size(), - argc - start_pos, - stream)); - break; - default: CUDF_FAIL("Unexpected decompression dispatch"); break; - } - CUDF_CUDA_TRY(cudaMemcpyAsync(inflate_out.host_ptr(start_pos), - inflate_out.device_ptr(start_pos), - sizeof(decltype(inflate_out)::value_type) * (argc - start_pos), - cudaMemcpyDeviceToHost, - stream.value())); + for_each_codec_page(codec.compression_type, [&](size_t page) { + auto dst_base = static_cast(decomp_pages.data()); + comp_in.emplace_back(pages[page].page_data, + static_cast(pages[page].compressed_page_size)); + comp_out.emplace_back(dst_base + decomp_offset, + static_cast(pages[page].uncompressed_page_size)); + + pages[page].page_data = static_cast(comp_out.back().data()); + decomp_offset += comp_out.back().size(); + }); + + host_span const> comp_in_view{comp_in.data() + start_pos, + codec.num_pages}; + auto const d_comp_in = cudf::detail::make_device_uvector_async(comp_in_view, stream); + host_span const> comp_out_view(comp_out.data() + start_pos, + codec.num_pages); + auto const d_comp_out = cudf::detail::make_device_uvector_async(comp_out_view, stream); + device_span d_comp_stats_view(comp_stats.data() + start_pos, + codec.num_pages); + + switch (codec.compression_type) { + case parquet::GZIP: + gpuinflate(d_comp_in, d_comp_out, d_comp_stats_view, gzip_header_included::YES, stream); + break; + case parquet::SNAPPY: + if (nvcomp_integration::is_stable_enabled()) { + nvcomp::batched_decompress(nvcomp::compression_type::SNAPPY, + d_comp_in, + d_comp_out, + d_comp_stats_view, + codec.max_decompressed_size, + stream); + } else { + gpu_unsnap(d_comp_in, d_comp_out, d_comp_stats_view, stream); + } + break; + case parquet::BROTLI: + gpu_debrotli(d_comp_in, + d_comp_out, + d_comp_stats_view, + debrotli_scratch.data(), + debrotli_scratch.size(), + stream); + break; + default: CUDF_FAIL("Unexpected decompression dispatch"); break; } + start_pos += codec.num_pages; } - decompress_check(inflate_out_view, any_block_failure.device_ptr(), stream); - any_block_failure.device_to_host(stream, true); // synchronizes stream - CUDF_EXPECTS(not any_block_failure[0], "Error during decompression"); + decompress_check(comp_stats, stream); // Update the page information in device memory with the updated value of // page_data; it now points to the uncompressed data buffer @@ -1729,11 +1628,7 @@ table_with_metadata reader::impl::read(size_type skip_rows, continue; } - int32_t type_width; - int32_t clock_rate; - int8_t converted_type; - - std::tie(type_width, clock_rate, converted_type) = + auto [type_width, clock_rate, converted_type] = conversion_info(to_type_id(schema, _strings_to_categorical, _timestamp_type.id()), _timestamp_type.id(), schema.type, diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index cb1acb4d9ec..dbbd39fb508 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -491,6 +491,13 @@ std::vector construct_schema_tree( [&](cudf::detail::LinkedColPtr const& col, column_in_metadata& col_meta, size_t parent_idx) { bool col_nullable = is_col_nullable(col, col_meta, single_write_mode); + auto set_field_id = [&schema, parent_idx](schema_tree_node& s, + column_in_metadata const& col_meta) { + if (schema[parent_idx].name != "list" and col_meta.is_parquet_field_id_set()) { + s.field_id = col_meta.get_parquet_field_id(); + } + }; + if (col->type().id() == type_id::STRUCT) { // if struct, add current and recursively call for all children schema_tree_node struct_schema{}; @@ -500,6 +507,7 @@ std::vector construct_schema_tree( struct_schema.name = (schema[parent_idx].name == "list") ? "element" : col_meta.get_name(); struct_schema.num_children = col->children.size(); struct_schema.parent_idx = parent_idx; + set_field_id(struct_schema, col_meta); schema.push_back(std::move(struct_schema)); auto struct_node_index = schema.size() - 1; @@ -524,6 +532,7 @@ std::vector construct_schema_tree( list_schema_1.name = (schema[parent_idx].name == "list") ? "element" : col_meta.get_name(); list_schema_1.num_children = 1; list_schema_1.parent_idx = parent_idx; + set_field_id(list_schema_1, col_meta); schema.push_back(std::move(list_schema_1)); schema_tree_node list_schema_2{}; @@ -555,7 +564,10 @@ std::vector construct_schema_tree( map_schema.converted_type = ConvertedType::MAP; map_schema.repetition_type = col_nullable ? FieldRepetitionType::OPTIONAL : FieldRepetitionType::REQUIRED; - map_schema.name = col_meta.get_name(); + map_schema.name = col_meta.get_name(); + if (col_meta.is_parquet_field_id_set()) { + map_schema.field_id = col_meta.get_parquet_field_id(); + } map_schema.num_children = 1; map_schema.parent_idx = parent_idx; schema.push_back(std::move(map_schema)); @@ -612,6 +624,7 @@ std::vector construct_schema_tree( col_schema.name = (schema[parent_idx].name == "list") ? "element" : col_meta.get_name(); col_schema.parent_idx = parent_idx; col_schema.leaf_column = col; + set_field_id(col_schema, col_meta); schema.push_back(col_schema); } }; @@ -863,7 +876,7 @@ auto build_chunk_dictionaries(hostdevice_2dvector& chunks, std::vector> dict_data; std::vector> dict_index; - if (h_chunks.size() == 0) { return std::make_pair(std::move(dict_data), std::move(dict_index)); } + if (h_chunks.size() == 0) { return std::pair(std::move(dict_data), std::move(dict_index)); } // Allocate slots for each chunk std::vector> hash_maps_storage; @@ -882,7 +895,7 @@ auto build_chunk_dictionaries(hostdevice_2dvector& chunks, chunks.host_to_device(stream); gpu::initialize_chunk_hash_maps(chunks.device_view().flat_view(), stream); - gpu::populate_chunk_hash_maps(chunks, frags, stream); + gpu::populate_chunk_hash_maps(frags, stream); chunks.device_to_host(stream, true); @@ -899,7 +912,7 @@ auto build_chunk_dictionaries(hostdevice_2dvector& chunks, // We don't use dictionary if the indices are > 16 bits because that's the maximum bitpacking // bitsize we efficiently support - if (nbits > 16) { return std::make_pair(false, 0); } + if (nbits > 16) { return std::pair(false, 0); } // Only these bit sizes are allowed for RLE encoding because it's compute optimized constexpr auto allowed_bitsizes = std::array{1, 2, 4, 8, 12, 16}; @@ -912,7 +925,7 @@ auto build_chunk_dictionaries(hostdevice_2dvector& chunks, bool use_dict = (ck.plain_data_size > dict_enc_size); if (not use_dict) { rle_bits = 0; } - return std::make_pair(use_dict, rle_bits); + return std::pair(use_dict, rle_bits); }(); } @@ -931,9 +944,9 @@ auto build_chunk_dictionaries(hostdevice_2dvector& chunks, } chunks.host_to_device(stream); gpu::collect_map_entries(chunks.device_view().flat_view(), stream); - gpu::get_dictionary_indices(chunks.device_view(), frags, stream); + gpu::get_dictionary_indices(frags, stream); - return std::make_pair(std::move(dict_data), std::move(dict_index)); + return std::pair(std::move(dict_data), std::move(dict_index)); } void writer::impl::init_encoder_pages(hostdevice_2dvector& chunks, @@ -971,8 +984,9 @@ void writer::impl::init_encoder_pages(hostdevice_2dvector& stream.synchronize(); } -void snappy_compress(device_span comp_in, - device_span comp_stat, +void snappy_compress(device_span const> comp_in, + device_span const> comp_out, + device_span comp_stats, size_t max_page_uncomp_data_size, rmm::cuda_stream_view stream) { @@ -999,16 +1013,20 @@ void snappy_compress(device_span comp_in, // the space allocated unless one uses the API nvcompBatchedSnappyCompressGetOutputSize() // Prepare the vectors - auto comp_it = thrust::make_zip_iterator(uncompressed_data_ptrs.begin(), - uncompressed_data_sizes.begin(), - compressed_data_ptrs.begin()); + auto comp_it = + thrust::make_zip_iterator(uncompressed_data_ptrs.begin(), uncompressed_data_sizes.begin()); + thrust::transform( + rmm::exec_policy(stream), + comp_in.begin(), + comp_in.end(), + comp_it, + [] __device__(auto const& in) { return thrust::make_tuple(in.data(), in.size()); }); + thrust::transform(rmm::exec_policy(stream), - comp_in.begin(), - comp_in.end(), - comp_it, - [] __device__(gpu_inflate_input_s in) { - return thrust::make_tuple(in.srcDevice, in.srcSize, in.dstDevice); - }); + comp_out.begin(), + comp_out.end(), + compressed_data_ptrs.begin(), + [] __device__(auto const& out) { return out.data(); }); nvcomp_status = nvcompBatchedSnappyCompressAsync(uncompressed_data_ptrs.data(), uncompressed_data_sizes.data(), max_page_uncomp_data_size, @@ -1028,9 +1046,9 @@ void snappy_compress(device_span comp_in, thrust::transform(rmm::exec_policy(stream), compressed_bytes_written.begin(), compressed_bytes_written.end(), - comp_stat.begin(), + comp_stats.begin(), [] __device__(size_t size) { - gpu_inflate_status_s status{}; + decompress_status status{}; status.bytes_written = size; return status; }); @@ -1038,9 +1056,9 @@ void snappy_compress(device_span comp_in, } catch (...) { // If we reach this then there was an error in compressing so set an error status for each page thrust::for_each(rmm::exec_policy(stream), - comp_stat.begin(), - comp_stat.end(), - [] __device__(gpu_inflate_status_s & stat) { stat.status = 1; }); + comp_stats.begin(), + comp_stats.end(), + [] __device__(decompress_status & stat) { stat.status = 1; }); }; } @@ -1064,19 +1082,17 @@ void writer::impl::encode_pages(hostdevice_2dvector& chunks uint32_t max_comp_pages = (compression_ != parquet::Compression::UNCOMPRESSED) ? pages_in_batch : 0; - rmm::device_uvector compression_input(max_comp_pages, stream); - rmm::device_uvector compression_status(max_comp_pages, stream); - - device_span comp_in{compression_input.data(), compression_input.size()}; - device_span comp_stat{compression_status.data(), compression_status.size()}; + rmm::device_uvector> comp_in(max_comp_pages, stream); + rmm::device_uvector> comp_out(max_comp_pages, stream); + rmm::device_uvector comp_stats(max_comp_pages, stream); - gpu::EncodePages(batch_pages, comp_in, comp_stat, stream); + gpu::EncodePages(batch_pages, comp_in, comp_out, comp_stats, stream); switch (compression_) { case parquet::Compression::SNAPPY: if (nvcomp_integration::is_stable_enabled()) { - snappy_compress(comp_in, comp_stat, max_page_uncomp_data_size, stream); + snappy_compress(comp_in, comp_out, comp_stats, max_page_uncomp_data_size, stream); } else { - CUDF_CUDA_TRY(gpu_snap(comp_in.data(), comp_stat.data(), pages_in_batch, stream)); + gpu_snap(comp_in, comp_out, comp_stats, stream); } break; default: break; @@ -1085,7 +1101,7 @@ void writer::impl::encode_pages(hostdevice_2dvector& chunks // chunk-level auto d_chunks_in_batch = chunks.device_view().subspan(first_rowgroup, rowgroups_in_batch); DecideCompression(d_chunks_in_batch.flat_view(), stream); - EncodePageHeaders(batch_pages, comp_stat, batch_pages_stats, chunk_stats, stream); + EncodePageHeaders(batch_pages, comp_stats, batch_pages_stats, chunk_stats, stream); GatherPages(d_chunks_in_batch.flat_view(), pages, stream); auto h_chunks_in_batch = chunks.host_view().subspan(first_rowgroup, rowgroups_in_batch); diff --git a/cpp/src/io/statistics/column_statistics.cuh b/cpp/src/io/statistics/column_statistics.cuh index 9be19979c50..9ba54ec550c 100644 --- a/cpp/src/io/statistics/column_statistics.cuh +++ b/cpp/src/io/statistics/column_statistics.cuh @@ -332,10 +332,8 @@ __global__ void __launch_bounds__(block_size, 1) cooperative_load(state.group, &groups[blockIdx.x]); __syncthreads(); - cooperative_load(state.col, state.group.col); - __syncthreads(); - type_dispatcher(state.col.leaf_column->type(), + type_dispatcher(state.group.col_dtype, merge_group_statistics_functor(storage), state, chunks_in + state.group.start_chunk, diff --git a/cpp/src/io/statistics/statistics.cuh b/cpp/src/io/statistics/statistics.cuh index 87d92beb595..bb3c3ee152c 100644 --- a/cpp/src/io/statistics/statistics.cuh +++ b/cpp/src/io/statistics/statistics.cuh @@ -107,7 +107,8 @@ struct statistics_group { }; struct statistics_merge_group { - const stats_column_desc* col; //!< Column information + data_type col_dtype; //!< Column data type + statistics_dtype stats_dtype; //!< Statistics data type for this column uint32_t start_chunk; //!< Start chunk of this group uint32_t num_chunks; //!< Number of chunks in group }; diff --git a/cpp/src/io/utilities/config_utils.cpp b/cpp/src/io/utilities/config_utils.cpp index ed8c3d6e1e3..08b5914cb19 100644 --- a/cpp/src/io/utilities/config_utils.cpp +++ b/cpp/src/io/utilities/config_utils.cpp @@ -35,7 +35,7 @@ namespace { /** * @brief Defines which cuFile usage to enable. */ -enum class usage_policy : uint8_t { OFF, GDS, ALWAYS }; +enum class usage_policy : uint8_t { OFF, GDS, ALWAYS, KVIKIO }; /** * @brief Get the current usage policy. @@ -46,6 +46,7 @@ usage_policy get_env_policy() if (env_val == "OFF") return usage_policy::OFF; if (env_val == "GDS") return usage_policy::GDS; if (env_val == "ALWAYS") return usage_policy::ALWAYS; + if (env_val == "KVIKIO") return usage_policy::KVIKIO; CUDF_FAIL("Invalid LIBCUDF_CUFILE_POLICY value: " + env_val); } } // namespace @@ -54,6 +55,8 @@ bool is_always_enabled() { return get_env_policy() == usage_policy::ALWAYS; } bool is_gds_enabled() { return is_always_enabled() or get_env_policy() == usage_policy::GDS; } +bool is_kvikio_enabled() { return get_env_policy() == usage_policy::KVIKIO; } + } // namespace cufile_integration namespace nvcomp_integration { diff --git a/cpp/src/io/utilities/config_utils.hpp b/cpp/src/io/utilities/config_utils.hpp index 80c20529687..4f6a14091cf 100644 --- a/cpp/src/io/utilities/config_utils.hpp +++ b/cpp/src/io/utilities/config_utils.hpp @@ -48,6 +48,11 @@ bool is_always_enabled(); */ bool is_gds_enabled(); +/** + * @brief Returns true if KvikIO is enabled. + */ +bool is_kvikio_enabled(); + } // namespace cufile_integration namespace nvcomp_integration { diff --git a/cpp/src/io/utilities/data_sink.cpp b/cpp/src/io/utilities/data_sink.cpp index 63d0103ddec..042afc01253 100644 --- a/cpp/src/io/utilities/data_sink.cpp +++ b/cpp/src/io/utilities/data_sink.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,7 +19,9 @@ #include "file_io_utilities.hpp" #include #include +#include +#include #include namespace cudf { @@ -30,10 +32,15 @@ namespace io { class file_sink : public data_sink { public: explicit file_sink(std::string const& filepath) - : _cufile_out(detail::make_cufile_output(filepath)) { _output_stream.open(filepath, std::ios::out | std::ios::binary | std::ios::trunc); CUDF_EXPECTS(_output_stream.is_open(), "Cannot open output file"); + + if (detail::cufile_integration::is_kvikio_enabled()) { + _kvikio_file = kvikio::FileHandle(filepath, "w"); + } else { + _cufile_out = detail::make_cufile_output(filepath); + } } virtual ~file_sink() { flush(); } @@ -49,19 +56,15 @@ class file_sink : public data_sink { size_t bytes_written() override { return _bytes_written; } - [[nodiscard]] bool supports_device_write() const override { return _cufile_out != nullptr; } - - [[nodiscard]] bool is_device_write_preferred(size_t size) const override + [[nodiscard]] bool supports_device_write() const override { - return _cufile_out != nullptr && _cufile_out->is_cufile_io_preferred(size); + return !_kvikio_file.closed() || _cufile_out != nullptr; } - void device_write(void const* gpu_data, size_t size, rmm::cuda_stream_view stream) override + [[nodiscard]] bool is_device_write_preferred(size_t size) const override { - if (!supports_device_write()) CUDF_FAIL("Device writes are not supported for this file."); - - _cufile_out->write(gpu_data, _bytes_written, size); - _bytes_written += size; + return !_kvikio_file.closed() || + (_cufile_out != nullptr && _cufile_out->is_cufile_io_preferred(size)); } std::future device_write_async(void const* gpu_data, @@ -70,15 +73,30 @@ class file_sink : public data_sink { { if (!supports_device_write()) CUDF_FAIL("Device writes are not supported for this file."); - auto result = _cufile_out->write_async(gpu_data, _bytes_written, size); + size_t offset = _bytes_written; _bytes_written += size; - return result; + + if (!_kvikio_file.closed()) { + // KvikIO's `pwrite()` returns a `std::future` so we convert it + // to `std::future` + return std::async(std::launch::deferred, [this, gpu_data, size, offset] { + _kvikio_file.pwrite(gpu_data, size, offset).get(); + }); + } + return _cufile_out->write_async(gpu_data, offset, size); + } + + void device_write(void const* gpu_data, size_t size, rmm::cuda_stream_view stream) override + { + if (!supports_device_write()) CUDF_FAIL("Device writes are not supported for this file."); + return device_write_async(gpu_data, _bytes_written, stream).get(); } private: std::ofstream _output_stream; size_t _bytes_written = 0; std::unique_ptr _cufile_out; + kvikio::FileHandle _kvikio_file; }; /** diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp index 6f864ab509f..80e07f31dd9 100644 --- a/cpp/src/io/utilities/datasource.cpp +++ b/cpp/src/io/utilities/datasource.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,9 @@ #include #include +#include +#include + #include #include #include @@ -33,28 +36,38 @@ namespace { */ class file_source : public datasource { public: - explicit file_source(const char* filepath) - : _file(filepath, O_RDONLY), _cufile_in(detail::make_cufile_input(filepath)) + explicit file_source(const char* filepath) : _file(filepath, O_RDONLY) { + if (detail::cufile_integration::is_kvikio_enabled()) { + _kvikio_file = kvikio::FileHandle(filepath); + } else { + _cufile_in = detail::make_cufile_input(filepath); + } } virtual ~file_source() = default; - [[nodiscard]] bool supports_device_read() const override { return _cufile_in != nullptr; } + [[nodiscard]] bool supports_device_read() const override + { + return !_kvikio_file.closed() || _cufile_in != nullptr; + } [[nodiscard]] bool is_device_read_preferred(size_t size) const override { - return _cufile_in != nullptr && _cufile_in->is_cufile_io_preferred(size); + return !_kvikio_file.closed() || + (_cufile_in != nullptr && _cufile_in->is_cufile_io_preferred(size)); } - std::unique_ptr device_read(size_t offset, - size_t size, - rmm::cuda_stream_view stream) override + std::future device_read_async(size_t offset, + size_t size, + uint8_t* dst, + rmm::cuda_stream_view stream) override { CUDF_EXPECTS(supports_device_read(), "Device reads are not supported for this file."); auto const read_size = std::min(size, _file.size() - offset); - return _cufile_in->read(offset, read_size, stream); + if (!_kvikio_file.closed()) { return _kvikio_file.pread(dst, read_size, offset); } + return _cufile_in->read_async(offset, read_size, dst, stream); } size_t device_read(size_t offset, @@ -62,21 +75,17 @@ class file_source : public datasource { uint8_t* dst, rmm::cuda_stream_view stream) override { - CUDF_EXPECTS(supports_device_read(), "Device reads are not supported for this file."); - - auto const read_size = std::min(size, _file.size() - offset); - return _cufile_in->read(offset, read_size, dst, stream); + return device_read_async(offset, size, dst, stream).get(); } - std::future device_read_async(size_t offset, - size_t size, - uint8_t* dst, - rmm::cuda_stream_view stream) override + std::unique_ptr device_read(size_t offset, + size_t size, + rmm::cuda_stream_view stream) override { - CUDF_EXPECTS(supports_device_read(), "Device reads are not supported for this file."); - - auto const read_size = std::min(size, _file.size() - offset); - return _cufile_in->read_async(offset, read_size, dst, stream); + rmm::device_buffer out_data(size, stream); + size_t read = device_read(offset, size, reinterpret_cast(out_data.data()), stream); + out_data.resize(read, stream); + return datasource::buffer::create(std::move(out_data)); } [[nodiscard]] size_t size() const override { return _file.size(); } @@ -86,6 +95,7 @@ class file_source : public datasource { private: std::unique_ptr _cufile_in; + kvikio::FileHandle _kvikio_file; }; /** diff --git a/cpp/src/io/utilities/file_io_utilities.cpp b/cpp/src/io/utilities/file_io_utilities.cpp index f7e250f1d3f..c0dd85702e2 100644 --- a/cpp/src/io/utilities/file_io_utilities.cpp +++ b/cpp/src/io/utilities/file_io_utilities.cpp @@ -176,16 +176,6 @@ cufile_input_impl::cufile_input_impl(std::string const& filepath) pool.sleep_duration = 10; } -std::unique_ptr cufile_input_impl::read(size_t offset, - size_t size, - rmm::cuda_stream_view stream) -{ - rmm::device_buffer out_data(size, stream); - auto read_size = read(offset, size, reinterpret_cast(out_data.data()), stream); - out_data.resize(read_size, stream); - return datasource::buffer::create(std::move(out_data)); -} - namespace { template cufile_input_impl::read_async(size_t offset, return std::async(std::launch::deferred, waiter, std::move(slice_tasks)); } -size_t cufile_input_impl::read(size_t offset, - size_t size, - uint8_t* dst, - rmm::cuda_stream_view stream) -{ - auto result = read_async(offset, size, dst, stream); - return result.get(); -} - cufile_output_impl::cufile_output_impl(std::string const& filepath) : shim{cufile_shim::instance()}, cf_file(shim, filepath, O_CREAT | O_RDWR | O_DIRECT, 0664), @@ -250,11 +231,6 @@ cufile_output_impl::cufile_output_impl(std::string const& filepath) { } -void cufile_output_impl::write(void const* data, size_t offset, size_t size) -{ - write_async(data, offset, size).wait(); -} - std::future cufile_output_impl::write_async(void const* data, size_t offset, size_t size) { int device; diff --git a/cpp/src/io/utilities/file_io_utilities.hpp b/cpp/src/io/utilities/file_io_utilities.hpp index be3ecc49ab0..704ee77de8a 100644 --- a/cpp/src/io/utilities/file_io_utilities.hpp +++ b/cpp/src/io/utilities/file_io_utilities.hpp @@ -80,35 +80,6 @@ class cufile_io_base { */ class cufile_input : public cufile_io_base { public: - /** - * @brief Reads into a new device buffer. - * - * @throws cudf::logic_error on cuFile error - * - * @param offset Number of bytes from the start - * @param size Number of bytes to read - * @param stream CUDA stream to use - * - * @return The data buffer in the device memory - */ - virtual std::unique_ptr read(size_t offset, - size_t size, - rmm::cuda_stream_view stream) = 0; - - /** - * @brief Reads into existing device memory. - * - * @throws cudf::logic_error on cuFile error - * - * @param offset Number of bytes from the start - * @param size Number of bytes to read - * @param dst Address of the existing device memory - * @param stream CUDA stream to use - * - * @return The number of bytes read - */ - virtual size_t read(size_t offset, size_t size, uint8_t* dst, rmm::cuda_stream_view stream) = 0; - /** * @brief Asynchronously reads into existing device memory. * @@ -132,17 +103,6 @@ class cufile_input : public cufile_io_base { */ class cufile_output : public cufile_io_base { public: - /** - * @brief Writes the data from a device buffer into a file. - * - * @throws cudf::logic_error on cuFile error - * - * @param data Pointer to the buffer to be written into the output file - * @param offset Number of bytes from the start - * @param size Number of bytes to write - */ - virtual void write(void const* data, size_t offset, size_t size) = 0; - /** * @brief Asynchronously writes the data from a device buffer into a file. * @@ -203,12 +163,6 @@ class cufile_input_impl final : public cufile_input { public: cufile_input_impl(std::string const& filepath); - std::unique_ptr read(size_t offset, - size_t size, - rmm::cuda_stream_view stream) override; - - size_t read(size_t offset, size_t size, uint8_t* dst, rmm::cuda_stream_view stream) override; - std::future read_async(size_t offset, size_t size, uint8_t* dst, @@ -229,7 +183,6 @@ class cufile_output_impl final : public cufile_output { public: cufile_output_impl(std::string const& filepath); - void write(void const* data, size_t offset, size_t size) override; std::future write_async(void const* data, size_t offset, size_t size) override; private: @@ -241,18 +194,6 @@ class cufile_output_impl final : public cufile_output { class cufile_input_impl final : public cufile_input { public: - std::unique_ptr read(size_t offset, - size_t size, - rmm::cuda_stream_view stream) override - { - CUDF_FAIL("Only used to compile without cufile library, should not be called"); - } - - size_t read(size_t offset, size_t size, uint8_t* dst, rmm::cuda_stream_view stream) override - { - CUDF_FAIL("Only used to compile without cufile library, should not be called"); - } - std::future read_async(size_t offset, size_t size, uint8_t* dst, @@ -264,10 +205,6 @@ class cufile_input_impl final : public cufile_input { class cufile_output_impl final : public cufile_output { public: - void write(void const* data, size_t offset, size_t size) override - { - CUDF_FAIL("Only used to compile without cufile library, should not be called"); - } std::future write_async(void const* data, size_t offset, size_t size) override { CUDF_FAIL("Only used to compile without cufile library, should not be called"); diff --git a/cpp/src/io/utilities/hostdevice_vector.hpp b/cpp/src/io/utilities/hostdevice_vector.hpp index 5c73cf31428..30c7b6ec326 100644 --- a/cpp/src/io/utilities/hostdevice_vector.hpp +++ b/cpp/src/io/utilities/hostdevice_vector.hpp @@ -51,10 +51,10 @@ class hostdevice_vector { } explicit hostdevice_vector(size_t initial_size, size_t max_size, rmm::cuda_stream_view stream) - : num_elements(initial_size), max_elements(max_size) + : max_elements(max_size), num_elements(initial_size) { if (max_elements != 0) { - CUDF_CUDA_TRY(cudaMallocHost(&h_data, sizeof(T) * max_elements)); + CUDF_CUDA_TRY(cudaMallocHost(reinterpret_cast(&h_data), sizeof(T) * max_elements)); d_data.resize(sizeof(T) * max_elements, stream); } } @@ -62,7 +62,7 @@ class hostdevice_vector { ~hostdevice_vector() { if (max_elements != 0) { - auto const free_result = cudaFreeHost(h_data); + [[maybe_unused]] auto const free_result = cudaFreeHost(h_data); assert(free_result == cudaSuccess); } } @@ -93,6 +93,31 @@ class hostdevice_vector { return reinterpret_cast(d_data.data()) + offset; } + /** + * @brief Returns the specified element from device memory + * + * @note This function incurs a device to host memcpy and should be used sparingly. + * @note This function synchronizes `stream`. + * + * @throws rmm::out_of_range exception if `element_index >= size()` + * + * @param element_index Index of the desired element + * @param stream The stream on which to perform the copy + * @return The value of the specified element + */ + [[nodiscard]] T element(std::size_t element_index, rmm::cuda_stream_view stream) const + { + CUDF_EXPECTS(element_index < size(), "Attempt to access out of bounds element."); + T value; + CUDF_CUDA_TRY(cudaMemcpyAsync(&value, + reinterpret_cast(d_data.data()) + element_index, + sizeof(value), + cudaMemcpyDefault, + stream.value())); + stream.synchronize(); + return value; + } + operator cudf::device_span() { return {device_ptr(), max_elements}; } operator cudf::device_span() const { return {device_ptr(), max_elements}; } diff --git a/cpp/src/join/conditional_join.cu b/cpp/src/join/conditional_join.cu index 9bf7e6a7a43..ae1561b422b 100644 --- a/cpp/src/join/conditional_join.cu +++ b/cpp/src/join/conditional_join.cu @@ -59,8 +59,8 @@ conditional_join(table_view const& left, // Inner and left semi joins return empty output because no matches can exist. case join_kind::INNER_JOIN: case join_kind::LEFT_SEMI_JOIN: - return std::make_pair(std::make_unique>(0, stream, mr), - std::make_unique>(0, stream, mr)); + return std::pair(std::make_unique>(0, stream, mr), + std::make_unique>(0, stream, mr)); default: CUDF_FAIL("Invalid join kind."); break; } } else if (left_num_rows == 0) { @@ -70,12 +70,12 @@ conditional_join(table_view const& left, case join_kind::LEFT_ANTI_JOIN: case join_kind::INNER_JOIN: case join_kind::LEFT_SEMI_JOIN: - return std::make_pair(std::make_unique>(0, stream, mr), - std::make_unique>(0, stream, mr)); + return std::pair(std::make_unique>(0, stream, mr), + std::make_unique>(0, stream, mr)); // Full joins need to return the trivial complement. case join_kind::FULL_JOIN: { auto ret_flipped = get_trivial_left_join_indices(right, stream); - return std::make_pair(std::move(ret_flipped.second), std::move(ret_flipped.first)); + return std::pair(std::move(ret_flipped.second), std::move(ret_flipped.first)); } default: CUDF_FAIL("Invalid join kind."); break; } @@ -139,8 +139,8 @@ conditional_join(table_view const& left, // all other cases (inner, left semi, and left anti joins) if we reach this // point we can safely return an empty result. if (join_size == 0) { - return std::make_pair(std::make_unique>(0, stream, mr), - std::make_unique>(0, stream, mr)); + return std::pair(std::make_unique>(0, stream, mr), + std::make_unique>(0, stream, mr)); } rmm::device_scalar write_index(0, stream); @@ -176,7 +176,7 @@ conditional_join(table_view const& left, swap_tables); } - auto join_indices = std::make_pair(std::move(left_indices), std::move(right_indices)); + auto join_indices = std::pair(std::move(left_indices), std::move(right_indices)); // For full joins, get the indices in the right table that were not joined to // by any row in the left table. diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index 086e1e49986..3e0e76de708 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -13,11 +13,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include +#include "join_common_utils.cuh" #include #include +#include +#include #include +#include #include #include @@ -38,13 +41,67 @@ namespace cudf { namespace detail { - -std::pair, std::unique_ptr
> get_empty_joined_table( - table_view const& probe, table_view const& build) +namespace { +/** + * @brief Calculates the exact size of the join output produced when + * joining two tables together. + * + * @throw cudf::logic_error if JoinKind is not INNER_JOIN or LEFT_JOIN + * + * @tparam JoinKind The type of join to be performed + * + * @param build_table The right hand table + * @param probe_table The left hand table + * @param hash_table A hash table built on the build table that maps the index + * of every row to the hash value of that row. + * @param nulls_equal Flag to denote nulls are equal or not. + * @param stream CUDA stream used for device memory operations and kernel launches + * + * @return The exact size of the output of the join operation + */ +template +std::size_t compute_join_output_size(table_device_view build_table, + table_device_view probe_table, + cudf::detail::multimap_type const& hash_table, + bool const has_nulls, + cudf::null_equality const nulls_equal, + rmm::cuda_stream_view stream) { - std::unique_ptr
empty_probe = empty_like(probe); - std::unique_ptr
empty_build = empty_like(build); - return std::make_pair(std::move(empty_probe), std::move(empty_build)); + const size_type build_table_num_rows{build_table.num_rows()}; + const size_type probe_table_num_rows{probe_table.num_rows()}; + + // If the build table is empty, we know exactly how large the output + // will be for the different types of joins and can return immediately + if (0 == build_table_num_rows) { + switch (JoinKind) { + // Inner join with an empty table will have no output + case join_kind::INNER_JOIN: return 0; + + // Left join with an empty table will have an output of NULL rows + // equal to the number of rows in the probe table + case join_kind::LEFT_JOIN: return probe_table_num_rows; + + default: CUDF_FAIL("Unsupported join type"); + } + } + + auto const probe_nulls = cudf::nullate::DYNAMIC{has_nulls}; + pair_equality equality{probe_table, build_table, probe_nulls, nulls_equal}; + + row_hash hash_probe{probe_nulls, probe_table}; + auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); + make_pair_function pair_func{hash_probe, empty_key_sentinel}; + + auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func); + + std::size_t size; + if constexpr (JoinKind == join_kind::LEFT_JOIN) { + size = hash_table.pair_count_outer(iter, iter + probe_table_num_rows, equality, stream.value()); + } else { + size = hash_table.pair_count(iter, iter + probe_table_num_rows, equality, stream.value()); + } + + return size; } /** @@ -69,7 +126,7 @@ std::pair>, std::unique_ptr>> probe_join_hash_table(cudf::table_device_view build_table, cudf::table_device_view probe_table, - multimap_type const& hash_table, + cudf::detail::multimap_type const& hash_table, bool has_nulls, null_equality compare_nulls, std::optional output_size, @@ -88,8 +145,8 @@ probe_join_hash_table(cudf::table_device_view build_table, // If output size is zero, return immediately if (join_size == 0) { - return std::make_pair(std::make_unique>(0, stream, mr), - std::make_unique>(0, stream, mr)); + return std::pair(std::make_unique>(0, stream, mr), + std::make_unique>(0, stream, mr)); } auto left_indices = std::make_unique>(join_size, stream, mr); @@ -125,7 +182,7 @@ probe_join_hash_table(cudf::table_device_view build_table, hash_table.pair_retrieve( iter, iter + probe_table_num_rows, out1_zip_begin, out2_zip_begin, equality, stream.value()); } - return std::make_pair(std::move(left_indices), std::move(right_indices)); + return std::pair(std::move(left_indices), std::move(right_indices)); } /** @@ -145,7 +202,7 @@ probe_join_hash_table(cudf::table_device_view build_table, */ std::size_t get_full_join_size(cudf::table_device_view build_table, cudf::table_device_view probe_table, - multimap_type const& hash_table, + cudf::detail::multimap_type const& hash_table, bool const has_nulls, null_equality const compare_nulls, rmm::cuda_stream_view stream, @@ -157,8 +214,6 @@ std::size_t get_full_join_size(cudf::table_device_view build_table, // If output size is zero, return immediately if (join_size == 0) { return join_size; } - rmm::device_scalar write_index(0, stream); - auto left_indices = std::make_unique>(join_size, stream, mr); auto right_indices = std::make_unique>(join_size, stream, mr); @@ -221,25 +276,12 @@ std::size_t get_full_join_size(cudf::table_device_view build_table, } return join_size + left_join_complement_size; } +} // namespace -std::unique_ptr combine_table_pair(std::unique_ptr&& left, - std::unique_ptr&& right) -{ - auto joined_cols = left->release(); - auto right_cols = right->release(); - joined_cols.insert(joined_cols.end(), - std::make_move_iterator(right_cols.begin()), - std::make_move_iterator(right_cols.end())); - return std::make_unique(std::move(joined_cols)); -} - -} // namespace detail - -hash_join::hash_join_impl::~hash_join_impl() = default; - -hash_join::hash_join_impl::hash_join_impl(cudf::table_view const& build, - null_equality compare_nulls, - rmm::cuda_stream_view stream) +template +hash_join::hash_join(cudf::table_view const& build, + cudf::null_equality compare_nulls, + rmm::cuda_stream_view stream) : _is_empty{build.num_rows() == 0}, _nulls_equal{compare_nulls}, _hash_table{compute_hash_table_size(build.num_rows()), @@ -263,41 +305,45 @@ hash_join::hash_join_impl::hash_join_impl(cudf::table_view const& build, cudf::detail::build_join_hash_table(_build, _hash_table, _nulls_equal, stream); } +template std::pair>, std::unique_ptr>> -hash_join::hash_join_impl::inner_join(cudf::table_view const& probe, - std::optional output_size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const +hash_join::inner_join(cudf::table_view const& probe, + std::optional output_size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const { CUDF_FUNC_RANGE(); return compute_hash_join(probe, output_size, stream, mr); } +template std::pair>, std::unique_ptr>> -hash_join::hash_join_impl::left_join(cudf::table_view const& probe, - std::optional output_size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const +hash_join::left_join(cudf::table_view const& probe, + std::optional output_size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const { CUDF_FUNC_RANGE(); return compute_hash_join(probe, output_size, stream, mr); } +template std::pair>, std::unique_ptr>> -hash_join::hash_join_impl::full_join(cudf::table_view const& probe, - std::optional output_size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const +hash_join::full_join(cudf::table_view const& probe, + std::optional output_size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const { CUDF_FUNC_RANGE(); return compute_hash_join(probe, output_size, stream, mr); } -std::size_t hash_join::hash_join_impl::inner_join_size(cudf::table_view const& probe, - rmm::cuda_stream_view stream) const +template +std::size_t hash_join::inner_join_size(cudf::table_view const& probe, + rmm::cuda_stream_view stream) const { CUDF_FUNC_RANGE(); @@ -320,8 +366,9 @@ std::size_t hash_join::hash_join_impl::inner_join_size(cudf::table_view const& p stream); } -std::size_t hash_join::hash_join_impl::left_join_size(cudf::table_view const& probe, - rmm::cuda_stream_view stream) const +template +std::size_t hash_join::left_join_size(cudf::table_view const& probe, + rmm::cuda_stream_view stream) const { CUDF_FUNC_RANGE(); @@ -344,9 +391,10 @@ std::size_t hash_join::hash_join_impl::left_join_size(cudf::table_view const& pr stream); } -std::size_t hash_join::hash_join_impl::full_join_size(cudf::table_view const& probe, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const +template +std::size_t hash_join::full_join_size(cudf::table_view const& probe, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const { CUDF_FUNC_RANGE(); @@ -370,13 +418,51 @@ std::size_t hash_join::hash_join_impl::full_join_size(cudf::table_view const& pr mr); } +template +template +std::pair>, + std::unique_ptr>> +hash_join::probe_join_indices(cudf::table_view const& probe_table, + std::optional output_size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const +{ + // Trivial left join case - exit early + if (_is_empty and JoinKind != cudf::detail::join_kind::INNER_JOIN) { + return get_trivial_left_join_indices(probe_table, stream, mr); + } + + CUDF_EXPECTS(!_is_empty, "Hash table of hash join is null."); + + auto build_table_ptr = cudf::table_device_view::create(_build, stream); + auto probe_table_ptr = cudf::table_device_view::create(probe_table, stream); + + auto join_indices = cudf::detail::probe_join_hash_table( + *build_table_ptr, + *probe_table_ptr, + _hash_table, + cudf::has_nulls(probe_table) | cudf::has_nulls(_build), + _nulls_equal, + output_size, + stream, + mr); + + if constexpr (JoinKind == cudf::detail::join_kind::FULL_JOIN) { + auto complement_indices = detail::get_left_join_indices_complement( + join_indices.second, probe_table.num_rows(), _build.num_rows(), stream, mr); + join_indices = detail::concatenate_vector_pairs(join_indices, complement_indices, stream); + } + return join_indices; +} + +template template std::pair>, std::unique_ptr>> -hash_join::hash_join_impl::compute_hash_join(cudf::table_view const& probe, - std::optional output_size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const +hash_join::compute_hash_join(cudf::table_view const& probe, + std::optional output_size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const { CUDF_EXPECTS(0 != probe.num_columns(), "Hash join probe table is empty"); CUDF_EXPECTS(probe.num_rows() < cudf::detail::MAX_JOIN_SIZE, @@ -390,8 +476,8 @@ hash_join::hash_join_impl::compute_hash_join(cudf::table_view const& probe, "Mismatch in number of columns to be joined on"); if (is_trivial_join(flattened_probe_table, _build, JoinKind)) { - return std::make_pair(std::make_unique>(0, stream, mr), - std::make_unique>(0, stream, mr)); + return std::pair(std::make_unique>(0, stream, mr), + std::make_unique>(0, stream, mr)); } CUDF_EXPECTS(std::equal(std::cbegin(_build), @@ -403,41 +489,64 @@ hash_join::hash_join_impl::compute_hash_join(cudf::table_view const& probe, return probe_join_indices(flattened_probe_table, output_size, stream, mr); } +} // namespace detail + +hash_join::~hash_join() = default; + +hash_join::hash_join(cudf::table_view const& build, + null_equality compare_nulls, + rmm::cuda_stream_view stream) + : _impl{std::make_unique(build, compare_nulls, stream)} +{ +} -template std::pair>, std::unique_ptr>> -hash_join::hash_join_impl::probe_join_indices(cudf::table_view const& probe_table, - std::optional output_size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const +hash_join::inner_join(cudf::table_view const& probe, + std::optional output_size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const { - // Trivial left join case - exit early - if (_is_empty and JoinKind != cudf::detail::join_kind::INNER_JOIN) { - return get_trivial_left_join_indices(probe_table, stream, mr); - } + return _impl->inner_join(probe, output_size, stream, mr); +} - CUDF_EXPECTS(!_is_empty, "Hash table of hash join is null."); +std::pair>, + std::unique_ptr>> +hash_join::left_join(cudf::table_view const& probe, + std::optional output_size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const +{ + return _impl->left_join(probe, output_size, stream, mr); +} - auto build_table_ptr = cudf::table_device_view::create(_build, stream); - auto probe_table_ptr = cudf::table_device_view::create(probe_table, stream); +std::pair>, + std::unique_ptr>> +hash_join::full_join(cudf::table_view const& probe, + std::optional output_size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const +{ + return _impl->full_join(probe, output_size, stream, mr); +} - auto join_indices = cudf::detail::probe_join_hash_table( - *build_table_ptr, - *probe_table_ptr, - _hash_table, - cudf::has_nulls(probe_table) | cudf::has_nulls(_build), - _nulls_equal, - output_size, - stream, - mr); +std::size_t hash_join::inner_join_size(cudf::table_view const& probe, + rmm::cuda_stream_view stream) const +{ + return _impl->inner_join_size(probe, stream); +} - if constexpr (JoinKind == cudf::detail::join_kind::FULL_JOIN) { - auto complement_indices = detail::get_left_join_indices_complement( - join_indices.second, probe_table.num_rows(), _build.num_rows(), stream, mr); - join_indices = detail::concatenate_vector_pairs(join_indices, complement_indices, stream); - } - return join_indices; +std::size_t hash_join::left_join_size(cudf::table_view const& probe, + rmm::cuda_stream_view stream) const +{ + return _impl->left_join_size(probe, stream); +} + +std::size_t hash_join::full_join_size(cudf::table_view const& probe, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const +{ + return _impl->full_join_size(probe, stream, mr); } } // namespace cudf diff --git a/cpp/src/join/hash_join.cuh b/cpp/src/join/hash_join.cuh deleted file mode 100644 index e55de043372..00000000000 --- a/cpp/src/join/hash_join.cuh +++ /dev/null @@ -1,289 +0,0 @@ -/* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include - -#include -#include - -namespace cudf { -namespace detail { - -/** - * @brief Remaps a hash value to a new value if it is equal to the specified sentinel value. - * - * @param hash The hash value to potentially remap - * @param sentinel The reserved value - */ -template -constexpr auto remap_sentinel_hash(H hash, S sentinel) -{ - // Arbitrarily choose hash - 1 - return (hash == sentinel) ? (hash - 1) : hash; -} - -/** - * @brief Device functor to create a pair of hash value and index for a given row. - */ -class make_pair_function { - public: - CUDF_HOST_DEVICE make_pair_function(row_hash const& hash, - hash_value_type const empty_key_sentinel) - : _hash{hash}, _empty_key_sentinel{empty_key_sentinel} - { - } - - __device__ __forceinline__ cudf::detail::pair_type operator()(size_type i) const noexcept - { - // Compute the hash value of row `i` - auto row_hash_value = remap_sentinel_hash(_hash(i), _empty_key_sentinel); - return cuco::make_pair(row_hash_value, i); - } - - private: - row_hash _hash; - hash_value_type const _empty_key_sentinel; -}; - -/** - * @brief Calculates the exact size of the join output produced when - * joining two tables together. - * - * @throw cudf::logic_error if JoinKind is not INNER_JOIN or LEFT_JOIN - * - * @tparam JoinKind The type of join to be performed - * @tparam multimap_type The type of the hash table - * - * @param build_table The right hand table - * @param probe_table The left hand table - * @param hash_table A hash table built on the build table that maps the index - * of every row to the hash value of that row. - * @param nulls_equal Flag to denote nulls are equal or not. - * @param stream CUDA stream used for device memory operations and kernel launches - * - * @return The exact size of the output of the join operation - */ -template -std::size_t compute_join_output_size(table_device_view build_table, - table_device_view probe_table, - multimap_type const& hash_table, - bool const has_nulls, - cudf::null_equality const nulls_equal, - rmm::cuda_stream_view stream) -{ - const size_type build_table_num_rows{build_table.num_rows()}; - const size_type probe_table_num_rows{probe_table.num_rows()}; - - // If the build table is empty, we know exactly how large the output - // will be for the different types of joins and can return immediately - if (0 == build_table_num_rows) { - switch (JoinKind) { - // Inner join with an empty table will have no output - case join_kind::INNER_JOIN: return 0; - - // Left join with an empty table will have an output of NULL rows - // equal to the number of rows in the probe table - case join_kind::LEFT_JOIN: return probe_table_num_rows; - - default: CUDF_FAIL("Unsupported join type"); - } - } - - auto const probe_nulls = cudf::nullate::DYNAMIC{has_nulls}; - pair_equality equality{probe_table, build_table, probe_nulls, nulls_equal}; - - row_hash hash_probe{probe_nulls, probe_table}; - auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); - make_pair_function pair_func{hash_probe, empty_key_sentinel}; - - auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func); - - std::size_t size; - if constexpr (JoinKind == join_kind::LEFT_JOIN) { - size = hash_table.pair_count_outer(iter, iter + probe_table_num_rows, equality, stream.value()); - } else { - size = hash_table.pair_count(iter, iter + probe_table_num_rows, equality, stream.value()); - } - - return size; -} - -std::pair, std::unique_ptr
> get_empty_joined_table( - table_view const& probe, table_view const& build); - -std::unique_ptr combine_table_pair(std::unique_ptr&& left, - std::unique_ptr&& right); - -/** - * @brief Builds the hash table based on the given `build_table`. - * - * @tparam MultimapType The type of the hash table - * - * @param build Table of columns used to build join hash. - * @param hash_table Build hash table. - * @param nulls_equal Flag to denote nulls are equal or not. - * @param stream CUDA stream used for device memory operations and kernel launches. - * - */ -template -void build_join_hash_table(cudf::table_view const& build, - MultimapType& hash_table, - null_equality const nulls_equal, - rmm::cuda_stream_view stream) -{ - auto build_table_ptr = cudf::table_device_view::create(build, stream); - - CUDF_EXPECTS(0 != build_table_ptr->num_columns(), "Selected build dataset is empty"); - CUDF_EXPECTS(0 != build_table_ptr->num_rows(), "Build side table has no rows"); - - row_hash hash_build{nullate::DYNAMIC{cudf::has_nulls(build)}, *build_table_ptr}; - auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); - make_pair_function pair_func{hash_build, empty_key_sentinel}; - - auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func); - - size_type const build_table_num_rows{build_table_ptr->num_rows()}; - if (nulls_equal == cudf::null_equality::EQUAL or (not nullable(build))) { - hash_table.insert(iter, iter + build_table_num_rows, stream.value()); - } else { - thrust::counting_iterator stencil(0); - auto const row_bitmask = cudf::detail::bitmask_and(build, stream).first; - row_is_valid pred{static_cast(row_bitmask.data())}; - - // insert valid rows - hash_table.insert_if(iter, iter + build_table_num_rows, stencil, pred, stream.value()); - } -} -} // namespace detail - -struct hash_join::hash_join_impl { - public: - hash_join_impl() = delete; - ~hash_join_impl(); - hash_join_impl(hash_join_impl const&) = delete; - hash_join_impl(hash_join_impl&&) = delete; - hash_join_impl& operator=(hash_join_impl const&) = delete; - hash_join_impl& operator=(hash_join_impl&&) = delete; - - private: - bool const _is_empty; - cudf::null_equality const _nulls_equal; - cudf::table_view _build; - std::vector> _created_null_columns; - cudf::structs::detail::flattened_table _flattened_build_table; - cudf::detail::multimap_type _hash_table; - - public: - /** - * @brief Constructor that internally builds the hash table based on the given `build` table - * - * @throw cudf::logic_error if the number of columns in `build` table is 0. - * @throw cudf::logic_error if the number of rows in `build` table exceeds MAX_JOIN_SIZE. - * - * @param build The build table, from which the hash table is built. - * @param compare_nulls Controls whether null join-key values should match or not. - * @param stream CUDA stream used for device memory operations and kernel launches. - */ - hash_join_impl(cudf::table_view const& build, - null_equality compare_nulls, - rmm::cuda_stream_view stream = rmm::cuda_stream_default); - - std::pair>, - std::unique_ptr>> - inner_join(cudf::table_view const& probe, - std::optional output_size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const; - - std::pair>, - std::unique_ptr>> - left_join(cudf::table_view const& probe, - std::optional output_size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const; - - std::pair>, - std::unique_ptr>> - full_join(cudf::table_view const& probe, - std::optional output_size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const; - - [[nodiscard]] std::size_t inner_join_size(cudf::table_view const& probe, - rmm::cuda_stream_view stream) const; - - [[nodiscard]] std::size_t left_join_size(cudf::table_view const& probe, - rmm::cuda_stream_view stream) const; - - std::size_t full_join_size(cudf::table_view const& probe, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const; - - private: - template - std::pair>, - std::unique_ptr>> - compute_hash_join(cudf::table_view const& probe, - std::optional output_size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const; - - /** - * @brief Probes the `_hash_table` built from `_build` for tuples in `probe_table`, - * and returns the output indices of `build_table` and `probe_table` as a combined table, - * i.e. if full join is specified as the join type then left join is called. Behavior - * is undefined if the provided `output_size` is smaller than the actual output size. - * - * @throw cudf::logic_error if hash table is null. - * - * @tparam JoinKind The type of join to be performed. - * - * @param probe_table Table of probe side columns to join. - * @param output_size Optional value which allows users to specify the exact output size. - * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource used to allocate the returned vectors. - * - * @return Join output indices vector pair. - */ - template - std::pair>, - std::unique_ptr>> - probe_join_indices(cudf::table_view const& probe_table, - std::optional output_size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const; -}; - -} // namespace cudf diff --git a/cpp/src/join/join.cu b/cpp/src/join/join.cu index 7a478ca2eb3..5c529c88d9d 100644 --- a/cpp/src/join/join.cu +++ b/cpp/src/join/join.cu @@ -13,8 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "join/hash_join.cuh" -#include "join/join_common_utils.hpp" +#include "join_common_utils.hpp" #include #include @@ -26,6 +25,26 @@ namespace cudf { namespace detail { +namespace { +std::pair, std::unique_ptr
> get_empty_joined_table( + table_view const& probe, table_view const& build) +{ + std::unique_ptr
empty_probe = empty_like(probe); + std::unique_ptr
empty_build = empty_like(build); + return std::pair(std::move(empty_probe), std::move(empty_build)); +} + +std::unique_ptr combine_table_pair(std::unique_ptr&& left, + std::unique_ptr&& right) +{ + auto joined_cols = left->release(); + auto right_cols = right->release(); + joined_cols.insert(joined_cols.end(), + std::make_move_iterator(right_cols.begin()), + std::make_move_iterator(right_cols.end())); + return std::make_unique(std::move(joined_cols)); +} +} // namespace std::pair>, std::unique_ptr>> @@ -52,7 +71,7 @@ inner_join(table_view const& left_input, if (right.num_rows() > left.num_rows()) { cudf::hash_join hj_obj(left, compare_nulls, stream); auto [right_result, left_result] = hj_obj.inner_join(right, std::nullopt, stream, mr); - return std::make_pair(std::move(left_result), std::move(right_result)); + return std::pair(std::move(left_result), std::move(right_result)); } else { cudf::hash_join hj_obj(right, compare_nulls, stream); return hj_obj.inner_join(left, std::nullopt, stream, mr); @@ -222,69 +241,8 @@ std::unique_ptr
full_join(table_view const& left_input, mr); return combine_table_pair(std::move(left_result), std::move(right_result)); } - } // namespace detail -hash_join::~hash_join() = default; - -hash_join::hash_join(cudf::table_view const& build, - null_equality compare_nulls, - rmm::cuda_stream_view stream) - : impl{std::make_unique(build, compare_nulls, stream)} -{ -} - -std::pair>, - std::unique_ptr>> -hash_join::inner_join(cudf::table_view const& probe, - std::optional output_size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const -{ - return impl->inner_join(probe, output_size, stream, mr); -} - -std::pair>, - std::unique_ptr>> -hash_join::left_join(cudf::table_view const& probe, - std::optional output_size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const -{ - return impl->left_join(probe, output_size, stream, mr); -} - -std::pair>, - std::unique_ptr>> -hash_join::full_join(cudf::table_view const& probe, - std::optional output_size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const -{ - return impl->full_join(probe, output_size, stream, mr); -} - -std::size_t hash_join::inner_join_size(cudf::table_view const& probe, - rmm::cuda_stream_view stream) const -{ - return impl->inner_join_size(probe, stream); -} - -std::size_t hash_join::left_join_size(cudf::table_view const& probe, - rmm::cuda_stream_view stream) const -{ - return impl->left_join_size(probe, stream); -} - -std::size_t hash_join::full_join_size(cudf::table_view const& probe, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const -{ - return impl->full_join_size(probe, stream, mr); -} - -// external APIs - std::pair>, std::unique_ptr>> inner_join(table_view const& left, @@ -353,5 +311,4 @@ std::unique_ptr
full_join(table_view const& left, return detail::full_join( left, right, left_on, right_on, compare_nulls, rmm::cuda_stream_default, mr); } - } // namespace cudf diff --git a/cpp/src/join/join_common_utils.cuh b/cpp/src/join/join_common_utils.cuh index b778f13b5e1..fdb63419c84 100644 --- a/cpp/src/join/join_common_utils.cuh +++ b/cpp/src/join/join_common_utils.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,8 +15,10 @@ */ #pragma once -#include +#include "join_common_utils.hpp" +#include +#include #include #include @@ -26,6 +28,41 @@ namespace cudf { namespace detail { +/** + * @brief Remaps a hash value to a new value if it is equal to the specified sentinel value. + * + * @param hash The hash value to potentially remap + * @param sentinel The reserved value + */ +template +constexpr auto remap_sentinel_hash(H hash, S sentinel) +{ + // Arbitrarily choose hash - 1 + return (hash == sentinel) ? (hash - 1) : hash; +} + +/** + * @brief Device functor to create a pair of hash value and index for a given row. + */ +class make_pair_function { + public: + CUDF_HOST_DEVICE make_pair_function(row_hash const& hash, + hash_value_type const empty_key_sentinel) + : _hash{hash}, _empty_key_sentinel{empty_key_sentinel} + { + } + + __device__ __forceinline__ cudf::detail::pair_type operator()(size_type i) const noexcept + { + // Compute the hash value of row `i` + auto row_hash_value = remap_sentinel_hash(_hash(i), _empty_key_sentinel); + return cuco::make_pair(row_hash_value, i); + } + + private: + row_hash _hash; + hash_value_type const _empty_key_sentinel; +}; /** * @brief Device functor to determine if a row is valid. @@ -98,6 +135,47 @@ get_trivial_left_join_indices( rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Builds the hash table based on the given `build_table`. + * + * @tparam MultimapType The type of the hash table + * + * @param build Table of columns used to build join hash. + * @param hash_table Build hash table. + * @param nulls_equal Flag to denote nulls are equal or not. + * @param stream CUDA stream used for device memory operations and kernel launches. + * + */ +template +void build_join_hash_table(cudf::table_view const& build, + MultimapType& hash_table, + null_equality const nulls_equal, + rmm::cuda_stream_view stream) +{ + auto build_table_ptr = cudf::table_device_view::create(build, stream); + + CUDF_EXPECTS(0 != build_table_ptr->num_columns(), "Selected build dataset is empty"); + CUDF_EXPECTS(0 != build_table_ptr->num_rows(), "Build side table has no rows"); + + row_hash hash_build{nullate::DYNAMIC{cudf::has_nulls(build)}, *build_table_ptr}; + auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); + make_pair_function pair_func{hash_build, empty_key_sentinel}; + + auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func); + + size_type const build_table_num_rows{build_table_ptr->num_rows()}; + if (nulls_equal == cudf::null_equality::EQUAL or (not nullable(build))) { + hash_table.insert(iter, iter + build_table_num_rows, stream.value()); + } else { + thrust::counting_iterator stencil(0); + auto const row_bitmask = cudf::detail::bitmask_and(build, stream).first; + row_is_valid pred{static_cast(row_bitmask.data())}; + + // insert valid rows + hash_table.insert_if(iter, iter + build_table_num_rows, stencil, pred, stream.value()); + } +} + // Convenient alias for a pair of unique pointers to device uvectors. using VectorPair = std::pair>, std::unique_ptr>>; diff --git a/cpp/src/join/join_common_utils.hpp b/cpp/src/join/join_common_utils.hpp index 526c22d1d5c..060e8bff6f8 100644 --- a/cpp/src/join/join_common_utils.hpp +++ b/cpp/src/join/join_common_utils.hpp @@ -15,8 +15,10 @@ */ #pragma once +#include #include #include +#include #include #include @@ -34,7 +36,6 @@ namespace cudf { namespace detail { constexpr size_type MAX_JOIN_SIZE{std::numeric_limits::max()}; -constexpr int DEFAULT_JOIN_CG_SIZE = 2; constexpr int DEFAULT_JOIN_BLOCK_SIZE = 128; constexpr int DEFAULT_JOIN_CACHE_SIZE = 128; constexpr size_type JoinNoneValue = std::numeric_limits::min(); @@ -45,12 +46,7 @@ using hash_type = cuco::detail::MurmurHash3_32; using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor>; -using multimap_type = - cuco::static_multimap>; +using multimap_type = cudf::hash_join::impl_type::map_type; // Multimap type used for mixed joins. TODO: This is a temporary alias used // until the mixed joins are converted to using CGs properly. Right now it's @@ -68,9 +64,6 @@ using row_hash = cudf::row_hasher; using row_equality = cudf::row_equality_comparator; -enum class join_kind { INNER_JOIN, LEFT_JOIN, FULL_JOIN, LEFT_SEMI_JOIN, LEFT_ANTI_JOIN }; - bool is_trivial_join(table_view const& left, table_view const& right, join_kind join_type); - } // namespace detail } // namespace cudf diff --git a/cpp/src/join/join_utils.cu b/cpp/src/join/join_utils.cu index 151db830962..7fa6642b19f 100644 --- a/cpp/src/join/join_utils.cu +++ b/cpp/src/join/join_utils.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include "join_common_utils.cuh" #include @@ -61,7 +61,7 @@ get_trivial_left_join_indices(table_view const& left, std::make_unique>(left.num_rows(), stream, mr); thrust::uninitialized_fill( rmm::exec_policy(stream), right_indices->begin(), right_indices->end(), JoinNoneValue); - return std::make_pair(std::move(left_indices), std::move(right_indices)); + return std::pair(std::move(left_indices), std::move(right_indices)); } VectorPair concatenate_vector_pairs(VectorPair& a, VectorPair& b, rmm::cuda_stream_view stream) @@ -151,7 +151,7 @@ get_left_join_indices_complement(std::unique_ptr> left_invalid_indices->end(), JoinNoneValue); - return std::make_pair(std::move(left_invalid_indices), std::move(right_indices_complement)); + return std::pair(std::move(left_invalid_indices), std::move(right_indices_complement)); } } // namespace detail diff --git a/cpp/src/join/mixed_join.cu b/cpp/src/join/mixed_join.cu index f9cbb2b5441..27ee77e3edd 100644 --- a/cpp/src/join/mixed_join.cu +++ b/cpp/src/join/mixed_join.cu @@ -14,6 +14,10 @@ * limitations under the License. */ +#include "join_common_utils.cuh" +#include "join_common_utils.hpp" +#include "mixed_join_kernels.cuh" + #include #include #include @@ -23,12 +27,9 @@ #include #include #include -#include -#include -#include -#include #include +#include #include #include @@ -81,8 +82,8 @@ mixed_join( case join_kind::FULL_JOIN: return get_trivial_left_join_indices(left_conditional, stream); // Inner joins return empty output because no matches can exist. case join_kind::INNER_JOIN: - return std::make_pair(std::make_unique>(0, stream, mr), - std::make_unique>(0, stream, mr)); + return std::pair(std::make_unique>(0, stream, mr), + std::make_unique>(0, stream, mr)); default: CUDF_FAIL("Invalid join kind."); break; } } else if (left_num_rows == 0) { @@ -90,12 +91,12 @@ mixed_join( // Left and inner joins all return empty sets. case join_kind::LEFT_JOIN: case join_kind::INNER_JOIN: - return std::make_pair(std::make_unique>(0, stream, mr), - std::make_unique>(0, stream, mr)); + return std::pair(std::make_unique>(0, stream, mr), + std::make_unique>(0, stream, mr)); // Full joins need to return the trivial complement. case join_kind::FULL_JOIN: { auto ret_flipped = get_trivial_left_join_indices(right_conditional, stream); - return std::make_pair(std::move(ret_flipped.second), std::move(ret_flipped.first)); + return std::pair(std::move(ret_flipped.second), std::move(ret_flipped.first)); } default: CUDF_FAIL("Invalid join kind."); break; } @@ -208,8 +209,8 @@ mixed_join( // all other cases (inner, left semi, and left anti joins) if we reach this // point we can safely return an empty result. if (join_size == 0) { - return std::make_pair(std::make_unique>(0, stream, mr), - std::make_unique>(0, stream, mr)); + return std::pair(std::make_unique>(0, stream, mr), + std::make_unique>(0, stream, mr)); } // Given the number of matches per row, we need to compute the offsets for insertion. @@ -258,7 +259,7 @@ mixed_join( swap_tables); } - auto join_indices = std::make_pair(std::move(left_indices), std::move(right_indices)); + auto join_indices = std::pair(std::move(left_indices), std::move(right_indices)); // For full joins, get the indices in the right table that were not joined to // by any row in the left table. diff --git a/cpp/src/join/mixed_join_kernel.cu b/cpp/src/join/mixed_join_kernel.cu new file mode 100644 index 00000000000..f8912f0c7bd --- /dev/null +++ b/cpp/src/join/mixed_join_kernel.cu @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mixed_join_kernel.cuh" + +namespace cudf { +namespace detail { + +template __global__ void mixed_join( + table_device_view left_table, + table_device_view right_table, + table_device_view probe, + table_device_view build, + row_equality const equality_probe, + join_kind const join_type, + cudf::detail::mixed_multimap_type::device_view hash_table_view, + size_type* join_output_l, + size_type* join_output_r, + cudf::ast::detail::expression_device_view device_expression_data, + cudf::size_type const* join_result_offsets, + bool const swap_tables); + +} // namespace detail + +} // namespace cudf diff --git a/cpp/src/join/mixed_join_kernels.cu b/cpp/src/join/mixed_join_kernel.cuh similarity index 80% rename from cpp/src/join/mixed_join_kernels.cu rename to cpp/src/join/mixed_join_kernel.cuh index efaea841e45..38955ef4667 100644 --- a/cpp/src/join/mixed_join_kernels.cu +++ b/cpp/src/join/mixed_join_kernel.cuh @@ -14,10 +14,11 @@ * limitations under the License. */ -#include -#include -#include -#include +#pragma once + +#include "join_common_utils.cuh" +#include "join_common_utils.hpp" +#include "mixed_join_common_utils.cuh" #include #include @@ -32,6 +33,7 @@ namespace cudf { namespace detail { + namespace cg = cooperative_groups; template @@ -107,34 +109,6 @@ __launch_bounds__(block_size) __global__ } } -template __global__ void mixed_join( - table_device_view left_table, - table_device_view right_table, - table_device_view probe, - table_device_view build, - row_equality const equality_probe, - join_kind const join_type, - cudf::detail::mixed_multimap_type::device_view hash_table_view, - size_type* join_output_l, - size_type* join_output_r, - cudf::ast::detail::expression_device_view device_expression_data, - cudf::size_type const* join_result_offsets, - bool const swap_tables); - -template __global__ void mixed_join( - table_device_view left_table, - table_device_view right_table, - table_device_view probe, - table_device_view build, - row_equality const equality_probe, - join_kind const join_type, - cudf::detail::mixed_multimap_type::device_view hash_table_view, - size_type* join_output_l, - size_type* join_output_r, - cudf::ast::detail::expression_device_view device_expression_data, - cudf::size_type const* join_result_offsets, - bool const swap_tables); - } // namespace detail } // namespace cudf diff --git a/cpp/src/join/mixed_join_kernel_nulls.cu b/cpp/src/join/mixed_join_kernel_nulls.cu new file mode 100644 index 00000000000..a911c62b349 --- /dev/null +++ b/cpp/src/join/mixed_join_kernel_nulls.cu @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mixed_join_kernel.cuh" + +namespace cudf { +namespace detail { + +template __global__ void mixed_join( + table_device_view left_table, + table_device_view right_table, + table_device_view probe, + table_device_view build, + row_equality const equality_probe, + join_kind const join_type, + cudf::detail::mixed_multimap_type::device_view hash_table_view, + size_type* join_output_l, + size_type* join_output_r, + cudf::ast::detail::expression_device_view device_expression_data, + cudf::size_type const* join_result_offsets, + bool const swap_tables); + +} // namespace detail + +} // namespace cudf diff --git a/cpp/src/join/mixed_join_semi.cu b/cpp/src/join/mixed_join_semi.cu index 60cc74991ef..13a1f1a0ce2 100644 --- a/cpp/src/join/mixed_join_semi.cu +++ b/cpp/src/join/mixed_join_semi.cu @@ -14,8 +14,14 @@ * limitations under the License. */ +#include "join_common_utils.cuh" +#include "join_common_utils.hpp" +#include "mixed_join_kernels_semi.cuh" + #include #include +#include +#include #include #include #include @@ -23,12 +29,9 @@ #include #include #include -#include -#include -#include -#include #include +#include #include #include diff --git a/cpp/src/join/mixed_join_size_kernel.cu b/cpp/src/join/mixed_join_size_kernel.cu new file mode 100644 index 00000000000..cf8236e2be2 --- /dev/null +++ b/cpp/src/join/mixed_join_size_kernel.cu @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mixed_join_size_kernel.cuh" + +namespace cudf { +namespace detail { + +template __global__ void compute_mixed_join_output_size( + table_device_view left_table, + table_device_view right_table, + table_device_view probe, + table_device_view build, + row_equality const equality_probe, + join_kind const join_type, + cudf::detail::mixed_multimap_type::device_view hash_table_view, + ast::detail::expression_device_view device_expression_data, + bool const swap_tables, + std::size_t* output_size, + cudf::device_span matches_per_row); + +} // namespace detail +} // namespace cudf diff --git a/cpp/src/join/mixed_join_size_kernels.cu b/cpp/src/join/mixed_join_size_kernel.cuh similarity index 77% rename from cpp/src/join/mixed_join_size_kernels.cu rename to cpp/src/join/mixed_join_size_kernel.cuh index 22c71bfc33a..ce70f7f18ee 100644 --- a/cpp/src/join/mixed_join_size_kernels.cu +++ b/cpp/src/join/mixed_join_size_kernel.cuh @@ -14,10 +14,9 @@ * limitations under the License. */ -#include -#include -#include -#include +#include "join_common_utils.cuh" +#include "join_common_utils.hpp" +#include "mixed_join_common_utils.cuh" #include #include @@ -99,32 +98,5 @@ __launch_bounds__(block_size) __global__ void compute_mixed_join_output_size( if (threadIdx.x == 0) atomicAdd(output_size, block_counter); } -template __global__ void compute_mixed_join_output_size( - table_device_view left_table, - table_device_view right_table, - table_device_view probe, - table_device_view build, - row_equality const equality_probe, - join_kind const join_type, - cudf::detail::mixed_multimap_type::device_view hash_table_view, - ast::detail::expression_device_view device_expression_data, - bool const swap_tables, - std::size_t* output_size, - cudf::device_span matches_per_row); - -template __global__ void compute_mixed_join_output_size( - table_device_view left_table, - table_device_view right_table, - table_device_view probe, - table_device_view build, - row_equality const equality_probe, - join_kind const join_type, - cudf::detail::mixed_multimap_type::device_view hash_table_view, - ast::detail::expression_device_view device_expression_data, - bool const swap_tables, - std::size_t* output_size, - cudf::device_span matches_per_row); - } // namespace detail - } // namespace cudf diff --git a/cpp/src/join/mixed_join_size_kernel_nulls.cu b/cpp/src/join/mixed_join_size_kernel_nulls.cu new file mode 100644 index 00000000000..f05d674b3b5 --- /dev/null +++ b/cpp/src/join/mixed_join_size_kernel_nulls.cu @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mixed_join_size_kernel.cuh" + +namespace cudf { +namespace detail { + +template __global__ void compute_mixed_join_output_size( + table_device_view left_table, + table_device_view right_table, + table_device_view probe, + table_device_view build, + row_equality const equality_probe, + join_kind const join_type, + cudf::detail::mixed_multimap_type::device_view hash_table_view, + ast::detail::expression_device_view device_expression_data, + bool const swap_tables, + std::size_t* output_size, + cudf::device_span matches_per_row); + +} // namespace detail +} // namespace cudf diff --git a/cpp/src/join/semi_join.cu b/cpp/src/join/semi_join.cu index 9e1aa27a4e7..687e553fefd 100644 --- a/cpp/src/join/semi_join.cu +++ b/cpp/src/join/semi_join.cu @@ -45,7 +45,7 @@ namespace { /** * @brief Device functor to create a pair of hash value and index for a given row. */ -struct make_pair_function { +struct make_pair_fn { __device__ __forceinline__ cudf::detail::pair_type operator()(size_type i) const noexcept { // The value is irrelevant since we only ever use the hash map to check for @@ -101,7 +101,7 @@ std::unique_ptr> left_semi_anti_join( auto const right_nulls = cudf::nullate::DYNAMIC{cudf::has_nulls(right_flattened_keys)}; row_hash const hash_build{right_nulls, *right_rows_d}; row_equality equality_build{right_nulls, *right_rows_d, *right_rows_d, compare_nulls}; - make_pair_function pair_func_build{}; + make_pair_fn pair_func_build{}; auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func_build); diff --git a/cpp/src/lists/combine/concatenate_list_elements.cu b/cpp/src/lists/combine/concatenate_list_elements.cu index fecdec0b1b2..f4d8e7678b1 100644 --- a/cpp/src/lists/combine/concatenate_list_elements.cu +++ b/cpp/src/lists/combine/concatenate_list_elements.cu @@ -81,7 +81,7 @@ std::unique_ptr concatenate_lists_ignore_null(column_view const& input, auto [null_mask, null_count] = [&] { if (!build_null_mask) - return std::make_pair(cudf::detail::copy_bitmask(input, stream, mr), input.null_count()); + return std::pair(cudf::detail::copy_bitmask(input, stream, mr), input.null_count()); // The output row will be null only if all lists on the input row are null. auto const lists_dv_ptr = column_device_view::create(lists_column_view(input).child(), stream); diff --git a/cpp/src/lists/copying/scatter_helper.cu b/cpp/src/lists/copying/scatter_helper.cu index adc1b95a9e6..38f738b4035 100644 --- a/cpp/src/lists/copying/scatter_helper.cu +++ b/cpp/src/lists/copying/scatter_helper.cu @@ -21,8 +21,7 @@ #include #include #include -#include -#include +#include #include #include @@ -176,7 +175,7 @@ struct list_child_constructor { source_lists_column_view.child().nullable() || target_lists_column_view.child().nullable() ? construct_child_nullmask( list_vector, list_offsets, source_lists, target_lists, num_child_rows, stream, mr) - : std::make_pair(rmm::device_buffer{}, 0); + : std::pair(rmm::device_buffer{}, 0); auto child_column = cudf::make_fixed_width_column(source_lists_column_view.child().type(), num_child_rows, @@ -233,6 +232,8 @@ struct list_child_constructor { auto string_views = rmm::device_uvector(num_child_rows, stream); + auto const null_string_view = string_view{nullptr, 0}; // placeholder for factory function + thrust::transform( rmm::exec_policy(stream), thrust::make_counting_iterator(0), @@ -242,7 +243,8 @@ struct list_child_constructor { offset_size = list_offsets.size(), d_list_vector = list_vector.begin(), source_lists, - target_lists] __device__(auto index) { + target_lists, + null_string_view] __device__(auto index) { auto const list_index_iter = thrust::upper_bound(thrust::seq, offset_begin, offset_begin + offset_size, index); auto const list_index = @@ -253,39 +255,18 @@ struct list_child_constructor { auto lists_column = actual_list_row.get_column(); auto lists_offsets_ptr = lists_column.offsets().template data(); auto child_strings_column = lists_column.child(); - auto string_offsets_ptr = - child_strings_column.child(cudf::strings_column_view::offsets_column_index) - .template data(); - auto string_chars_ptr = - child_strings_column.child(cudf::strings_column_view::chars_column_index) - .template data(); - - auto strings_offset = lists_offsets_ptr[row_index] + intra_index; - auto char_offset = string_offsets_ptr[strings_offset]; - auto char_ptr = string_chars_ptr + char_offset; - auto string_size = - string_offsets_ptr[strings_offset + 1] - string_offsets_ptr[strings_offset]; - return string_view{char_ptr, string_size}; + auto strings_offset = lists_offsets_ptr[row_index] + intra_index; + + if (child_strings_column.is_null(strings_offset)) { return null_string_view; } + auto const d_str = child_strings_column.template element(strings_offset); + // ensure a string from an all-empty column is not mapped to the null placeholder + auto const empty_string_view = string_view{}; + return d_str.empty() ? empty_string_view : d_str; }); // string_views should now have been populated with source and target references. - - auto string_offsets = cudf::strings::detail::child_offsets_from_string_iterator( - string_views.begin(), string_views.size(), stream, mr); - - auto string_chars = cudf::strings::detail::child_chars_from_string_vector( - string_views, string_offsets->view(), stream, mr); - auto child_null_mask = - source_lists_column_view.child().nullable() || target_lists_column_view.child().nullable() - ? construct_child_nullmask( - list_vector, list_offsets, source_lists, target_lists, num_child_rows, stream, mr) - : std::make_pair(rmm::device_buffer{}, 0); - - return cudf::make_strings_column(num_child_rows, - std::move(string_offsets), - std::move(string_chars), - child_null_mask.second, // Null count. - std::move(child_null_mask.first)); + auto sv_span = cudf::device_span(string_views); + return cudf::make_strings_column(sv_span, null_string_view, stream, mr); } /** @@ -372,7 +353,7 @@ struct list_child_constructor { source_lists_column_view.child().nullable() || target_lists_column_view.child().nullable() ? construct_child_nullmask( list_vector, list_offsets, source_lists, target_lists, num_child_rows, stream, mr) - : std::make_pair(rmm::device_buffer{}, 0); + : std::pair(rmm::device_buffer{}, 0); return cudf::make_lists_column(num_child_rows, std::move(child_offsets), @@ -468,7 +449,7 @@ struct list_child_constructor { source_lists_column_view.child().nullable() || target_lists_column_view.child().nullable() ? construct_child_nullmask( list_vector, list_offsets, source_lists, target_lists, num_child_rows, stream, mr) - : std::make_pair(rmm::device_buffer{}, 0); + : std::pair(rmm::device_buffer{}, 0); return cudf::make_structs_column(num_child_rows, std::move(child_columns), diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu index 01a94457b69..9c94a6220d6 100644 --- a/cpp/src/merge/merge.cu +++ b/cpp/src/merge/merge.cu @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -38,7 +39,6 @@ #include #include -#include "cudf/utilities/traits.hpp" #include #include diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu index 43686b7d257..0371065a2e5 100644 --- a/cpp/src/partitioning/partitioning.cu +++ b/cpp/src/partitioning/partitioning.cu @@ -595,8 +595,7 @@ std::pair, std::vector> hash_partition_table( } stream.synchronize(); // Async D2H copy must finish before returning host vec - return std::make_pair(std::make_unique
(std::move(output_cols)), - std::move(partition_offsets)); + return std::pair(std::make_unique
(std::move(output_cols)), std::move(partition_offsets)); } else { // Compute a scatter map from input to output such that the output rows are // sorted by partition number @@ -613,7 +612,7 @@ std::pair, std::vector> hash_partition_table( input, row_partition_numbers.begin(), row_partition_numbers.end(), input, false, stream, mr); stream.synchronize(); // Async D2H copy must finish before returning host vec - return std::make_pair(std::move(output), std::move(partition_offsets)); + return std::pair(std::move(output), std::move(partition_offsets)); } } @@ -700,7 +699,7 @@ struct dispatch_map_type { auto scattered = cudf::detail::scatter(t, scatter_map.begin(), scatter_map.end(), t, false, stream, mr); - return std::make_pair(std::move(scattered), std::move(partition_offsets)); + return std::pair(std::move(scattered), std::move(partition_offsets)); } template @@ -728,7 +727,7 @@ std::pair, std::vector> hash_partition( // Return empty result if there are no partitions or nothing to hash if (num_partitions <= 0 || input.num_rows() == 0 || table_to_hash.num_columns() == 0) { - return std::make_pair(empty_like(input), std::vector{}); + return std::pair(empty_like(input), std::vector{}); } if (has_nulls(table_to_hash)) { @@ -753,7 +752,7 @@ std::pair, std::vector> partition( CUDF_EXPECTS(not partition_map.has_nulls(), "Unexpected null values in partition_map."); if (num_partitions == 0 or t.num_rows() == 0) { - return std::make_pair(empty_like(t), std::vector{}); + return std::pair(empty_like(t), std::vector{}); } return cudf::type_dispatcher( @@ -779,10 +778,10 @@ std::pair, std::vector> hash_partition( if (!is_numeric(input.column(column_id).type())) CUDF_FAIL("IdentityHash does not support this data type"); } - return detail::local::hash_partition( + return detail::local::hash_partition( input, columns_to_hash, num_partitions, seed, stream, mr); case (hash_id::HASH_MURMUR3): - return detail::local::hash_partition( + return detail::local::hash_partition( input, columns_to_hash, num_partitions, seed, stream, mr); default: CUDF_FAIL("Unsupported hash function in hash_partition"); } diff --git a/cpp/src/partitioning/round_robin.cu b/cpp/src/partitioning/round_robin.cu index 193bb5a4353..9cfad602db0 100644 --- a/cpp/src/partitioning/round_robin.cu +++ b/cpp/src/partitioning/round_robin.cu @@ -104,8 +104,8 @@ std::pair, std::vector> degenerate stream, mr); - return std::make_pair(std::move(uniq_tbl), - cudf::detail::make_std_vector_sync(partition_offsets, stream)); + return std::pair(std::move(uniq_tbl), + cudf::detail::make_std_vector_sync(partition_offsets, stream)); } else { //( num_partitions > nrows ) rmm::device_uvector d_row_indices(nrows, stream); @@ -140,8 +140,8 @@ std::pair, std::vector> degenerate nedges_iter_begin + num_partitions, partition_offsets.begin()); - return std::make_pair(std::move(uniq_tbl), - cudf::detail::make_std_vector_sync(partition_offsets, stream)); + return std::pair(std::move(uniq_tbl), + cudf::detail::make_std_vector_sync(partition_offsets, stream)); } } } // namespace @@ -230,7 +230,7 @@ std::pair, std::vector> round_robin_part auto uniq_tbl = cudf::detail::gather( input, iter_begin, iter_begin + nrows, cudf::out_of_bounds_policy::DONT_CHECK, stream, mr); - auto ret_pair = std::make_pair(std::move(uniq_tbl), std::vector(num_partitions)); + auto ret_pair = std::pair(std::move(uniq_tbl), std::vector(num_partitions)); // this has the effect of rotating the set of partition sizes // right by start_partition positions: diff --git a/cpp/src/quantiles/quantile.cu b/cpp/src/quantiles/quantile.cu index a71fc862bf3..f38d0a921b7 100644 --- a/cpp/src/quantiles/quantile.cu +++ b/cpp/src/quantiles/quantile.cu @@ -113,10 +113,7 @@ struct quantile_functor { ordered_indices, [input = *d_input] __device__(size_type idx) { return input.is_valid_nocheck(idx); }); - rmm::device_buffer mask; - size_type null_count; - - std::tie(mask, null_count) = valid_if( + auto [mask, null_count] = valid_if( q_device.begin(), q_device.end(), [sorted_validity, interp = interp, size = size] __device__(double q) { diff --git a/cpp/src/reductions/scan/rank_scan.cu b/cpp/src/reductions/scan/rank_scan.cu index 521f8e2d06f..0ababbf0a3d 100644 --- a/cpp/src/reductions/scan/rank_scan.cu +++ b/cpp/src/reductions/scan/rank_scan.cu @@ -102,16 +102,15 @@ std::unique_ptr inclusive_rank_scan(column_view const& order_by, mr); } -std::unique_ptr inclusive_percent_rank_scan(column_view const& order_by, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::unique_ptr inclusive_one_normalized_percent_rank_scan( + column_view const& order_by, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { auto const rank_column = inclusive_rank_scan(order_by, stream, rmm::mr::get_current_device_resource()); auto const rank_view = rank_column->view(); - // Result type for PERCENT_RANK is independent of input type. - using result_type = cudf::detail::target_type_t; + // Result type for min 0-index percent rank is independent of input type. + using result_type = double; auto percent_rank_result = cudf::make_fixed_width_column( data_type{type_to_id()}, rank_view.size(), mask_state::UNALLOCATED, stream, mr); diff --git a/cpp/src/reductions/scan/scan.cpp b/cpp/src/reductions/scan/scan.cpp index 52aaad5ddcf..b678b9441a5 100644 --- a/cpp/src/reductions/scan/scan.cpp +++ b/cpp/src/reductions/scan/scan.cpp @@ -35,17 +35,17 @@ std::unique_ptr scan(column_view const& input, if (agg->kind == aggregation::RANK) { CUDF_EXPECTS(inclusive == scan_type::INCLUSIVE, "Rank aggregation operator requires an inclusive scan"); - return inclusive_rank_scan(input, rmm::cuda_stream_default, mr); - } - if (agg->kind == aggregation::DENSE_RANK) { - CUDF_EXPECTS(inclusive == scan_type::INCLUSIVE, - "Dense rank aggregation operator requires an inclusive scan"); - return inclusive_dense_rank_scan(input, rmm::cuda_stream_default, mr); - } - if (agg->kind == aggregation::PERCENT_RANK) { - CUDF_EXPECTS(inclusive == scan_type::INCLUSIVE, - "Percent rank aggregation operator requires an inclusive scan"); - return inclusive_percent_rank_scan(input, rmm::cuda_stream_default, mr); + auto const& rank_agg = dynamic_cast(*agg); + if (rank_agg._method == rank_method::MIN) { + if (rank_agg._percentage == rank_percentage::NONE) { + return inclusive_rank_scan(input, rmm::cuda_stream_default, mr); + } else if (rank_agg._percentage == rank_percentage::ONE_NORMALIZED) { + return inclusive_one_normalized_percent_rank_scan(input, rmm::cuda_stream_default, mr); + } + } else if (rank_agg._method == rank_method::DENSE) { + return inclusive_dense_rank_scan(input, rmm::cuda_stream_default, mr); + } + CUDF_FAIL("Unsupported rank aggregation method for inclusive scan"); } return inclusive == scan_type::EXCLUSIVE diff --git a/cpp/src/replace/clamp.cu b/cpp/src/replace/clamp.cu index 8b696854c25..73b224b0c99 100644 --- a/cpp/src/replace/clamp.cu +++ b/cpp/src/replace/clamp.cu @@ -76,7 +76,7 @@ std::pair, std::unique_ptr> form_offsets_and_cha cudf::detail::get_value(offsets_column->view(), strings_count, stream); auto chars_column = cudf::strings::detail::create_chars_child_column(bytes, stream, mr); - return std::make_pair(std::move(offsets_column), std::move(chars_column)); + return std::pair(std::move(offsets_column), std::move(chars_column)); } template diff --git a/cpp/src/reshape/interleave_columns.cu b/cpp/src/reshape/interleave_columns.cu index 9954cb4a299..d9e858e8e40 100644 --- a/cpp/src/reshape/interleave_columns.cu +++ b/cpp/src/reshape/interleave_columns.cu @@ -258,10 +258,7 @@ struct interleave_columns_impl()>> { func_value, func_validity); - rmm::device_buffer mask; - size_type null_count; - - std::tie(mask, null_count) = valid_if(index_begin, index_end, func_validity, stream, mr); + auto [mask, null_count] = valid_if(index_begin, index_end, func_validity, stream, mr); output->set_null_mask(std::move(mask), null_count); diff --git a/cpp/src/rolling/rolling_collect_list.cuh b/cpp/src/rolling/rolling_collect_list.cuh index 94703e320d0..13de4693e54 100644 --- a/cpp/src/rolling/rolling_collect_list.cuh +++ b/cpp/src/rolling/rolling_collect_list.cuh @@ -207,9 +207,7 @@ std::unique_ptr rolling_collect_list(column_view const& input, stream, mr); - rmm::device_buffer null_mask; - size_type null_count; - std::tie(null_mask, null_count) = valid_if( + auto [null_mask, null_count] = valid_if( thrust::make_counting_iterator(0), thrust::make_counting_iterator(input.size()), [preceding_begin, following_begin, min_periods] __device__(auto i) { diff --git a/cpp/src/strings/convert/convert_datetime.cu b/cpp/src/strings/convert/convert_datetime.cu index 70a6252e9b3..9473bed963e 100644 --- a/cpp/src/strings/convert/convert_datetime.cu +++ b/cpp/src/strings/convert/convert_datetime.cu @@ -1086,7 +1086,7 @@ struct dispatch_from_timestamps_fn { thrust::make_counting_iterator(0), d_timestamps.size(), pfn); - return std::make_pair(std::move(offsets_column), std::move(chars_column)); + return std::pair(std::move(offsets_column), std::move(chars_column)); } template diff --git a/cpp/src/strings/convert/convert_floats.cu b/cpp/src/strings/convert/convert_floats.cu index b8a10a00f5b..89c00c63bb5 100644 --- a/cpp/src/strings/convert/convert_floats.cu +++ b/cpp/src/strings/convert/convert_floats.cu @@ -67,8 +67,8 @@ __device__ inline double stod(string_view const& d_str) // special strings: NaN, Inf if ((in_ptr < end) && *in_ptr > '9') { auto const inf_nan = string_view(in_ptr, static_cast(thrust::distance(in_ptr, end))); - if (string::is_nan_str(inf_nan)) return std::numeric_limits::quiet_NaN(); - if (string::is_inf_str(inf_nan)) return sign * std::numeric_limits::infinity(); + if (is_nan_str(inf_nan)) return std::numeric_limits::quiet_NaN(); + if (is_inf_str(inf_nan)) return sign * std::numeric_limits::infinity(); } // Parse and store the mantissa as much as we can, @@ -124,16 +124,27 @@ __device__ inline double stod(string_view const& d_str) exp_ten *= exp_sign; exp_ten += exp_off; exp_ten += num_digits - 1; - if (exp_ten > std::numeric_limits::max_exponent10) + if (exp_ten > std::numeric_limits::max_exponent10) { return sign > 0 ? std::numeric_limits::infinity() : -std::numeric_limits::infinity(); - else if (exp_ten < std::numeric_limits::min_exponent10) - return double{0}; + } + + double base = sign * static_cast(digits); exp_ten += 1 - num_digits; - // exp10() is faster than pow(10.0,exp_ten) + // If 10^exp_ten would result in a subnormal value, the base and + // exponent should be adjusted so that 10^exp_ten is a normal value + auto const subnormal_shift = std::numeric_limits::min_exponent10 - exp_ten; + if (subnormal_shift > 0) { + // Handle subnormal values. Ensure that both base and exponent are + // normal values before computing their product. + base = base / exp10(static_cast(num_digits - 1 + subnormal_shift)); + exp_ten += num_digits - 1; // adjust exponent + auto const exponent = exp10(static_cast(exp_ten + subnormal_shift)); + return base * exponent; + } + double const exponent = exp10(static_cast(std::abs(exp_ten))); - double const base = sign * static_cast(digits); return exp_ten < 0 ? base / exponent : base * exponent; } @@ -567,7 +578,7 @@ std::unique_ptr is_float( d_results, [d_column] __device__(size_type idx) { if (d_column.is_null(idx)) return false; - return string::is_float(d_column.element(idx)); + return strings::is_float(d_column.element(idx)); }); results->set_null_count(strings.null_count()); return results; diff --git a/cpp/src/strings/convert/convert_integers.cu b/cpp/src/strings/convert/convert_integers.cu index 95ddf1822a7..75c2f851bab 100644 --- a/cpp/src/strings/convert/convert_integers.cu +++ b/cpp/src/strings/convert/convert_integers.cu @@ -150,14 +150,13 @@ std::unique_ptr is_integer( d_column->pair_begin(), d_column->pair_end(), d_results, - [] __device__(auto const& p) { return p.second ? string::is_integer(p.first) : false; }); + [] __device__(auto const& p) { return p.second ? strings::is_integer(p.first) : false; }); } else { - thrust::transform( - rmm::exec_policy(stream), - d_column->pair_begin(), - d_column->pair_end(), - d_results, - [] __device__(auto const& p) { return p.second ? string::is_integer(p.first) : false; }); + thrust::transform(rmm::exec_policy(stream), + d_column->pair_begin(), + d_column->pair_end(), + d_results, + [] __device__(auto const& p) { return strings::is_integer(p.first); }); } // Calling mutable_view() on a column invalidates it's null count so we need to set it back diff --git a/cpp/src/strings/copying/concatenate.cu b/cpp/src/strings/copying/concatenate.cu index fedb8d38a08..0ab7ef5ff2b 100644 --- a/cpp/src/strings/copying/concatenate.cu +++ b/cpp/src/strings/copying/concatenate.cu @@ -72,9 +72,7 @@ auto create_strings_device_views(host_span views, rmm::cuda_s { CUDF_FUNC_RANGE(); // Assemble contiguous array of device views - std::unique_ptr device_view_owners; - column_device_view* device_views_ptr; - std::tie(device_view_owners, device_views_ptr) = + auto [device_view_owners, device_views_ptr] = contiguous_copy_column_device_views(views, stream); // Compute the partition offsets and size of offset column diff --git a/cpp/src/strings/json/json_path.cu b/cpp/src/strings/json/json_path.cu index 30e8770c3c2..995b6223ddc 100644 --- a/cpp/src/strings/json/json_path.cu +++ b/cpp/src/strings/json/json_path.cu @@ -670,8 +670,8 @@ std::pair>, int> build_comma auto const is_empty = h_operators.size() == 1 && h_operators[0].type == path_operator_type::END; return is_empty - ? std::make_pair(thrust::nullopt, 0) - : std::make_pair( + ? std::pair(thrust::nullopt, 0) + : std::pair( thrust::make_optional(cudf::detail::make_device_uvector_sync(h_operators, stream)), max_stack_depth); } diff --git a/cpp/src/strings/regex/regcomp.cpp b/cpp/src/strings/regex/regcomp.cpp index 6f36658523b..829230d0842 100644 --- a/cpp/src/strings/regex/regcomp.cpp +++ b/cpp/src/strings/regex/regcomp.cpp @@ -16,6 +16,7 @@ #include +#include #include #include @@ -58,6 +59,37 @@ const std::array escapable_chars{ {'.', '-', '+', '*', '\\', '?', '^', '$', '|', '{', '}', '(', ')', '[', ']', '<', '>', '"', '~', '\'', '`', '_', '@', '=', ';', ':', '!', '#', '%', '&', ',', '/', ' '}}; +/** + * @brief Converts UTF-8 string into fixed-width 32-bit character vector. + * + * No character conversion occurs. + * Each UTF-8 character is promoted into a 32-bit value. + * The last entry in the returned vector will be a 0 value. + * The fixed-width vector makes it easier to compile and faster to execute. + * + * @param pattern Regular expression encoded with UTF-8. + * @return Fixed-width 32-bit character vector. + */ +std::vector string_to_char32_vector(std::string_view pattern) +{ + size_type size = static_cast(pattern.size()); + size_type count = std::count_if(pattern.cbegin(), pattern.cend(), [](char ch) { + return is_begin_utf8_char(static_cast(ch)); + }); + std::vector result(count + 1); + char32_t* output_ptr = result.data(); + const char* input_ptr = pattern.data(); + for (size_type idx = 0; idx < size; ++idx) { + char_utf8 output_character = 0; + size_type ch_width = to_char_utf8(input_ptr, output_character); + input_ptr += ch_width; + idx += ch_width - 1; + *output_ptr++ = output_character; + } + result[count] = 0; // last entry set to 0 + return result; +} + } // namespace int32_t reprog::add_inst(int32_t t) @@ -838,10 +870,11 @@ class regex_compiler { }; // Convert pattern into program -reprog reprog::create_from(const char32_t* pattern, regex_flags const flags) +reprog reprog::create_from(std::string_view pattern, regex_flags const flags) { reprog rtn; - regex_compiler compiler(pattern, flags, rtn); + auto pattern32 = string_to_char32_vector(pattern); + regex_compiler compiler(pattern32.data(), flags, rtn); // for debugging, it can be helpful to call rtn.print(flags) here to dump // out the instructions that have been created from the given pattern return rtn; diff --git a/cpp/src/strings/regex/regcomp.h b/cpp/src/strings/regex/regcomp.h index 18735d0f980..798b43830b4 100644 --- a/cpp/src/strings/regex/regcomp.h +++ b/cpp/src/strings/regex/regcomp.h @@ -92,7 +92,7 @@ class reprog { * @brief Parses the given regex pattern and compiles * into a list of chained instructions. */ - static reprog create_from(const char32_t* pattern, regex_flags const flags); + static reprog create_from(std::string_view pattern, regex_flags const flags); int32_t add_inst(int32_t type); int32_t add_inst(reinst inst); diff --git a/cpp/src/strings/regex/regex.cuh b/cpp/src/strings/regex/regex.cuh index b172ceae2a6..bcdd15bceda 100644 --- a/cpp/src/strings/regex/regex.cuh +++ b/cpp/src/strings/regex/regex.cuh @@ -25,7 +25,6 @@ #include #include -#include #include namespace cudf { @@ -35,9 +34,7 @@ class string_view; namespace strings { namespace detail { -struct reljunk; -struct reinst; -class reprog; +struct relist; using match_pair = thrust::pair; using match_result = thrust::optional; @@ -65,19 +62,18 @@ constexpr int32_t RX_LARGE_INSTS = (RX_STACK_LARGE / 11); * * This class holds the unique data for any regex CCLASS instruction. */ -class reclass_device { - public: +struct alignas(16) reclass_device { int32_t builtins{}; int32_t count{}; - char32_t* literals{}; + char32_t const* literals{}; - __device__ bool is_match(char32_t ch, const uint8_t* flags); + __device__ inline bool is_match(char32_t const ch, uint8_t const* flags) const; }; /** * @brief Regex program of instructions/data for a specific regex pattern. * - * Once create, this find/extract methods are used to evaluating the regex instructions + * Once created, the find/extract methods are used to evaluate the regex instructions * against a single string. */ class reprog_device { @@ -132,15 +128,7 @@ class reprog_device { /** * @brief Returns the number of regex instructions. */ - [[nodiscard]] __host__ __device__ int32_t insts_counts() const { return _insts_count; } - - /** - * @brief Returns true if this is an empty program. - */ - [[nodiscard]] __device__ bool is_empty() const - { - return insts_counts() == 0 || get_inst(0)->type == END; - } + [[nodiscard]] CUDF_HOST_DEVICE int32_t insts_counts() const { return _insts_count; } /** * @brief Returns the number of regex groups found in the expression. @@ -151,19 +139,9 @@ class reprog_device { } /** - * @brief Returns the regex instruction object for a given index. - */ - [[nodiscard]] __device__ inline reinst* get_inst(int32_t idx) const; - - /** - * @brief Returns the regex class object for a given index. - */ - [[nodiscard]] __device__ inline reclass_device get_class(int32_t idx) const; - - /** - * @brief Returns the start-instruction-ids vector. + * @brief Returns true if this is an empty program. */ - [[nodiscard]] __device__ inline int32_t* startinst_ids() const; + [[nodiscard]] __device__ inline bool is_empty() const; /** * @brief Does a find evaluation using the compiled expression on the given string. @@ -180,9 +158,9 @@ class reprog_device { */ template __device__ inline int32_t find(int32_t idx, - string_view const& d_str, - int32_t& begin, - int32_t& end); + string_view const d_str, + cudf::size_type& begin, + cudf::size_type& end) const; /** * @brief Does an extract evaluation using the compiled expression on the given string. @@ -192,8 +170,8 @@ class reprog_device { * the matched section. * * @tparam stack_size One of the `RX_STACK_` values based on the `insts_count`. - * @param idx The string index used for mapping the state memory for this string in global memory - * (if necessary). + * @param idx The string index used for mapping the state memory for this string in global + * memory (if necessary). * @param d_str The string to search. * @param begin Position index to begin the search. If found, returns the position found * in the string. @@ -204,34 +182,65 @@ class reprog_device { */ template __device__ inline match_result extract(cudf::size_type idx, - string_view const& d_str, + string_view const d_str, cudf::size_type begin, cudf::size_type end, - cudf::size_type group_id); + cudf::size_type const group_id) const; private: - int32_t _startinst_id, _num_capturing_groups; - int32_t _insts_count, _starts_count, _classes_count; - const uint8_t* _codepoint_flags{}; // table of character types - reinst* _insts{}; // array of regex instructions - int32_t* _startinst_ids{}; // array of start instruction ids - reclass_device* _classes{}; // array of regex classes - void* _relists_mem{}; // runtime relist memory for regexec + struct reljunk { + relist* __restrict__ list1; + relist* __restrict__ list2; + int32_t starttype{}; + char32_t startchar{}; + + __device__ inline reljunk(relist* list1, relist* list2, reinst const inst); + __device__ inline void swaplist(); + }; + + /** + * @brief Returns the regex instruction object for a given id. + */ + __device__ inline reinst get_inst(int32_t id) const; + + /** + * @brief Returns the regex class object for a given id. + */ + __device__ inline reclass_device get_class(int32_t id) const; /** * @brief Executes the regex pattern on the given string. */ - __device__ inline int32_t regexec( - string_view const& d_str, reljunk& jnk, int32_t& begin, int32_t& end, int32_t group_id = 0); + __device__ inline int32_t regexec(string_view const d_str, + reljunk jnk, + cudf::size_type& begin, + cudf::size_type& end, + cudf::size_type const group_id = 0) const; /** * @brief Utility wrapper to setup state memory structures for calling regexec */ template - __device__ inline int32_t call_regexec( - int32_t idx, string_view const& d_str, int32_t& begin, int32_t& end, int32_t group_id = 0); - - reprog_device(reprog&); // must use create() + __device__ inline int32_t call_regexec(int32_t idx, + string_view const d_str, + cudf::size_type& begin, + cudf::size_type& end, + cudf::size_type const group_id = 0) const; + + reprog_device(reprog&); + + int32_t _startinst_id; // first instruction id + int32_t _num_capturing_groups; // instruction groups + int32_t _insts_count; // number of instructions + int32_t _starts_count; // number of start-insts ids + int32_t _classes_count; // number of classes + + uint8_t const* _codepoint_flags{}; // table of character types + reinst const* _insts{}; // array of regex instructions + int32_t const* _startinst_ids{}; // array of start instruction ids + reclass_device const* _classes{}; // array of regex classes + + void* _relists_mem{}; // runtime relist memory for regexec() }; } // namespace detail diff --git a/cpp/src/strings/regex/regex.inl b/cpp/src/strings/regex/regex.inl index 01e773960e4..9fe4440d7ec 100644 --- a/cpp/src/strings/regex/regex.inl +++ b/cpp/src/strings/regex/regex.inl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,16 +17,9 @@ #include #include -#include +#include #include -#include -#include -#include -#include -#include -#include - namespace cudf { namespace strings { namespace detail { @@ -40,95 +33,102 @@ namespace detail { * reflected here. The regexec function updates and manages this state data. */ struct alignas(8) relist { - int16_t size{}; - int16_t listsize{}; - int32_t reserved; - int2* ranges{}; // pair per instruction - int16_t* inst_ids{}; // one per instruction - u_char* mask{}; // bit per instruction - - CUDF_HOST_DEVICE inline static int32_t data_size_for(int32_t insts) + /** + * @brief Compute the memory size for the state data. + */ + constexpr inline static std::size_t data_size_for(int32_t insts) { - return ((sizeof(ranges[0]) + sizeof(inst_ids[0])) * insts) + ((insts + 7) / 8); + return ((sizeof(ranges[0]) + sizeof(inst_ids[0])) * insts) + + cudf::util::div_rounding_up_unsafe(insts, 8); } - CUDF_HOST_DEVICE inline static int32_t alloc_size(int32_t insts) + /** + * @brief Compute the aligned memory allocation size. + */ + constexpr inline static std::size_t alloc_size(int32_t insts) { - int32_t size = sizeof(relist); - size += data_size_for(insts); - size = ((size + 7) / 8) * 8; // align it too - return size; + return cudf::util::round_up_unsafe(data_size_for(insts) + sizeof(relist), + sizeof(ranges[0])); } - CUDF_HOST_DEVICE inline relist() {} + struct alignas(16) restate { + int2 range; + int32_t inst_id; + int32_t reserved; + }; - CUDF_HOST_DEVICE inline relist(int16_t insts, u_char* data = nullptr) : listsize(insts) + __device__ __forceinline__ relist(int16_t insts, u_char* data = nullptr) + : masksize(cudf::util::div_rounding_up_unsafe(insts, 8)) { auto ptr = data == nullptr ? reinterpret_cast(this) + sizeof(relist) : data; ranges = reinterpret_cast(ptr); - ptr += listsize * sizeof(ranges[0]); + ptr += insts * sizeof(ranges[0]); inst_ids = reinterpret_cast(ptr); - ptr += listsize * sizeof(inst_ids[0]); + ptr += insts * sizeof(inst_ids[0]); mask = ptr; reset(); } - CUDF_HOST_DEVICE inline void reset() + __device__ __forceinline__ void reset() { - memset(mask, 0, (listsize + 7) / 8); + memset(mask, 0, masksize); size = 0; } - __device__ inline bool activate(int32_t i, int32_t begin, int32_t end) + __device__ __forceinline__ bool activate(int32_t id, int32_t begin, int32_t end) { - if (readMask(i)) return false; - writeMask(true, i); - inst_ids[size] = static_cast(i); + if (readMask(id)) { return false; } + writeMask(id); + inst_ids[size] = static_cast(id); ranges[size] = int2{begin, end}; ++size; return true; } - __device__ inline void writeMask(bool v, int32_t pos) + __device__ __forceinline__ restate get_state(int16_t idx) const { - u_char uc = 1 << (pos & 7); - if (v) - mask[pos >> 3] |= uc; - else - mask[pos >> 3] &= ~uc; + return restate{ranges[idx], inst_ids[idx]}; } - __device__ inline bool readMask(int32_t pos) + __device__ __forceinline__ int16_t get_size() const { return size; } + + private: + int16_t size{}; + int16_t const masksize; + int32_t reserved; + int2* __restrict__ ranges; // pair per instruction + int16_t* __restrict__ inst_ids; // one per instruction + u_char* __restrict__ mask; // bit per instruction + + __device__ __forceinline__ void writeMask(int32_t pos) const { - u_char uc = mask[pos >> 3]; - return static_cast((uc >> (pos & 7)) & 1); + u_char const uc = 1 << (pos & 7); + mask[pos >> 3] |= uc; } -}; -/** - * @brief This manages the two relist instances required by the regexec function. - */ -struct reljunk { - relist* list1; - relist* list2; - int32_t starttype{}; - char32_t startchar{}; - - __host__ __device__ reljunk(relist* list1, relist* list2, int32_t stype, char32_t schar) - : list1(list1), list2(list2) + __device__ __forceinline__ bool readMask(int32_t pos) const { - if (starttype == CHAR || starttype == BOL) { - starttype = stype; - startchar = schar; - } + u_char const uc = mask[pos >> 3]; + return static_cast((uc >> (pos & 7)) & 1); } }; -__device__ inline void swaplist(relist*& l1, relist*& l2) +__device__ __forceinline__ reprog_device::reljunk::reljunk(relist* list1, + relist* list2, + reinst const inst) + : list1(list1), list2(list2) +{ + if (inst.type == CHAR || inst.type == BOL) { + starttype = inst.type; + startchar = inst.u1.c; + } +} + +__device__ __forceinline__ void reprog_device::reljunk::swaplist() { - relist* tmp = l1; - l1 = l2; - l2 = tmp; + auto tmp = list1; + list1 = list2; + list2 = tmp; } /** @@ -138,15 +138,13 @@ __device__ inline void swaplist(relist*& l1, relist*& l2) * @param codepoint_flags Used for mapping a character to type for builtin classes. * @return true if the character matches */ -__device__ inline bool reclass_device::is_match(char32_t ch, const uint8_t* codepoint_flags) +__device__ __forceinline__ bool reclass_device::is_match(char32_t const ch, + uint8_t const* codepoint_flags) const { - if (thrust::any_of(thrust::seq, - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(count), - [ch, this] __device__(int i) { - return ((ch >= literals[i * 2]) && (ch <= literals[(i * 2) + 1])); - })) - return true; + for (int i = 0; i < count; ++i) { + if ((ch >= literals[i * 2]) && (ch <= literals[(i * 2) + 1])) { return true; } + } + if (!builtins) return false; uint32_t codept = utf8_to_codepoint(ch); if (codept > 0x00FFFF) return false; @@ -167,20 +165,18 @@ __device__ inline bool reclass_device::is_match(char32_t ch, const uint8_t* code return false; } -__device__ inline reinst* reprog_device::get_inst(int32_t idx) const +__device__ __forceinline__ reinst reprog_device::get_inst(int32_t id) const { return _insts[id]; } + +__device__ __forceinline__ reclass_device reprog_device::get_class(int32_t id) const { - assert((idx >= 0) && (idx < _insts_count)); - return _insts + idx; + return _classes[id]; } -__device__ inline reclass_device reprog_device::get_class(int32_t idx) const +__device__ __forceinline__ bool reprog_device::is_empty() const { - assert((idx >= 0) && (idx < _classes_count)); - return _classes[idx]; + return insts_counts() == 0 || get_inst(0).type == END; } -__device__ inline int32_t* reprog_device::startinst_ids() const { return _startinst_ids; } - /** * @brief Evaluate a specific string against regex pattern compiled to this instance. * @@ -195,35 +191,36 @@ __device__ inline int32_t* reprog_device::startinst_ids() const { return _starti * @param group_id Index of the group to match in a multi-group regex pattern. * @return >0 if match found */ -__device__ inline int32_t reprog_device::regexec( - string_view const& dstr, reljunk& jnk, int32_t& begin, int32_t& end, int32_t group_id) +__device__ __forceinline__ int32_t reprog_device::regexec(string_view const dstr, + reljunk jnk, + cudf::size_type& begin, + cudf::size_type& end, + cudf::size_type const group_id) const { - int32_t match = 0; - auto checkstart = jnk.starttype; - auto pos = begin; - auto eos = end; - char32_t c = 0; - auto last_character = false; + int32_t match = 0; + auto pos = begin; + auto eos = end; + char_utf8 c = 0; + auto checkstart = jnk.starttype != 0; + auto last_character = false; + string_view::const_iterator itr = string_view::const_iterator(dstr, pos); jnk.list1->reset(); do { - /* fast check for first char */ + // fast check for first CHAR or BOL if (checkstart) { + auto startchar = static_cast(jnk.startchar); switch (jnk.starttype) { - case CHAR: { - auto fidx = dstr.find(static_cast(jnk.startchar), pos); - if (fidx < 0) return match; - pos = fidx; - break; - } - case BOL: { + case BOL: if (pos == 0) break; - if (jnk.startchar != '^') return match; + if (jnk.startchar != '^') { return match; } --pos; - int fidx = dstr.find(static_cast('\n'), pos); - if (fidx < 0) return match; // update begin/end values? - pos = fidx + 1; + startchar = static_cast('\n'); + case CHAR: { + auto const fidx = dstr.find(startchar, pos); + if (fidx < 0) { return match; } + pos = fidx + (jnk.starttype == BOL); break; } } @@ -231,128 +228,114 @@ __device__ inline int32_t reprog_device::regexec( } if (((eos < 0) || (pos < eos)) && match == 0) { - int32_t i = 0; - auto ids = startinst_ids(); - while (ids[i] >= 0) - jnk.list1->activate(ids[i++], (group_id == 0 ? pos : -1), -1); + auto ids = _startinst_ids; + while (*ids >= 0) + jnk.list1->activate(*ids++, (group_id == 0 ? pos : -1), -1); } - last_character = (pos >= dstr.length()); + last_character = itr.byte_offset() >= dstr.size_bytes(); - c = static_cast(last_character ? 0 : *itr); + c = last_character ? 0 : *itr; - // expand LBRA, RBRA, BOL, EOL, BOW, NBOW, and OR + // expand the non-character types like: LBRA, RBRA, BOL, EOL, BOW, NBOW, and OR bool expanded = false; do { jnk.list2->reset(); expanded = false; - for (int16_t i = 0; i < jnk.list1->size; i++) { - auto inst_id = static_cast(jnk.list1->inst_ids[i]); - int2& range = jnk.list1->ranges[i]; - const reinst* inst = get_inst(inst_id); + for (int16_t i = 0; i < jnk.list1->get_size(); i++) { + auto state = jnk.list1->get_state(i); + auto range = state.range; + auto const inst = get_inst(state.inst_id); int32_t id_activate = -1; - switch (inst->type) { + switch (inst.type) { case CHAR: case ANY: case ANYNL: case CCLASS: case NCCLASS: - case END: id_activate = inst_id; break; + case END: id_activate = state.inst_id; break; case LBRA: - if (inst->u1.subid == group_id) range.x = pos; - id_activate = inst->u2.next_id; + if (inst.u1.subid == group_id) range.x = pos; + id_activate = inst.u2.next_id; expanded = true; break; case RBRA: - if (inst->u1.subid == group_id) range.y = pos; - id_activate = inst->u2.next_id; + if (inst.u1.subid == group_id) range.y = pos; + id_activate = inst.u2.next_id; expanded = true; break; case BOL: - if ((pos == 0) || - ((inst->u1.c == '^') && (dstr[pos - 1] == static_cast('\n')))) { - id_activate = inst->u2.next_id; + if ((pos == 0) || ((inst.u1.c == '^') && (dstr[pos - 1] == '\n'))) { + id_activate = inst.u2.next_id; expanded = true; } break; case EOL: - if (last_character || (c == '\n' && inst->u1.c == '$')) { - id_activate = inst->u2.next_id; - expanded = true; - } - break; - case BOW: { - auto codept = utf8_to_codepoint(c); - auto last_c = static_cast(pos ? dstr[pos - 1] : 0); - auto last_codept = utf8_to_codepoint(last_c); - bool cur_alphaNumeric = (codept < 0x010000) && IS_ALPHANUM(_codepoint_flags[codept]); - bool last_alphaNumeric = - (last_codept < 0x010000) && IS_ALPHANUM(_codepoint_flags[last_codept]); - if (cur_alphaNumeric != last_alphaNumeric) { - id_activate = inst->u2.next_id; + if (last_character || (c == '\n' && inst.u1.c == '$')) { + id_activate = inst.u2.next_id; expanded = true; } break; - } + case BOW: case NBOW: { - auto codept = utf8_to_codepoint(c); - auto last_c = static_cast(pos ? dstr[pos - 1] : 0); - auto last_codept = utf8_to_codepoint(last_c); - bool cur_alphaNumeric = (codept < 0x010000) && IS_ALPHANUM(_codepoint_flags[codept]); - bool last_alphaNumeric = + auto const codept = utf8_to_codepoint(c); + auto const last_c = pos > 0 ? dstr[pos - 1] : 0; + auto const last_codept = utf8_to_codepoint(last_c); + + bool const cur_alphaNumeric = + (codept < 0x010000) && IS_ALPHANUM(_codepoint_flags[codept]); + bool const last_alphaNumeric = (last_codept < 0x010000) && IS_ALPHANUM(_codepoint_flags[last_codept]); - if (cur_alphaNumeric == last_alphaNumeric) { - id_activate = inst->u2.next_id; + if ((cur_alphaNumeric == last_alphaNumeric) != (inst.type == BOW)) { + id_activate = inst.u2.next_id; expanded = true; } break; } case OR: - jnk.list2->activate(inst->u1.right_id, range.x, range.y); - id_activate = inst->u2.left_id; + jnk.list2->activate(inst.u1.right_id, range.x, range.y); + id_activate = inst.u2.left_id; expanded = true; break; } if (id_activate >= 0) jnk.list2->activate(id_activate, range.x, range.y); } - swaplist(jnk.list1, jnk.list2); + jnk.swaplist(); } while (expanded); - // execute + // execute instructions bool continue_execute = true; jnk.list2->reset(); - for (int16_t i = 0; continue_execute && i < jnk.list1->size; i++) { - auto inst_id = static_cast(jnk.list1->inst_ids[i]); - int2& range = jnk.list1->ranges[i]; - const reinst* inst = get_inst(inst_id); + for (int16_t i = 0; continue_execute && i < jnk.list1->get_size(); i++) { + auto const state = jnk.list1->get_state(i); + auto const range = state.range; + auto const inst = get_inst(state.inst_id); int32_t id_activate = -1; - switch (inst->type) { + switch (inst.type) { case CHAR: - if (inst->u1.c == c) id_activate = inst->u2.next_id; + if (inst.u1.c == c) id_activate = inst.u2.next_id; break; case ANY: - if (c != '\n') id_activate = inst->u2.next_id; + if (c != '\n') id_activate = inst.u2.next_id; break; - case ANYNL: id_activate = inst->u2.next_id; break; + case ANYNL: id_activate = inst.u2.next_id; break; + case NCCLASS: case CCLASS: { - reclass_device cls = get_class(inst->u1.cls_id); - if (cls.is_match(c, _codepoint_flags)) id_activate = inst->u2.next_id; - break; - } - case NCCLASS: { - reclass_device cls = get_class(inst->u1.cls_id); - if (!cls.is_match(c, _codepoint_flags)) id_activate = inst->u2.next_id; + auto const cls = get_class(inst.u1.cls_id); + if (cls.is_match(static_cast(c), _codepoint_flags) == (inst.type == CCLASS)) { + id_activate = inst.u2.next_id; + } break; } case END: match = 1; begin = range.x; end = group_id == 0 ? pos : range.y; - + // done with execute continue_execute = false; break; } @@ -362,18 +345,18 @@ __device__ inline int32_t reprog_device::regexec( ++pos; ++itr; - swaplist(jnk.list1, jnk.list2); - checkstart = jnk.list1->size > 0 ? 0 : 1; - } while (!last_character && (jnk.list1->size > 0 || match == 0)); + jnk.swaplist(); + checkstart = jnk.list1->get_size() == 0; + } while (!last_character && (!checkstart || !match)); return match; } template -__device__ inline int32_t reprog_device::find(int32_t idx, - string_view const& dstr, - int32_t& begin, - int32_t& end) +__device__ __forceinline__ int32_t reprog_device::find(int32_t idx, + string_view const dstr, + cudf::size_type& begin, + cudf::size_type& end) const { int32_t rtn = call_regexec(idx, dstr, begin, end); if (rtn <= 0) begin = end = -1; @@ -381,11 +364,11 @@ __device__ inline int32_t reprog_device::find(int32_t idx, } template -__device__ inline match_result reprog_device::extract(cudf::size_type idx, - string_view const& dstr, - cudf::size_type begin, - cudf::size_type end, - cudf::size_type group_id) +__device__ __forceinline__ match_result reprog_device::extract(cudf::size_type idx, + string_view const dstr, + cudf::size_type begin, + cudf::size_type end, + cudf::size_type const group_id) const { end = begin + 1; return call_regexec(idx, dstr, begin, end, group_id + 1) > 0 @@ -394,28 +377,29 @@ __device__ inline match_result reprog_device::extract(cudf::size_type idx, } template -__device__ inline int32_t reprog_device::call_regexec( - int32_t idx, string_view const& dstr, int32_t& begin, int32_t& end, int32_t group_id) +__device__ __forceinline__ int32_t reprog_device::call_regexec(int32_t idx, + string_view const dstr, + cudf::size_type& begin, + cudf::size_type& end, + cudf::size_type const group_id) const { u_char data1[stack_size], data2[stack_size]; - auto const stype = get_inst(_startinst_id)->type; - auto const schar = get_inst(_startinst_id)->u1.c; - relist list1(static_cast(_insts_count), data1); relist list2(static_cast(_insts_count), data2); - reljunk jnk(&list1, &list2, stype, schar); + reljunk jnk(&list1, &list2, get_inst(_startinst_id)); return regexec(dstr, jnk, begin, end, group_id); } template <> -__device__ inline int32_t reprog_device::call_regexec( - int32_t idx, string_view const& dstr, int32_t& begin, int32_t& end, int32_t group_id) +__device__ __forceinline__ int32_t +reprog_device::call_regexec(int32_t idx, + string_view const dstr, + cudf::size_type& begin, + cudf::size_type& end, + cudf::size_type const group_id) const { - auto const stype = get_inst(_startinst_id)->type; - auto const schar = get_inst(_startinst_id)->u1.c; - auto const relists_size = relist::alloc_size(_insts_count); auto* listmem = reinterpret_cast(_relists_mem); // beginning of relist buffer; listmem += (idx * relists_size * 2); // two relist ptrs in reljunk: @@ -423,7 +407,7 @@ __device__ inline int32_t reprog_device::call_regexec( auto* list1 = new (listmem) relist(static_cast(_insts_count)); auto* list2 = new (listmem + relists_size) relist(static_cast(_insts_count)); - reljunk jnk(list1, list2, stype, schar); + reljunk jnk(list1, list2, get_inst(_startinst_id)); return regexec(dstr, jnk, begin, end, group_id); } diff --git a/cpp/src/strings/regex/regexec.cu b/cpp/src/strings/regex/regexec.cu index 3bcf55cf069..70d6079972a 100644 --- a/cpp/src/strings/regex/regexec.cu +++ b/cpp/src/strings/regex/regexec.cu @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -27,39 +28,6 @@ namespace cudf { namespace strings { namespace detail { -namespace { -/** - * @brief Converts UTF-8 string into fixed-width 32-bit character vector. - * - * No character conversion occurs. - * Each UTF-8 character is promoted into a 32-bit value. - * The last entry in the returned vector will be a 0 value. - * The fixed-width vector makes it easier to compile and faster to execute. - * - * @param pattern Regular expression encoded with UTF-8. - * @return Fixed-width 32-bit character vector. - */ -std::vector string_to_char32_vector(std::string const& pattern) -{ - size_type size = static_cast(pattern.size()); - size_type count = std::count_if(pattern.cbegin(), pattern.cend(), [](char ch) { - return is_begin_utf8_char(static_cast(ch)); - }); - std::vector result(count + 1); - char32_t* output_ptr = result.data(); - const char* input_ptr = pattern.data(); - for (size_type idx = 0; idx < size; ++idx) { - char_utf8 output_character = 0; - size_type ch_width = to_char_utf8(input_ptr, output_character); - input_ptr += ch_width; - idx += ch_width - 1; - *output_ptr++ = output_character; - } - result[count] = 0; // last entry set to 0 - return result; -} - -} // namespace // Copy reprog primitive values reprog_device::reprog_device(reprog& prog) @@ -89,75 +57,76 @@ std::unique_ptr> reprog_devic size_type strings_count, rmm::cuda_stream_view stream) { - std::vector pattern32 = string_to_char32_vector(pattern); // compile pattern into host object - reprog h_prog = reprog::create_from(pattern32.data(), flags); + reprog h_prog = reprog::create_from(pattern, flags); + // compute size to hold all the member data - auto insts_count = h_prog.insts_count(); - auto classes_count = h_prog.classes_count(); - auto starts_count = h_prog.starts_count(); - // compute size of each section; make sure each is aligned appropriately - auto insts_size = - cudf::util::round_up_safe(insts_count * sizeof(_insts[0]), sizeof(size_t)); - auto startids_size = - cudf::util::round_up_safe(starts_count * sizeof(_startinst_ids[0]), sizeof(size_t)); - auto classes_size = - cudf::util::round_up_safe(classes_count * sizeof(_classes[0]), sizeof(size_t)); - for (int32_t idx = 0; idx < classes_count; ++idx) + auto const insts_count = h_prog.insts_count(); + auto const classes_count = h_prog.classes_count(); + auto const starts_count = h_prog.starts_count(); + + // compute size of each section + auto insts_size = insts_count * sizeof(_insts[0]); + auto startids_size = starts_count * sizeof(_startinst_ids[0]); + auto classes_size = classes_count * sizeof(_classes[0]); + for (auto idx = 0; idx < classes_count; ++idx) classes_size += static_cast((h_prog.class_at(idx).literals.size()) * sizeof(char32_t)); - size_t memsize = insts_size + startids_size + classes_size; - size_t rlm_size = 0; - // check memory size needed for executing regex - if (insts_count > RX_LARGE_INSTS) { - auto relist_alloc_size = relist::alloc_size(insts_count); - rlm_size = relist_alloc_size * 2L * strings_count; // reljunk has 2 relist ptrs - } + // make sure each section is aligned for the subsequent section's data type + auto const memsize = cudf::util::round_up_safe(insts_size, sizeof(_startinst_ids[0])) + + cudf::util::round_up_safe(startids_size, sizeof(_classes[0])) + + cudf::util::round_up_safe(classes_size, sizeof(char32_t)); + + // allocate memory to store all the prog data in a flat contiguous buffer + std::vector h_buffer(memsize); // copy everything into here; + auto h_ptr = h_buffer.data(); // this is our running host ptr; + auto d_buffer = new rmm::device_buffer(memsize, stream); // output device memory; + auto d_ptr = reinterpret_cast(d_buffer->data()); // running device pointer - // allocate memory to store prog data - std::vector h_buffer(memsize); - u_char* h_ptr = h_buffer.data(); // running pointer - auto* d_buffer = new rmm::device_buffer(memsize, stream); - u_char* d_ptr = reinterpret_cast(d_buffer->data()); // running device pointer // put everything into a flat host buffer first reprog_device* d_prog = new reprog_device(h_prog); - // copy the instructions array first (fixed-size structs) - reinst* insts = reinterpret_cast(h_ptr); - memcpy(insts, h_prog.insts_data(), insts_size); - h_ptr += insts_size; // next section + + // copy the instructions array first (fixed-sized structs) + memcpy(h_ptr, h_prog.insts_data(), insts_size); d_prog->_insts = reinterpret_cast(d_ptr); + + // point to the end for the next section + insts_size = cudf::util::round_up_safe(insts_size, sizeof(_startinst_ids[0])); + h_ptr += insts_size; d_ptr += insts_size; - // copy the startinst_ids next (ints) - int32_t* startinst_ids = reinterpret_cast(h_ptr); - memcpy(startinst_ids, h_prog.starts_data(), startids_size); - h_ptr += startids_size; // next section + // copy the startinst_ids next + memcpy(h_ptr, h_prog.starts_data(), startids_size); d_prog->_startinst_ids = reinterpret_cast(d_ptr); + + // next section; align the size for next data type + startids_size = cudf::util::round_up_safe(startids_size, sizeof(_classes[0])); + h_ptr += startids_size; d_ptr += startids_size; // copy classes into flat memory: [class1,class2,...][char32 arrays] - reclass_device* classes = reinterpret_cast(h_ptr); - d_prog->_classes = reinterpret_cast(d_ptr); + auto classes = reinterpret_cast(h_ptr); + d_prog->_classes = reinterpret_cast(d_ptr); // get pointer to the end to handle variable length data - u_char* h_end = h_ptr + (classes_count * sizeof(reclass_device)); - u_char* d_end = d_ptr + (classes_count * sizeof(reclass_device)); + auto h_end = h_ptr + (classes_count * sizeof(reclass_device)); + auto d_end = d_ptr + (classes_count * sizeof(reclass_device)); // place each class and append the variable length data for (int32_t idx = 0; idx < classes_count; ++idx) { reclass& h_class = h_prog.class_at(idx); - reclass_device d_class; - d_class.builtins = h_class.builtins; - d_class.count = h_class.literals.size() / 2; - d_class.literals = reinterpret_cast(d_end); - memcpy(classes++, &d_class, sizeof(d_class)); + reclass_device d_class{h_class.builtins, + static_cast(h_class.literals.size() / 2), + reinterpret_cast(d_end)}; + *classes++ = d_class; memcpy(h_end, h_class.literals.c_str(), h_class.literals.size() * sizeof(char32_t)); h_end += h_class.literals.size() * sizeof(char32_t); d_end += h_class.literals.size() * sizeof(char32_t); } + // initialize the rest of the elements - d_prog->_insts_count = insts_count; - d_prog->_starts_count = starts_count; - d_prog->_classes_count = classes_count; d_prog->_codepoint_flags = codepoint_flags; + // allocate execute memory if needed rmm::device_buffer* d_relists{}; - if (rlm_size > 0) { + if (insts_count > RX_LARGE_INSTS) { + // two relist state structures are needed for execute per string + auto const rlm_size = relist::alloc_size(insts_count) * 2 * strings_count; d_relists = new rmm::device_buffer(rlm_size, stream); d_prog->_relists_mem = d_relists->data(); } @@ -165,7 +134,8 @@ std::unique_ptr> reprog_devic // copy flat prog to device memory CUDF_CUDA_TRY(cudaMemcpyAsync( d_buffer->data(), h_buffer.data(), memsize, cudaMemcpyHostToDevice, stream.value())); - // + + // build deleter to cleanup device memory auto deleter = [d_buffer, d_relists](reprog_device* t) { t->destroy(); delete d_buffer; diff --git a/cpp/src/strings/repeat_strings.cu b/cpp/src/strings/repeat_strings.cu index d496b46bc36..7a3e0fb0243 100644 --- a/cpp/src/strings/repeat_strings.cu +++ b/cpp/src/strings/repeat_strings.cu @@ -283,7 +283,7 @@ auto make_strings_children(Func fn, for_each_fn(fn); } - return std::make_pair(std::move(offsets_column), std::move(chars_column)); + return std::pair(std::move(offsets_column), std::move(chars_column)); } } // namespace @@ -345,7 +345,7 @@ std::pair, int64_t> repeat_strings_output_sizes( auto const strings_count = input.size(); if (strings_count == 0) { - return std::make_pair(make_empty_column(type_to_id()), int64_t{0}); + return std::pair(make_empty_column(type_to_id()), int64_t{0}); } auto output_sizes = make_numeric_column( @@ -374,7 +374,7 @@ std::pair, int64_t> repeat_strings_output_sizes( int64_t{0}, thrust::plus{}); - return std::make_pair(std::move(output_sizes), total_bytes); + return std::pair(std::move(output_sizes), total_bytes); } } // namespace detail diff --git a/cpp/src/strings/utilities.cu b/cpp/src/strings/utilities.cu index d7cc72fdfff..ac073f8efbc 100644 --- a/cpp/src/strings/utilities.cu +++ b/cpp/src/strings/utilities.cu @@ -20,7 +20,6 @@ #include #include -#include #include #include @@ -28,12 +27,8 @@ #include #include -#include #include -#include -#include - -#include +#include namespace cudf { namespace strings { @@ -42,65 +37,33 @@ namespace detail { /** * @copydoc create_string_vector_from_column */ -rmm::device_uvector create_string_vector_from_column(cudf::strings_column_view strings, - rmm::cuda_stream_view stream) -{ - auto strings_column = column_device_view::create(strings.parent(), stream); - auto d_column = *strings_column; - - rmm::device_uvector strings_vector(strings.size(), stream); - string_view* d_strings = strings_vector.data(); - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - strings.size(), - [d_column, d_strings] __device__(size_type idx) { - if (d_column.is_null(idx)) - d_strings[idx] = string_view(nullptr, 0); - else - d_strings[idx] = d_column.element(idx); - }); - return strings_vector; -} - -/** - * @copydoc child_offsets_from_string_vector - */ -std::unique_ptr child_offsets_from_string_vector( - cudf::device_span strings, +rmm::device_uvector create_string_vector_from_column( + cudf::strings_column_view const input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - return child_offsets_from_string_iterator(strings.begin(), strings.size(), stream, mr); -} + auto d_strings = column_device_view::create(input.parent(), stream); + + auto strings_vector = rmm::device_uvector(input.size(), stream, mr); + + thrust::transform(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(input.size()), + strings_vector.begin(), + [d_strings = *d_strings] __device__(size_type idx) { + // placeholder for factory function that takes a span of string_views + auto const null_string_view = string_view{nullptr, 0}; + if (d_strings.is_null(idx)) { return null_string_view; } + auto const d_str = d_strings.element(idx); + // special case when the entire column is filled with empty strings: + // here the empty d_str may have a d_str.data() == nullptr + auto const empty_string_view = string_view{}; + return d_str.empty() ? empty_string_view : d_str; + }); -/** - * @copydoc child_chars_from_string_vector - */ -std::unique_ptr child_chars_from_string_vector(cudf::device_span strings, - column_view const& offsets, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - auto const d_strings = strings.data(); - auto const bytes = cudf::detail::get_value(offsets, strings.size(), stream); - auto const d_offsets = offsets.data(); - - // create column - auto chars_column = create_chars_child_column(bytes, stream, mr); - // get it's view - auto d_chars = chars_column->mutable_view().data(); - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - strings.size(), - [d_strings, d_offsets, d_chars] __device__(size_type idx) { - string_view const d_str = d_strings[idx]; - memcpy(d_chars + d_offsets[idx], d_str.data(), d_str.size_bytes()); - }); - - return chars_column; + return strings_vector; } -// std::unique_ptr create_chars_child_column(cudf::size_type total_bytes, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) diff --git a/cpp/src/structs/structs_column_view.cpp b/cpp/src/structs/structs_column_view.cpp index db9496f18be..7d8c8837d2d 100644 --- a/cpp/src/structs/structs_column_view.cpp +++ b/cpp/src/structs/structs_column_view.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,9 +14,9 @@ * limitations under the License. */ -#include "cudf/utilities/error.hpp" #include #include +#include namespace cudf { @@ -25,6 +25,8 @@ structs_column_view::structs_column_view(column_view const& rhs) : column_view{r CUDF_EXPECTS(type().id() == type_id::STRUCT, "structs_column_view only supports struct columns"); } +column_view structs_column_view::parent() const { return *this; } + column_view structs_column_view::get_sliced_child(int index) const { std::vector children; diff --git a/cpp/src/structs/utilities.cpp b/cpp/src/structs/utilities.cpp index 852a32bed3d..a2c173cae5f 100644 --- a/cpp/src/structs/utilities.cpp +++ b/cpp/src/structs/utilities.cpp @@ -371,7 +371,7 @@ std::tuple> superimpose_paren auto [new_child_mask, null_count] = [&] { if (not child.nullable()) { // Adopt parent STRUCT's null mask. - return std::make_pair(structs_column.null_mask(), 0); + return std::pair(structs_column.null_mask(), 0); } // Both STRUCT and child are nullable. AND() for the child's new null mask. @@ -387,8 +387,8 @@ std::tuple> superimpose_paren stream, mr); ret_validity_buffers.push_back(std::move(new_mask)); - return std::make_pair( - reinterpret_cast(ret_validity_buffers.back().data()), null_count); + return std::pair(reinterpret_cast(ret_validity_buffers.back().data()), + null_count); }(); return cudf::column_view( diff --git a/cpp/src/text/subword/bpe_tokenizer.cu b/cpp/src/text/subword/bpe_tokenizer.cu index fb631b3f31f..404ecf1248c 100644 --- a/cpp/src/text/subword/bpe_tokenizer.cu +++ b/cpp/src/text/subword/bpe_tokenizer.cu @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -144,8 +145,8 @@ struct byte_pair_encoding_fn { * @param rhs Second string. * @return The hash value to match with `d_map`. */ - __device__ hash_value_type compute_hash(cudf::string_view const& lhs, - cudf::string_view const& rhs) + __device__ cudf::hash_value_type compute_hash(cudf::string_view const& lhs, + cudf::string_view const& rhs) { __shared__ char shmem[48 * 1024]; // max for Pascal auto const total_size = lhs.size_bytes() + rhs.size_bytes() + 1; diff --git a/cpp/src/text/subword/bpe_tokenizer.cuh b/cpp/src/text/subword/bpe_tokenizer.cuh index 31cc29a8d8a..24b10fc4a36 100644 --- a/cpp/src/text/subword/bpe_tokenizer.cuh +++ b/cpp/src/text/subword/bpe_tokenizer.cuh @@ -36,12 +36,12 @@ namespace detail { using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor>; -using merge_pairs_map_type = cuco::static_map; -using string_hasher_type = MurmurHash3_32; +using string_hasher_type = cudf::detail::MurmurHash3_32; } // namespace detail diff --git a/cpp/src/text/subword/data_normalizer.cu b/cpp/src/text/subword/data_normalizer.cu index 2ed59c3ae0c..71f9e3f7043 100644 --- a/cpp/src/text/subword/data_normalizer.cu +++ b/cpp/src/text/subword/data_normalizer.cu @@ -278,8 +278,8 @@ uvector_pair data_normalizer::normalize(char const* d_strings, rmm::cuda_stream_view stream) const { if (num_strings == 0) - return std::make_pair(std::make_unique>(0, stream), - std::make_unique>(0, stream)); + return std::pair(std::make_unique>(0, stream), + std::make_unique>(0, stream)); // copy offsets to working memory size_t const num_offsets = num_strings + 1; @@ -294,8 +294,8 @@ uvector_pair data_normalizer::normalize(char const* d_strings, }); uint32_t const bytes_count = d_strings_offsets->element(num_strings, stream); if (bytes_count == 0) // if no bytes, nothing to do - return std::make_pair(std::make_unique>(0, stream), - std::make_unique>(0, stream)); + return std::pair(std::make_unique>(0, stream), + std::make_unique>(0, stream)); cudf::detail::grid_1d const grid{static_cast(bytes_count), THREADS_PER_BLOCK, 1}; size_t const threads_on_device = grid.num_threads_per_block * grid.num_blocks; diff --git a/cpp/src/text/subword/load_merges_file.cu b/cpp/src/text/subword/load_merges_file.cu index 31f579dc9d4..1e0c9c81fcd 100644 --- a/cpp/src/text/subword/load_merges_file.cu +++ b/cpp/src/text/subword/load_merges_file.cu @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -42,7 +43,7 @@ struct make_pair_function { /** * @brief Hash the merge pair entry */ - __device__ cuco::pair_type operator()(cudf::size_type idx) + __device__ cuco::pair_type operator()(cudf::size_type idx) { auto const result = _hasher(d_strings.element(idx)); return cuco::make_pair(result, idx); @@ -105,9 +106,9 @@ std::unique_ptr initialize_merge_pairs_map( // Ensure capacity is at least (size/0.7) as documented here: // https://github.com/NVIDIA/cuCollections/blob/6ec8b6dcdeceea07ab4456d32461a05c18864411/include/cuco/static_map.cuh#L179-L182 auto merge_pairs_map = std::make_unique( - static_cast(input.size() * 2), // capacity is 2x; - std::numeric_limits::max(), // empty key; - -1, // empty value is not used + static_cast(input.size() * 2), // capacity is 2x; + std::numeric_limits::max(), // empty key; + -1, // empty value is not used hash_table_allocator_type{default_allocator{}, stream}, stream.value()); @@ -117,8 +118,8 @@ std::unique_ptr initialize_merge_pairs_map( merge_pairs_map->insert(iter, iter + input.size(), - cuco::detail::MurmurHash3_32{}, - thrust::equal_to{}, + cuco::detail::MurmurHash3_32{}, + thrust::equal_to{}, stream.value()); return merge_pairs_map; diff --git a/cpp/src/transform/bools_to_mask.cu b/cpp/src/transform/bools_to_mask.cu index 2cf4771890b..a1f49a5685f 100644 --- a/cpp/src/transform/bools_to_mask.cu +++ b/cpp/src/transform/bools_to_mask.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,7 +34,7 @@ std::pair, cudf::size_type> bools_to_mask( { CUDF_EXPECTS(input.type().id() == type_id::BOOL8, "Input is not of type bool"); - if (input.is_empty()) { return std::make_pair(std::make_unique(), 0); } + if (input.is_empty()) { return std::pair(std::make_unique(), 0); } auto input_device_view_ptr = column_device_view::create(input, stream); auto input_device_view = *input_device_view_ptr; @@ -45,12 +45,12 @@ std::pair, cudf::size_type> bools_to_mask( auto mask = detail::valid_if(input_begin, input_begin + input.size(), pred, stream, mr); - return std::make_pair(std::make_unique(std::move(mask.first)), mask.second); + return std::pair(std::make_unique(std::move(mask.first)), mask.second); } else { auto mask = detail::valid_if( input_device_view.begin(), input_device_view.end(), pred, stream, mr); - return std::make_pair(std::make_unique(std::move(mask.first)), mask.second); + return std::pair(std::make_unique(std::move(mask.first)), mask.second); } } diff --git a/cpp/src/transform/encode.cu b/cpp/src/transform/encode.cu index 04821b09eab..60769665fca 100644 --- a/cpp/src/transform/encode.cu +++ b/cpp/src/transform/encode.cu @@ -57,7 +57,7 @@ std::pair, std::unique_ptr> encode( auto indices_column = cudf::detail::lower_bound( sorted_unique_keys->view(), input_table, column_order, null_precedence, stream, mr); - return std::make_pair(std::move(sorted_unique_keys), std::move(indices_column)); + return std::pair(std::move(sorted_unique_keys), std::move(indices_column)); } } // namespace detail diff --git a/cpp/src/transform/nans_to_nulls.cu b/cpp/src/transform/nans_to_nulls.cu index ee63e6d366f..42d41b44779 100644 --- a/cpp/src/transform/nans_to_nulls.cu +++ b/cpp/src/transform/nans_to_nulls.cu @@ -53,8 +53,7 @@ struct dispatch_nan_to_null { stream, mr); - return std::make_pair(std::make_unique(std::move(mask.first)), - mask.second); + return std::pair(std::make_unique(std::move(mask.first)), mask.second); } else { auto pred = [input_device_view] __device__(cudf::size_type idx) { return not(std::isnan(input_device_view.element(idx))); @@ -66,8 +65,7 @@ struct dispatch_nan_to_null { stream, mr); - return std::make_pair(std::make_unique(std::move(mask.first)), - mask.second); + return std::pair(std::make_unique(std::move(mask.first)), mask.second); } } @@ -85,7 +83,7 @@ struct dispatch_nan_to_null { std::pair, cudf::size_type> nans_to_nulls( column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - if (input.is_empty()) { return std::make_pair(std::make_unique(), 0); } + if (input.is_empty()) { return std::pair(std::make_unique(), 0); } return cudf::type_dispatcher(input.type(), dispatch_nan_to_null{}, input, stream, mr); } diff --git a/cpp/src/transform/one_hot_encode.cu b/cpp/src/transform/one_hot_encode.cu index 16aee349bb5..b1a8858f847 100644 --- a/cpp/src/transform/one_hot_encode.cu +++ b/cpp/src/transform/one_hot_encode.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -89,7 +89,7 @@ struct one_hot_encode_launcher { auto views = cudf::split(all_encodings->view(), split_indices); table_view encodings_view{views}; - return std::make_pair(std::move(all_encodings), encodings_view); + return std::pair(std::move(all_encodings), encodings_view); } template , table_view> one_hot_encode(column_view const& { CUDF_EXPECTS(input.type() == categories.type(), "Mismatch type between input and categories."); - if (categories.is_empty()) { - return std::make_pair(make_empty_column(type_id::BOOL8), table_view{}); - } + if (categories.is_empty()) { return std::pair(make_empty_column(type_id::BOOL8), table_view{}); } if (input.is_empty()) { auto empty_data = make_empty_column(type_id::BOOL8); std::vector views(categories.size(), empty_data->view()); - return std::make_pair(std::move(empty_data), table_view{views}); + return std::pair(std::move(empty_data), table_view{views}); } return type_dispatcher(input.type(), one_hot_encode_launcher{}, input, categories, stream, mr); diff --git a/cpp/src/transpose/transpose.cu b/cpp/src/transpose/transpose.cu index b5b00b11a0f..a87cf60a252 100644 --- a/cpp/src/transpose/transpose.cu +++ b/cpp/src/transpose/transpose.cu @@ -37,7 +37,7 @@ std::pair, table_view> transpose(table_view const& input { // If there are no rows in the input, return successfully if (input.num_columns() == 0 || input.num_rows() == 0) { - return std::make_pair(std::make_unique(), table_view{}); + return std::pair(std::make_unique(), table_view{}); } // Check datatype homogeneity @@ -54,7 +54,7 @@ std::pair, table_view> transpose(table_view const& input auto splits = std::vector(splits_iter, splits_iter + input.num_rows() - 1); auto output_column_views = split(output_column->view(), splits, stream); - return std::make_pair(std::move(output_column), table_view(output_column_views)); + return std::pair(std::move(output_column), table_view(output_column_views)); } } // namespace detail diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index e016f47616b..95c54d7596e 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -250,6 +250,7 @@ ConfigureTest( copying/gather_tests.cpp copying/get_value_tests.cpp copying/pack_tests.cpp + copying/purge_nonempty_nulls_tests.cpp copying/sample_tests.cpp copying/scatter_tests.cpp copying/scatter_list_tests.cpp diff --git a/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp b/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp index 64462669f90..28df893aff1 100644 --- a/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp +++ b/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp @@ -20,13 +20,13 @@ #include #include #include +#include #include #include #include #include -#include "cudf/utilities/error.hpp" #include #include diff --git a/cpp/tests/column/factories_test.cpp b/cpp/tests/column/factories_test.cpp index 4e0e70bf15c..44a79e63cd8 100644 --- a/cpp/tests/column/factories_test.cpp +++ b/cpp/tests/column/factories_test.cpp @@ -645,7 +645,7 @@ TYPED_TEST(ListsStructsLeafTest, FromNonNested) 0, cudf::create_null_mask(2, cudf::mask_state::UNALLOCATED)); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*col, *expected); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*col, *expected); } TYPED_TEST(ListsStructsLeafTest, FromNested) diff --git a/cpp/tests/copying/purge_nonempty_nulls_tests.cpp b/cpp/tests/copying/purge_nonempty_nulls_tests.cpp new file mode 100644 index 00000000000..77fd3f66ee5 --- /dev/null +++ b/cpp/tests/copying/purge_nonempty_nulls_tests.cpp @@ -0,0 +1,437 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace cudf::test { + +using iterators::no_nulls; +using iterators::null_at; +using iterators::nulls_at; +using T = int32_t; // The actual type of the leaf node isn't really important. +using values_col_t = fixed_width_column_wrapper; +using offsets_col_t = fixed_width_column_wrapper; +using gather_map_t = fixed_width_column_wrapper; + +template +using LCW = cudf::test::lists_column_wrapper; + +struct PurgeNonEmptyNullsTest : public cudf::test::BaseFixture { + /// Helper to run gather() on a single column, and extract the single column from the result. + std::unique_ptr gather(column_view const& input, gather_map_t const& gather_map) + { + auto gathered = + cudf::gather(cudf::table_view{{input}}, gather_map, out_of_bounds_policy::NULLIFY); + return std::move(gathered->release()[0]); + } + + /// Verify that the result of `sanitize()` is equivalent to the unsanitized input, + /// except that the null rows are also empty. + template + void test_purge(ColumnViewT const& unpurged) + { + auto const purged = cudf::purge_nonempty_nulls(unpurged); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(unpurged.parent(), *purged); + EXPECT_FALSE(cudf::has_nonempty_nulls(*purged)); + } +}; + +// List. +TEST_F(PurgeNonEmptyNullsTest, SingleLevelList) +{ + auto const input = LCW{{{{1, 2, 3, 4}, null_at(2)}, + {5}, + {6, 7}, // <--- Will be set to NULL. Unsanitized row. + {8, 9, 10}}, + no_nulls()} + .release(); + EXPECT_FALSE(cudf::may_have_nonempty_nulls(*input)); + EXPECT_FALSE(cudf::has_nonempty_nulls(*input)); + + // Set nullmask, post construction. + cudf::detail::set_null_mask(input->mutable_view().null_mask(), 2, 3, false); + EXPECT_TRUE(cudf::may_have_nonempty_nulls(*input)); + EXPECT_TRUE(cudf::has_nonempty_nulls(*input)); + + test_purge(lists_column_view{*input}); + + { + // Selecting all rows from input, in different order. + auto const results = gather(input->view(), {1, 2, 0, 3}); + auto const results_list_view = lists_column_view(*results); + + auto const expected = LCW{{{5}, + {}, // NULL. + {{1, 2, 3, 4}, null_at(2)}, + {8, 9, 10}}, + null_at(1)}; + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results_list_view.offsets(), offsets_col_t{0, 1, 1, 5, 8}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results_list_view.child(), + values_col_t{{5, 1, 2, 3, 4, 8, 9, 10}, null_at(3)}); + EXPECT_TRUE(cudf::may_have_nonempty_nulls(*results)); + EXPECT_FALSE(cudf::has_nonempty_nulls(*results)); + } + { + // Test when gather selects rows preceded by unsanitized rows. + auto const results = gather(input->view(), {3, 100, 0}); + auto const expected = LCW{{ + {8, 9, 10}, + {}, // NULL. + {{1, 2, 3, 4}, null_at(2)}, + }, + null_at(1)}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); + EXPECT_TRUE(cudf::may_have_nonempty_nulls(*results)); + EXPECT_FALSE(cudf::has_nonempty_nulls(*results)); + } + { + // Test when gather selects rows followed by unsanitized rows. + auto const results = gather(input->view(), {1, 100, 0}); + auto const expected = LCW{{ + {5}, + {}, // NULL. + {{1, 2, 3, 4}, null_at(2)}, + }, + null_at(1)}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); + EXPECT_TRUE(cudf::may_have_nonempty_nulls(*results)); + EXPECT_FALSE(cudf::has_nonempty_nulls(*results)); + } + { + // Test when gather selects unsanitized row specifically. + auto const results = gather(input->view(), {2}); + auto const results_lists_view = lists_column_view(*results); + auto const expected = LCW{{ + LCW{} // NULL. + }, + null_at(0)}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results_lists_view.offsets(), offsets_col_t{0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results_lists_view.child(), values_col_t{}); + EXPECT_TRUE(cudf::may_have_nonempty_nulls(*results)); + EXPECT_FALSE(cudf::has_nonempty_nulls(*results)); + } +} + +// List>. +TEST_F(PurgeNonEmptyNullsTest, TwoLevelList) +{ + auto const input = + LCW{ + {{{1, 2, 3}, {4, 5, 6, 7}, {8}, {9, 1}, {2}}, + {{11, 12}, {13, 14, 15}, {16, 17, 18}, {19}}, + {{21}, {22, 23}, {24, 25, 26}}, + {{31, 32}, {33, 34, 35, 36}, {}, {37, 38}}, //<--- Will be set to NULL. Unsanitized row. + {{41}, {42, 43}}}, + no_nulls()} + .release(); + EXPECT_FALSE(cudf::may_have_nonempty_nulls(*input)); + EXPECT_FALSE(cudf::has_nonempty_nulls(*input)); + + // Set nullmask, post construction. + cudf::detail::set_null_mask(input->mutable_view().null_mask(), 3, 4, false); + EXPECT_TRUE(cudf::may_have_nonempty_nulls(*input)); + EXPECT_TRUE(cudf::has_nonempty_nulls(*input)); + + test_purge(lists_column_view{*input}); + + { + // Verify that gather() output is sanitized. + auto const results = gather(input->view(), {100, 3, 0, 1}); + auto const results_lists_view = lists_column_view(*results); + + auto const expected = LCW{{ + LCW{}, // NULL, because of out of bounds. + LCW{}, // NULL, because input row was null. + {{1, 2, 3}, {4, 5, 6, 7}, {8}, {9, 1}, {2}}, // i.e. input[0] + {{11, 12}, {13, 14, 15}, {16, 17, 18}, {19}} // i.e. input[1] + }, + nulls_at({0, 1})}; + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results_lists_view.offsets(), offsets_col_t{0, 0, 0, 5, 9}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + results_lists_view.child(), + LCW{ + {1, 2, 3}, {4, 5, 6, 7}, {8}, {9, 1}, {2}, {11, 12}, {13, 14, 15}, {16, 17, 18}, {19}}); + + auto const child_lists_view = lists_column_view(results_lists_view.child()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(child_lists_view.offsets(), + offsets_col_t{0, 3, 7, 8, 10, 11, 13, 16, 19, 20}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + child_lists_view.child(), + values_col_t{1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 11, 12, 13, 14, 15, 16, 17, 18, 19}); + EXPECT_TRUE(cudf::may_have_nonempty_nulls(*results)); + EXPECT_FALSE(cudf::has_nonempty_nulls(*results)); + } +} + +// List>>. +TEST_F(PurgeNonEmptyNullsTest, ThreeLevelList) +{ + auto const input = LCW{{{{{1, 2}, {3}}, {{4, 5}, {6, 7}}, {{8, 8}, {}}, {{9, 1}}, {{2, 3}}}, + {{{11, 12}}, {{13}, {14, 15}}, {{16, 17, 18}}, {{19, 19}, {}}}, + {{{21, 21}}, {{22, 23}, {}}, {{24, 25}, {26}}}, + {{{31, 32}, {}}, + {{33, 34, 35}, {36}}, + {}, + {{37, 38}}}, //<--- Will be set to NULL. Unsanitized row. + {{{41, 41, 41}}, {{42, 43}}}}, + no_nulls()} + .release(); + EXPECT_FALSE(cudf::may_have_nonempty_nulls(*input)); + EXPECT_FALSE(cudf::has_nonempty_nulls(*input)); + + // Set nullmask, post construction. + cudf::detail::set_null_mask(input->mutable_view().null_mask(), 3, 4, false); + EXPECT_TRUE(cudf::may_have_nonempty_nulls(*input)); + EXPECT_TRUE(cudf::has_nonempty_nulls(*input)); + + test_purge(lists_column_view{*input}); + + { + auto const results = gather(input->view(), {100, 3, 0, 1}); + auto const results_lists_view = lists_column_view(*results); + + auto const expected = LCW{ + { + LCW{}, // NULL, because of out of bounds. + LCW{}, // NULL, because input row was null. + {{{1, 2}, {3}}, {{4, 5}, {6, 7}}, {{8, 8}, {}}, {{9, 1}}, {{2, 3}}}, // i.e. input[0] + {{{11, 12}}, {{13}, {14, 15}}, {{16, 17, 18}}, {{19, 19}, {}}} // i.e. input[1] + }, + nulls_at({0, 1})}; + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results_lists_view.offsets(), offsets_col_t{0, 0, 0, 5, 9}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results_lists_view.child(), + LCW{{{1, 2}, {3}}, + {{4, 5}, {6, 7}}, + {{8, 8}, {}}, + {{9, 1}}, + {{2, 3}}, + {{11, 12}}, + {{13}, {14, 15}}, + {{16, 17, 18}}, + {{19, 19}, {}}}); + EXPECT_TRUE(cudf::may_have_nonempty_nulls(*results)); + EXPECT_FALSE(cudf::has_nonempty_nulls(*results)); + } +} + +// List. +TEST_F(PurgeNonEmptyNullsTest, ListOfStrings) +{ + using T = string_view; + + auto const input = LCW{{{{"1", "22", "", "4444"}, null_at(2)}, + {"55555"}, + {"666666", "7777777"}, // <--- Will be set to NULL. Unsanitized row. + {"88888888", "999999999", "1010101010"}, + {"11", "22", "33", "44"}, + {"55", "66", "77", "88"}}, + no_nulls()} + .release(); + EXPECT_TRUE(cudf::may_have_nonempty_nulls(*input)); + EXPECT_FALSE(cudf::has_nonempty_nulls(*input)); + + // Set nullmask, post construction. + cudf::detail::set_null_mask(input->mutable_view().null_mask(), 2, 3, false); + EXPECT_TRUE(cudf::may_have_nonempty_nulls(*input)); + EXPECT_TRUE(cudf::has_nonempty_nulls(*input)); + + test_purge(lists_column_view{*input}); + + { + // Selecting all rows from input, in different order. + auto const results = gather(input->view(), {1, 2, 0, 3}); + auto const results_list_view = lists_column_view(*results); + + auto const expected = LCW{{{"55555"}, + {}, // NULL. + {{"1", "22", "", "4444"}, null_at(2)}, + {"88888888", "999999999", "1010101010"}}, + null_at(1)}; + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results_list_view.offsets(), offsets_col_t{0, 1, 1, 5, 8}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + results_list_view.child(), + strings_column_wrapper{ + {"55555", "1", "22", "", "4444", "88888888", "999999999", "1010101010"}, null_at(3)}); + EXPECT_TRUE(cudf::may_have_nonempty_nulls(*results)); + EXPECT_FALSE(cudf::has_nonempty_nulls(*results)); + } + { + // Gathering from a sliced column. + auto const sliced = cudf::slice({input->view()}, {1, 5})[0]; // Lop off 1 row at each end. + EXPECT_TRUE(cudf::may_have_nonempty_nulls(sliced)); + EXPECT_TRUE(cudf::has_nonempty_nulls(sliced)); + + auto const results = gather(sliced, {1, 2, 0, 3}); + auto const results_list_view = lists_column_view(*results); + auto const expected = LCW{{ + {}, + {"88888888", "999999999", "1010101010"}, + {"55555"}, + {"11", "22", "33", "44"}, + }, + null_at(0)}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results_list_view.offsets(), offsets_col_t{0, 0, 3, 4, 8}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + results_list_view.child(), + strings_column_wrapper{ + "88888888", "999999999", "1010101010", "55555", "11", "22", "33", "44"}); + EXPECT_TRUE(cudf::may_have_nonempty_nulls(*results)); + EXPECT_FALSE(cudf::has_nonempty_nulls(*results)); + } +} + +// List. +TEST_F(PurgeNonEmptyNullsTest, UnsanitizedListOfUnsanitizedStrings) +{ + auto strings = + strings_column_wrapper{ + {"1", "22", "3", "44", "5", "66", "7", "8888", "9", "1010"}, //<--- "8888" will be + // unsanitized. + no_nulls()} + .release(); + EXPECT_FALSE(cudf::may_have_nonempty_nulls(*strings)); + EXPECT_FALSE(cudf::has_nonempty_nulls(*strings)); + + // Set strings nullmask, post construction. + set_null_mask(strings->mutable_view().null_mask(), 7, 8, false); + EXPECT_TRUE(cudf::may_have_nonempty_nulls(*strings)); + EXPECT_TRUE(cudf::has_nonempty_nulls(*strings)); + + test_purge(strings_column_view{*strings}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + strings_column_view(*strings).offsets(), offsets_col_t{0, 1, 3, 4, 6, 7, 9, 10, 14, 15, 19} + // 10-14 indicates that "8888" is unsanitized. + ); + + // Construct a list column from the strings column. + auto const lists = make_lists_column(4, + offsets_col_t{0, 4, 5, 7, 10}.release(), + std::move(strings), + 0, + detail::make_null_mask(no_nulls(), no_nulls() + 4)); + EXPECT_TRUE(cudf::may_have_nonempty_nulls(*lists)); + EXPECT_TRUE(cudf::has_nonempty_nulls(*lists)); + + // Set lists nullmask, post construction. + cudf::detail::set_null_mask(lists->mutable_view().null_mask(), 2, 3, false); + EXPECT_TRUE(cudf::may_have_nonempty_nulls(*lists)); + EXPECT_TRUE(cudf::has_nonempty_nulls(*lists)); + + test_purge(lists_column_view{*lists}); + + // At this point, + // 1. {"66", "7"} will be unsanitized. + // 2. {"8888", "9", "1010"} will be actually be {NULL, "9", "1010"}. + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + lists_column_view(*lists).offsets(), + offsets_col_t{0, 4, 5, 7, 10}); // 5-7 indicates that list row#2 is unsanitized. + + auto const result = gather(lists->view(), {1, 2, 0, 3}); + auto const expected = LCW{{{"5"}, + {}, // NULL. + {"1", "22", "3", "44"}, + {{"", "9", "1010"}, null_at(0)}}, + null_at(1)}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); + + // Ensure row#2 has been sanitized. + auto const results_lists_view = lists_column_view(*result); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results_lists_view.offsets(), offsets_col_t{0, 1, 1, 5, 8} + // 1-1 indicates that row#2 is sanitized. + ); + + // Ensure that "8888" has been sanitized, and stored as "". + auto const child_strings_view = strings_column_view(results_lists_view.child()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(child_strings_view.offsets(), + offsets_col_t{0, 1, 2, 4, 5, 7, 7, 8, 12}); + EXPECT_TRUE(cudf::may_have_nonempty_nulls(*result)); + EXPECT_FALSE(cudf::has_nonempty_nulls(*result)); +} + +// Struct>. +TEST_F(PurgeNonEmptyNullsTest, StructOfList) +{ + auto const structs_input = + [] { + auto child = LCW{{{{1, 2, 3, 4}, null_at(2)}, + {5}, + {6, 7}, //<--- Unsanitized row. + {8, 9, 10}}, + no_nulls()}; + EXPECT_FALSE(cudf::has_nonempty_nulls(child)); + return structs_column_wrapper{{child}, null_at(2)}; + }() + .release(); + + EXPECT_TRUE(cudf::may_have_nonempty_nulls(*structs_input)); + EXPECT_TRUE(cudf::has_nonempty_nulls(*structs_input)); + + test_purge(structs_column_view{*structs_input}); + + // At this point, even though the structs column has a null at index 2, + // the child column has a non-empty list row at index 2: {6, 7}. + CUDF_TEST_EXPECT_COLUMNS_EQUAL(lists_column_view(structs_input->child(0)).child(), + values_col_t{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, null_at(2)}); + + { + // Test rearrange. + auto const gather_map = gather_map_t{1, 2, 0, 3}; + auto const result = gather(structs_input->view(), gather_map); + auto const expected_result = [] { + auto child = LCW{{{5}, + LCW{}, //<--- Now, sanitized. + {{1, 2, 3, 4}, null_at(2)}, + {8, 9, 10}}, + null_at(1)}; + return structs_column_wrapper{{child}, null_at(1)}; + }(); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected_result); + auto const results_child = lists_column_view(result->child(0)); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results_child.offsets(), offsets_col_t{0, 1, 1, 5, 8}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results_child.child(), + values_col_t{{5, 1, 2, 3, 4, 8, 9, 10}, null_at(3)}); + EXPECT_TRUE(cudf::may_have_nonempty_nulls(*result)); + EXPECT_FALSE(cudf::has_nonempty_nulls(*result)); + } +} + +} // namespace cudf::test diff --git a/cpp/tests/copying/scatter_tests.cpp b/cpp/tests/copying/scatter_tests.cpp index 28ebb6cbcb6..306ab8a3d5c 100644 --- a/cpp/tests/copying/scatter_tests.cpp +++ b/cpp/tests/copying/scatter_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -573,6 +573,17 @@ TEST_F(ScatterStringsTests, ScatterScalarNoNulls) CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table); } +TEST_F(ScatterStringsTests, EmptyStrings) +{ + cudf::test::strings_column_wrapper input{"", "", ""}; + cudf::table_view t({input}); + + // Test for issue 10717: all-empty-string column scatter + auto map = cudf::test::fixed_width_column_wrapper({0}); + auto result = cudf::scatter(t, map, t); + CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), t); +} + template class BooleanMaskScatter : public cudf::test::BaseFixture { }; diff --git a/cpp/tests/error/error_handling_test.cu b/cpp/tests/error/error_handling_test.cu index 4327a8b694b..bde8ccc6de7 100644 --- a/cpp/tests/error/error_handling_test.cu +++ b/cpp/tests/error/error_handling_test.cu @@ -39,6 +39,7 @@ TEST(CudaTryTest, Error) CUDA_EXPECT_THROW_MESSAGE(CUDF_CUDA_TRY(cudaErrorLaunchFailure), "cudaErrorLaunchFailure unspecified launch failure"); } + TEST(CudaTryTest, Success) { EXPECT_NO_THROW(CUDF_CUDA_TRY(cudaSuccess)); } TEST(CudaTryTest, TryCatch) diff --git a/cpp/tests/groupby/m2_tests.cpp b/cpp/tests/groupby/m2_tests.cpp index be7d6c1ce05..6f5a04e3752 100644 --- a/cpp/tests/groupby/m2_tests.cpp +++ b/cpp/tests/groupby/m2_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -48,8 +48,7 @@ auto compute_M2(cudf::column_view const& keys, cudf::column_view const& values) auto gb_obj = cudf::groupby::groupby(cudf::table_view({keys})); auto result = gb_obj.aggregate(requests); - return std::make_pair(std::move(result.first->release()[0]), - std::move(result.second[0].results[0])); + return std::pair(std::move(result.first->release()[0]), std::move(result.second[0].results[0])); } } // namespace diff --git a/cpp/tests/groupby/merge_lists_tests.cpp b/cpp/tests/groupby/merge_lists_tests.cpp index 7c24c6267ca..593bb7c50af 100644 --- a/cpp/tests/groupby/merge_lists_tests.cpp +++ b/cpp/tests/groupby/merge_lists_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -47,8 +47,7 @@ auto merge_lists(vcol_views const& keys_cols, vcol_views const& values_cols) auto gb_obj = cudf::groupby::groupby(cudf::table_view({*keys})); auto result = gb_obj.aggregate(requests); - return std::make_pair(std::move(result.first->release()[0]), - std::move(result.second[0].results[0])); + return std::pair(std::move(result.first->release()[0]), std::move(result.second[0].results[0])); } } // namespace diff --git a/cpp/tests/groupby/merge_m2_tests.cpp b/cpp/tests/groupby/merge_m2_tests.cpp index 60067e78022..79ffebf146c 100644 --- a/cpp/tests/groupby/merge_m2_tests.cpp +++ b/cpp/tests/groupby/merge_m2_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -67,10 +67,9 @@ auto compute_partial_results(cudf::column_view const& keys, cudf::column_view co auto [out_keys, out_results] = gb_obj.aggregate(requests); auto const num_output_rows = out_keys->num_rows(); - return std::make_pair( - std::move(out_keys->release()[0]), - cudf::make_structs_column( - num_output_rows, std::move(out_results[0].results), 0, rmm::device_buffer{})); + return std::pair(std::move(out_keys->release()[0]), + cudf::make_structs_column( + num_output_rows, std::move(out_results[0].results), 0, rmm::device_buffer{})); } /** @@ -93,8 +92,7 @@ auto merge_M2(vcol_views const& keys_cols, vcol_views const& values_cols) auto gb_obj = cudf::groupby::groupby(cudf::table_view({*keys})); auto result = gb_obj.aggregate(requests); - return std::make_pair(std::move(result.first->release()[0]), - std::move(result.second[0].results[0])); + return std::pair(std::move(result.first->release()[0]), std::move(result.second[0].results[0])); } } // namespace diff --git a/cpp/tests/groupby/merge_sets_tests.cpp b/cpp/tests/groupby/merge_sets_tests.cpp index 1e2f0c9fa9e..57f67f6b81a 100644 --- a/cpp/tests/groupby/merge_sets_tests.cpp +++ b/cpp/tests/groupby/merge_sets_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -47,8 +47,7 @@ auto merge_sets(vcol_views const& keys_cols, vcol_views const& values_cols) auto gb_obj = cudf::groupby::groupby(cudf::table_view({*keys})); auto result = gb_obj.aggregate(requests); - return std::make_pair(std::move(result.first->release()[0]), - std::move(result.second[0].results[0])); + return std::pair(std::move(result.first->release()[0]), std::move(result.second[0].results[0])); } } // namespace diff --git a/cpp/tests/groupby/rank_scan_tests.cpp b/cpp/tests/groupby/rank_scan_tests.cpp index 81369beb2ec..d4e8b4cbf0f 100644 --- a/cpp/tests/groupby/rank_scan_tests.cpp +++ b/cpp/tests/groupby/rank_scan_tests.cpp @@ -29,11 +29,9 @@ namespace test { using namespace iterators; template -using input = fixed_width_column_wrapper; -using rank_result_col = fixed_width_column_wrapper; -using percent_result_t = - cudf::detail::target_type_t; -using percent_result_col = fixed_width_column_wrapper; +using input = fixed_width_column_wrapper; +using rank_result_col = fixed_width_column_wrapper; +using percent_result_col = fixed_width_column_wrapper; using null_iter_t = decltype(nulls_at({})); auto constexpr X = int32_t{0}; // Placeholder for NULL rows. @@ -45,27 +43,31 @@ inline void test_rank_scans(column_view const& keys, column_view const& expected_rank, column_view const& expected_percent_rank) { - test_single_scan(keys, - order, - keys, - expected_dense, - make_dense_rank_aggregation(), - null_policy::INCLUDE, - sorted::YES); - test_single_scan(keys, - order, - keys, - expected_rank, - make_rank_aggregation(), - null_policy::INCLUDE, - sorted::YES); - test_single_scan(keys, - order, - keys, - expected_percent_rank, - make_percent_rank_aggregation(), - null_policy::INCLUDE, - sorted::YES); + test_single_scan( + keys, + order, + keys, + expected_dense, + make_rank_aggregation(rank_method::DENSE, {}, null_policy::INCLUDE), + null_policy::INCLUDE, + sorted::YES); + test_single_scan( + keys, + order, + keys, + expected_rank, + make_rank_aggregation(rank_method::MIN, {}, null_policy::INCLUDE), + null_policy::INCLUDE, + sorted::YES); + test_single_scan( + keys, + order, + keys, + expected_percent_rank, + make_rank_aggregation( + rank_method::MIN, {}, null_policy::INCLUDE, {}, rank_percentage::ONE_NORMALIZED), + null_policy::INCLUDE, + sorted::YES); } struct groupby_rank_scan_test : public BaseFixture { @@ -148,7 +150,7 @@ TYPED_TEST(typed_groupby_rank_scan_test, basic) { using T = TypeParam; - auto const keys = input{0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1}; + auto const keys = /* */ input{0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1}; auto const make_order_by = [&] { return input{5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 1, 1}; }; auto const order_by = make_order_by(); auto const order_by_struct = [&] { @@ -244,9 +246,12 @@ TYPED_TEST(typed_groupby_rank_scan_test, mixedStructs) std::vector requests; requests.emplace_back(groupby::scan_request()); requests[0].values = *struct_col; - requests[0].aggregations.push_back(make_dense_rank_aggregation()); - requests[0].aggregations.push_back(make_rank_aggregation()); - requests[0].aggregations.push_back(make_percent_rank_aggregation()); + requests[0].aggregations.push_back( + make_rank_aggregation(rank_method::DENSE, {}, null_policy::INCLUDE)); + requests[0].aggregations.push_back( + make_rank_aggregation(rank_method::MIN, {}, null_policy::INCLUDE)); + requests[0].aggregations.push_back(make_rank_aggregation( + rank_method::MIN, {}, null_policy::INCLUDE, {}, rank_percentage::ONE_NORMALIZED)); groupby::groupby gb_obj(table_view({keys}), null_policy::INCLUDE, sorted::YES); auto [result_keys, agg_results] = gb_obj.scan(requests); @@ -288,13 +293,19 @@ TYPED_TEST(typed_groupby_rank_scan_test, nestedStructs) requests.emplace_back(groupby::scan_request()); requests.emplace_back(groupby::scan_request()); requests[0].values = *nested_structs; - requests[0].aggregations.push_back(make_dense_rank_aggregation()); - requests[0].aggregations.push_back(make_rank_aggregation()); - requests[0].aggregations.push_back(make_percent_rank_aggregation()); + requests[0].aggregations.push_back( + make_rank_aggregation(rank_method::DENSE)); + requests[0].aggregations.push_back( + make_rank_aggregation(rank_method::MIN)); + requests[0].aggregations.push_back(make_rank_aggregation( + rank_method::MIN, {}, null_policy::INCLUDE, {}, rank_percentage::ONE_NORMALIZED)); requests[1].values = *flat_struct; - requests[1].aggregations.push_back(make_dense_rank_aggregation()); - requests[1].aggregations.push_back(make_rank_aggregation()); - requests[1].aggregations.push_back(make_percent_rank_aggregation()); + requests[1].aggregations.push_back( + make_rank_aggregation(rank_method::DENSE)); + requests[1].aggregations.push_back( + make_rank_aggregation(rank_method::MIN)); + requests[1].aggregations.push_back(make_rank_aggregation( + rank_method::MIN, {}, null_policy::INCLUDE, {}, rank_percentage::ONE_NORMALIZED)); groupby::groupby gb_obj(table_view({keys}), null_policy::INCLUDE, sorted::YES); auto [result_keys, agg_results] = gb_obj.scan(requests); @@ -339,13 +350,19 @@ TYPED_TEST(typed_groupby_rank_scan_test, structsWithNullPushdown) requests.emplace_back(groupby::scan_request()); requests.emplace_back(groupby::scan_request()); requests[0].values = *possibly_null_structs; - requests[0].aggregations.push_back(make_dense_rank_aggregation()); - requests[0].aggregations.push_back(make_rank_aggregation()); - requests[0].aggregations.push_back(make_percent_rank_aggregation()); + requests[0].aggregations.push_back( + make_rank_aggregation(rank_method::DENSE, {}, null_policy::INCLUDE)); + requests[0].aggregations.push_back( + make_rank_aggregation(rank_method::MIN, {}, null_policy::INCLUDE)); + requests[0].aggregations.push_back(make_rank_aggregation( + rank_method::MIN, {}, null_policy::INCLUDE, {}, rank_percentage::ONE_NORMALIZED)); requests[1].values = *definitely_null_structs; - requests[1].aggregations.push_back(make_dense_rank_aggregation()); - requests[1].aggregations.push_back(make_rank_aggregation()); - requests[1].aggregations.push_back(make_percent_rank_aggregation()); + requests[1].aggregations.push_back( + make_rank_aggregation(rank_method::DENSE, {}, null_policy::INCLUDE)); + requests[1].aggregations.push_back( + make_rank_aggregation(rank_method::MIN, {}, null_policy::INCLUDE)); + requests[1].aggregations.push_back(make_rank_aggregation( + rank_method::MIN, {}, null_policy::INCLUDE, {}, rank_percentage::ONE_NORMALIZED)); groupby::groupby gb_obj(table_view({keys}), null_policy::INCLUDE, sorted::YES); auto [result_keys, agg_results] = gb_obj.scan(requests); @@ -405,11 +422,11 @@ TYPED_TEST(list_groupby_rank_scan_test, lists) requests.emplace_back(groupby::aggregation_request()); requests.emplace_back(groupby::aggregation_request()); requests[0].values = list_col; - requests[0].aggregations.push_back(make_dense_rank_aggregation()); - requests[0].aggregations.push_back(make_rank_aggregation()); + requests[0].aggregations.push_back(make_rank_aggregation(rank_method::DENSE)); + requests[0].aggregations.push_back(make_rank_aggregation(rank_method::MIN)); requests[1].values = struct_col; - requests[1].aggregations.push_back(make_dense_rank_aggregation()); - requests[1].aggregations.push_back(make_rank_aggregation()); + requests[1].aggregations.push_back(make_rank_aggregation(rank_method::DENSE)); + requests[1].aggregations.push_back(make_rank_aggregation(rank_method::MIN)); groupby::groupby gb_obj(table_view({keys}), null_policy::INCLUDE, sorted::YES); auto result = gb_obj.scan(requests); @@ -484,7 +501,7 @@ TEST(groupby_rank_scan_test, strings) keys, order_by_structs_with_nulls, expected_dense, expected_rank, expected_percent); } -TEST_F(groupby_rank_scan_test_failures, test_exception_triggers) +TEST_F(groupby_rank_scan_test_failures, DISABLED_test_exception_triggers) { using T = uint32_t; @@ -496,57 +513,60 @@ TEST_F(groupby_rank_scan_test_failures, test_exception_triggers) col, keys, col, - make_dense_rank_aggregation(), + make_rank_aggregation(rank_method::DENSE), null_policy::INCLUDE, sorted::NO), - "Dense rank aggregate in groupby scan requires the keys to be presorted"); + "Rank aggregate in groupby scan requires the keys to be presorted"); - CUDF_EXPECT_THROW_MESSAGE(test_single_scan(keys, - col, - keys, - col, - make_rank_aggregation(), - null_policy::INCLUDE, - sorted::NO), - "Rank aggregate in groupby scan requires the keys to be presorted"); + CUDF_EXPECT_THROW_MESSAGE( + test_single_scan(keys, + col, + keys, + col, + make_rank_aggregation(rank_method::MIN), + null_policy::INCLUDE, + sorted::NO), + "Rank aggregate in groupby scan requires the keys to be presorted"); + + CUDF_EXPECT_THROW_MESSAGE( + test_single_scan(keys, + col, + keys, + col, + make_rank_aggregation(rank_method::DENSE), + null_policy::EXCLUDE, + sorted::YES), + "Rank aggregate in groupby scan requires the keys to be presorted"); CUDF_EXPECT_THROW_MESSAGE( test_single_scan(keys, col, keys, col, - make_dense_rank_aggregation(), + make_rank_aggregation(rank_method::MIN), null_policy::EXCLUDE, sorted::YES), - "Dense rank aggregate in groupby scan requires the keys to be presorted"); + "Rank aggregate in groupby scan requires the keys to be presorted"); - CUDF_EXPECT_THROW_MESSAGE(test_single_scan(keys, - col, - keys, - col, - make_rank_aggregation(), - null_policy::EXCLUDE, - sorted::YES), - "Rank aggregate in groupby scan requires the keys to be presorted"); + CUDF_EXPECT_THROW_MESSAGE( + test_single_scan(keys, + col, + keys, + col, + make_rank_aggregation(rank_method::DENSE), + null_policy::EXCLUDE, + sorted::NO), + "Rank aggregate in groupby scan requires the keys to be presorted"); CUDF_EXPECT_THROW_MESSAGE( test_single_scan(keys, col, keys, col, - make_dense_rank_aggregation(), + make_rank_aggregation(rank_method::MIN), null_policy::EXCLUDE, sorted::NO), - "Dense rank aggregate in groupby scan requires the keys to be presorted"); - - CUDF_EXPECT_THROW_MESSAGE(test_single_scan(keys, - col, - keys, - col, - make_rank_aggregation(), - null_policy::EXCLUDE, - sorted::NO), - "Rank aggregate in groupby scan requires the keys to be presorted"); + "Rank aggregate in groupby scan requires the keys to be presorted"); } } // namespace test diff --git a/cpp/tests/hash_map/map_test.cu b/cpp/tests/hash_map/map_test.cu index d69aee57756..f42549514e6 100644 --- a/cpp/tests/hash_map/map_test.cu +++ b/cpp/tests/hash_map/map_test.cu @@ -23,12 +23,12 @@ #include #include +#include #include #include #include -#include "rmm/exec_policy.hpp" #include #include #include diff --git a/cpp/tests/interop/to_arrow_test.cpp b/cpp/tests/interop/to_arrow_test.cpp index d1dc60119b6..4b481ade83f 100644 --- a/cpp/tests/interop/to_arrow_test.cpp +++ b/cpp/tests/interop/to_arrow_test.cpp @@ -148,7 +148,7 @@ std::pair, std::shared_ptr> get_table auto schema = std::make_shared(schema_vector); - return std::make_pair( + return std::pair( std::make_unique(std::move(columns)), arrow::Table::Make( schema, {int64array, string_array, dict_array, boolarray, list_array, struct_array})); diff --git a/cpp/tests/io/comp/decomp_test.cpp b/cpp/tests/io/comp/decomp_test.cpp index dd00b201df9..a325cadf6a5 100644 --- a/cpp/tests/io/comp/decomp_test.cpp +++ b/cpp/tests/io/comp/decomp_test.cpp @@ -15,6 +15,7 @@ */ #include +#include #include @@ -24,6 +25,8 @@ #include +using cudf::device_span; + /** * @brief Base test fixture for decompression * @@ -32,19 +35,6 @@ */ template struct DecompressTest : public cudf::test::BaseFixture { - void SetUp() override - { - ASSERT_CUDA_SUCCEEDED(cudaMallocHost((void**)&inf_args, sizeof(cudf::io::gpu_inflate_input_s))); - ASSERT_CUDA_SUCCEEDED( - cudaMallocHost((void**)&inf_stat, sizeof(cudf::io::gpu_inflate_status_s))); - } - - void TearDown() override - { - ASSERT_CUDA_SUCCEEDED(cudaFreeHost(inf_stat)); - ASSERT_CUDA_SUCCEEDED(cudaFreeHost(inf_args)); - } - std::vector vector_from_string(const char* str) const { return std::vector(reinterpret_cast(str), @@ -55,49 +45,43 @@ struct DecompressTest : public cudf::test::BaseFixture { const uint8_t* compressed, size_t compressed_size) { - rmm::device_buffer src{compressed, compressed_size, rmm::cuda_stream_default}; - rmm::device_buffer dst{decompressed->size(), rmm::cuda_stream_default}; - - inf_args->srcDevice = static_cast(src.data()); - inf_args->dstDevice = static_cast(dst.data()); - inf_args->srcSize = src.size(); - inf_args->dstSize = dst.size(); - rmm::device_uvector d_inf_args(1, rmm::cuda_stream_default); - rmm::device_uvector d_inf_stat(1, rmm::cuda_stream_default); - ASSERT_CUDA_SUCCEEDED(cudaMemcpyAsync(d_inf_args.data(), - inf_args, - sizeof(cudf::io::gpu_inflate_input_s), - cudaMemcpyHostToDevice, - 0)); - ASSERT_CUDA_SUCCEEDED(cudaMemcpyAsync(d_inf_stat.data(), - inf_stat, - sizeof(cudf::io::gpu_inflate_status_s), - cudaMemcpyHostToDevice, - 0)); - ASSERT_CUDA_SUCCEEDED( - static_cast(this)->dispatch(d_inf_args.data(), d_inf_stat.data())); - ASSERT_CUDA_SUCCEEDED(cudaMemcpyAsync(inf_stat, - d_inf_stat.data(), - sizeof(cudf::io::gpu_inflate_status_s), - cudaMemcpyDeviceToHost, - 0)); - ASSERT_CUDA_SUCCEEDED(cudaMemcpyAsync( - decompressed->data(), inf_args->dstDevice, inf_args->dstSize, cudaMemcpyDeviceToHost, 0)); - ASSERT_CUDA_SUCCEEDED(cudaStreamSynchronize(0)); + auto stream = rmm::cuda_stream_default; + rmm::device_buffer src{compressed, compressed_size, stream}; + rmm::device_uvector dst{decompressed->size(), stream}; + + hostdevice_vector> inf_in(1, stream); + inf_in[0] = {static_cast(src.data()), src.size()}; + inf_in.host_to_device(stream); + + hostdevice_vector> inf_out(1, stream); + inf_out[0] = dst; + inf_out.host_to_device(stream); + + hostdevice_vector inf_stat(1, stream); + inf_stat[0] = {}; + inf_stat.host_to_device(stream); + + static_cast(this)->dispatch(inf_in, inf_out, inf_stat); + cudaMemcpyAsync( + decompressed->data(), dst.data(), dst.size(), cudaMemcpyDeviceToHost, stream.value()); + inf_stat.device_to_host(stream, true); + ASSERT_EQ(inf_stat[0].status, 0); } - - cudf::io::gpu_inflate_input_s* inf_args = nullptr; - cudf::io::gpu_inflate_status_s* inf_stat = nullptr; }; /** * @brief Derived fixture for GZIP decompression */ struct GzipDecompressTest : public DecompressTest { - cudaError_t dispatch(cudf::io::gpu_inflate_input_s* d_inf_args, - cudf::io::gpu_inflate_status_s* d_inf_stat) + void dispatch(device_span> d_inf_in, + device_span> d_inf_out, + device_span d_inf_stat) { - return cudf::io::gpuinflate(d_inf_args, d_inf_stat, 1, 1, rmm::cuda_stream_default); + cudf::io::gpuinflate(d_inf_in, + d_inf_out, + d_inf_stat, + cudf::io::gzip_header_included::YES, + rmm::cuda_stream_default); } }; @@ -105,10 +89,11 @@ struct GzipDecompressTest : public DecompressTest { * @brief Derived fixture for Snappy decompression */ struct SnappyDecompressTest : public DecompressTest { - cudaError_t dispatch(cudf::io::gpu_inflate_input_s* d_inf_args, - cudf::io::gpu_inflate_status_s* d_inf_stat) + void dispatch(device_span> d_inf_in, + device_span> d_inf_out, + device_span d_inf_stat) { - return cudf::io::gpu_unsnap(d_inf_args, d_inf_stat, 1, rmm::cuda_stream_default); + cudf::io::gpu_unsnap(d_inf_in, d_inf_out, d_inf_stat, rmm::cuda_stream_default); } }; @@ -116,14 +101,19 @@ struct SnappyDecompressTest : public DecompressTest { * @brief Derived fixture for Brotli decompression */ struct BrotliDecompressTest : public DecompressTest { - cudaError_t dispatch(cudf::io::gpu_inflate_input_s* d_inf_args, - cudf::io::gpu_inflate_status_s* d_inf_stat) + void dispatch(device_span> d_inf_in, + device_span> d_inf_out, + device_span d_inf_stat) { rmm::device_buffer d_scratch{cudf::io::get_gpu_debrotli_scratch_size(1), rmm::cuda_stream_default}; - return cudf::io::gpu_debrotli( - d_inf_args, d_inf_stat, d_scratch.data(), d_scratch.size(), 1, rmm::cuda_stream_default); + cudf::io::gpu_debrotli(d_inf_in, + d_inf_out, + d_inf_stat, + d_scratch.data(), + d_scratch.size(), + rmm::cuda_stream_default); } }; diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index cd0aab3caeb..3905df2b274 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -219,15 +219,21 @@ struct ParquetWriterTimestampTypeTest : public ParquetWriterTest { auto type() { return cudf::data_type{cudf::type_to_id()}; } }; +// Typed test fixture for all types +template +struct ParquetWriterSchemaTest : public ParquetWriterTest { + auto type() { return cudf::data_type{cudf::type_to_id()}; } +}; + // Declare typed test cases // TODO: Replace with `NumericTypes` when unsigned support is added. Issue #5352 using SupportedTypes = cudf::test::Types; TYPED_TEST_SUITE(ParquetWriterNumericTypeTest, SupportedTypes); -using SupportedChronoTypes = cudf::test::Concat; -TYPED_TEST_SUITE(ParquetWriterChronoTypeTest, SupportedChronoTypes); +TYPED_TEST_SUITE(ParquetWriterChronoTypeTest, cudf::test::ChronoTypes); using SupportedTimestampTypes = cudf::test::Types; TYPED_TEST_SUITE(ParquetWriterTimestampTypeTest, SupportedTimestampTypes); +TYPED_TEST_SUITE(ParquetWriterSchemaTest, cudf::test::AllTypes); // Base test fixture for chunked writer tests struct ParquetChunkedWriterTest : public cudf::test::BaseFixture { diff --git a/cpp/tests/iterator/value_iterator_test_strings.cu b/cpp/tests/iterator/value_iterator_test_strings.cu index 5bddbfbd4aa..9aa18eb844f 100644 --- a/cpp/tests/iterator/value_iterator_test_strings.cu +++ b/cpp/tests/iterator/value_iterator_test_strings.cu @@ -12,10 +12,12 @@ * or implied. See the License for the specific language governing permissions and limitations under * the License. */ -#include "cudf/detail/utilities/vector_factories.hpp" -#include "rmm/cuda_stream_view.hpp" -#include "rmm/device_uvector.hpp" -#include +#include "iterator_tests.cuh" + +#include + +#include +#include #include #include diff --git a/cpp/tests/join/conditional_join_tests.cu b/cpp/tests/join/conditional_join_tests.cu index 73b355d496d..13852027bf0 100644 --- a/cpp/tests/join/conditional_join_tests.cu +++ b/cpp/tests/join/conditional_join_tests.cu @@ -93,7 +93,7 @@ std::pair, std::vector> gen_random_repeated_columns( std::mt19937 gen(rd()); std::shuffle(left.begin(), left.end(), gen); std::shuffle(right.begin(), right.end(), gen); - return std::make_pair(std::move(left), std::move(right)); + return std::pair(std::move(left), std::move(right)); } // Generate a single pair of left/right nullable columns of random data @@ -120,8 +120,8 @@ gen_random_nullable_repeated_columns(unsigned int N = 10000, unsigned int num_re return uniform_dist(gen) > 0.5; }); - return std::make_pair(std::make_pair(std::move(left), std::move(left_nulls)), - std::make_pair(std::move(right), std::move(right_nulls))); + return std::pair(std::pair(std::move(left), std::move(left_nulls)), + std::pair(std::move(right), std::move(right_nulls))); } } // namespace diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp index f560ce7f20c..8ed50c8fb39 100644 --- a/cpp/tests/join/join_tests.cpp +++ b/cpp/tests/join/join_tests.cpp @@ -67,7 +67,7 @@ struct JoinTest : public cudf::test::BaseFixture { auto gold_sort_order = cudf::sorted_order(gold); auto sorted_gold = cudf::gather(gold, *gold_sort_order); - return std::make_pair(std::move(sorted_gold), std::move(sorted_result)); + return std::pair(std::move(sorted_gold), std::move(sorted_result)); } }; diff --git a/cpp/tests/join/mixed_join_tests.cu b/cpp/tests/join/mixed_join_tests.cu index df5b1f5c14a..edcf1d1be27 100644 --- a/cpp/tests/join/mixed_join_tests.cu +++ b/cpp/tests/join/mixed_join_tests.cu @@ -94,7 +94,7 @@ std::pair, std::vector> gen_random_repeated_columns( std::mt19937 gen(rd()); std::shuffle(left.begin(), left.end(), gen); std::shuffle(right.begin(), right.end(), gen); - return std::make_pair(std::move(left), std::move(right)); + return std::pair(std::move(left), std::move(right)); } // Generate a single pair of left/right nullable columns of random data @@ -121,8 +121,8 @@ gen_random_nullable_repeated_columns(unsigned int N = 10000, unsigned int num_re return uniform_dist(gen) > 0.5; }); - return std::make_pair(std::make_pair(std::move(left), std::move(left_nulls)), - std::make_pair(std::move(right), std::move(right_nulls))); + return std::pair(std::pair(std::move(left), std::move(left_nulls)), + std::pair(std::move(right), std::move(right_nulls))); } } // namespace diff --git a/cpp/tests/merge/merge_test.cpp b/cpp/tests/merge/merge_test.cpp index ea26cad3b59..129d1ad66f3 100644 --- a/cpp/tests/merge/merge_test.cpp +++ b/cpp/tests/merge/merge_test.cpp @@ -652,8 +652,8 @@ TYPED_TEST(MergeTest_, NMerge1KeyColumns) std::vector> facts{}; std::vector tables{}; for (int i = 0; i < num_tables; ++i) { - facts.emplace_back(std::make_pair(PairT0(sequence0, sequence0 + inputRows), - PairT1(sequence1, sequence1 + inputRows))); + facts.emplace_back(std::pair(PairT0(sequence0, sequence0 + inputRows), + PairT1(sequence1, sequence1 + inputRows))); tables.push_back(cudf::table_view{{facts.back().first, facts.back().second}}); } std::vector key_cols{0}; diff --git a/cpp/tests/partitioning/hash_partition_test.cpp b/cpp/tests/partitioning/hash_partition_test.cpp index befd9884b11..3ec6ae97595 100644 --- a/cpp/tests/partitioning/hash_partition_test.cpp +++ b/cpp/tests/partitioning/hash_partition_test.cpp @@ -67,9 +67,7 @@ TEST_F(HashPartition, ZeroPartitions) auto columns_to_hash = std::vector({2}); cudf::size_type const num_partitions = 0; - std::unique_ptr output; - std::vector offsets; - std::tie(output, offsets) = cudf::hash_partition(input, columns_to_hash, num_partitions); + auto [output, offsets] = cudf::hash_partition(input, columns_to_hash, num_partitions); // Expect empty table with same number of columns and zero partitions EXPECT_EQ(input.num_columns(), output->num_columns()); @@ -87,9 +85,7 @@ TEST_F(HashPartition, ZeroRows) auto columns_to_hash = std::vector({2}); cudf::size_type const num_partitions = 3; - std::unique_ptr output; - std::vector offsets; - std::tie(output, offsets) = cudf::hash_partition(input, columns_to_hash, num_partitions); + auto [output, offsets] = cudf::hash_partition(input, columns_to_hash, num_partitions); // Expect empty table with same number of columns and zero partitions EXPECT_EQ(input.num_columns(), output->num_columns()); @@ -104,9 +100,7 @@ TEST_F(HashPartition, ZeroColumns) auto columns_to_hash = std::vector({}); cudf::size_type const num_partitions = 3; - std::unique_ptr output; - std::vector offsets; - std::tie(output, offsets) = cudf::hash_partition(input, columns_to_hash, num_partitions); + auto [output, offsets] = cudf::hash_partition(input, columns_to_hash, num_partitions); // Expect empty table with same number of columns and zero partitions EXPECT_EQ(input.num_columns(), output->num_columns()); @@ -124,10 +118,8 @@ TEST_F(HashPartition, MixedColumnTypes) auto columns_to_hash = std::vector({0, 2}); cudf::size_type const num_partitions = 3; - std::unique_ptr output1, output2; - std::vector offsets1, offsets2; - std::tie(output1, offsets1) = cudf::hash_partition(input, columns_to_hash, num_partitions); - std::tie(output2, offsets2) = cudf::hash_partition(input, columns_to_hash, num_partitions); + auto [output1, offsets1] = cudf::hash_partition(input, columns_to_hash, num_partitions); + auto [output2, offsets2] = cudf::hash_partition(input, columns_to_hash, num_partitions); // Expect output to have size num_partitions EXPECT_EQ(static_cast(num_partitions), offsets1.size()); @@ -148,9 +140,7 @@ TEST_F(HashPartition, NullableStrings) std::vector const columns_to_hash({0}); cudf::size_type const num_partitions = 3; - std::unique_ptr result; - std::vector offsets; - std::tie(result, offsets) = cudf::hash_partition(input, columns_to_hash, num_partitions); + auto [result, offsets] = cudf::hash_partition(input, columns_to_hash, num_partitions); auto const& col = result->get_column(0); EXPECT_EQ(0, col.null_count()); @@ -167,11 +157,9 @@ TEST_F(HashPartition, ColumnsToHash) auto columns_to_hash = std::vector({0}); cudf::size_type const num_partitions = 3; - std::unique_ptr first_result, second_result; - std::vector first_offsets, second_offsets; - std::tie(first_result, first_offsets) = + auto [first_result, first_offsets] = cudf::hash_partition(first_input, columns_to_hash, num_partitions); - std::tie(second_result, second_offsets) = + auto [second_result, second_offsets] = cudf::hash_partition(second_input, columns_to_hash, num_partitions); // Expect offsets to be equal and num_partitions in length @@ -228,11 +216,9 @@ TEST_F(HashPartition, CustomSeedValue) auto columns_to_hash = std::vector({0, 2}); cudf::size_type const num_partitions = 3; - std::unique_ptr output1, output2; - std::vector offsets1, offsets2; - std::tie(output1, offsets1) = cudf::hash_partition( + auto [output1, offsets1] = cudf::hash_partition( input, columns_to_hash, num_partitions, cudf::hash_id::HASH_MURMUR3, 12345); - std::tie(output2, offsets2) = cudf::hash_partition( + auto [output2, offsets2] = cudf::hash_partition( input, columns_to_hash, num_partitions, cudf::hash_id::HASH_MURMUR3, 12345); // Expect output to have size num_partitions @@ -260,9 +246,7 @@ TYPED_TEST(HashPartitionFixedWidth, NullableFixedWidth) std::vector const columns_to_hash({0}); cudf::size_type const num_partitions = 3; - std::unique_ptr result; - std::vector offsets; - std::tie(result, offsets) = cudf::hash_partition(input, columns_to_hash, num_partitions); + auto [result, offsets] = cudf::hash_partition(input, columns_to_hash, num_partitions); auto const& col = result->get_column(0); EXPECT_EQ(0, col.null_count()); @@ -294,10 +278,8 @@ void run_fixed_width_test(size_t cols, auto columns_to_hash = std::vector(cols); std::iota(columns_to_hash.begin(), columns_to_hash.end(), 0); - std::unique_ptr output1, output2; - std::vector offsets1, offsets2; - std::tie(output1, offsets1) = cudf::hash_partition(input, columns_to_hash, num_partitions); - std::tie(output2, offsets2) = cudf::hash_partition(input, columns_to_hash, num_partitions); + auto [output1, offsets1] = cudf::hash_partition(input, columns_to_hash, num_partitions); + auto [output2, offsets2] = cudf::hash_partition(input, columns_to_hash, num_partitions); // Expect output to have size num_partitions EXPECT_EQ(static_cast(num_partitions), offsets1.size()); @@ -367,9 +349,7 @@ TEST_F(HashPartition, FixedPointColumnsToHash) auto columns_to_hash = std::vector({0}); cudf::size_type const num_partitions = 1; - std::unique_ptr first_result; - std::vector first_offsets; - std::tie(first_result, first_offsets) = + auto [first_result, first_offsets] = cudf::hash_partition(first_input, columns_to_hash, num_partitions); // Expect offsets to be equal and num_partitions in length diff --git a/cpp/tests/partitioning/partition_test.cpp b/cpp/tests/partitioning/partition_test.cpp index 785af409c4c..014a19e93a9 100644 --- a/cpp/tests/partitioning/partition_test.cpp +++ b/cpp/tests/partitioning/partition_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,16 +13,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include -#include #include #include #include #include #include -#include "cudf/sorting.hpp" +#include +#include +#include +#include template class PartitionTest : public cudf::test::BaseFixture { diff --git a/cpp/tests/reductions/list_rank_test.cpp b/cpp/tests/reductions/list_rank_test.cpp index d263677f23b..b3a8e7e0c28 100644 --- a/cpp/tests/reductions/list_rank_test.cpp +++ b/cpp/tests/reductions/list_rank_test.cpp @@ -42,10 +42,11 @@ TEST_F(ListRankScanTest, BasicList) auto const expected_dense_vals = cudf::test::fixed_width_column_wrapper{1, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9, 9}; - this->test_ungrouped_rank_scan(col, - expected_dense_vals, - cudf::make_dense_rank_aggregation(), - cudf::null_policy::INCLUDE); + this->test_ungrouped_rank_scan( + col, + expected_dense_vals, + cudf::make_rank_aggregation(cudf::rank_method::DENSE), + cudf::null_policy::INCLUDE); } TEST_F(ListRankScanTest, DeepList) @@ -73,20 +74,22 @@ TEST_F(ListRankScanTest, DeepList) { // Non-sliced auto const expected_dense_vals = cudf::test::fixed_width_column_wrapper{ 1, 1, 2, 3, 4, 5, 5, 5, 6, 6, 7, 7, 8, 9, 10, 11}; - this->test_ungrouped_rank_scan(col, - expected_dense_vals, - cudf::make_dense_rank_aggregation(), - cudf::null_policy::INCLUDE); + this->test_ungrouped_rank_scan( + col, + expected_dense_vals, + cudf::make_rank_aggregation(cudf::rank_method::DENSE), + cudf::null_policy::INCLUDE); } { // sliced auto sliced_col = cudf::slice(col, {3, 12})[0]; auto const expected_dense_vals = cudf::test::fixed_width_column_wrapper{1, 2, 3, 3, 3, 4, 4, 5, 5}; - this->test_ungrouped_rank_scan(sliced_col, - expected_dense_vals, - cudf::make_dense_rank_aggregation(), - cudf::null_policy::INCLUDE); + this->test_ungrouped_rank_scan( + sliced_col, + expected_dense_vals, + cudf::make_rank_aggregation(cudf::rank_method::DENSE), + cudf::null_policy::INCLUDE); } } @@ -138,10 +141,11 @@ TEST_F(ListRankScanTest, ListOfStruct) auto expect = cudf::test::fixed_width_column_wrapper{ 1, 1, 2, 2, 3, 4, 4, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10}; - this->test_ungrouped_rank_scan(list_column, - expect, - cudf::make_dense_rank_aggregation(), - cudf::null_policy::INCLUDE); + this->test_ungrouped_rank_scan( + list_column, + expect, + cudf::make_rank_aggregation(cudf::rank_method::DENSE), + cudf::null_policy::INCLUDE); } { // Sliced @@ -149,10 +153,11 @@ TEST_F(ListRankScanTest, ListOfStruct) auto expect = cudf::test::fixed_width_column_wrapper{1, 2, 3, 3, 3, 4, 5, 6, 7, 7, 8, 8}; - this->test_ungrouped_rank_scan(sliced_col, - expect, - cudf::make_dense_rank_aggregation(), - cudf::null_policy::INCLUDE); + this->test_ungrouped_rank_scan( + sliced_col, + expect, + cudf::make_rank_aggregation(cudf::rank_method::DENSE), + cudf::null_policy::INCLUDE); } } @@ -192,10 +197,11 @@ TEST_F(ListRankScanTest, ListOfEmptyStruct) auto expect = cudf::test::fixed_width_column_wrapper{1, 1, 2, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6}; - this->test_ungrouped_rank_scan(*list_column, - expect, - cudf::make_dense_rank_aggregation(), - cudf::null_policy::INCLUDE); + this->test_ungrouped_rank_scan( + *list_column, + expect, + cudf::make_rank_aggregation(cudf::rank_method::DENSE), + cudf::null_policy::INCLUDE); } TEST_F(ListRankScanTest, EmptyDeepList) @@ -221,8 +227,9 @@ TEST_F(ListRankScanTest, EmptyDeepList) auto expect = cudf::test::fixed_width_column_wrapper{1, 1, 2, 2}; - this->test_ungrouped_rank_scan(*list_column, - expect, - cudf::make_dense_rank_aggregation(), - cudf::null_policy::INCLUDE); + this->test_ungrouped_rank_scan( + *list_column, + expect, + cudf::make_rank_aggregation(cudf::rank_method::DENSE), + cudf::null_policy::INCLUDE); } diff --git a/cpp/tests/reductions/rank_tests.cpp b/cpp/tests/reductions/rank_tests.cpp index fb2cd17fe30..3bf2899ce2f 100644 --- a/cpp/tests/reductions/rank_tests.cpp +++ b/cpp/tests/reductions/rank_tests.cpp @@ -36,15 +36,14 @@ namespace cudf::test { using namespace iterators; template -using input = fixed_width_column_wrapper; -using rank_result_col = fixed_width_column_wrapper; -using percent_result_t = - cudf::detail::target_type_t; -using percent_result_col = fixed_width_column_wrapper; +using input = fixed_width_column_wrapper; +using rank_result_col = fixed_width_column_wrapper; +using percent_result_col = fixed_width_column_wrapper; -auto const rank = cudf::make_rank_aggregation(); -auto const dense_rank = cudf::make_dense_rank_aggregation(); -auto const percent_rank = cudf::make_percent_rank_aggregation(); +auto const rank = cudf::make_rank_aggregation(cudf::rank_method::MIN); +auto const dense_rank = cudf::make_rank_aggregation(cudf::rank_method::DENSE); +auto const percent_rank = cudf::make_rank_aggregation( + cudf::rank_method::MIN, {}, null_policy::INCLUDE, {}, rank_percentage::ONE_NORMALIZED); auto constexpr INCLUSIVE_SCAN = cudf::scan_type::INCLUSIVE; auto constexpr INCLUDE_NULLS = cudf::null_policy::INCLUDE; @@ -56,6 +55,8 @@ struct TypedRankScanTest : BaseScanTest { std::unique_ptr const& agg) { auto col_out = cudf::scan(input, agg, INCLUSIVE_SCAN, INCLUDE_NULLS); + std::cout << "expect type: " << static_cast(expect_vals.type().id()) << std::endl; + std::cout << "out type: " << static_cast(col_out->type().id()) << std::endl; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expect_vals, col_out->view()); } }; @@ -318,11 +319,11 @@ TEST(RankScanTest, ExclusiveScan) auto const vals = input{3, 4, 5}; CUDF_EXPECT_THROW_MESSAGE(cudf::scan(vals, dense_rank, scan_type::EXCLUSIVE, INCLUDE_NULLS), - "Dense rank aggregation operator requires an inclusive scan"); + "Rank aggregation operator requires an inclusive scan"); CUDF_EXPECT_THROW_MESSAGE(cudf::scan(vals, rank, scan_type::EXCLUSIVE, INCLUDE_NULLS), "Rank aggregation operator requires an inclusive scan"); CUDF_EXPECT_THROW_MESSAGE(cudf::scan(vals, percent_rank, scan_type::EXCLUSIVE, INCLUDE_NULLS), - "Percent rank aggregation operator requires an inclusive scan"); + "Rank aggregation operator requires an inclusive scan"); } } // namespace cudf::test diff --git a/cpp/tests/reductions/scan_tests.cpp b/cpp/tests/reductions/scan_tests.cpp index d533a91f4d0..68b4d85db2a 100644 --- a/cpp/tests/reductions/scan_tests.cpp +++ b/cpp/tests/reductions/scan_tests.cpp @@ -84,6 +84,7 @@ struct ScanTest : public BaseScanTest { case aggregation::PRODUCT: return std::is_invocable_v; case aggregation::MIN: return std::is_invocable_v; case aggregation::MAX: return std::is_invocable_v; + case aggregation::RANK: return std::is_invocable_v; // comparable default: return false; } return false; diff --git a/cpp/tests/rolling/grouped_rolling_test.cpp b/cpp/tests/rolling/grouped_rolling_test.cpp index f484661eee8..3a69c13c889 100644 --- a/cpp/tests/rolling/grouped_rolling_test.cpp +++ b/cpp/tests/rolling/grouped_rolling_test.cpp @@ -340,10 +340,8 @@ class GroupedRollingTest : public cudf::test::BaseFixture { thrust::host_vector ref_valid(num_rows); // input data and mask - thrust::host_vector in_col; - std::vector in_valid; - std::tie(in_col, in_valid) = cudf::test::to_host(input); - bitmask_type* valid_mask = in_valid.data(); + auto [in_col, in_valid] = cudf::test::to_host(input); + bitmask_type* valid_mask = in_valid.data(); agg_op op; for (size_type i = 0; i < num_rows; i++) { @@ -973,10 +971,8 @@ class GroupedTimeRangeRollingTest : public cudf::test::BaseFixture { thrust::host_vector ref_valid(num_rows); // input data and mask - thrust::host_vector in_col; - std::vector in_valid; - std::tie(in_col, in_valid) = cudf::test::to_host(input); - bitmask_type* valid_mask = in_valid.data(); + auto [in_col, in_valid] = cudf::test::to_host(input); + bitmask_type* valid_mask = in_valid.data(); agg_op op; for (size_type i = 0; i < num_rows; i++) { diff --git a/cpp/tests/rolling/rolling_test.cpp b/cpp/tests/rolling/rolling_test.cpp index c54fe073e3a..9549569d9f6 100644 --- a/cpp/tests/rolling/rolling_test.cpp +++ b/cpp/tests/rolling/rolling_test.cpp @@ -552,10 +552,8 @@ class RollingTest : public cudf::test::BaseFixture { thrust::host_vector ref_valid(num_rows); // input data and mask - thrust::host_vector in_col; - std::vector in_valid; - std::tie(in_col, in_valid) = cudf::test::to_host(input); - bitmask_type* valid_mask = in_valid.data(); + auto [in_col, in_valid] = cudf::test::to_host(input); + bitmask_type* valid_mask = in_valid.data(); agg_op op; for (size_type i = 0; i < num_rows; i++) { diff --git a/cpp/tests/search/search_struct_test.cpp b/cpp/tests/search/search_struct_test.cpp index a1f0b1d81cf..159b082890a 100644 --- a/cpp/tests/search/search_struct_test.cpp +++ b/cpp/tests/search/search_struct_test.cpp @@ -57,7 +57,7 @@ auto search_bounds(cudf::column_view const& t_col_view, auto const values = cudf::table_view{std::vector{values_col->view()}}; auto result_lower_bound = cudf::lower_bound(t, values, column_orders, null_precedence); auto result_upper_bound = cudf::upper_bound(t, values, column_orders, null_precedence); - return std::make_pair(std::move(result_lower_bound), std::move(result_upper_bound)); + return std::pair(std::move(result_lower_bound), std::move(result_upper_bound)); } auto search_bounds(std::unique_ptr const& t_col, diff --git a/cpp/tests/sort/rank_test.cpp b/cpp/tests/sort/rank_test.cpp index c4d0b6b04f4..28c9b40de11 100644 --- a/cpp/tests/sort/rank_test.cpp +++ b/cpp/tests/sort/rank_test.cpp @@ -91,8 +91,7 @@ struct Rank : public BaseFixture { test_case_t{table_view{{col1, col2, col3}}, table_view{{col1_rank, col2_rank, col3_rank}}}, }) { - table_view input, output; - std::tie(input, output) = test_case; + auto [input, output] = test_case; run_rank_test(input, output, diff --git a/cpp/tests/stream_compaction/distinct_count_tests.cpp b/cpp/tests/stream_compaction/distinct_count_tests.cpp index 0529539c4b2..31bbd43c78d 100644 --- a/cpp/tests/stream_compaction/distinct_count_tests.cpp +++ b/cpp/tests/stream_compaction/distinct_count_tests.cpp @@ -71,7 +71,7 @@ TYPED_TEST(TypedDistinctCount, TableNoNull) std::vector> pair_input; std::transform( input1.begin(), input1.end(), input2.begin(), std::back_inserter(pair_input), [](T a, T b) { - return std::make_pair(a, b); + return std::pair(a, b); }); cudf::test::fixed_width_column_wrapper input_col1(input1.begin(), input1.end()); diff --git a/cpp/tests/stream_compaction/unique_count_tests.cpp b/cpp/tests/stream_compaction/unique_count_tests.cpp index 3285cd1a711..591fe042592 100644 --- a/cpp/tests/stream_compaction/unique_count_tests.cpp +++ b/cpp/tests/stream_compaction/unique_count_tests.cpp @@ -71,7 +71,7 @@ TYPED_TEST(TypedUniqueCount, TableNoNull) std::vector> pair_input; std::transform( input1.begin(), input1.end(), input2.begin(), std::back_inserter(pair_input), [](T a, T b) { - return std::make_pair(a, b); + return std::pair(a, b); }); cudf::test::fixed_width_column_wrapper input_col1(input1.begin(), input1.end()); diff --git a/cpp/tests/strings/floats_tests.cpp b/cpp/tests/strings/floats_tests.cpp index bec06f7e601..360ea8be178 100644 --- a/cpp/tests/strings/floats_tests.cpp +++ b/cpp/tests/strings/floats_tests.cpp @@ -125,11 +125,15 @@ TEST_F(StringsConvertTest, FromFloats32) TEST_F(StringsConvertTest, ToFloats64) { + // clang-format off std::vector h_strings{ "1234", nullptr, "-876", "543.2", "-0.12", ".25", "-.002", "", "-0.0", "1.28e256", "NaN", "abc123", "123abc", "456e", "-1.78e+5", "-122.33644782", "12e+309", "1.7976931348623159E308", - "-Inf", "-INFINITY"}; + "-Inf", "-INFINITY", "1.0", "1.7976931348623157e+308", "1.7976931348623157e-307", + // subnormal numbers: v--- smallest double v--- result is 0 + "4e-308", "3.3333333333e-320", "4.940656458412465441765688e-324", "1.e-324" }; + // clang-format on cudf::test::strings_column_wrapper strings( h_strings.begin(), h_strings.end(), diff --git a/cpp/tests/strings/translate_tests.cpp b/cpp/tests/strings/translate_tests.cpp index e928065dca4..53c6982b880 100644 --- a/cpp/tests/strings/translate_tests.cpp +++ b/cpp/tests/strings/translate_tests.cpp @@ -38,7 +38,7 @@ std::pair make_entry(const char* from, const c cudf::char_utf8 out = 0; cudf::strings::detail::to_char_utf8(from, in); if (to) cudf::strings::detail::to_char_utf8(to, out); - return std::make_pair(in, out); + return std::pair(in, out); } TEST_F(StringsTranslateTest, Translate) diff --git a/cpp/tests/transform/row_bit_count_test.cu b/cpp/tests/transform/row_bit_count_test.cu index 8ed50b6eae0..9c3326cf575 100644 --- a/cpp/tests/transform/row_bit_count_test.cu +++ b/cpp/tests/transform/row_bit_count_test.cu @@ -123,9 +123,7 @@ TYPED_TEST(RowBitCountTyped, Lists) { using T = TypeParam; - std::unique_ptr col; - std::unique_ptr expected_sizes; - std::tie(col, expected_sizes) = build_list_column(); + auto [col, expected_sizes] = build_list_column(); table_view t({*col}); auto result = cudf::row_bit_count(t); @@ -326,9 +324,7 @@ TEST_F(RowBitCount, StructsNoNulls) TEST_F(RowBitCount, StructsNulls) { - std::unique_ptr struct_col; - std::unique_ptr expected_sizes; - std::tie(struct_col, expected_sizes) = build_struct_column(); + auto [struct_col, expected_sizes] = build_struct_column(); table_view t({*struct_col}); auto result = cudf::row_bit_count(t); @@ -440,9 +436,7 @@ TEST_F(RowBitCount, NestedTypes) { // List, float, List, int16> { - std::unique_ptr col_no_nulls; - std::unique_ptr expected_sizes; - std::tie(col_no_nulls, expected_sizes) = + auto [col_no_nulls, expected_sizes] = build_nested_and_expected_column({1, 1, 1, 1, 1, 1, 1, 1}); table_view no_nulls_t({*col_no_nulls}); auto no_nulls_result = cudf::row_bit_count(no_nulls_t); @@ -600,19 +594,13 @@ struct sum_functor { TEST_F(RowBitCount, Table) { // complex nested column - std::unique_ptr col0; - std::unique_ptr col0_sizes; - std::tie(col0, col0_sizes) = build_nested_and_expected_column({1, 1, 1, 1, 1, 1, 1, 1}); + auto [col0, col0_sizes] = build_nested_and_expected_column({1, 1, 1, 1, 1, 1, 1, 1}); // struct column - std::unique_ptr col1; - std::unique_ptr col1_sizes; - std::tie(col1, col1_sizes) = build_struct_column(); + auto [col1, col1_sizes] = build_struct_column(); // list column - std::unique_ptr col2; - std::unique_ptr col2_sizes; - std::tie(col2, col2_sizes) = build_list_column(); + auto [col2, col2_sizes] = build_list_column(); table_view t({*col0, *col1, *col2}); auto result = cudf::row_bit_count(t); diff --git a/cpp/tests/unary/cast_tests.cpp b/cpp/tests/unary/cast_tests.cpp index f53498bccec..16fb02e06bc 100644 --- a/cpp/tests/unary/cast_tests.cpp +++ b/cpp/tests/unary/cast_tests.cpp @@ -174,9 +174,7 @@ void validate_cast_result(cudf::column_view expected, cudf::column_view actual) { using namespace cudf::test; // round-trip through the host because sizeof(T) may not equal sizeof(R) - thrust::host_vector h_data; - std::vector null_mask; - std::tie(h_data, null_mask) = to_host(expected); + auto [h_data, null_mask] = to_host(expected); if (null_mask.empty()) { CUDF_TEST_EXPECT_COLUMNS_EQUAL(make_column(h_data), actual); } else { diff --git a/docs/cudf/source/_static/params.css b/docs/cudf/source/_static/params.css index 2bdd6f5a299..9e6be7ca75f 100644 --- a/docs/cudf/source/_static/params.css +++ b/docs/cudf/source/_static/params.css @@ -52,4 +52,9 @@ table.io-supported-types-table thead{ .special-table td, .special-table th { border: 1px solid #dee2e6; -} \ No newline at end of file +} + +/* Needed to resolve https://github.com/executablebooks/jupyter-book/issues/1611 */ +.output.text_html { + overflow: auto; +} diff --git a/docs/cudf/source/api_docs/dataframe.rst b/docs/cudf/source/api_docs/dataframe.rst index 1aa1ea8beac..e0ef3cb2ff0 100644 --- a/docs/cudf/source/api_docs/dataframe.rst +++ b/docs/cudf/source/api_docs/dataframe.rst @@ -105,6 +105,7 @@ Function application, GroupBy & window :toctree: api/ DataFrame.apply + DataFrame.applymap DataFrame.apply_chunks DataFrame.apply_rows DataFrame.pipe @@ -148,6 +149,7 @@ Computations / descriptive stats DataFrame.round DataFrame.skew DataFrame.sum + DataFrame.sum_of_squares DataFrame.std DataFrame.var DataFrame.nunique @@ -247,9 +249,11 @@ Serialization / IO / conversion DataFrame.to_dlpack DataFrame.to_parquet DataFrame.to_csv + DataFrame.to_cupy DataFrame.to_hdf DataFrame.to_dict DataFrame.to_json + DataFrame.to_numpy DataFrame.to_pandas DataFrame.to_feather DataFrame.to_records diff --git a/docs/cudf/source/api_docs/index_objects.rst b/docs/cudf/source/api_docs/index_objects.rst index 6f5affd0ecd..8e0e3bbd411 100644 --- a/docs/cudf/source/api_docs/index_objects.rst +++ b/docs/cudf/source/api_docs/index_objects.rst @@ -92,7 +92,9 @@ Conversion Index.astype Index.to_arrow + Index.to_cupy Index.to_list + Index.to_numpy Index.to_series Index.to_frame Index.to_pandas diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst index 95aa71919e4..d7015c9348d 100644 --- a/docs/cudf/source/api_docs/series.rst +++ b/docs/cudf/source/api_docs/series.rst @@ -390,10 +390,12 @@ Serialization / IO / conversion :toctree: api/ Series.to_arrow + Series.to_cupy Series.to_dlpack Series.to_frame Series.to_hdf Series.to_json + Series.to_numpy Series.to_pandas Series.to_string Series.from_arrow diff --git a/docs/cudf/source/api_docs/string_handling.rst b/docs/cudf/source/api_docs/string_handling.rst index 3087bcaa826..8d4646c47a7 100644 --- a/docs/cudf/source/api_docs/string_handling.rst +++ b/docs/cudf/source/api_docs/string_handling.rst @@ -83,7 +83,6 @@ strings and apply several methods to it. These can be accessed like rsplit startswith strip - subword_tokenize swapcase title token_count diff --git a/docs/cudf/source/basics/basics.rst b/docs/cudf/source/basics/basics.rst index 60a65558033..9b8983fba49 100644 --- a/docs/cudf/source/basics/basics.rst +++ b/docs/cudf/source/basics/basics.rst @@ -15,36 +15,40 @@ The following table lists all of cudf types. For methods requiring dtype argumen .. rst-class:: special-table .. table:: - +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ - | Kind of Data | Data Type | Scalar | String Aliases | - +========================+==================+=====================================================================================+=============================================+ - | Integer | | np.int8_, np.int16_, np.int32_, np.int64_, np.uint8_, np.uint16_, | ``'int8'``, ``'int16'``, ``'int32'``, | - | | | np.uint32_, np.uint64_ | ``'int64'``, ``'uint8'``, ``'uint16'``, | - | | | | ``'uint32'``, ``'uint64'`` | - +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ - | Float | | np.float32_, np.float64_ | ``'float32'``, ``'float64'`` | - +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ - | Strings | | `str `_ | ``'string'``, ``'object'`` | - +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ - | Datetime | | np.datetime64_ | ``'datetime64[s]'``, ``'datetime64[ms]'``, | - | | | | ``'datetime64[us]'``, ``'datetime64[ns]'`` | - +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ - | Timedelta | | np.timedelta64_ | ``'timedelta64[s]'``, ``'timedelta64[ms]'``,| - | (duration type) | | | ``'timedelta64[us]'``, ``'timedelta64[ns]'``| - +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ - | Categorical | CategoricalDtype | (none) | ``'category'`` | - +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ - | Boolean | | np.bool_ | ``'bool'`` | - +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ - | Decimal | Decimal32Dtype, | (none) | (none) | - | | Decimal64Dtype, | | | - | | Decimal128Dtype | | | - +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ + +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+ + | Kind of Data | Data Type | Scalar | String Aliases | + +=================+==================+==============================================================+==============================================+ + | Integer | | np.int8_, np.int16_, np.int32_, np.int64_, np.uint8_, | ``'int8'``, ``'int16'``, ``'int32'``, | + | | | np.uint16_, np.uint32_, np.uint64_ | ``'int64'``, ``'uint8'``, ``'uint16'``, | + | | | | ``'uint32'``, ``'uint64'`` | + +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+ + | Float | | np.float32_, np.float64_ | ``'float32'``, ``'float64'`` | + +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+ + | Strings | | `str `_ | ``'string'``, ``'object'`` | + +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+ + | Datetime | | np.datetime64_ | ``'datetime64[s]'``, ``'datetime64[ms]'``, | + | | | | ``'datetime64[us]'``, ``'datetime64[ns]'`` | + +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+ + | Timedelta | | np.timedelta64_ | ``'timedelta64[s]'``, ``'timedelta64[ms]'``, | + | (duration type) | | | ``'timedelta64[us]'``, ``'timedelta64[ns]'`` | + +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+ + | Categorical | CategoricalDtype | (none) | ``'category'`` | + +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+ + | Boolean | | np.bool_ | ``'bool'`` | + +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+ + | Decimal | Decimal32Dtype, | (none) | (none) | + | | Decimal64Dtype, | | | + | | Decimal128Dtype | | | + +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+ + | Lists | ListDtype | list | ``'list'`` | + +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+ + | Structs | StructDtype | dict | ``'struct'`` | + +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+ **Note: All dtypes above are Nullable** -.. _np.int8: -.. _np.int16: +.. _np.int8: +.. _np.int16: .. _np.int32: .. _np.int64: .. _np.uint8: diff --git a/docs/cudf/source/basics/internals.rst b/docs/cudf/source/basics/internals.rst index 60b63c6fab8..96ef40d51e6 100644 --- a/docs/cudf/source/basics/internals.rst +++ b/docs/cudf/source/basics/internals.rst @@ -54,7 +54,7 @@ As another example, the ``StringColumn`` backing the Series 2. No mask buffer as there are no nulls in the Series 3. Two children columns: - - A column of 8-bit characters + - A column of UTF-8 characters ``['d', 'o', 'y', 'o', 'u', h' ... '?']`` - A column of "offsets" to the characters column (in this case, ``[0, 2, 5, 9, 12, 19]``) @@ -172,7 +172,7 @@ Selecting columns by index: >>> ca.select_by_index(1) ColumnAccessor(OrderedColumnDict([('y', )]), multiindex=False, level_names=(None,)) >>> ca.select_by_index([0, 1]) - ColumnAccessor(OrderedColumnDict([('x', ), ('y', )]), multiindex=False, level_names=(None,)) + ColumnAccessor(OrderedColumnDict([('x', ), ('y', )]), multiindex=False, level_names=(None,)) >>> ca.select_by_index(slice(1, 3)) ColumnAccessor(OrderedColumnDict([('y', ), ('z', )]), multiindex=False, level_names=(None,)) diff --git a/docs/cudf/source/basics/io-gds-integration.rst b/docs/cudf/source/basics/io-gds-integration.rst index 71c114e9149..ce774453386 100644 --- a/docs/cudf/source/basics/io-gds-integration.rst +++ b/docs/cudf/source/basics/io-gds-integration.rst @@ -1,39 +1,42 @@ GPUDirect Storage Integration ============================= -Many IO APIs can use GPUDirect Storage (GDS) library to optimize IO operations. -GDS enables a direct data path for direct memory access (DMA) transfers between GPU memory and storage, which avoids a bounce buffer through the CPU. -GDS also has a compatibility mode that allows the library to fall back to copying through a CPU bounce buffer. +Many IO APIs can use GPUDirect Storage (GDS) library to optimize IO operations. +GDS enables a direct data path for direct memory access (DMA) transfers between GPU memory and storage, which avoids a bounce buffer through the CPU. +GDS also has a compatibility mode that allows the library to fall back to copying through a CPU bounce buffer. The SDK is available for download `here `_. GDS is also included in CUDA Toolkit 11.4 and higher. -Use of GPUDirect Storage in cuDF is enabled by default, but can be disabled through the environment variable ``LIBCUDF_CUFILE_POLICY``. -This variable also controls the GDS compatibility mode. +Use of GPUDirect Storage in cuDF is enabled by default, but can be disabled through the environment variable ``LIBCUDF_CUFILE_POLICY``. +This variable also controls the GDS compatibility mode. -There are three valid values for the environment variable: +There are four valid values for the environment variable: - "GDS": Enable GDS use; GDS compatibility mode is *off*. - "ALWAYS": Enable GDS use; GDS compatibility mode is *on*. +- "KVIKIO": Enable GDS through `KvikIO `_. - "OFF": Completely disable GDS use. If no value is set, behavior will be the same as the "GDS" option. This environment variable also affects how cuDF treats GDS errors. When ``LIBCUDF_CUFILE_POLICY`` is set to "GDS" and a GDS API call fails for any reason, cuDF falls back to the internal implementation with bounce buffers. -When ``LIBCUDF_CUFILE_POLICY`` is set to "ALWAYS" and a GDS API call fails for any reason (unlikely, given that the compatibility mode is on), -cuDF throws an exception to propagate the error to te user. +When ``LIBCUDF_CUFILE_POLICY`` is set to "ALWAYS" and a GDS API call fails for any reason (unlikely, given that the compatibility mode is on), +cuDF throws an exception to propagate the error to the user. +When ``LIBCUDF_CUFILE_POLICY`` is set to "KVIKIO" and a KvikIO API call fails for any reason (unlikely, given that KvikIO implements its own compatibility mode) cuDF throws an exception to propagate the error to the user. +For more information about error handling, compatibility mode, and tuning parameters in KvikIO see: https://github.com/rapidsai/kvikio Operations that support the use of GPUDirect Storage: -- `read_avro` -- `read_parquet` -- `read_orc` -- `to_csv` -- `to_parquet` -- `to_orc` +- :py:func:`cudf.read_avro` +- :py:func:`cudf.read_parquet` +- :py:func:`cudf.read_orc` +- :py:meth:`cudf.DataFrame.to_csv` +- :py:meth:`cudf.DataFrame.to_parquet` +- :py:meth:`cudf.DataFrame.to_orc` Several parameters that can be used to tune the performance of GDS-enabled I/O are exposed through environment variables: - ``LIBCUDF_CUFILE_THREAD_COUNT``: Integral value, maximum number of parallel reads/writes per file (default 16); - ``LIBCUDF_CUFILE_SLICE_SIZE``: Integral value, maximum size of each GDS read/write, in bytes (default 4MB). - Larger I/O operations are split into multiple calls. \ No newline at end of file + Larger I/O operations are split into multiple calls. diff --git a/docs/cudf/source/basics/io-nvcomp-integration.rst b/docs/cudf/source/basics/io-nvcomp-integration.rst index 521833e2afd..fc24e0c15f4 100644 --- a/docs/cudf/source/basics/io-nvcomp-integration.rst +++ b/docs/cudf/source/basics/io-nvcomp-integration.rst @@ -1,14 +1,14 @@ nvCOMP Integration ============================= -Some types of compression/decompression can be performed using either `nvCOMP library `_ or the internal implementation. +Some types of compression/decompression can be performed using either the `nvCOMP library `_ or the internal implementation. Which implementation is used by default depends on the data format and the compression type. Behavior can be influenced through environment variable ``LIBCUDF_NVCOMP_POLICY``. There are three valid values for the environment variable: -- "STABLE": Only enable the nvCOMP in places where it has been deemed stable for production use. +- "STABLE": Only enable the nvCOMP in places where it has been deemed stable for production use. - "ALWAYS": Enable all available uses of nvCOMP, including new, experimental combinations. - "OFF": Disable nvCOMP use whenever possible and use the internal implementations instead. diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index dbdf8e59e6a..c8b30120924 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -46,10 +46,13 @@ "numpydoc", "IPython.sphinxext.ipython_console_highlighting", "IPython.sphinxext.ipython_directive", - "nbsphinx", "PandasCompat", + "myst_nb", ] +jupyter_execute_notebooks = "force" +execution_timeout = 300 + copybutton_prompt_text = ">>> " autosummary_generate = True ipython_mplbackend = "str" @@ -252,6 +255,7 @@ def process_class_docstrings(app, what, name, obj, options, lines): lines[:] = lines[:cut_index] +nitpick_ignore = [("py:class", "SeriesOrIndex"),] def setup(app): diff --git a/docs/cudf/source/user_guide/10min-cudf-cupy.ipynb b/docs/cudf/source/user_guide/10min-cudf-cupy.ipynb index 1bcb9335256..35ca21f380e 100644 --- a/docs/cudf/source/user_guide/10min-cudf-cupy.ipynb +++ b/docs/cudf/source/user_guide/10min-cudf-cupy.ipynb @@ -1067,7 +1067,7 @@ " if sparseformat == 'row':\n", " _sparse_constructor = cp.sparse.csr_matrix\n", "\n", - " return _sparse_constructor(cp.from_dlpack(data.to_dlpack()))" + " return _sparse_constructor(cupy_from_dlpack(data.to_dlpack()))" ] }, { diff --git a/docs/cudf/source/user_guide/10min.ipynb b/docs/cudf/source/user_guide/10min.ipynb index ab006847fc6..9bb95406e8a 100644 --- a/docs/cudf/source/user_guide/10min.ipynb +++ b/docs/cudf/source/user_guide/10min.ipynb @@ -2484,6 +2484,14 @@ "execution_count": 35, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/mmccarty/miniconda3/envs/cudf_dev/lib/python3.8/site-packages/cudf/core/series.py:2223: FutureWarning: Series.applymap is deprecated and will be removed in a future cuDF release. Use Series.apply instead.\n", + " warnings.warn(\n" + ] + }, { "data": { "text/plain": [ @@ -3024,7 +3032,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/home/ashwin/workspace/rapids/cudf/python/cudf/cudf/core/indexed_frame.py:2271: FutureWarning: append is deprecated and will be removed in a future version. Use concat instead.\n", + "/home/mmccarty/miniconda3/envs/cudf_dev/lib/python3.8/site-packages/cudf/core/indexed_frame.py:2329: FutureWarning: append is deprecated and will be removed in a future version. Use concat instead.\n", " warnings.warn(\n" ] }, @@ -5850,7 +5858,32 @@ }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/home/mmccarty/sandbox/rapids/cudf/python/cudf/cudf/tests/data/orc/TestOrcFile.test1.orc'" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import os\n", + "from pathlib import Path\n", + "current_dir = os.path.dirname(os.path.realpath(\"__file__\"))\n", + "cudf_root = Path(current_dir).parents[3]\n", + "file_path = os.path.join(cudf_root, \"python\", \"cudf\", \"cudf\", \"tests\", \"data\", \"orc\", \"TestOrcFile.test1.orc\")\n", + "file_path" + ] + }, + { + "cell_type": "code", + "execution_count": 81, "metadata": {}, "outputs": [ { @@ -5941,13 +5974,13 @@ "1 [{'key': 'chani', 'value': {'int1': 5, 'string... " ] }, - "execution_count": 79, + "execution_count": 81, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df2 = cudf.read_orc('/rapids/cudf/python/cudf/cudf/tests/data/orc/TestOrcFile.test1.orc')\n", + "df2 = cudf.read_orc(file_path)\n", "df2" ] }, @@ -5974,15 +6007,17 @@ }, { "cell_type": "code", - "execution_count": 80, + "execution_count": 82, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "2022-03-29 12:21:32,328 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize\n", - "2022-03-29 12:21:32,394 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize\n" + "2022-04-21 10:11:07,360 - distributed.diskutils - INFO - Found stale lock file and directory '/home/mmccarty/sandbox/rapids/cudf/docs/cudf/source/user_guide/dask-worker-space/worker-ghcx5g0e', purging\n", + "2022-04-21 10:11:07,360 - distributed.diskutils - INFO - Found stale lock file and directory '/home/mmccarty/sandbox/rapids/cudf/docs/cudf/source/user_guide/dask-worker-space/worker-wh16f0h3', purging\n", + "2022-04-21 10:11:07,360 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize\n", + "2022-04-21 10:11:07,388 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize\n" ] }, { @@ -5992,7 +6027,7 @@ "
\n", "
\n", "

Client

\n", - "

Client-4be800f5-af7c-11ec-8df8-c8d9d2247354

\n", + "

Client-e3492c89-c17c-11ec-813e-fc3497a62adc

\n", "
\n", "\n", " \n", @@ -6021,7 +6056,7 @@ " \n", "
\n", "

LocalCUDACluster

\n", - "

137d0882

\n", + "

db2501e1

\n", "
\n", " \n", " \n", " \n", " \n", " \n", @@ -6058,11 +6093,11 @@ "
\n", "
\n", "

Scheduler

\n", - "

Scheduler-08f95e9e-2c10-4d66-a103-955ab4218e91

\n", + "

Scheduler-6f476508-e52f-49e9-8f1f-6a8641e177bd

\n", "
\n", @@ -6036,7 +6071,7 @@ " Total threads: 2\n", " \n", - " Total memory: 45.79 GiB\n", + " Total memory: 125.65 GiB\n", "
\n", " \n", " \n", " \n", " \n", " \n", "
\n", - " Comm: tcp://127.0.0.1:35157\n", + " Comm: tcp://127.0.0.1:39755\n", " \n", " Workers: 2\n", @@ -6081,7 +6116,7 @@ " Started: Just now\n", " \n", - " Total memory: 45.79 GiB\n", + " Total memory: 125.65 GiB\n", "
\n", @@ -6104,7 +6139,7 @@ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", @@ -6158,7 +6193,7 @@ "
\n", - " Comm: tcp://127.0.0.1:41411\n", + " Comm: tcp://127.0.0.1:33491\n", " \n", " Total threads: 1\n", @@ -6112,31 +6147,31 @@ "
\n", - " Dashboard: http://127.0.0.1:40997/status\n", + " Dashboard: http://127.0.0.1:34333/status\n", " \n", - " Memory: 22.89 GiB\n", + " Memory: 62.82 GiB\n", "
\n", - " Nanny: tcp://127.0.0.1:42959\n", + " Nanny: tcp://127.0.0.1:43093\n", "
\n", - " Local directory: /home/ashwin/workspace/rapids/cudf/docs/cudf/source/user_guide/dask-worker-space/worker-ruvvgno2\n", + " Local directory: /home/mmccarty/sandbox/rapids/cudf/docs/cudf/source/user_guide/dask-worker-space/worker-jsuvfju4\n", "
\n", - " GPU: Quadro GV100\n", + " GPU: NVIDIA RTX A6000\n", " \n", - " GPU memory: 31.75 GiB\n", + " GPU memory: 47.51 GiB\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", @@ -6216,10 +6251,10 @@ "" ], "text/plain": [ - "" + "" ] }, - "execution_count": 80, + "execution_count": 82, "metadata": {}, "output_type": "execute_result" } @@ -6245,7 +6280,7 @@ }, { "cell_type": "code", - "execution_count": 81, + "execution_count": 83, "metadata": {}, "outputs": [ { @@ -6321,7 +6356,7 @@ "" ] }, - "execution_count": 81, + "execution_count": 83, "metadata": {}, "output_type": "execute_result" } @@ -6337,14 +6372,14 @@ }, { "cell_type": "code", - "execution_count": 82, + "execution_count": 84, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Tue Mar 29 12:21:33 2022 \n", + "Thu Apr 21 10:11:07 2022 \n", "+-----------------------------------------------------------------------------+\n", "| NVIDIA-SMI 495.29.05 Driver Version: 495.29.05 CUDA Version: 11.5 |\n", "|-------------------------------+----------------------+----------------------+\n", @@ -6352,12 +6387,12 @@ "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", "| | | MIG M. |\n", "|===============================+======================+======================|\n", - "| 0 Quadro GV100 Off | 00000000:15:00.0 Off | Off |\n", - "| 36% 49C P2 50W / 250W | 1113MiB / 32508MiB | 0% Default |\n", + "| 0 NVIDIA RTX A6000 On | 00000000:01:00.0 On | Off |\n", + "| 30% 48C P2 83W / 300W | 2970MiB / 48651MiB | 7% Default |\n", "| | | N/A |\n", "+-------------------------------+----------------------+----------------------+\n", - "| 1 Quadro GV100 Off | 00000000:2D:00.0 Off | Off |\n", - "| 40% 54C P2 50W / 250W | 306MiB / 32498MiB | 0% Default |\n", + "| 1 NVIDIA RTX A6000 On | 00000000:02:00.0 Off | Off |\n", + "| 30% 36C P2 25W / 300W | 265MiB / 48685MiB | 5% Default |\n", "| | | N/A |\n", "+-------------------------------+----------------------+----------------------+\n", " \n", @@ -6366,6 +6401,15 @@ "| GPU GI CI PID Type Process name GPU Memory |\n", "| ID ID Usage |\n", "|=============================================================================|\n", + "| 0 N/A N/A 2292 G /usr/lib/xorg/Xorg 871MiB |\n", + "| 0 N/A N/A 2441 G /usr/bin/gnome-shell 316MiB |\n", + "| 0 N/A N/A 1240494 G ...AAAAAAAAA= --shared-files 68MiB |\n", + "| 0 N/A N/A 1240525 G ...RendererForSitePerProcess 41MiB |\n", + "| 0 N/A N/A 1243689 C .../envs/cudf_dev/bin/python 593MiB |\n", + "| 0 N/A N/A 1245502 C .../envs/cudf_dev/bin/python 753MiB |\n", + "| 0 N/A N/A 1245751 C .../envs/cudf_dev/bin/python 257MiB |\n", + "| 1 N/A N/A 2292 G /usr/lib/xorg/Xorg 4MiB |\n", + "| 1 N/A N/A 1245748 C .../envs/cudf_dev/bin/python 257MiB |\n", "+-----------------------------------------------------------------------------+\n" ] } @@ -6383,7 +6427,7 @@ }, { "cell_type": "code", - "execution_count": 83, + "execution_count": 85, "metadata": {}, "outputs": [ { @@ -6459,7 +6503,7 @@ "" ] }, - "execution_count": 83, + "execution_count": 85, "metadata": {}, "output_type": "execute_result" } @@ -6471,14 +6515,14 @@ }, { "cell_type": "code", - "execution_count": 84, + "execution_count": 86, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Tue Mar 29 12:21:34 2022 \n", + "Thu Apr 21 10:11:08 2022 \n", "+-----------------------------------------------------------------------------+\n", "| NVIDIA-SMI 495.29.05 Driver Version: 495.29.05 CUDA Version: 11.5 |\n", "|-------------------------------+----------------------+----------------------+\n", @@ -6486,12 +6530,12 @@ "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", "| | | MIG M. |\n", "|===============================+======================+======================|\n", - "| 0 Quadro GV100 Off | 00000000:15:00.0 Off | Off |\n", - "| 36% 49C P2 50W / 250W | 1113MiB / 32508MiB | 0% Default |\n", + "| 0 NVIDIA RTX A6000 On | 00000000:01:00.0 On | Off |\n", + "| 30% 48C P2 84W / 300W | 2970MiB / 48651MiB | 3% Default |\n", "| | | N/A |\n", "+-------------------------------+----------------------+----------------------+\n", - "| 1 Quadro GV100 Off | 00000000:2D:00.0 Off | Off |\n", - "| 40% 54C P2 50W / 250W | 306MiB / 32498MiB | 0% Default |\n", + "| 1 NVIDIA RTX A6000 On | 00000000:02:00.0 Off | Off |\n", + "| 30% 36C P2 37W / 300W | 265MiB / 48685MiB | 0% Default |\n", "| | | N/A |\n", "+-------------------------------+----------------------+----------------------+\n", " \n", @@ -6500,6 +6544,15 @@ "| GPU GI CI PID Type Process name GPU Memory |\n", "| ID ID Usage |\n", "|=============================================================================|\n", + "| 0 N/A N/A 2292 G /usr/lib/xorg/Xorg 871MiB |\n", + "| 0 N/A N/A 2441 G /usr/bin/gnome-shell 316MiB |\n", + "| 0 N/A N/A 1240494 G ...AAAAAAAAA= --shared-files 68MiB |\n", + "| 0 N/A N/A 1240525 G ...RendererForSitePerProcess 41MiB |\n", + "| 0 N/A N/A 1243689 C .../envs/cudf_dev/bin/python 593MiB |\n", + "| 0 N/A N/A 1245502 C .../envs/cudf_dev/bin/python 753MiB |\n", + "| 0 N/A N/A 1245751 C .../envs/cudf_dev/bin/python 257MiB |\n", + "| 1 N/A N/A 2292 G /usr/lib/xorg/Xorg 4MiB |\n", + "| 1 N/A N/A 1245748 C .../envs/cudf_dev/bin/python 257MiB |\n", "+-----------------------------------------------------------------------------+\n" ] } @@ -6527,7 +6580,7 @@ }, { "cell_type": "code", - "execution_count": 85, + "execution_count": 87, "metadata": {}, "outputs": [], "source": [ @@ -6552,7 +6605,7 @@ }, { "cell_type": "code", - "execution_count": 86, + "execution_count": 88, "metadata": {}, "outputs": [], "source": [ @@ -6569,16 +6622,16 @@ }, { "cell_type": "code", - "execution_count": 87, + "execution_count": 89, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "DoneAndNotDoneFutures(done={, , , , }, not_done=set())" + "DoneAndNotDoneFutures(done={, , , , }, not_done=set())" ] }, - "execution_count": 87, + "execution_count": 89, "metadata": {}, "output_type": "execute_result" } diff --git a/docs/cudf/source/user_guide/guide-to-udfs.ipynb b/docs/cudf/source/user_guide/guide-to-udfs.ipynb index 41bce8b865e..8026c378156 100644 --- a/docs/cudf/source/user_guide/guide-to-udfs.ipynb +++ b/docs/cudf/source/user_guide/guide-to-udfs.ipynb @@ -4,7 +4,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Overview of User Defined Functions with cuDF" + "Overview of User Defined Functions with cuDF\n", + "====================================" ] }, { @@ -40,7 +41,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Series UDFs\n", + "Series UDFs\n", + "--------------\n", "\n", "You can execute UDFs on Series in two ways:\n", "\n", @@ -54,7 +56,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# `cudf.Series.apply`" + "`cudf.Series.apply`\n", + "---------------------" ] }, { @@ -126,7 +129,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Functions with Additional Scalar Arguments" + "Functions with Additional Scalar Arguments\n", + "---------------------------------------------------" ] }, { @@ -138,7 +142,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -148,7 +152,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -160,7 +164,7 @@ "dtype: int64" ] }, - "execution_count": 7, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -181,7 +185,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Nullable Data" + "Nullable Data\n", + "----------------" ] }, { @@ -193,7 +198,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -205,7 +210,7 @@ "dtype: int64" ] }, - "execution_count": 9, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -218,7 +223,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -229,7 +234,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -241,7 +246,7 @@ "dtype: int64" ] }, - "execution_count": 11, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -260,7 +265,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -274,7 +279,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -286,7 +291,7 @@ "dtype: int64" ] }, - "execution_count": 14, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -307,7 +312,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Lower level control with custom `numba` kernels" + "Lower level control with custom `numba` kernels\n", + "---------------------------------------------------------" ] }, { @@ -322,7 +328,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -331,7 +337,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -355,7 +361,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -373,7 +379,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -452,7 +458,7 @@ "4 979 982 1011 9790.0" ] }, - "execution_count": 19, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -472,7 +478,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## DataFrame UDFs\n", + "DataFrame UDFs\n", + "--------------------\n", "\n", "Like `cudf.Series`, there are multiple ways of using UDFs on dataframes, which essentially amount to UDFs that expect multiple columns as input:\n", "\n", @@ -485,7 +492,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# `cudf.DataFrame.apply`" + "`cudf.DataFrame.apply`\n", + "---------------------------" ] }, { @@ -497,7 +505,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -514,7 +522,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -569,7 +577,7 @@ "2 3 6" ] }, - "execution_count": 21, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -591,7 +599,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -603,7 +611,7 @@ "dtype: int64" ] }, - "execution_count": 22, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -621,7 +629,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -633,7 +641,7 @@ "dtype: object" ] }, - "execution_count": 23, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -658,7 +666,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -709,7 +717,7 @@ "2 3" ] }, - "execution_count": 24, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -728,7 +736,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -740,7 +748,7 @@ "dtype: int64" ] }, - "execution_count": 25, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -758,7 +766,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -813,7 +821,7 @@ "2 3 1" ] }, - "execution_count": 26, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -836,7 +844,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -848,7 +856,7 @@ "dtype: int64" ] }, - "execution_count": 27, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -866,7 +874,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -921,7 +929,7 @@ "2 3 3.14" ] }, - "execution_count": 28, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -939,7 +947,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 25, "metadata": {}, "outputs": [ { @@ -951,7 +959,7 @@ "dtype: float64" ] }, - "execution_count": 29, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -982,7 +990,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 26, "metadata": {}, "outputs": [ { @@ -1033,7 +1041,7 @@ "2 5" ] }, - "execution_count": 30, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -1054,7 +1062,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 27, "metadata": {}, "outputs": [ { @@ -1066,7 +1074,7 @@ "dtype: float64" ] }, - "execution_count": 31, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -1084,7 +1092,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 28, "metadata": {}, "outputs": [ { @@ -1151,7 +1159,7 @@ "2 3 6 4 8 6" ] }, - "execution_count": 32, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -1172,7 +1180,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 29, "metadata": {}, "outputs": [ { @@ -1184,7 +1192,7 @@ "dtype: float64" ] }, - "execution_count": 33, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -1197,7 +1205,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Numba kernels for DataFrames" + "Numba kernels for DataFrames\n", + "------------------------------------" ] }, { @@ -1212,7 +1221,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 30, "metadata": {}, "outputs": [], "source": [ @@ -1241,7 +1250,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 31, "metadata": {}, "outputs": [ { @@ -1312,7 +1321,7 @@ "2 3 6 4 8 6 9.0" ] }, - "execution_count": 35, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -1344,7 +1353,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 32, "metadata": {}, "outputs": [ { @@ -1417,7 +1426,7 @@ "4 979 982 1011" ] }, - "execution_count": 36, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -1443,7 +1452,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 33, "metadata": {}, "outputs": [ { @@ -1522,7 +1531,7 @@ "4 979 982 1011 1961.0" ] }, - "execution_count": 37, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -1546,7 +1555,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Rolling Window UDFs\n", + "Rolling Window UDFs\n", + "-------------------------\n", "\n", "For time-series data, we may need to operate on a small \\\"window\\\" of our column at a time, processing each portion independently. We could slide (\\\"roll\\\") this window over the entire column to answer questions like \\\"What is the 3-day moving average of a stock price over the past year?\"\n", "\n", @@ -1555,7 +1565,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 34, "metadata": {}, "outputs": [ { @@ -1570,7 +1580,7 @@ "dtype: float64" ] }, - "execution_count": 38, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -1582,7 +1592,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 35, "metadata": {}, "outputs": [ { @@ -1591,7 +1601,7 @@ "Rolling [window=3,min_periods=3,center=False]" ] }, - "execution_count": 39, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -1610,7 +1620,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 36, "metadata": {}, "outputs": [], "source": [ @@ -1634,7 +1644,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 37, "metadata": {}, "outputs": [ { @@ -1649,7 +1659,7 @@ "dtype: float64" ] }, - "execution_count": 41, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } @@ -1667,7 +1677,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 38, "metadata": {}, "outputs": [ { @@ -1734,7 +1744,7 @@ "4 59.0 59.0" ] }, - "execution_count": 42, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } @@ -1748,7 +1758,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 39, "metadata": {}, "outputs": [ { @@ -1845,7 +1855,7 @@ "9 100.0 100.0" ] }, - "execution_count": 43, + "execution_count": 39, "metadata": {}, "output_type": "execute_result" } @@ -1859,16 +1869,17 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## GroupBy DataFrame UDFs\n", + "GroupBy DataFrame UDFs\n", + "-------------------------------\n", "\n", "We can also apply UDFs to grouped DataFrames using `apply_grouped`. This example is also drawn and adapted from the RAPIDS [API documentation]().\n", "\n", - "First, we'll group our DataFrame based on column `b`, which is either True or False. Note that we currently need to pass `method=\"cudf\"` to use UDFs with GroupBy objects." + "First, we'll group our DataFrame based on column `b`, which is either True or False." ] }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 40, "metadata": {}, "outputs": [ { @@ -1947,7 +1958,7 @@ "4 -0.970850 False Sarah 0.342905" ] }, - "execution_count": 44, + "execution_count": 40, "metadata": {}, "output_type": "execute_result" } @@ -1959,7 +1970,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 41, "metadata": {}, "outputs": [], "source": [ @@ -1975,7 +1986,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 42, "metadata": {}, "outputs": [], "source": [ @@ -2002,7 +2013,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 43, "metadata": {}, "outputs": [ { @@ -2132,7 +2143,7 @@ "9 -0.725581 True George 0.405245 0.271319" ] }, - "execution_count": 47, + "execution_count": 43, "metadata": {}, "output_type": "execute_result" } @@ -2155,14 +2166,15 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Numba Kernels on CuPy Arrays\n", + "Numba Kernels on CuPy Arrays\n", + "-------------------------------------\n", "\n", "We can also execute Numba kernels on CuPy NDArrays, again thanks to the `__cuda_array_interface__`. We can even run the same UDF on the Series and the CuPy array. First, we define a Series and then create a CuPy array from that Series." ] }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 44, "metadata": {}, "outputs": [ { @@ -2171,7 +2183,7 @@ "array([ 1., 2., 3., 4., 10.])" ] }, - "execution_count": 48, + "execution_count": 44, "metadata": {}, "output_type": "execute_result" } @@ -2193,7 +2205,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 45, "metadata": {}, "outputs": [ { @@ -2207,14 +2219,12 @@ "dtype: int32" ] }, - "execution_count": 49, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "from cudf.utils import cudautils\n", - "\n", "@cuda.jit\n", "def multiply_by_5(x, out):\n", " i = cuda.grid(1)\n", @@ -2235,7 +2245,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 46, "metadata": {}, "outputs": [ { @@ -2244,7 +2254,7 @@ "array([ 5., 10., 15., 20., 50.])" ] }, - "execution_count": 50, + "execution_count": 46, "metadata": {}, "output_type": "execute_result" } @@ -2259,7 +2269,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Caveats" + "Caveats\n", + "---------" ] }, { @@ -2274,7 +2285,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Summary\n", + "Summary\n", + "-----------\n", "\n", "This guide has covered a lot of content. At this point, you should hopefully feel comfortable writing UDFs (with or without null values) that operate on\n", "\n", diff --git a/java/README.md b/java/README.md index afd69df11ef..ea1b9e3e4e4 100644 --- a/java/README.md +++ b/java/README.md @@ -75,16 +75,6 @@ If you decide to build without Docker and the build script, examining the cmake settings in the [Java CI build script](ci/build-in-docker.sh) can be helpful if you are encountering difficulties during the build. -## Dynamically Linking Arrow - -Since libcudf builds by default with a dynamically linked Arrow dependency, it may be -desirable to build the Java bindings without requiring a statically-linked Arrow to avoid -rebuilding an already built libcudf.so. To do so, specify the additional command-line flag -`-DCUDF_JNI_ARROW_STATIC=OFF` when building the Java bindings with Maven. However this will -result in a jar that requires the correct Arrow version to be available in the runtime -environment, and therefore is not recommended unless you are only performing local testing -within the libcudf build environment. - ## Statically Linking the CUDA Runtime If you use the default cmake options libcudart will be dynamically linked to libcudf and libcudfjni. diff --git a/java/ci/build-in-docker.sh b/java/ci/build-in-docker.sh index d6a193fbeaf..d21010ba30e 100755 --- a/java/ci/build-in-docker.sh +++ b/java/ci/build-in-docker.sh @@ -78,7 +78,6 @@ BUILD_ARG="-Dmaven.repo.local=\"$WORKSPACE/.m2\"\ -DPER_THREAD_DEFAULT_STREAM=$ENABLE_PTDS\ -DCUDA_STATIC_RUNTIME=$ENABLE_CUDA_STATIC_RUNTIME\ -DCUDF_JNI_LIBCUDF_STATIC=ON\ - -DRMM_LOGGING_LEVEL=$RMM_LOGGING_LEVEL\ -DUSE_GDS=$ENABLE_GDS -Dtest=*,!CuFileTest" if [ "$SIGN_FILE" == true ]; then diff --git a/java/pom.xml b/java/pom.xml index 8eccd652a46..50b6ca59440 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -147,7 +147,7 @@ org.apache.hadoop hadoop-common - 3.1.4 + 3.2.3 test @@ -165,10 +165,8 @@ OFF OFF OFF - INFO OFF ALL - ON OFF ${project.build.directory}/cmake-build 1.7.30 @@ -386,13 +384,11 @@ - - diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index ed3ac124216..cc1bc35f951 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -1546,6 +1546,31 @@ public ColumnVector segmentedReduce(ColumnView offsets, SegmentedReductionAggreg } } + /** + * Segmented gather of the elements within a list element in each row of a list column. + * For each list, assuming the size is N, valid indices of gather map ranges in [-N, N). + * Out of bound indices refer to null. + * @param gatherMap ListColumnView carrying lists of integral indices which maps the + * element in list of each row in the source columns to rows of lists in the result columns. + * @return the result. + */ + public ColumnVector segmentedGather(ColumnView gatherMap) { + return segmentedGather(gatherMap, OutOfBoundsPolicy.NULLIFY); + } + + /** + * Segmented gather of the elements within a list element in each row of a list column. + * @param gatherMap ListColumnView carrying lists of integral indices which maps the + * element in list of each row in the source columns to rows of lists in the result columns. + * @param policy OutOfBoundsPolicy, `DONT_CHECK` leads to undefined behaviour; `NULLIFY` + * replaces out of bounds with null. + * @return the result. + */ + public ColumnVector segmentedGather(ColumnView gatherMap, OutOfBoundsPolicy policy) { + return new ColumnVector(segmentedGather(getNativeView(), gatherMap.getNativeView(), + policy.equals(OutOfBoundsPolicy.NULLIFY))); + } + /** * Do a reduction on the values in a list. The output type will be the type of the data column * of this list. @@ -3448,6 +3473,16 @@ public final ColumnVector listSortRows(boolean isDescending, boolean isNullSmall return new ColumnVector(listSortRows(getNativeView(), isDescending, isNullSmallest)); } + /** + * Generate list offsets from sizes of each list. + * NOTICE: This API only works for INT32. Otherwise, the behavior is undefined. And no null and negative value is allowed. + * + * @return a column of list offsets whose size is N + 1 + */ + public final ColumnVector generateListOffsets() { + return new ColumnVector(generateListOffsets(getNativeView())); + } + /** * Get a single item from the column at the specified index as a Scalar. * @@ -3998,6 +4033,9 @@ private static native long scan(long viewHandle, long aggregation, private static native long segmentedReduce(long dataViewHandle, long offsetsViewHandle, long aggregation, boolean includeNulls, int dtype, int scale) throws CudfException; + private static native long segmentedGather(long sourceColumnHandle, long gatherMapListHandle, + boolean isNullifyOutBounds) throws CudfException; + private static native long isNullNative(long viewHandle); private static native long isNanNative(long viewHandle); @@ -4134,6 +4172,8 @@ static native long makeCudfColumnView(int type, int scale, long data, long dataS static native long copyColumnViewToCV(long viewHandle) throws CudfException; + static native long generateListOffsets(long handle) throws CudfException; + /** * A utility class to create column vector like objects without refcounts and other APIs when * creating the device side vector from host side nested vectors. Eventually this can go away or diff --git a/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java b/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java index 78b3d5d52ec..f3fb7de6abe 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java @@ -33,9 +33,15 @@ public class ColumnWriterOptions { private boolean isNullable; private boolean isMap = false; private String columnName; + // only for Parquet + private boolean hasParquetFieldId; + private int parquetFieldId; + private ColumnWriterOptions(AbstractStructBuilder builder) { this.columnName = builder.name; this.isNullable = builder.isNullable; + this.hasParquetFieldId = builder.hasParquetFieldId; + this.parquetFieldId = builder.parquetFieldId; this.childColumnOptions = (ColumnWriterOptions[]) builder.children.toArray(new ColumnWriterOptions[0]); } @@ -67,6 +73,10 @@ public AbstractStructBuilder(String name, boolean isNullable) { super(name, isNullable); } + public AbstractStructBuilder(String name, boolean isNullable, int parquetFieldId) { + super(name, isNullable, parquetFieldId); + } + protected AbstractStructBuilder() { super(); } @@ -84,6 +94,9 @@ public static abstract class NestedBuilder children = new ArrayList<>(); protected boolean isNullable = true; protected String name = ""; + // Parquet structure needs + protected boolean hasParquetFieldId; + protected int parquetFieldId; /** * Builder specific to build a Struct meta @@ -93,22 +106,43 @@ protected NestedBuilder(String name, boolean isNullable) { this.isNullable = isNullable; } + protected NestedBuilder(String name, boolean isNullable, int parquetFieldId) { + this.name = name; + this.isNullable = isNullable; + this.hasParquetFieldId = true; + this.parquetFieldId = parquetFieldId; + } + protected NestedBuilder() {} - protected ColumnWriterOptions withColumns(String name, boolean isNullable) { + protected ColumnWriterOptions withColumn(String name, boolean isNullable) { return new ColumnWriterOptions(name, isNullable); } + protected ColumnWriterOptions withColumn(String name, boolean isNullable, int parquetFieldId) { + return new ColumnWriterOptions(name, isNullable, parquetFieldId); + } + protected ColumnWriterOptions withDecimal(String name, int precision, boolean isNullable) { return new ColumnWriterOptions(name, false, precision, isNullable); } + protected ColumnWriterOptions withDecimal(String name, int precision, + boolean isNullable, int parquetFieldId) { + return new ColumnWriterOptions(name, false, precision, isNullable, parquetFieldId); + } + protected ColumnWriterOptions withTimestamp(String name, boolean isInt96, boolean isNullable) { return new ColumnWriterOptions(name, isInt96, UNKNOWN_PRECISION, isNullable); } + protected ColumnWriterOptions withTimestamp(String name, boolean isInt96, + boolean isNullable, int parquetFieldId) { + return new ColumnWriterOptions(name, isInt96, UNKNOWN_PRECISION, isNullable, parquetFieldId); + } + /** * Set the list column meta. * Lists should have only one child in ColumnVector, but the metadata expects a @@ -155,16 +189,16 @@ public T withStructColumn(StructColumnWriterOptions child) { /** * Set column name */ - public T withNonNullableColumns(String... name) { - withColumns(false, name); + public T withNonNullableColumns(String... names) { + withColumns(false, names); return (T) this; } /** * Set nullable column meta data */ - public T withNullableColumns(String... name) { - withColumns(true, name); + public T withNullableColumns(String... names) { + withColumns(true, names); return (T) this; } @@ -172,13 +206,22 @@ public T withNullableColumns(String... name) { * Set a simple child meta data * @return this for chaining. */ - public T withColumns(boolean nullable, String... name) { - for (String n : name) { - children.add(withColumns(n, nullable)); + public T withColumns(boolean nullable, String... names) { + for (String n : names) { + children.add(withColumn(n, nullable)); } return (T) this; } + /** + * Set a simple child meta data + * @return this for chaining. + */ + public T withColumn(boolean nullable, String name, int parquetFieldId) { + children.add(withColumn(name, nullable, parquetFieldId)); + return (T) this; + } + /** * Set a Decimal child meta data * @return this for chaining. @@ -188,6 +231,15 @@ public T withDecimalColumn(String name, int precision, boolean nullable) { return (T) this; } + /** + * Set a Decimal child meta data + * @return this for chaining. + */ + public T withDecimalColumn(String name, int precision, boolean nullable, int parquetFieldId) { + children.add(withDecimal(name, precision, nullable, parquetFieldId)); + return (T) this; + } + /** * Set a Decimal child meta data * @return this for chaining. @@ -206,6 +258,15 @@ public T withDecimalColumn(String name, int precision) { return (T) this; } + /** + * Set a timestamp child meta data + * @return this for chaining. + */ + public T withTimestampColumn(String name, boolean isInt96, boolean nullable, int parquetFieldId) { + children.add(withTimestamp(name, isInt96, nullable, parquetFieldId)); + return (T) this; + } + /** * Set a timestamp child meta data * @return this for chaining. @@ -244,6 +305,13 @@ public ColumnWriterOptions(String columnName, boolean isTimestampTypeInt96, this.columnName = columnName; } + public ColumnWriterOptions(String columnName, boolean isTimestampTypeInt96, + int precision, boolean isNullable, int parquetFieldId) { + this(columnName, isTimestampTypeInt96, precision, isNullable); + this.hasParquetFieldId = true; + this.parquetFieldId = parquetFieldId; + } + public ColumnWriterOptions(String columnName, boolean isNullable) { this.isTimestampTypeInt96 = false; this.precision = UNKNOWN_PRECISION; @@ -251,6 +319,12 @@ public ColumnWriterOptions(String columnName, boolean isNullable) { this.columnName = columnName; } + public ColumnWriterOptions(String columnName, boolean isNullable, int parquetFieldId) { + this(columnName, isNullable); + this.hasParquetFieldId = true; + this.parquetFieldId = parquetFieldId; + } + public ColumnWriterOptions(String columnName) { this(columnName, true); } @@ -302,6 +376,24 @@ int[] getFlatPrecision() { } } + boolean[] getFlatHasParquetFieldId() { + boolean[] ret = {hasParquetFieldId}; + if (childColumnOptions.length > 0) { + return getFlatBooleans(ret, (opt) -> opt.getFlatHasParquetFieldId()); + } else { + return ret; + } + } + + int[] getFlatParquetFieldId() { + int[] ret = {parquetFieldId}; + if (childColumnOptions.length > 0) { + return getFlatInts(ret, (opt) -> opt.getFlatParquetFieldId()); + } else { + return ret; + } + } + boolean[] getFlatIsNullable() { boolean[] ret = {isNullable}; if (childColumnOptions.length > 0) { @@ -418,6 +510,13 @@ public static StructBuilder structBuilder(String name, boolean isNullable) { return new StructBuilder(name, isNullable); } + /** + * Creates a StructBuilder for column called 'name' + */ + public static StructBuilder structBuilder(String name, boolean isNullable, int parquetFieldId) { + return new StructBuilder(name, isNullable, parquetFieldId); + } + /** * Creates a StructBuilder for column called 'name' */ @@ -477,6 +576,10 @@ public StructBuilder(String name, boolean isNullable) { super(name, isNullable); } + public StructBuilder(String name, boolean isNullable, int parquetFieldId) { + super(name, isNullable, parquetFieldId); + } + public StructColumnWriterOptions build() { return new StructColumnWriterOptions(this); } diff --git a/java/src/main/java/ai/rapids/cudf/CompressionMetadataWriterOptions.java b/java/src/main/java/ai/rapids/cudf/CompressionMetadataWriterOptions.java index 9292975d0ce..3a3b7d721b7 100644 --- a/java/src/main/java/ai/rapids/cudf/CompressionMetadataWriterOptions.java +++ b/java/src/main/java/ai/rapids/cudf/CompressionMetadataWriterOptions.java @@ -41,6 +41,16 @@ int[] getFlatPrecision() { return super.getFlatInts(new int[]{}, (opt) -> opt.getFlatPrecision()); } + @Override + boolean[] getFlatHasParquetFieldId() { + return super.getFlatBooleans(new boolean[]{}, (opt) -> opt.getFlatHasParquetFieldId()); + } + + @Override + int[] getFlatParquetFieldId() { + return super.getFlatInts(new int[]{}, (opt) -> opt.getFlatParquetFieldId()); + } + @Override int[] getFlatNumChildren() { return super.getFlatInts(new int[]{}, (opt) -> opt.getFlatNumChildren()); diff --git a/java/src/main/java/ai/rapids/cudf/CudaException.java b/java/src/main/java/ai/rapids/cudf/CudaException.java index 2d862b47ef8..ff7ca308f3c 100755 --- a/java/src/main/java/ai/rapids/cudf/CudaException.java +++ b/java/src/main/java/ai/rapids/cudf/CudaException.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +15,9 @@ */ package ai.rapids.cudf; +import java.util.HashMap; +import java.util.Map; + /** * Exception from the cuda language/library. Be aware that because of how cuda does asynchronous * processing exceptions from cuda can be thrown by method calls that did not cause the exception @@ -28,11 +31,283 @@ * don't switch between threads for different parts of processing that can be retried as a chunk. */ public class CudaException extends RuntimeException { - CudaException(String message) { + CudaException(String message, int errorCode) { super(message); + cudaError = CudaError.parseErrorCode(errorCode); } - CudaException(String message, Throwable cause) { + CudaException(String message, int errorCode, Throwable cause) { super(message, cause); + cudaError = CudaError.parseErrorCode(errorCode); + } + + public final CudaError cudaError; + + /** + * The Java mirror of cudaError, which facilities the tracking of CUDA errors in JVM. + */ + public enum CudaError { + UnknownNativeError(-1), // native CUDA error type which Java doesn't have a representation + cudaErrorInvalidValue(1), + cudaErrorMemoryAllocation(2), + cudaErrorInitializationError(3), + cudaErrorCudartUnloading(4), + cudaErrorProfilerDisabled(5), + cudaErrorProfilerNotInitialized(6), + cudaErrorProfilerAlreadyStarted(7), + cudaErrorProfilerAlreadyStopped(8), + cudaErrorInvalidConfiguration(9), + cudaErrorInvalidPitchValue(12), + cudaErrorInvalidSymbol(13), + cudaErrorInvalidHostPointer(16), + cudaErrorInvalidDevicePointer(17), + cudaErrorInvalidTexture(18), + cudaErrorInvalidTextureBinding(19), + cudaErrorInvalidChannelDescriptor(20), + cudaErrorInvalidMemcpyDirection(21), + cudaErrorAddressOfConstant(22), + cudaErrorTextureFetchFailed(23), + cudaErrorTextureNotBound(24), + cudaErrorSynchronizationError(25), + cudaErrorInvalidFilterSetting(26), + cudaErrorInvalidNormSetting(27), + cudaErrorMixedDeviceExecution(28), + cudaErrorNotYetImplemented(31), + cudaErrorMemoryValueTooLarge(32), + cudaErrorStubLibrary(34), + cudaErrorInsufficientDriver(35), + cudaErrorCallRequiresNewerDriver(36), + cudaErrorInvalidSurface(37), + cudaErrorDuplicateVariableName(43), + cudaErrorDuplicateTextureName(44), + cudaErrorDuplicateSurfaceName(45), + cudaErrorDevicesUnavailable(46), + cudaErrorIncompatibleDriverContext(49), + cudaErrorMissingConfiguration(52), + cudaErrorPriorLaunchFailure(53), + cudaErrorLaunchMaxDepthExceeded(65), + cudaErrorLaunchFileScopedTex(66), + cudaErrorLaunchFileScopedSurf(67), + cudaErrorSyncDepthExceeded(68), + cudaErrorLaunchPendingCountExceeded(69), + cudaErrorInvalidDeviceFunction(98), + cudaErrorNoDevice(100), + cudaErrorInvalidDevice(101), + cudaErrorDeviceNotLicensed(102), + cudaErrorSoftwareValidityNotEstablished(103), + cudaErrorStartupFailure(127), + cudaErrorInvalidKernelImage(200), + cudaErrorDeviceUninitialized(201), + cudaErrorMapBufferObjectFailed(205), + cudaErrorUnmapBufferObjectFailed(206), + cudaErrorArrayIsMapped(207), + cudaErrorAlreadyMapped(208), + cudaErrorNoKernelImageForDevice(209), + cudaErrorAlreadyAcquired(210), + cudaErrorNotMapped(211), + cudaErrorNotMappedAsArray(212), + cudaErrorNotMappedAsPointer(213), + cudaErrorECCUncorrectable(214), + cudaErrorUnsupportedLimit(215), + cudaErrorDeviceAlreadyInUse(216), + cudaErrorPeerAccessUnsupported(217), + cudaErrorInvalidPtx(218), + cudaErrorInvalidGraphicsContext(219), + cudaErrorNvlinkUncorrectable(220), + cudaErrorJitCompilerNotFound(221), + cudaErrorUnsupportedPtxVersion(222), + cudaErrorJitCompilationDisabled(223), + cudaErrorUnsupportedExecAffinity(224), + cudaErrorInvalidSource(300), + cudaErrorFileNotFound(301), + cudaErrorSharedObjectSymbolNotFound(302), + cudaErrorSharedObjectInitFailed(303), + cudaErrorOperatingSystem(304), + cudaErrorInvalidResourceHandle(400), + cudaErrorIllegalState(401), + cudaErrorSymbolNotFound(500), + cudaErrorNotReady(600), + cudaErrorIllegalAddress(700), + cudaErrorLaunchOutOfResources(701), + cudaErrorLaunchTimeout(702), + cudaErrorLaunchIncompatibleTexturing(703), + cudaErrorPeerAccessAlreadyEnabled(704), + cudaErrorPeerAccessNotEnabled(705), + cudaErrorSetOnActiveProcess(708), + cudaErrorContextIsDestroyed(709), + cudaErrorAssert(710), + cudaErrorTooManyPeers(711), + cudaErrorHostMemoryAlreadyRegistered(712), + cudaErrorHostMemoryNotRegistered(713), + cudaErrorHardwareStackError(714), + cudaErrorIllegalInstruction(715), + cudaErrorMisalignedAddress(716), + cudaErrorInvalidAddressSpace(717), + cudaErrorInvalidPc(718), + cudaErrorLaunchFailure(719), + cudaErrorCooperativeLaunchTooLarge(720), + cudaErrorNotPermitted(800), + cudaErrorNotSupported(801), + cudaErrorSystemNotReady(802), + cudaErrorSystemDriverMismatch(803), + cudaErrorCompatNotSupportedOnDevice(804), + cudaErrorMpsConnectionFailed(805), + cudaErrorMpsRpcFailure(806), + cudaErrorMpsServerNotReady(807), + cudaErrorMpsMaxClientsReached(808), + cudaErrorMpsMaxConnectionsReached(809), + cudaErrorStreamCaptureUnsupported(900), + cudaErrorStreamCaptureInvalidated(901), + cudaErrorStreamCaptureMerge(902), + cudaErrorStreamCaptureUnmatched(903), + cudaErrorStreamCaptureUnjoined(904), + cudaErrorStreamCaptureIsolation(905), + cudaErrorStreamCaptureImplicit(906), + cudaErrorCapturedEvent(907), + cudaErrorStreamCaptureWrongThread(908), + cudaErrorTimeout(909), + cudaErrorGraphExecUpdateFailure(910), + cudaErrorExternalDevice(911), + cudaErrorUnknown(999), + cudaErrorApiFailureBase(10000); + + final int code; + + private static Map codeToError = new HashMap(){{ + put(cudaErrorInvalidValue.code, cudaErrorInvalidValue); + put(cudaErrorMemoryAllocation.code, cudaErrorMemoryAllocation); + put(cudaErrorInitializationError.code, cudaErrorInitializationError); + put(cudaErrorCudartUnloading.code, cudaErrorCudartUnloading); + put(cudaErrorProfilerDisabled.code, cudaErrorProfilerDisabled); + put(cudaErrorProfilerNotInitialized.code, cudaErrorProfilerNotInitialized); + put(cudaErrorProfilerAlreadyStarted.code, cudaErrorProfilerAlreadyStarted); + put(cudaErrorProfilerAlreadyStopped.code, cudaErrorProfilerAlreadyStopped); + put(cudaErrorInvalidConfiguration.code, cudaErrorInvalidConfiguration); + put(cudaErrorInvalidPitchValue.code, cudaErrorInvalidPitchValue); + put(cudaErrorInvalidSymbol.code, cudaErrorInvalidSymbol); + put(cudaErrorInvalidHostPointer.code, cudaErrorInvalidHostPointer); + put(cudaErrorInvalidDevicePointer.code, cudaErrorInvalidDevicePointer); + put(cudaErrorInvalidTexture.code, cudaErrorInvalidTexture); + put(cudaErrorInvalidTextureBinding.code, cudaErrorInvalidTextureBinding); + put(cudaErrorInvalidChannelDescriptor.code, cudaErrorInvalidChannelDescriptor); + put(cudaErrorInvalidMemcpyDirection.code, cudaErrorInvalidMemcpyDirection); + put(cudaErrorAddressOfConstant.code, cudaErrorAddressOfConstant); + put(cudaErrorTextureFetchFailed.code, cudaErrorTextureFetchFailed); + put(cudaErrorTextureNotBound.code, cudaErrorTextureNotBound); + put(cudaErrorSynchronizationError.code, cudaErrorSynchronizationError); + put(cudaErrorInvalidFilterSetting.code, cudaErrorInvalidFilterSetting); + put(cudaErrorInvalidNormSetting.code, cudaErrorInvalidNormSetting); + put(cudaErrorMixedDeviceExecution.code, cudaErrorMixedDeviceExecution); + put(cudaErrorNotYetImplemented.code, cudaErrorNotYetImplemented); + put(cudaErrorMemoryValueTooLarge.code, cudaErrorMemoryValueTooLarge); + put(cudaErrorStubLibrary.code, cudaErrorStubLibrary); + put(cudaErrorInsufficientDriver.code, cudaErrorInsufficientDriver); + put(cudaErrorCallRequiresNewerDriver.code, cudaErrorCallRequiresNewerDriver); + put(cudaErrorInvalidSurface.code, cudaErrorInvalidSurface); + put(cudaErrorDuplicateVariableName.code, cudaErrorDuplicateVariableName); + put(cudaErrorDuplicateTextureName.code, cudaErrorDuplicateTextureName); + put(cudaErrorDuplicateSurfaceName.code, cudaErrorDuplicateSurfaceName); + put(cudaErrorDevicesUnavailable.code, cudaErrorDevicesUnavailable); + put(cudaErrorIncompatibleDriverContext.code, cudaErrorIncompatibleDriverContext); + put(cudaErrorMissingConfiguration.code, cudaErrorMissingConfiguration); + put(cudaErrorPriorLaunchFailure.code, cudaErrorPriorLaunchFailure); + put(cudaErrorLaunchMaxDepthExceeded.code, cudaErrorLaunchMaxDepthExceeded); + put(cudaErrorLaunchFileScopedTex.code, cudaErrorLaunchFileScopedTex); + put(cudaErrorLaunchFileScopedSurf.code, cudaErrorLaunchFileScopedSurf); + put(cudaErrorSyncDepthExceeded.code, cudaErrorSyncDepthExceeded); + put(cudaErrorLaunchPendingCountExceeded.code, cudaErrorLaunchPendingCountExceeded); + put(cudaErrorInvalidDeviceFunction.code, cudaErrorInvalidDeviceFunction); + put(cudaErrorNoDevice.code, cudaErrorNoDevice); + put(cudaErrorInvalidDevice.code, cudaErrorInvalidDevice); + put(cudaErrorDeviceNotLicensed.code, cudaErrorDeviceNotLicensed); + put(cudaErrorSoftwareValidityNotEstablished.code, cudaErrorSoftwareValidityNotEstablished); + put(cudaErrorStartupFailure.code, cudaErrorStartupFailure); + put(cudaErrorInvalidKernelImage.code, cudaErrorInvalidKernelImage); + put(cudaErrorDeviceUninitialized.code, cudaErrorDeviceUninitialized); + put(cudaErrorMapBufferObjectFailed.code, cudaErrorMapBufferObjectFailed); + put(cudaErrorUnmapBufferObjectFailed.code, cudaErrorUnmapBufferObjectFailed); + put(cudaErrorArrayIsMapped.code, cudaErrorArrayIsMapped); + put(cudaErrorAlreadyMapped.code, cudaErrorAlreadyMapped); + put(cudaErrorNoKernelImageForDevice.code, cudaErrorNoKernelImageForDevice); + put(cudaErrorAlreadyAcquired.code, cudaErrorAlreadyAcquired); + put(cudaErrorNotMapped.code, cudaErrorNotMapped); + put(cudaErrorNotMappedAsArray.code, cudaErrorNotMappedAsArray); + put(cudaErrorNotMappedAsPointer.code, cudaErrorNotMappedAsPointer); + put(cudaErrorECCUncorrectable.code, cudaErrorECCUncorrectable); + put(cudaErrorUnsupportedLimit.code, cudaErrorUnsupportedLimit); + put(cudaErrorDeviceAlreadyInUse.code, cudaErrorDeviceAlreadyInUse); + put(cudaErrorPeerAccessUnsupported.code, cudaErrorPeerAccessUnsupported); + put(cudaErrorInvalidPtx.code, cudaErrorInvalidPtx); + put(cudaErrorInvalidGraphicsContext.code, cudaErrorInvalidGraphicsContext); + put(cudaErrorNvlinkUncorrectable.code, cudaErrorNvlinkUncorrectable); + put(cudaErrorJitCompilerNotFound.code, cudaErrorJitCompilerNotFound); + put(cudaErrorUnsupportedPtxVersion.code, cudaErrorUnsupportedPtxVersion); + put(cudaErrorJitCompilationDisabled.code, cudaErrorJitCompilationDisabled); + put(cudaErrorUnsupportedExecAffinity.code, cudaErrorUnsupportedExecAffinity); + put(cudaErrorInvalidSource.code, cudaErrorInvalidSource); + put(cudaErrorFileNotFound.code, cudaErrorFileNotFound); + put(cudaErrorSharedObjectSymbolNotFound.code, cudaErrorSharedObjectSymbolNotFound); + put(cudaErrorSharedObjectInitFailed.code, cudaErrorSharedObjectInitFailed); + put(cudaErrorOperatingSystem.code, cudaErrorOperatingSystem); + put(cudaErrorInvalidResourceHandle.code, cudaErrorInvalidResourceHandle); + put(cudaErrorIllegalState.code, cudaErrorIllegalState); + put(cudaErrorSymbolNotFound.code, cudaErrorSymbolNotFound); + put(cudaErrorNotReady.code, cudaErrorNotReady); + put(cudaErrorIllegalAddress.code, cudaErrorIllegalAddress); + put(cudaErrorLaunchOutOfResources.code, cudaErrorLaunchOutOfResources); + put(cudaErrorLaunchTimeout.code, cudaErrorLaunchTimeout); + put(cudaErrorLaunchIncompatibleTexturing.code, cudaErrorLaunchIncompatibleTexturing); + put(cudaErrorPeerAccessAlreadyEnabled.code, cudaErrorPeerAccessAlreadyEnabled); + put(cudaErrorPeerAccessNotEnabled.code, cudaErrorPeerAccessNotEnabled); + put(cudaErrorSetOnActiveProcess.code, cudaErrorSetOnActiveProcess); + put(cudaErrorContextIsDestroyed.code, cudaErrorContextIsDestroyed); + put(cudaErrorAssert.code, cudaErrorAssert); + put(cudaErrorTooManyPeers.code, cudaErrorTooManyPeers); + put(cudaErrorHostMemoryAlreadyRegistered.code, cudaErrorHostMemoryAlreadyRegistered); + put(cudaErrorHostMemoryNotRegistered.code, cudaErrorHostMemoryNotRegistered); + put(cudaErrorHardwareStackError.code, cudaErrorHardwareStackError); + put(cudaErrorIllegalInstruction.code, cudaErrorIllegalInstruction); + put(cudaErrorMisalignedAddress.code, cudaErrorMisalignedAddress); + put(cudaErrorInvalidAddressSpace.code, cudaErrorInvalidAddressSpace); + put(cudaErrorInvalidPc.code, cudaErrorInvalidPc); + put(cudaErrorLaunchFailure.code, cudaErrorLaunchFailure); + put(cudaErrorCooperativeLaunchTooLarge.code, cudaErrorCooperativeLaunchTooLarge); + put(cudaErrorNotPermitted.code, cudaErrorNotPermitted); + put(cudaErrorNotSupported.code, cudaErrorNotSupported); + put(cudaErrorSystemNotReady.code, cudaErrorSystemNotReady); + put(cudaErrorSystemDriverMismatch.code, cudaErrorSystemDriverMismatch); + put(cudaErrorCompatNotSupportedOnDevice.code, cudaErrorCompatNotSupportedOnDevice); + put(cudaErrorMpsConnectionFailed.code, cudaErrorMpsConnectionFailed); + put(cudaErrorMpsRpcFailure.code, cudaErrorMpsRpcFailure); + put(cudaErrorMpsServerNotReady.code, cudaErrorMpsServerNotReady); + put(cudaErrorMpsMaxClientsReached.code, cudaErrorMpsMaxClientsReached); + put(cudaErrorMpsMaxConnectionsReached.code, cudaErrorMpsMaxConnectionsReached); + put(cudaErrorStreamCaptureUnsupported.code, cudaErrorStreamCaptureUnsupported); + put(cudaErrorStreamCaptureInvalidated.code, cudaErrorStreamCaptureInvalidated); + put(cudaErrorStreamCaptureMerge.code, cudaErrorStreamCaptureMerge); + put(cudaErrorStreamCaptureUnmatched.code, cudaErrorStreamCaptureUnmatched); + put(cudaErrorStreamCaptureUnjoined.code, cudaErrorStreamCaptureUnjoined); + put(cudaErrorStreamCaptureIsolation.code, cudaErrorStreamCaptureIsolation); + put(cudaErrorStreamCaptureImplicit.code, cudaErrorStreamCaptureImplicit); + put(cudaErrorCapturedEvent.code, cudaErrorCapturedEvent); + put(cudaErrorStreamCaptureWrongThread.code, cudaErrorStreamCaptureWrongThread); + put(cudaErrorTimeout.code, cudaErrorTimeout); + put(cudaErrorGraphExecUpdateFailure.code, cudaErrorGraphExecUpdateFailure); + put(cudaErrorExternalDevice.code, cudaErrorExternalDevice); + put(cudaErrorUnknown.code, cudaErrorUnknown); + put(cudaErrorApiFailureBase.code, cudaErrorApiFailureBase); + }}; + + CudaError(int errorCode) { + this.code = errorCode; + } + + public static CudaError parseErrorCode(int errorCode) { + if (!codeToError.containsKey(errorCode)) { + return UnknownNativeError; + } + return codeToError.get(errorCode); + } + } } diff --git a/java/src/main/java/ai/rapids/cudf/CudaFatalException.java b/java/src/main/java/ai/rapids/cudf/CudaFatalException.java new file mode 100644 index 00000000000..cf36726aa80 --- /dev/null +++ b/java/src/main/java/ai/rapids/cudf/CudaFatalException.java @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ai.rapids.cudf; + +/** + * CudaFatalException is a kind of CudaException which leaves the process in an inconsistent state + * and any further CUDA work will return the same error. + * To continue using CUDA, the process must be terminated and relaunched. + */ +public class CudaFatalException extends CudaException { + CudaFatalException(String message, int errorCode) { + super(message, errorCode); + } + + CudaFatalException(String message, int errorCode, Throwable cause) { + super(message, errorCode, cause); + } +} diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java index ff966643866..24f7d44ed28 100644 --- a/java/src/main/java/ai/rapids/cudf/Table.java +++ b/java/src/main/java/ai/rapids/cudf/Table.java @@ -289,7 +289,10 @@ private static native long writeParquetFileBegin(String[] columnNames, int statsFreq, boolean[] isInt96, int[] precisions, - boolean[] isMapValues, String filename) throws CudfException; + boolean[] isMapValues, + boolean[] hasParquetFieldIds, + int[] parquetFieldIds, + String filename) throws CudfException; /** * Setup everything to write parquet formatted data to a buffer. @@ -319,6 +322,8 @@ private static native long writeParquetBufferBegin(String[] columnNames, boolean[] isInt96, int[] precisions, boolean[] isMapValues, + boolean[] hasParquetFieldIds, + int[] parquetFieldIds, HostBufferConsumer consumer) throws CudfException; /** @@ -1201,6 +1206,8 @@ private ParquetTableWriter(ParquetWriterOptions options, File outputFile) { boolean[] timeInt96Values = options.getFlatIsTimeTypeInt96(); boolean[] isMapValues = options.getFlatIsMap(); int[] precisions = options.getFlatPrecision(); + boolean[] hasParquetFieldIds = options.getFlatHasParquetFieldId(); + int[] parquetFieldIds = options.getFlatParquetFieldId(); int[] flatNumChildren = options.getFlatNumChildren(); this.consumer = null; @@ -1215,6 +1222,8 @@ private ParquetTableWriter(ParquetWriterOptions options, File outputFile) { timeInt96Values, precisions, isMapValues, + hasParquetFieldIds, + parquetFieldIds, outputFile.getAbsolutePath()); } @@ -1224,6 +1233,8 @@ private ParquetTableWriter(ParquetWriterOptions options, HostBufferConsumer cons boolean[] timeInt96Values = options.getFlatIsTimeTypeInt96(); boolean[] isMapValues = options.getFlatIsMap(); int[] precisions = options.getFlatPrecision(); + boolean[] hasParquetFieldIds = options.getFlatHasParquetFieldId(); + int[] parquetFieldIds = options.getFlatParquetFieldId(); int[] flatNumChildren = options.getFlatNumChildren(); this.consumer = consumer; @@ -1238,6 +1249,8 @@ private ParquetTableWriter(ParquetWriterOptions options, HostBufferConsumer cons timeInt96Values, precisions, isMapValues, + hasParquetFieldIds, + parquetFieldIds, consumer); } diff --git a/java/src/main/native/CMakeLists.txt b/java/src/main/native/CMakeLists.txt index 9851102d011..3a375412bbd 100755 --- a/java/src/main/native/CMakeLists.txt +++ b/java/src/main/native/CMakeLists.txt @@ -41,7 +41,6 @@ option(BUILD_TESTS "Configure CMake to build tests" ON) option(PER_THREAD_DEFAULT_STREAM "Build with per-thread default stream" OFF) option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF) option(USE_GDS "Build with GPUDirect Storage (GDS)/cuFile support" OFF) -option(CUDF_JNI_ARROW_STATIC "Statically link Arrow" ON) option(CUDF_JNI_LIBCUDF_STATIC "Link with libcudf.a" OFF) message(VERBOSE "CUDF_JNI: Build with NVTX support: ${USE_NVTX}") @@ -50,7 +49,6 @@ message(VERBOSE "CUDF_JNI: Configure CMake to build tests: ${BUILD_TESTS}") message(VERBOSE "CUDF_JNI: Build with per-thread default stream: ${PER_THREAD_DEFAULT_STREAM}") message(VERBOSE "CUDF_JNI: Statically link the CUDA runtime: ${CUDA_STATIC_RUNTIME}") message(VERBOSE "CUDF_JNI: Build with GPUDirect Storage support: ${USE_GDS}") -message(VERBOSE "CUDF_JNI: Build with static Arrow library: ${CUDF_JNI_ARROW_STATIC}") message(VERBOSE "CUDF_JNI: Link with libcudf statically: ${CUDF_JNI_LIBCUDF_STATIC}") set(CUDF_SOURCE_DIR "${PROJECT_SOURCE_DIR}/../../../../cpp") @@ -93,67 +91,16 @@ endif() rapids_cmake_build_type("Release") # ################################################################################################## -# * Thrust/CUB -# ------------------------------------------------------------------------------------ -include(${CUDF_SOURCE_DIR}/cmake/thirdparty/get_thrust.cmake) +# * nvcomp------------------------------------------------------------------------------------------ -# ################################################################################################## -# * CUDF ------------------------------------------------------------------------------------------ - -set(CUDF_INCLUDE "${PROJECT_SOURCE_DIR}/../../../../cpp/include" - "${PROJECT_SOURCE_DIR}/../../../../cpp/src/" -) - -set(CUDF_LIB_HINTS HINTS "$ENV{CUDF_ROOT}" "$ENV{CUDF_ROOT}/lib" "$ENV{CONDA_PREFIX}/lib" - "${CUDF_CPP_BUILD_DIR}" -) - -find_library(CUDF_LIB "cudf" REQUIRED HINTS ${CUDF_LIB_HINTS}) - -# ################################################################################################## -# * ZLIB ------------------------------------------------------------------------------------------ - -# find zlib -rapids_find_package(ZLIB REQUIRED) +set(nvcomp_DIR "${CUDF_CPP_BUILD_DIR}/_deps/nvcomp-build") +rapids_find_package(nvcomp REQUIRED) # ################################################################################################## -# * RMM ------------------------------------------------------------------------------------------- +# * CUDF ------------------------------------------------------------------------------------------ -include(${CUDF_SOURCE_DIR}/cmake/thirdparty/get_rmm.cmake) - -# ################################################################################################## -# * ARROW ----------------------------------------------------------------------------------------- - -find_path(ARROW_INCLUDE "arrow" HINTS "$ENV{ARROW_ROOT}/include" - "${CUDF_CPP_BUILD_DIR}/_deps/arrow-src/cpp/src" -) - -message(STATUS "ARROW: ARROW_INCLUDE set to ${ARROW_INCLUDE}") - -if(CUDF_JNI_ARROW_STATIC) - # Find static version of Arrow lib - set(CUDF_JNI_ARROW_LIBNAME "libarrow.a") -else() - set(CUDF_JNI_ARROW_LIBNAME "arrow") -endif() - -find_library( - ARROW_LIBRARY ${CUDF_JNI_ARROW_LIBNAME} REQUIRED - HINTS "$ENV{ARROW_ROOT}/lib" "${CUDF_CPP_BUILD_DIR}/_deps/arrow-build/release" - "${CUDF_CPP_BUILD_DIR}/_deps/arrow-build/debug" -) - -if(NOT ARROW_LIBRARY) - if(CUDF_JNI_ARROW_STATIC) - message( - FATAL_ERROR "Arrow static library not found. Was libcudf built with CUDF_USE_ARROW_STATIC=ON?" - ) - else() - message(FATAL_ERROR "Arrow dynamic library not found.") - endif() -else() - message(STATUS "ARROW: ARROW_LIBRARY set to ${ARROW_LIBRARY}") -endif() +set(cudf_ROOT "${CUDF_CPP_BUILD_DIR}") +rapids_find_package(cudf REQUIRED) # ################################################################################################## # * find JNI ------------------------------------------------------------------------------------- @@ -164,27 +111,6 @@ else() message(FATAL_ERROR "JDK with JNI not found, please check your settings.") endif() -# ################################################################################################## -# * nvcomp ---------------------------------------------------------------------------------------- - -find_path(NVCOMP_INCLUDE "nvcomp" HINTS "${CUDF_CPP_BUILD_DIR}/_deps/nvcomp-src/include" - "$ENV{CONDA_PREFIX}/include" -) - -message(STATUS "NVCOMP: NVCOMP_INCLUDE set to ${NVCOMP_INCLUDE}") - -set(CUDF_JNI_NVCOMP_LIBNAME "libnvcomp.a") -find_library( - NVCOMP_LIBRARY ${CUDF_JNI_NVCOMP_LIBNAME} REQUIRED HINTS "${CUDF_CPP_BUILD_DIR}/lib" - "$ENV{CONDA_PREFIX}/lib" -) - -if(NOT NVCOMP_LIBRARY) - message(FATAL_ERROR "nvcomp static library not found.") -else() - message(STATUS "NVCOMP: NVCOMP_LIBRARY set to ${NVCOMP_LIBRARY}") -endif() - # ################################################################################################## # * GDS/cufile ------------------------------------------------------------------------------------ @@ -238,17 +164,8 @@ endif() # * include paths --------------------------------------------------------------------------------- target_include_directories( - cudfjni - PUBLIC cudf::Thrust - "${LIBCUDACXX_INCLUDE}" - "${CUDAToolkit_INCLUDE_DIRS}" - "${NVCOMP_INCLUDE}" - "${CMAKE_BINARY_DIR}/include" - "${CMAKE_SOURCE_DIR}/include" - "${CMAKE_SOURCE_DIR}/src" - "${JNI_INCLUDE_DIRS}" - "${CUDF_INCLUDE}" - "${ARROW_INCLUDE}" + cudfjni PUBLIC "${CMAKE_BINARY_DIR}/include" "${CMAKE_SOURCE_DIR}/include" + "${CMAKE_SOURCE_DIR}/src" "${JNI_INCLUDE_DIRS}" ) # ################################################################################################## @@ -291,39 +208,24 @@ if(USE_GDS) POSITION_INDEPENDENT_CODE ON INTERFACE_POSITION_INDEPENDENT_CODE ON ) - target_include_directories( - cufilejni - PUBLIC "${LIBCUDACXX_INCLUDE}" "${CUDF_INCLUDE}" - PRIVATE "${cuFile_INCLUDE_DIRS}" - ) - target_link_libraries(cufilejni PRIVATE cudfjni rmm::rmm "${cuFile_LIBRARIES}") + target_include_directories(cufilejni PRIVATE "${cuFile_INCLUDE_DIRS}") + target_link_libraries(cufilejni PRIVATE cudfjni "${cuFile_LIBRARIES}") endif() -# ################################################################################################## -# * rmm logging level ----------------------------------------------------------------------------- - -set(RMM_LOGGING_LEVEL - "INFO" - CACHE STRING "Choose the logging level." -) -# Set the possible values of build type for cmake-gui -set_property( - CACHE RMM_LOGGING_LEVEL PROPERTY STRINGS "TRACE" "DEBUG" "INFO" "WARN" "ERROR" "CRITICAL" "OFF" -) -message(STATUS "RMM_LOGGING_LEVEL = '${RMM_LOGGING_LEVEL}'.") - -target_compile_definitions(cudfjni PUBLIC SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${RMM_LOGGING_LEVEL}) - # ################################################################################################## # * link libraries -------------------------------------------------------------------------------- -set(CUDF_LINK ${CUDF_LIB}) +set(CUDF_LINK PUBLIC cudf::cudf) if(CUDF_JNI_LIBCUDF_STATIC) - set(CUDF_LINK -Wl,--whole-archive ${CUDF_LIB} -Wl,--no-whole-archive ZLIB::ZLIB) + # Whole-link libcudf.a into the shared library but not its dependencies + set(CUDF_LINK PRIVATE -Wl,--whole-archive cudf::cudf -Wl,--no-whole-archive PUBLIC cudf::cudf) endif() +# When nvcomp is installed we need to use nvcomp::nvcomp but from the cudf build directory it will +# just be nvcomp. target_link_libraries( - cudfjni PRIVATE ${CUDF_LINK} ${NVCOMP_LIBRARY} ${ARROW_LIBRARY} rmm::rmm CUDA::cuda_driver + cudfjni ${CUDF_LINK} PRIVATE $ + $ ) # ################################################################################################## diff --git a/java/src/main/native/include/jni_utils.hpp b/java/src/main/native/include/jni_utils.hpp index a45716a89b3..eca424132a5 100644 --- a/java/src/main/native/include/jni_utils.hpp +++ b/java/src/main/native/include/jni_utils.hpp @@ -30,6 +30,7 @@ namespace jni { constexpr jint MINIMUM_JNI_VERSION = JNI_VERSION_1_6; constexpr char const *CUDA_ERROR_CLASS = "ai/rapids/cudf/CudaException"; +constexpr char const *CUDA_FATAL_ERROR_CLASS = "ai/rapids/cudf/CudaFatalException"; constexpr char const *CUDF_ERROR_CLASS = "ai/rapids/cudf/CudfException"; constexpr char const *INDEX_OOB_CLASS = "java/lang/ArrayIndexOutOfBoundsException"; constexpr char const *ILLEGAL_ARG_CLASS = "java/lang/IllegalArgumentException"; @@ -737,12 +738,26 @@ class native_jstringArray { * @brief create a cuda exception from a given cudaError_t */ inline jthrowable cuda_exception(JNIEnv *const env, cudaError_t status, jthrowable cause = NULL) { - jclass ex_class = env->FindClass(cudf::jni::CUDA_ERROR_CLASS); + const char *ex_class_name; + + // Calls cudaGetLastError twice. It is nearly certain that a fatal error occurred if the second + // call doesn't return with cudaSuccess. + cudaGetLastError(); + auto const last = cudaGetLastError(); + // Call cudaDeviceSynchronize to ensure `last` did not result from an asynchronous error. + // between two calls. + if (status == last && last == cudaDeviceSynchronize()) { + ex_class_name = cudf::jni::CUDA_FATAL_ERROR_CLASS; + } else { + ex_class_name = cudf::jni::CUDA_ERROR_CLASS; + } + + jclass ex_class = env->FindClass(ex_class_name); if (ex_class == NULL) { return NULL; } jmethodID ctor_id = - env->GetMethodID(ex_class, "", "(Ljava/lang/String;Ljava/lang/Throwable;)V"); + env->GetMethodID(ex_class, "", "(Ljava/lang/String;ILjava/lang/Throwable;)V"); if (ctor_id == NULL) { return NULL; } @@ -752,19 +767,20 @@ inline jthrowable cuda_exception(JNIEnv *const env, cudaError_t status, jthrowab return NULL; } - jobject ret = env->NewObject(ex_class, ctor_id, msg, cause); + jint err_code = static_cast(status); + + jobject ret = env->NewObject(ex_class, ctor_id, msg, err_code, cause); return (jthrowable)ret; } inline void jni_cuda_check(JNIEnv *const env, cudaError_t cuda_status) { if (cudaSuccess != cuda_status) { - // Clear the last error so it does not propagate. - cudaGetLastError(); jthrowable jt = cuda_exception(env, cuda_status); if (jt != NULL) { env->Throw(jt); - throw jni_exception("CUDA ERROR"); } + throw jni_exception(std::string("CUDA ERROR: code ") + + std::to_string(static_cast(cuda_status))); } } @@ -790,18 +806,26 @@ inline void jni_cuda_check(JNIEnv *const env, cudaError_t cuda_status) { JNI_THROW_NEW(env, class_name, message, ret_val) \ } -#define JNI_CUDA_TRY(env, ret_val, call) \ +// Throw a new exception only if one is not pending then always return with the specified value +#define JNI_CHECK_CUDA_ERROR(env, class_name, e, ret_val) \ { \ - cudaError_t internal_cuda_status = (call); \ - if (cudaSuccess != internal_cuda_status) { \ - /* Clear the last error so it does not propagate.*/ \ - cudaGetLastError(); \ - jthrowable jt = cudf::jni::cuda_exception(env, internal_cuda_status); \ - if (jt != NULL) { \ - env->Throw(jt); \ - } \ + if (env->ExceptionOccurred()) { \ return ret_val; \ } \ + std::string n_msg = e.what() == nullptr ? "" : e.what(); \ + jstring j_msg = env->NewStringUTF(n_msg.c_str()); \ + jint e_code = static_cast(e.error_code()); \ + jclass ex_class = env->FindClass(class_name); \ + if (ex_class != NULL) { \ + jmethodID ctor_id = env->GetMethodID(ex_class, "", "(Ljava/lang/String;I)V"); \ + if (ctor_id != NULL) { \ + jobject cuda_error = env->NewObject(ex_class, ctor_id, j_msg, e_code); \ + if (cuda_error != NULL) { \ + env->Throw((jthrowable)cuda_error); \ + } \ + } \ + } \ + return ret_val; \ } #define JNI_NULL_CHECK(env, obj, error_msg, ret_val) \ @@ -831,6 +855,12 @@ inline void jni_cuda_check(JNIEnv *const env, cudaError_t cuda_status) { std::string("Could not allocate native memory: ") + (e.what() == nullptr ? "" : e.what()); \ JNI_CHECK_THROW_NEW(env, cudf::jni::OOM_CLASS, what.c_str(), ret_val); \ } \ + catch (const cudf::fatal_cuda_error &e) { \ + JNI_CHECK_CUDA_ERROR(env, cudf::jni::CUDA_FATAL_ERROR_CLASS, e, ret_val); \ + } \ + catch (const cudf::cuda_error &e) { \ + JNI_CHECK_CUDA_ERROR(env, cudf::jni::CUDA_ERROR_CLASS, e, ret_val); \ + } \ catch (const std::exception &e) { \ /* If jni_exception caught then a Java exception is pending and this will not overwrite it. */ \ JNI_CHECK_THROW_NEW(env, class_name, e.what(), ret_val); \ diff --git a/java/src/main/native/src/AggregationJni.cpp b/java/src/main/native/src/AggregationJni.cpp index f8c448566c8..6ac73282615 100644 --- a/java/src/main/native/src/AggregationJni.cpp +++ b/java/src/main/native/src/AggregationJni.cpp @@ -82,11 +82,14 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Aggregation_createNoParamAgg(JNIEnv case 27: // MERGE_M2 return cudf::make_merge_m2_aggregation(); case 28: // RANK - return cudf::make_rank_aggregation(); + return cudf::make_rank_aggregation(cudf::rank_method::MIN, {}, + cudf::null_policy::INCLUDE); case 29: // DENSE_RANK - return cudf::make_dense_rank_aggregation(); - case 30: // PERCENT_RANK - return cudf::make_percent_rank_aggregation(); + return cudf::make_rank_aggregation(cudf::rank_method::DENSE, {}, + cudf::null_policy::INCLUDE); + case 30: // ANSI SQL PERCENT_RANK + return cudf::make_rank_aggregation(cudf::rank_method::MIN, {}, cudf::null_policy::INCLUDE, + {}, cudf::rank_percentage::ONE_NORMALIZED); default: throw std::logic_error("Unsupported No Parameter Aggregation Operation"); } }(); diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 8c8e9b91e8d..e074180c312 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -288,6 +289,23 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_segmentedReduce( CATCH_STD(env, 0); } +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_segmentedGather( + JNIEnv *env, jclass, jlong source_column, jlong gather_map_list, jboolean nullify_out_bounds) { + JNI_NULL_CHECK(env, source_column, "source column view is null", 0); + JNI_NULL_CHECK(env, gather_map_list, "gather map is null", 0); + try { + cudf::jni::auto_set_device(env); + auto const &src_col = + cudf::lists_column_view(*reinterpret_cast(source_column)); + auto const &gather_map = + cudf::lists_column_view(*reinterpret_cast(gather_map_list)); + auto out_bounds_policy = nullify_out_bounds ? cudf::out_of_bounds_policy::NULLIFY : + cudf::out_of_bounds_policy::DONT_CHECK; + return release_as_jlong(cudf::lists::segmented_gather(src_col, gather_map, out_bounds_policy)); + } + CATCH_STD(env, 0); +} + JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_scan(JNIEnv *env, jclass, jlong j_col_view, jlong j_agg, jboolean is_inclusive, jboolean include_nulls) { @@ -606,6 +624,17 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_listSortRows(JNIEnv *env, CATCH_STD(env, 0); } +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_generateListOffsets(JNIEnv *env, jclass, + jlong handle) { + JNI_NULL_CHECK(env, handle, "handle is null", 0) + try { + cudf::jni::auto_set_device(env); + auto const cv = reinterpret_cast(handle); + return release_as_jlong(cudf::jni::generate_list_offsets(*cv)); + } + CATCH_STD(env, 0); +} + JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplit(JNIEnv *env, jclass, jlong input_handle, jstring pattern_obj, diff --git a/java/src/main/native/src/ColumnViewJni.cu b/java/src/main/native/src/ColumnViewJni.cu index 47055ca1611..6b4db39eb34 100644 --- a/java/src/main/native/src/ColumnViewJni.cu +++ b/java/src/main/native/src/ColumnViewJni.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,8 +15,11 @@ */ #include +#include #include #include +#include +#include #include "ColumnViewJni.hpp" @@ -51,4 +54,22 @@ new_column_with_boolean_column_as_validity(cudf::column_view const &exemplar, return deep_copy; } +std::unique_ptr generate_list_offsets(cudf::column_view const &list_length, + rmm::cuda_stream_view stream) { + CUDF_EXPECTS(list_length.type().id() == cudf::type_id::INT32, + "Input column does not have type INT32."); + + auto const begin_iter = list_length.template begin(); + auto const end_iter = list_length.template end(); + + auto offsets_column = make_numeric_column(data_type{type_id::INT32}, list_length.size() + 1, + mask_state::UNALLOCATED, stream); + auto offsets_view = offsets_column->mutable_view(); + auto d_offsets = offsets_view.template begin(); + + thrust::inclusive_scan(rmm::exec_policy(stream), begin_iter, end_iter, d_offsets + 1); + CUDF_CUDA_TRY(cudaMemsetAsync(d_offsets, 0, sizeof(int32_t), stream)); + + return offsets_column; +} } // namespace cudf::jni diff --git a/java/src/main/native/src/ColumnViewJni.hpp b/java/src/main/native/src/ColumnViewJni.hpp index 37e58ecb63a..429f36bcb1d 100644 --- a/java/src/main/native/src/ColumnViewJni.hpp +++ b/java/src/main/native/src/ColumnViewJni.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ */ #include +#include namespace cudf::jni { @@ -35,4 +36,19 @@ std::unique_ptr new_column_with_boolean_column_as_validity(cudf::column_view const &exemplar, cudf::column_view const &bool_column); +/** + * @brief Generates list offsets with lengths of each list. + * + * For example, + * Given a list column: [[1,2,3], [4,5], [6], [], [7,8]] + * The list lengths of it: [3, 2, 1, 0, 2] + * The list offsets of it: [0, 3, 5, 6, 6, 8] + * + * @param list_length The column represents list lengths. + * @return The column represents list offsets. + */ +std::unique_ptr +generate_list_offsets(cudf::column_view const &list_length, + rmm::cuda_stream_view stream = rmm::cuda_stream_default); + } // namespace cudf::jni diff --git a/java/src/main/native/src/CudaJni.cpp b/java/src/main/native/src/CudaJni.cpp index 9862c3bface..926521c55f9 100644 --- a/java/src/main/native/src/CudaJni.cpp +++ b/java/src/main/native/src/CudaJni.cpp @@ -15,6 +15,7 @@ */ #include +#include #include #include "jni_utils.hpp" @@ -66,7 +67,7 @@ JNIEXPORT jobject JNICALL Java_ai_rapids_cudf_Cuda_memGetInfo(JNIEnv *env, jclas cudf::jni::auto_set_device(env); size_t free, total; - JNI_CUDA_TRY(env, NULL, cudaMemGetInfo(&free, &total)); + CUDF_CUDA_TRY(cudaMemGetInfo(&free, &total)); jclass info_class = env->FindClass("Lai/rapids/cudf/CudaMemInfo;"); if (info_class == NULL) { @@ -90,7 +91,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Cuda_hostAllocPinned(JNIEnv *env, jc try { cudf::jni::auto_set_device(env); void *ret = nullptr; - JNI_CUDA_TRY(env, 0, cudaMallocHost(&ret, size)); + CUDF_CUDA_TRY(cudaMallocHost(&ret, size)); return reinterpret_cast(ret); } CATCH_STD(env, 0); @@ -99,7 +100,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Cuda_hostAllocPinned(JNIEnv *env, jc JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_freePinned(JNIEnv *env, jclass, jlong ptr) { try { cudf::jni::auto_set_device(env); - JNI_CUDA_TRY(env, , cudaFreeHost(reinterpret_cast(ptr))); + CUDF_CUDA_TRY(cudaFreeHost(reinterpret_cast(ptr))); } CATCH_STD(env, ); } @@ -109,8 +110,8 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_memset(JNIEnv *env, jclass, jlon JNI_NULL_CHECK(env, dst, "dst memory pointer is null", ); try { cudf::jni::auto_set_device(env); - JNI_CUDA_TRY(env, , cudaMemsetAsync((void *)dst, value, count)); - JNI_CUDA_TRY(env, , cudaStreamSynchronize(0)); + CUDF_CUDA_TRY(cudaMemsetAsync((void *)dst, value, count)); + CUDF_CUDA_TRY(cudaStreamSynchronize(0)); } CATCH_STD(env, ); } @@ -120,7 +121,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_asyncMemset(JNIEnv *env, jclass, JNI_NULL_CHECK(env, dst, "dst memory pointer is null", ); try { cudf::jni::auto_set_device(env); - JNI_CUDA_TRY(env, , cudaMemsetAsync((void *)dst, value, count)); + CUDF_CUDA_TRY(cudaMemsetAsync((void *)dst, value, count)); } CATCH_STD(env, ); } @@ -129,7 +130,7 @@ JNIEXPORT jint JNICALL Java_ai_rapids_cudf_Cuda_getDevice(JNIEnv *env, jclass) { try { cudf::jni::auto_set_device(env); jint dev; - JNI_CUDA_TRY(env, -2, cudaGetDevice(&dev)); + CUDF_CUDA_TRY(cudaGetDevice(&dev)); return dev; } CATCH_STD(env, -2); @@ -139,7 +140,7 @@ JNIEXPORT jint JNICALL Java_ai_rapids_cudf_Cuda_getDeviceCount(JNIEnv *env, jcla try { cudf::jni::auto_set_device(env); jint count; - JNI_CUDA_TRY(env, -2, cudaGetDeviceCount(&count)); + CUDF_CUDA_TRY(cudaGetDeviceCount(&count)); return count; } CATCH_STD(env, -2); @@ -151,7 +152,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_setDevice(JNIEnv *env, jclass, j cudf::jni::throw_java_exception(env, cudf::jni::CUDF_ERROR_CLASS, "Cannot change device after RMM init"); } - JNI_CUDA_TRY(env, , cudaSetDevice(dev)); + CUDF_CUDA_TRY(cudaSetDevice(dev)); } CATCH_STD(env, ); } @@ -167,7 +168,7 @@ JNIEXPORT jint JNICALL Java_ai_rapids_cudf_Cuda_getDriverVersion(JNIEnv *env, jc try { cudf::jni::auto_set_device(env); jint driver_version; - JNI_CUDA_TRY(env, -2, cudaDriverGetVersion(&driver_version)); + CUDF_CUDA_TRY(cudaDriverGetVersion(&driver_version)); return driver_version; } CATCH_STD(env, -2); @@ -177,7 +178,7 @@ JNIEXPORT jint JNICALL Java_ai_rapids_cudf_Cuda_getRuntimeVersion(JNIEnv *env, j try { cudf::jni::auto_set_device(env); jint runtime_version; - JNI_CUDA_TRY(env, -2, cudaRuntimeGetVersion(&runtime_version)); + CUDF_CUDA_TRY(cudaRuntimeGetVersion(&runtime_version)); return runtime_version; } CATCH_STD(env, -2); @@ -187,9 +188,9 @@ JNIEXPORT jint JNICALL Java_ai_rapids_cudf_Cuda_getNativeComputeMode(JNIEnv *env try { cudf::jni::auto_set_device(env); int device; - JNI_CUDA_TRY(env, -2, cudaGetDevice(&device)); + CUDF_CUDA_TRY(cudaGetDevice(&device)); cudaDeviceProp device_prop; - JNI_CUDA_TRY(env, -2, cudaGetDeviceProperties(&device_prop, device)); + CUDF_CUDA_TRY(cudaGetDeviceProperties(&device_prop, device)); return device_prop.computeMode; } CATCH_STD(env, -2); @@ -199,10 +200,9 @@ JNIEXPORT jint JNICALL Java_ai_rapids_cudf_Cuda_getComputeCapabilityMajor(JNIEnv try { cudf::jni::auto_set_device(env); int device; - JNI_CUDA_TRY(env, -2, ::cudaGetDevice(&device)); + CUDF_CUDA_TRY(::cudaGetDevice(&device)); int attribute_value; - JNI_CUDA_TRY( - env, -2, + CUDF_CUDA_TRY( ::cudaDeviceGetAttribute(&attribute_value, ::cudaDevAttrComputeCapabilityMajor, device)); return attribute_value; } @@ -213,10 +213,9 @@ JNIEXPORT jint JNICALL Java_ai_rapids_cudf_Cuda_getComputeCapabilityMinor(JNIEnv try { cudf::jni::auto_set_device(env); int device; - JNI_CUDA_TRY(env, -2, ::cudaGetDevice(&device)); + CUDF_CUDA_TRY(::cudaGetDevice(&device)); int attribute_value; - JNI_CUDA_TRY( - env, -2, + CUDF_CUDA_TRY( ::cudaDeviceGetAttribute(&attribute_value, ::cudaDevAttrComputeCapabilityMinor, device)); return attribute_value; } @@ -226,7 +225,7 @@ JNIEXPORT jint JNICALL Java_ai_rapids_cudf_Cuda_getComputeCapabilityMinor(JNIEnv JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_freeZero(JNIEnv *env, jclass) { try { cudf::jni::auto_set_device(env); - JNI_CUDA_TRY(env, , cudaFree(0)); + CUDF_CUDA_TRY(cudaFree(0)); } CATCH_STD(env, ); } @@ -237,7 +236,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Cuda_createStream(JNIEnv *env, jclas cudf::jni::auto_set_device(env); cudaStream_t stream = nullptr; auto flags = isNonBlocking ? cudaStreamNonBlocking : cudaStreamDefault; - JNI_CUDA_TRY(env, 0, cudaStreamCreateWithFlags(&stream, flags)); + CUDF_CUDA_TRY(cudaStreamCreateWithFlags(&stream, flags)); return reinterpret_cast(stream); } CATCH_STD(env, 0); @@ -247,7 +246,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_destroyStream(JNIEnv *env, jclas try { cudf::jni::auto_set_device(env); auto stream = reinterpret_cast(jstream); - JNI_CUDA_TRY(env, , cudaStreamDestroy(stream)); + CUDF_CUDA_TRY(cudaStreamDestroy(stream)); } CATCH_STD(env, ); } @@ -258,7 +257,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_streamWaitEvent(JNIEnv *env, jcl cudf::jni::auto_set_device(env); auto stream = reinterpret_cast(jstream); auto event = reinterpret_cast(jevent); - JNI_CUDA_TRY(env, , cudaStreamWaitEvent(stream, event, 0)); + CUDF_CUDA_TRY(cudaStreamWaitEvent(stream, event, 0)); } CATCH_STD(env, ); } @@ -268,7 +267,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_streamSynchronize(JNIEnv *env, j try { cudf::jni::auto_set_device(env); auto stream = reinterpret_cast(jstream); - JNI_CUDA_TRY(env, , cudaStreamSynchronize(stream)); + CUDF_CUDA_TRY(cudaStreamSynchronize(stream)); } CATCH_STD(env, ); } @@ -286,7 +285,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Cuda_createEvent(JNIEnv *env, jclass if (blockingSync) { flags = flags | cudaEventBlockingSync; } - JNI_CUDA_TRY(env, 0, cudaEventCreateWithFlags(&event, flags)); + CUDF_CUDA_TRY(cudaEventCreateWithFlags(&event, flags)); return reinterpret_cast(event); } CATCH_STD(env, 0); @@ -296,7 +295,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_destroyEvent(JNIEnv *env, jclass try { cudf::jni::auto_set_device(env); auto event = reinterpret_cast(jevent); - JNI_CUDA_TRY(env, , cudaEventDestroy(event)); + CUDF_CUDA_TRY(cudaEventDestroy(event)); } CATCH_STD(env, ); } @@ -311,7 +310,7 @@ JNIEXPORT jboolean JNICALL Java_ai_rapids_cudf_Cuda_eventQuery(JNIEnv *env, jcla } else if (result == cudaErrorNotReady) { return false; } // else - JNI_CUDA_TRY(env, false, result); + CUDF_CUDA_TRY(result); } CATCH_STD(env, false); return false; @@ -323,7 +322,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_eventRecord(JNIEnv *env, jclass, cudf::jni::auto_set_device(env); auto event = reinterpret_cast(jevent); auto stream = reinterpret_cast(jstream); - JNI_CUDA_TRY(env, , cudaEventRecord(event, stream)); + CUDF_CUDA_TRY(cudaEventRecord(event, stream)); } CATCH_STD(env, ); } @@ -333,7 +332,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_eventSynchronize(JNIEnv *env, jc try { cudf::jni::auto_set_device(env); auto event = reinterpret_cast(jevent); - JNI_CUDA_TRY(env, , cudaEventSynchronize(event)); + CUDF_CUDA_TRY(cudaEventSynchronize(event)); } CATCH_STD(env, ); } @@ -352,8 +351,8 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_memcpyOnStream(JNIEnv *env, jcla auto src = reinterpret_cast(jsrc); auto kind = static_cast(jkind); auto stream = reinterpret_cast(jstream); - JNI_CUDA_TRY(env, , cudaMemcpyAsync(dst, src, count, kind, stream)); - JNI_CUDA_TRY(env, , cudaStreamSynchronize(stream)); + CUDF_CUDA_TRY(cudaMemcpyAsync(dst, src, count, kind, stream)); + CUDF_CUDA_TRY(cudaStreamSynchronize(stream)); } CATCH_STD(env, ); } @@ -372,7 +371,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_asyncMemcpyOnStream(JNIEnv *env, auto src = reinterpret_cast(jsrc); auto kind = static_cast(jkind); auto stream = reinterpret_cast(jstream); - JNI_CUDA_TRY(env, , cudaMemcpyAsync(dst, src, count, kind, stream)); + CUDF_CUDA_TRY(cudaMemcpyAsync(dst, src, count, kind, stream)); } CATCH_STD(env, ); } diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index cebe476dd87..919958d4db2 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -673,6 +673,8 @@ int set_column_metadata(cudf::io::column_in_metadata &column_metadata, cudf::jni::native_jbooleanArray &is_int96, cudf::jni::native_jintArray &precisions, cudf::jni::native_jbooleanArray &is_map, + cudf::jni::native_jbooleanArray &hasParquetFieldIds, + cudf::jni::native_jintArray &parquetFieldIds, cudf::jni::native_jintArray &children, int num_children, int read_index) { int write_index = 0; for (int i = 0; i < num_children; i++, write_index++) { @@ -687,12 +689,15 @@ int set_column_metadata(cudf::io::column_in_metadata &column_metadata, if (is_map[read_index]) { child.set_list_column_as_map(); } + if (!parquetFieldIds.is_null() && hasParquetFieldIds[read_index]) { + child.set_parquet_field_id(parquetFieldIds[read_index]); + } column_metadata.add_child(child); int childs_children = children[read_index++]; if (childs_children > 0) { - read_index = - set_column_metadata(column_metadata.child(write_index), col_names, nullability, is_int96, - precisions, is_map, children, childs_children, read_index); + read_index = set_column_metadata(column_metadata.child(write_index), col_names, nullability, + is_int96, precisions, is_map, hasParquetFieldIds, + parquetFieldIds, children, childs_children, read_index); } } return read_index; @@ -701,12 +706,15 @@ int set_column_metadata(cudf::io::column_in_metadata &column_metadata, void createTableMetaData(JNIEnv *env, jint num_children, jobjectArray &j_col_names, jintArray &j_children, jbooleanArray &j_col_nullability, jbooleanArray &j_is_int96, jintArray &j_precisions, - jbooleanArray &j_is_map, cudf::io::table_input_metadata &metadata) { + jbooleanArray &j_is_map, cudf::io::table_input_metadata &metadata, + jbooleanArray &j_hasParquetFieldIds, jintArray &j_parquetFieldIds) { cudf::jni::auto_set_device(env); cudf::jni::native_jstringArray col_names(env, j_col_names); cudf::jni::native_jbooleanArray col_nullability(env, j_col_nullability); cudf::jni::native_jbooleanArray is_int96(env, j_is_int96); cudf::jni::native_jintArray precisions(env, j_precisions); + cudf::jni::native_jbooleanArray hasParquetFieldIds(env, j_hasParquetFieldIds); + cudf::jni::native_jintArray parquetFieldIds(env, j_parquetFieldIds); cudf::jni::native_jintArray children(env, j_children); cudf::jni::native_jbooleanArray is_map(env, j_is_map); @@ -729,11 +737,14 @@ void createTableMetaData(JNIEnv *env, jint num_children, jobjectArray &j_col_nam if (is_map[read_index]) { metadata.column_metadata[write_index].set_list_column_as_map(); } + if (!parquetFieldIds.is_null() && hasParquetFieldIds[read_index]) { + metadata.column_metadata[write_index].set_parquet_field_id(parquetFieldIds[read_index]); + } int childs_children = children[read_index++]; if (childs_children > 0) { - read_index = - set_column_metadata(metadata.column_metadata[write_index], cpp_names, col_nullability, - is_int96, precisions, is_map, children, childs_children, read_index); + read_index = set_column_metadata( + metadata.column_metadata[write_index], cpp_names, col_nullability, is_int96, precisions, + is_map, hasParquetFieldIds, parquetFieldIds, children, childs_children, read_index); } } } @@ -1539,7 +1550,8 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeParquetBufferBegin( JNIEnv *env, jclass, jobjectArray j_col_names, jint j_num_children, jintArray j_children, jbooleanArray j_col_nullability, jobjectArray j_metadata_keys, jobjectArray j_metadata_values, jint j_compression, jint j_stats_freq, jbooleanArray j_isInt96, jintArray j_precisions, - jbooleanArray j_is_map, jobject consumer) { + jbooleanArray j_is_map, jbooleanArray j_hasParquetFieldIds, jintArray j_parquetFieldIds, + jobject consumer) { JNI_NULL_CHECK(env, j_col_names, "null columns", 0); JNI_NULL_CHECK(env, j_col_nullability, "null nullability", 0); JNI_NULL_CHECK(env, j_metadata_keys, "null metadata keys", 0); @@ -1554,7 +1566,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeParquetBufferBegin( sink_info sink{data_sink.get()}; table_input_metadata metadata; createTableMetaData(env, j_num_children, j_col_names, j_children, j_col_nullability, j_isInt96, - j_precisions, j_is_map, metadata); + j_precisions, j_is_map, metadata, j_hasParquetFieldIds, j_parquetFieldIds); auto meta_keys = cudf::jni::native_jstringArray{env, j_metadata_keys}.as_cpp_vector(); auto meta_values = cudf::jni::native_jstringArray{env, j_metadata_values}.as_cpp_vector(); @@ -1583,7 +1595,8 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeParquetFileBegin( JNIEnv *env, jclass, jobjectArray j_col_names, jint j_num_children, jintArray j_children, jbooleanArray j_col_nullability, jobjectArray j_metadata_keys, jobjectArray j_metadata_values, jint j_compression, jint j_stats_freq, jbooleanArray j_isInt96, jintArray j_precisions, - jbooleanArray j_is_map, jstring j_output_path) { + jbooleanArray j_is_map, jbooleanArray j_hasParquetFieldIds, jintArray j_parquetFieldIds, + jstring j_output_path) { JNI_NULL_CHECK(env, j_col_names, "null columns", 0); JNI_NULL_CHECK(env, j_col_nullability, "null nullability", 0); JNI_NULL_CHECK(env, j_metadata_keys, "null metadata keys", 0); @@ -1596,7 +1609,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeParquetFileBegin( using namespace cudf::jni; table_input_metadata metadata; createTableMetaData(env, j_num_children, j_col_names, j_children, j_col_nullability, j_isInt96, - j_precisions, j_is_map, metadata); + j_precisions, j_is_map, metadata, j_hasParquetFieldIds, j_parquetFieldIds); auto meta_keys = cudf::jni::native_jstringArray{env, j_metadata_keys}.as_cpp_vector(); auto meta_values = cudf::jni::native_jstringArray{env, j_metadata_values}.as_cpp_vector(); @@ -1721,8 +1734,12 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCBufferBegin( table_input_metadata metadata; // ORC has no `j_is_int96`, but `createTableMetaData` needs a lvalue. jbooleanArray j_is_int96 = NULL; + // ORC has no `j_parquetFieldIds`, but `createTableMetaData` needs a lvalue. + jbooleanArray j_hasParquetFieldIds = NULL; + jintArray j_parquetFieldIds = NULL; + createTableMetaData(env, j_num_children, j_col_names, j_children, j_col_nullability, j_is_int96, - j_precisions, j_is_map, metadata); + j_precisions, j_is_map, metadata, j_hasParquetFieldIds, j_parquetFieldIds); auto meta_keys = cudf::jni::native_jstringArray{env, j_metadata_keys}.as_cpp_vector(); auto meta_values = cudf::jni::native_jstringArray{env, j_metadata_values}.as_cpp_vector(); @@ -1766,8 +1783,11 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCFileBegin( table_input_metadata metadata; // ORC has no `j_is_int96`, but `createTableMetaData` needs a lvalue. jbooleanArray j_is_int96 = NULL; + // ORC has no `j_parquetFieldIds`, but `createTableMetaData` needs a lvalue. + jbooleanArray j_hasParquetFieldIds = NULL; + jintArray j_parquetFieldIds = NULL; createTableMetaData(env, j_num_children, j_col_names, j_children, j_col_nullability, j_is_int96, - j_precisions, j_is_map, metadata); + j_precisions, j_is_map, metadata, j_hasParquetFieldIds, j_parquetFieldIds); auto meta_keys = cudf::jni::native_jstringArray{env, j_metadata_keys}.as_cpp_vector(); auto meta_values = cudf::jni::native_jstringArray{env, j_metadata_values}.as_cpp_vector(); diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 58901d5743b..a42846aac05 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -20,6 +20,7 @@ import ai.rapids.cudf.ColumnView.FindOptions; import ai.rapids.cudf.HostColumnVector.*; +import com.google.common.collect.Lists; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @@ -6259,4 +6260,43 @@ void testCopyWithBooleanColumnAsValidity() { }); assertTrue(x.getMessage().contains("Exemplar and validity columns must have the same size")); } + + @Test + void testSegmentedGather() { + HostColumnVector.DataType dt = new ListType(true, new BasicType(true, DType.STRING)); + try (ColumnVector source = ColumnVector.fromLists(dt, + Lists.newArrayList("a", "b", null, "c"), + null, + Lists.newArrayList(), + Lists.newArrayList(null, "A", "B", "C", "D")); + ColumnVector gatherMap = ColumnVector.fromLists( + new ListType(false, new BasicType(false, DType.INT32)), + Lists.newArrayList(-3, 0, 2, 3, 4), + Lists.newArrayList(), + Lists.newArrayList(1), + Lists.newArrayList(1, -4, 5, -1, -6)); + ColumnVector actual = source.segmentedGather(gatherMap); + ColumnVector expected = ColumnVector.fromLists(dt, + Lists.newArrayList("b", "a", null, "c", null), + null, + Lists.newArrayList((String) null), + Lists.newArrayList("A", "A", null, "D", null))) { + assertColumnsAreEqual(expected, actual); + } + } + + @Test + void testGenerateListOffsets() { + try (ColumnVector index = ColumnVector.fromInts(1, 3, 3, 0, 2, 0, 0, 5, 10, 25); + ColumnVector actual = index.generateListOffsets(); + ColumnVector expected = ColumnVector.fromInts(0, 1, 4, 7, 7, 9, 9, 9, 14, 24, 49)) { + assertColumnsAreEqual(expected, actual); + } + + try (ColumnVector index = ColumnVector.fromInts(0, 0, 1, 0, 0); + ColumnVector actual = index.generateListOffsets(); + ColumnVector expected = ColumnVector.fromInts(0, 0, 0, 1, 1, 1)) { + assertColumnsAreEqual(expected, actual); + } + } } diff --git a/java/src/test/java/ai/rapids/cudf/CudaTest.java b/java/src/test/java/ai/rapids/cudf/CudaTest.java index 8905c2edd56..1a86dbb374d 100644 --- a/java/src/test/java/ai/rapids/cudf/CudaTest.java +++ b/java/src/test/java/ai/rapids/cudf/CudaTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,7 +18,7 @@ import org.junit.jupiter.api.Test; -import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.*; public class CudaTest { @@ -32,4 +32,17 @@ public void testGetCudaRuntimeInfo() { assertEquals(Cuda.getNativeComputeMode(), Cuda.getComputeMode().nativeId); } + @Test + public void testCudaException() { + assertThrows(CudaException.class, () -> { + try { + Cuda.memset(Long.MAX_VALUE, (byte) 0, 1024); + } catch (CudaFatalException ignored) { + } catch (CudaException ex) { + assertEquals(CudaException.CudaError.cudaErrorInvalidValue, ex.cudaError); + throw ex; + } + } + ); + } } diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java index 7be1ca2118b..af28cfb6d6c 100644 --- a/java/src/test/java/ai/rapids/cudf/TableTest.java +++ b/java/src/test/java/ai/rapids/cudf/TableTest.java @@ -36,6 +36,7 @@ import org.apache.hadoop.fs.Path; import org.apache.parquet.hadoop.ParquetFileReader; import org.apache.parquet.hadoop.util.HadoopInputFile; +import org.apache.parquet.schema.GroupType; import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.OriginalType; import org.junit.jupiter.api.Test; @@ -7899,6 +7900,126 @@ void testParquetWriteToFileUncompressedNoStats() throws IOException { } } + @Test + void testParquetWriteWithFieldId() throws IOException { + // field IDs are: + // c1: -1, c2: 2, c3: 3, c31: 31, c32: 32, c4: -4, c5: not specified + ColumnWriterOptions.StructBuilder sBuilder = + structBuilder("c3", true, 3) + .withColumn(true, "c31", 31) + .withColumn(true, "c32", 32); + ParquetWriterOptions options = ParquetWriterOptions.builder() + .withColumn(true, "c1", -1) + .withDecimalColumn("c2", 9, true, 2) + .withStructColumn(sBuilder.build()) + .withTimestampColumn("c4", true, true, -4) + .withColumns( true, "c5") + .build(); + + File tempFile = File.createTempFile("test-field-id", ".parquet"); + try { + HostColumnVector.StructType structType = new HostColumnVector.StructType( + true, + new HostColumnVector.BasicType(true, DType.STRING), + new HostColumnVector.BasicType(true, DType.STRING)); + + try (Table table0 = new Table.TestBuilder() + .column(true, false) // c1 + .decimal32Column(0, 298, 2473) // c2 + .column(structType, // c3 + new HostColumnVector.StructData("a", "b"), new HostColumnVector.StructData("a", "b")) + .timestampMicrosecondsColumn(1000L, 2000L) // c4 + .column("a", "b") // c5 + .build()) { + try (TableWriter writer = Table.writeParquetChunked(options, tempFile.getAbsoluteFile())) { + writer.write(table0); + } + } + + try (ParquetFileReader reader = ParquetFileReader.open(HadoopInputFile.fromPath( + new Path(tempFile.getAbsolutePath()), + new Configuration()))) { + MessageType schema = reader.getFooter().getFileMetaData().getSchema(); + assert (schema.getFields().get(0).getId().intValue() == -1); + assert (schema.getFields().get(1).getId().intValue() == 2); + assert (schema.getFields().get(2).getId().intValue() == 3); + assert (((GroupType) schema.getFields().get(2)).getFields().get(0).getId().intValue() == 31); + assert (((GroupType) schema.getFields().get(2)).getFields().get(1).getId().intValue() == 32); + assert (schema.getFields().get(3).getId().intValue() == -4); + assert (schema.getFields().get(4).getId() == null); + } + } finally { + tempFile.delete(); + } + } + + @Test + void testParquetWriteWithFieldIdNestNotSpecified() throws IOException { + // field IDs are: + // c0: no field ID + // c1: 1 + // c2: no field ID + // c21: 21 + // c22: no field ID + // c3: 3 + // c31: 31 + // c32: no field ID + // c4: 0 + ColumnWriterOptions.StructBuilder c2Builder = + structBuilder("c2", true) + .withColumn(true, "c21", 21) + .withColumns(true, "c22"); + ColumnWriterOptions.StructBuilder c3Builder = + structBuilder("c3", true, 3) + .withColumn(true, "c31", 31) + .withColumns(true, "c32"); + ParquetWriterOptions options = ParquetWriterOptions.builder() + .withColumns(true, "c0") + .withDecimalColumn("c1", 9, true, 1) + .withStructColumn(c2Builder.build()) + .withStructColumn(c3Builder.build()) + .withColumn(true, "c4", 0) + .build(); + + File tempFile = File.createTempFile("test-field-id", ".parquet"); + try { + HostColumnVector.StructType structType = new HostColumnVector.StructType( + true, + new HostColumnVector.BasicType(true, DType.STRING), + new HostColumnVector.BasicType(true, DType.STRING)); + + try (Table table0 = new Table.TestBuilder() + .column(true, false) // c0 + .decimal32Column(0, 298, 2473) // c1 + .column(structType, // c2 + new HostColumnVector.StructData("a", "b"), new HostColumnVector.StructData("a", "b")) + .column(structType, // c3 + new HostColumnVector.StructData("a", "b"), new HostColumnVector.StructData("a", "b")) + .column("a", "b") // c4 + .build()) { + try (TableWriter writer = Table.writeParquetChunked(options, tempFile.getAbsoluteFile())) { + writer.write(table0); + } + } + + try (ParquetFileReader reader = ParquetFileReader.open(HadoopInputFile.fromPath( + new Path(tempFile.getAbsolutePath()), + new Configuration()))) { + MessageType schema = reader.getFooter().getFileMetaData().getSchema(); + assert (schema.getFields().get(0).getId() == null); + assert (schema.getFields().get(1).getId().intValue() == 1); + assert (schema.getFields().get(2).getId() == null); + assert (((GroupType) schema.getFields().get(2)).getFields().get(0).getId().intValue() == 21); + assert (((GroupType) schema.getFields().get(2)).getFields().get(1).getId() == null); + assert (((GroupType) schema.getFields().get(3)).getFields().get(0).getId().intValue() == 31); + assert (((GroupType) schema.getFields().get(3)).getFields().get(1).getId() == null); + assert (schema.getFields().get(4).getId().intValue() == 0); + } + } finally { + tempFile.delete(); + } + } + /** Return a column where DECIMAL64 has been up-casted to DECIMAL128 */ private ColumnVector castDecimal64To128(ColumnView c) { DType dtype = c.getType(); diff --git a/python/.flake8 b/python/.flake8 index c645c46a216..667875030cc 100644 --- a/python/.flake8 +++ b/python/.flake8 @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. [flake8] exclude = __init__.py @@ -9,14 +9,14 @@ ignore = E203 [pydocstyle] -match = ^(.*abc\.py|.*api/types\.py|.*single_column_frame\.py|.*indexed_frame\.py)$ -# Due to https://github.com/PyCQA/pydocstyle/issues/363, we must exclude rather than include using match-dir. -match-dir = ^(?!ci|cpp|python/dask_cudf|python/cudf_kafka|python/custreamz).*$ -# In addition to numpy style, we additionally ignore: -add-ignore = - # magic methods - D105, - # no docstring in __init__ - D107, - # newlines before docstrings - D204 +# Due to https://github.com/PyCQA/pydocstyle/issues/363, we must exclude rather +# than include using match-dir. Note that as discussed in +# https://stackoverflow.com/questions/65478393/how-to-filter-directories-using-the-match-dir-flag-for-pydocstyle, +# unlike the match option above this match-dir will have no effect when +# pydocstyle is invoked from pre-commit. Therefore this exclusion list must +# also be maintained in the pre-commit config file. +match-dir = ^(?!(ci|cpp|conda|docs|java|notebooks|dask_cudf|cudf_kafka|custreamz|tests)).*$ +# Allow missing docstrings for docutils +ignore-decorators = .*(docutils|doc_apply|copy_docstring).* +select = + D30 diff --git a/python/cudf/cudf/_fuzz_testing/fuzzer.py b/python/cudf/cudf/_fuzz_testing/fuzzer.py index b99cd938a63..59d6f198681 100644 --- a/python/cudf/cudf/_fuzz_testing/fuzzer.py +++ b/python/cudf/cudf/_fuzz_testing/fuzzer.py @@ -57,7 +57,7 @@ def log_stats(self): logging.info(f"Run-Time elapsed (hh:mm:ss.ms) {total_time_taken}") def write_crash(self, error): - error_file_name = datetime.datetime.now().__str__() + error_file_name = str(datetime.datetime.now()) if self._crash_dir: crash_path = os.path.join( self._crash_dir, diff --git a/python/cudf/cudf/_fuzz_testing/json.py b/python/cudf/cudf/_fuzz_testing/json.py index f850a7e79f9..29e0aeb7050 100644 --- a/python/cudf/cudf/_fuzz_testing/json.py +++ b/python/cudf/cudf/_fuzz_testing/json.py @@ -2,7 +2,7 @@ import logging import random -from collections import abc as abc +from collections import abc import numpy as np diff --git a/python/cudf/cudf/_lib/__init__.py b/python/cudf/cudf/_lib/__init__.py index bd25aa53405..542262b7908 100644 --- a/python/cudf/cudf/_lib/__init__.py +++ b/python/cudf/cudf/_lib/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. import numpy as np from . import ( @@ -8,6 +8,7 @@ copying, csv, datetime, + expressions, filling, gpuarrow, groupby, diff --git a/python/cudf/cudf/_lib/aggregation.pyx b/python/cudf/cudf/_lib/aggregation.pyx index 4dc91268d57..84dd9c3a576 100644 --- a/python/cudf/cudf/_lib/aggregation.pyx +++ b/python/cudf/cudf/_lib/aggregation.pyx @@ -30,7 +30,10 @@ from cudf._lib.types import Interpolation cimport cudf._lib.cpp.aggregation as libcudf_aggregation cimport cudf._lib.cpp.types as libcudf_types -from cudf._lib.cpp.aggregation cimport underlying_type_t_correlation_type +from cudf._lib.cpp.aggregation cimport ( + underlying_type_t_correlation_type, + underlying_type_t_rank_method, +) import cudf @@ -54,6 +57,7 @@ class AggregationKind(Enum): ARGMIN = libcudf_aggregation.aggregation.Kind.ARGMIN NUNIQUE = libcudf_aggregation.aggregation.Kind.NUNIQUE NTH = libcudf_aggregation.aggregation.Kind.NTH_ELEMENT + RANK = libcudf_aggregation.aggregation.Kind.RANK COLLECT = libcudf_aggregation.aggregation.Kind.COLLECT UNIQUE = libcudf_aggregation.aggregation.Kind.COLLECT_SET PTX = libcudf_aggregation.aggregation.Kind.PTX @@ -77,6 +81,14 @@ class CorrelationType(IntEnum): ) +class RankMethod(IntEnum): + FIRST = libcudf_aggregation.rank_method.FIRST + AVERAGE = libcudf_aggregation.rank_method.AVERAGE + MIN = libcudf_aggregation.rank_method.MIN + MAX = libcudf_aggregation.rank_method.MAX + DENSE = libcudf_aggregation.rank_method.DENSE + + cdef class RollingAggregation: """A Cython wrapper for rolling window aggregations. @@ -564,6 +576,33 @@ cdef class GroupbyScanAggregation: cummin = min cummax = max + @classmethod + def rank(cls, method, ascending, na_option, pct): + cdef GroupbyScanAggregation agg = cls() + cdef libcudf_aggregation.rank_method c_method = ( + ( + ( + RankMethod[method.upper()] + ) + ) + ) + agg.c_obj = move( + libcudf_aggregation. + make_rank_aggregation[groupby_scan_aggregation]( + c_method, + (libcudf_types.order.ASCENDING if ascending else + libcudf_types.order.DESCENDING), + (libcudf_types.null_policy.EXCLUDE if na_option == "keep" else + libcudf_types.null_policy.INCLUDE), + (libcudf_types.null_order.BEFORE + if (na_option == "top") == ascending else + libcudf_types.null_order.AFTER), + (libcudf_aggregation.rank_percentage.ZERO_NORMALIZED + if pct else + libcudf_aggregation.rank_percentage.NONE) + )) + return agg + cdef class ReduceAggregation: """A Cython wrapper for reduce aggregations. diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx index 448a22425a4..8cbadfa19a5 100644 --- a/python/cudf/cudf/_lib/column.pyx +++ b/python/cudf/cudf/_lib/column.pyx @@ -7,7 +7,7 @@ import pandas as pd import rmm import cudf -import cudf._lib as libcudfxx +import cudf._lib as libcudf from cudf.api.types import is_categorical_dtype, is_list_dtype, is_struct_dtype from cudf.core.buffer import Buffer @@ -160,7 +160,7 @@ cdef class Column: if self.base_mask is None or self.offset == 0: self._mask = self.base_mask else: - self._mask = libcudfxx.null_mask.copy_bitmask(self) + self._mask = libcudf.null_mask.copy_bitmask(self) return self._mask @property diff --git a/python/cudf/cudf/_lib/cpp/aggregation.pxd b/python/cudf/cudf/_lib/cpp/aggregation.pxd index 399deb74c9c..a1d1485e1e8 100644 --- a/python/cudf/cudf/_lib/cpp/aggregation.pxd +++ b/python/cudf/cudf/_lib/cpp/aggregation.pxd @@ -1,5 +1,6 @@ # Copyright (c) 2020-2022, NVIDIA CORPORATION. from libc.stdint cimport int32_t +from libcpp cimport bool from libcpp.memory cimport unique_ptr from libcpp.string cimport string from libcpp.vector cimport vector @@ -7,11 +8,14 @@ from libcpp.vector cimport vector from cudf._lib.cpp.types cimport ( data_type, interpolation, + null_order, null_policy, + order, size_type, ) ctypedef int32_t underlying_type_t_correlation_type +ctypedef int32_t underlying_type_t_rank_method cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil: @@ -35,6 +39,7 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil: ARGMIN 'cudf::aggregation::ARGMIN' NUNIQUE 'cudf::aggregation::NUNIQUE' NTH_ELEMENT 'cudf::aggregation::NTH_ELEMENT' + RANK 'cudf::aggregation::RANK' COLLECT 'cudf::aggregation::COLLECT_LIST' COLLECT_SET 'cudf::aggregation::COLLECT_SET' PTX 'cudf::aggregation::PTX' @@ -68,6 +73,18 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil: KENDALL 'cudf::correlation_type::KENDALL' SPEARMAN 'cudf::correlation_type::SPEARMAN' + ctypedef enum rank_method: + FIRST "cudf::rank_method::FIRST" + AVERAGE "cudf::rank_method::AVERAGE" + MIN "cudf::rank_method::MIN" + MAX "cudf::rank_method::MAX" + DENSE "cudf::rank_method::DENSE" + + ctypedef enum rank_percentage: + NONE "cudf::rank_percentage::NONE" + ZERO_NORMALIZED "cudf::rank_percentage::ZERO_NORMALIZED" + ONE_NORMALIZED "cudf::rank_percentage::ONE_NORMALIZED" + cdef unique_ptr[T] make_sum_aggregation[T]() except + cdef unique_ptr[T] make_product_aggregation[T]() except + @@ -127,3 +144,10 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil: cdef unique_ptr[T] make_covariance_aggregation[T]( size_type min_periods, size_type ddof) except + + + cdef unique_ptr[T] make_rank_aggregation[T]( + rank_method method, + order column_order, + null_policy null_handling, + null_order null_precedence, + rank_percentage percentage) except + diff --git a/python/cudf/cudf/_lib/cpp/expressions.pxd b/python/cudf/cudf/_lib/cpp/expressions.pxd new file mode 100644 index 00000000000..1721f8aa734 --- /dev/null +++ b/python/cudf/cudf/_lib/cpp/expressions.pxd @@ -0,0 +1,88 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. + +from libcpp.memory cimport unique_ptr + +from cudf._lib.cpp.column.column cimport column +from cudf._lib.cpp.scalar.scalar cimport ( + duration_scalar, + numeric_scalar, + timestamp_scalar, +) +from cudf._lib.cpp.table.table_view cimport table_view +from cudf._lib.cpp.types cimport size_type + + +cdef extern from "cudf/ast/expressions.hpp" namespace "cudf::ast" nogil: + ctypedef enum ast_operator: + # Binary operators + ADD "cudf::ast::ast_operator::ADD" + SUB "cudf::ast::ast_operator::SUB" + MUL "cudf::ast::ast_operator::MUL" + DIV "cudf::ast::ast_operator::DIV" + TRUE_DIV "cudf::ast::ast_operator::TRUE_DIV" + FLOOR_DIV "cudf::ast::ast_operator::FLOOR_DIV" + MOD "cudf::ast::ast_operator::MOD" + PYMOD "cudf::ast::ast_operator::PYMOD" + POW "cudf::ast::ast_operator::POW" + EQUAL "cudf::ast::ast_operator::EQUAL" + NULL_EQUAL "cudf::ast::ast_operator::NULL_EQUAL" + NOT_EQUAL "cudf::ast::ast_operator::NOT_EQUAL" + LESS "cudf::ast::ast_operator::LESS" + GREATER "cudf::ast::ast_operator::GREATER" + LESS_EQUAL "cudf::ast::ast_operator::LESS_EQUAL" + GREATER_EQUAL "cudf::ast::ast_operator::GREATER_EQUAL" + BITWISE_AND "cudf::ast::ast_operator::BITWISE_AND" + BITWISE_OR "cudf::ast::ast_operator::BITWISE_OR" + BITWISE_XOR "cudf::ast::ast_operator::BITWISE_XOR" + NULL_LOGICAL_AND "cudf::ast::ast_operator::NULL_LOGICAL_AND" + LOGICAL_AND "cudf::ast::ast_operator::LOGICAL_AND" + NULL_LOGICAL_OR "cudf::ast::ast_operator::NULL_LOGICAL_OR" + LOGICAL_OR "cudf::ast::ast_operator::LOGICAL_OR" + # Unary operators + IDENTITY "cudf::ast::ast_operator::IDENTITY" + SIN "cudf::ast::ast_operator::SIN" + COS "cudf::ast::ast_operator::COS" + TAN "cudf::ast::ast_operator::TAN" + ARCSIN "cudf::ast::ast_operator::ARCSIN" + ARCCOS "cudf::ast::ast_operator::ARCCOS" + ARCTAN "cudf::ast::ast_operator::ARCTAN" + SINH "cudf::ast::ast_operator::SINH" + COSH "cudf::ast::ast_operator::COSH" + TANH "cudf::ast::ast_operator::TANH" + ARCSINH "cudf::ast::ast_operator::ARCSINH" + ARCCOSH "cudf::ast::ast_operator::ARCCOSH" + ARCTANH "cudf::ast::ast_operator::ARCTANH" + EXP "cudf::ast::ast_operator::EXP" + LOG "cudf::ast::ast_operator::LOG" + SQRT "cudf::ast::ast_operator::SQRT" + CBRT "cudf::ast::ast_operator::CBRT" + CEIL "cudf::ast::ast_operator::CEIL" + FLOOR "cudf::ast::ast_operator::FLOOR" + ABS "cudf::ast::ast_operator::ABS" + RINT "cudf::ast::ast_operator::RINT" + BIT_INVERT "cudf::ast::ast_operator::BIT_INVERT" + NOT "cudf::ast::ast_operator::NOT" + + cdef cppclass expression: + pass + + ctypedef enum table_reference: + LEFT "cudf::ast::table_reference::LEFT" + RIGHT "cudf::ast::table_reference::RIGHT" + + cdef cppclass literal(expression): + # Due to https://github.com/cython/cython/issues/3198, we need to + # specify a return type for templated constructors. + literal literal[T](numeric_scalar[T] &) except + + literal literal[T](timestamp_scalar[T] &) except + + literal literal[T](duration_scalar[T] &) except + + + cdef cppclass column_reference(expression): + # Allow for default C++ parameters by declaring multiple constructors + # with the default parameters optionally omitted. + column_reference(size_type) except + + column_reference(size_type, table_reference) except + + + cdef cppclass operation(expression): + operation(ast_operator, const expression &) + operation(ast_operator, const expression &, const expression&) diff --git a/python/cudf/cudf/_lib/cpp/lists/contains.pxd b/python/cudf/cudf/_lib/cpp/lists/contains.pxd index 46aea37643f..e3cb01721a0 100644 --- a/python/cudf/cudf/_lib/cpp/lists/contains.pxd +++ b/python/cudf/cudf/_lib/cpp/lists/contains.pxd @@ -18,3 +18,8 @@ cdef extern from "cudf/lists/contains.hpp" namespace "cudf::lists" nogil: lists_column_view lists, scalar search_key, ) except + + + cdef unique_ptr[column] index_of( + lists_column_view lists, + column_view search_keys, + ) except + diff --git a/python/cudf/cudf/_lib/cpp/sorting.pxd b/python/cudf/cudf/_lib/cpp/sorting.pxd index 243b841ce4b..c6c42c327ac 100644 --- a/python/cudf/cudf/_lib/cpp/sorting.pxd +++ b/python/cudf/cudf/_lib/cpp/sorting.pxd @@ -7,20 +7,13 @@ from libcpp.vector cimport vector from cudf._lib.types import cudf_to_np_types, np_to_cudf_types cimport cudf._lib.cpp.types as libcudf_types +from cudf._lib.cpp.aggregation cimport rank_method from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.column.column_view cimport column_view from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view -cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil: - ctypedef enum rank_method: - FIRST "cudf::rank_method::FIRST" - AVERAGE "cudf::rank_method::AVERAGE" - MIN "cudf::rank_method::MIN" - MAX "cudf::rank_method::MAX" - DENSE "cudf::rank_method::DENSE" - cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil: cdef unique_ptr[column] sorted_order( table_view source_table, diff --git a/python/cudf/cudf/_lib/cpp/transform.pxd b/python/cudf/cudf/_lib/cpp/transform.pxd index 590a371ff52..d9de04b676e 100644 --- a/python/cudf/cudf/_lib/cpp/transform.pxd +++ b/python/cudf/cudf/_lib/cpp/transform.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. from libcpp cimport bool from libcpp.memory cimport unique_ptr @@ -9,6 +9,7 @@ from rmm._lib.device_buffer cimport device_buffer from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.column.column_view cimport column_view +from cudf._lib.cpp.expressions cimport expression from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view from cudf._lib.cpp.types cimport bitmask_type, data_type, size_type @@ -42,3 +43,8 @@ cdef extern from "cudf/transform.hpp" namespace "cudf" nogil: column_view input_column, column_view categories ) + + cdef unique_ptr[column] compute_column( + const table_view table, + const expression& expr + ) except + diff --git a/python/cudf/cudf/_lib/csv.pyx b/python/cudf/cudf/_lib/csv.pyx index 05ff32392fe..f1a75baa951 100644 --- a/python/cudf/cudf/_lib/csv.pyx +++ b/python/cudf/cudf/_lib/csv.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. from libcpp cimport bool from libcpp.map cimport map @@ -19,9 +19,9 @@ import cudf from cudf._lib.cpp.types cimport size_type -import collections.abc as abc import errno import os +from collections import abc from enum import IntEnum from io import BytesIO, StringIO @@ -238,7 +238,7 @@ cdef csv_reader_options make_csv_reader_options( "`parse_dates`: dictionaries are unsupported") if not isinstance(parse_dates, abc.Iterable): raise NotImplementedError( - "`parse_dates`: non-lists are unsupported") + "`parse_dates`: an iterable is required") for col in parse_dates: if isinstance(col, str): c_parse_dates_names.push_back(str(col).encode()) @@ -279,7 +279,7 @@ cdef csv_reader_options make_csv_reader_options( ) csv_reader_options_c.set_dtypes(c_dtypes_list) csv_reader_options_c.set_parse_hex(c_hex_col_indexes) - elif isinstance(dtype, abc.Iterable): + elif isinstance(dtype, abc.Collection): c_dtypes_list.reserve(len(dtype)) for index, col_dtype in enumerate(dtype): if col_dtype in CSV_HEX_TYPE_MAP: diff --git a/python/cudf/cudf/_lib/expressions.pxd b/python/cudf/cudf/_lib/expressions.pxd new file mode 100644 index 00000000000..85665822174 --- /dev/null +++ b/python/cudf/cudf/_lib/expressions.pxd @@ -0,0 +1,38 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. + +from libc.stdint cimport int32_t, int64_t +from libcpp.memory cimport unique_ptr + +from cudf._lib.cpp.expressions cimport ( + column_reference, + expression, + literal, + operation, +) +from cudf._lib.cpp.scalar.scalar cimport numeric_scalar + +ctypedef enum scalar_type_t: + INT + DOUBLE + + +ctypedef union int_or_double_scalar_ptr: + unique_ptr[numeric_scalar[int64_t]] int_ptr + unique_ptr[numeric_scalar[double]] double_ptr + + +cdef class Expression: + cdef unique_ptr[expression] c_obj + + +cdef class Literal(Expression): + cdef scalar_type_t c_scalar_type + cdef int_or_double_scalar_ptr c_scalar + + +cdef class ColumnReference(Expression): + pass + + +cdef class Operation(Expression): + pass diff --git a/python/cudf/cudf/_lib/expressions.pyx b/python/cudf/cudf/_lib/expressions.pyx new file mode 100644 index 00000000000..f069bcdbe73 --- /dev/null +++ b/python/cudf/cudf/_lib/expressions.pyx @@ -0,0 +1,130 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. + +from enum import Enum + +from cython.operator cimport dereference +from libc.stdint cimport int64_t +from libcpp.memory cimport make_unique, unique_ptr +from libcpp.utility cimport move + +from cudf._lib.cpp cimport expressions as libcudf_exp +from cudf._lib.cpp.types cimport size_type + +# Necessary for proper casting, see below. +ctypedef int32_t underlying_type_ast_operator + + +# Aliases for simplicity +ctypedef unique_ptr[libcudf_exp.expression] expression_ptr + + +class ASTOperator(Enum): + ADD = libcudf_exp.ast_operator.ADD + SUB = libcudf_exp.ast_operator.SUB + MUL = libcudf_exp.ast_operator.MUL + DIV = libcudf_exp.ast_operator.DIV + TRUE_DIV = libcudf_exp.ast_operator.TRUE_DIV + FLOOR_DIV = libcudf_exp.ast_operator.FLOOR_DIV + MOD = libcudf_exp.ast_operator.MOD + PYMOD = libcudf_exp.ast_operator.PYMOD + POW = libcudf_exp.ast_operator.POW + EQUAL = libcudf_exp.ast_operator.EQUAL + NULL_EQUAL = libcudf_exp.ast_operator.NULL_EQUAL + NOT_EQUAL = libcudf_exp.ast_operator.NOT_EQUAL + LESS = libcudf_exp.ast_operator.LESS + GREATER = libcudf_exp.ast_operator.GREATER + LESS_EQUAL = libcudf_exp.ast_operator.LESS_EQUAL + GREATER_EQUAL = libcudf_exp.ast_operator.GREATER_EQUAL + BITWISE_AND = libcudf_exp.ast_operator.BITWISE_AND + BITWISE_OR = libcudf_exp.ast_operator.BITWISE_OR + BITWISE_XOR = libcudf_exp.ast_operator.BITWISE_XOR + LOGICAL_AND = libcudf_exp.ast_operator.LOGICAL_AND + NULL_LOGICAL_AND = libcudf_exp.ast_operator.NULL_LOGICAL_AND + LOGICAL_OR = libcudf_exp.ast_operator.LOGICAL_OR + NULL_LOGICAL_OR = libcudf_exp.ast_operator.NULL_LOGICAL_OR + # Unary operators + IDENTITY = libcudf_exp.ast_operator.IDENTITY + SIN = libcudf_exp.ast_operator.SIN + COS = libcudf_exp.ast_operator.COS + TAN = libcudf_exp.ast_operator.TAN + ARCSIN = libcudf_exp.ast_operator.ARCSIN + ARCCOS = libcudf_exp.ast_operator.ARCCOS + ARCTAN = libcudf_exp.ast_operator.ARCTAN + SINH = libcudf_exp.ast_operator.SINH + COSH = libcudf_exp.ast_operator.COSH + TANH = libcudf_exp.ast_operator.TANH + ARCSINH = libcudf_exp.ast_operator.ARCSINH + ARCCOSH = libcudf_exp.ast_operator.ARCCOSH + ARCTANH = libcudf_exp.ast_operator.ARCTANH + EXP = libcudf_exp.ast_operator.EXP + LOG = libcudf_exp.ast_operator.LOG + SQRT = libcudf_exp.ast_operator.SQRT + CBRT = libcudf_exp.ast_operator.CBRT + CEIL = libcudf_exp.ast_operator.CEIL + FLOOR = libcudf_exp.ast_operator.FLOOR + ABS = libcudf_exp.ast_operator.ABS + RINT = libcudf_exp.ast_operator.RINT + BIT_INVERT = libcudf_exp.ast_operator.BIT_INVERT + NOT = libcudf_exp.ast_operator.NOT + + +class TableReference(Enum): + LEFT = libcudf_exp.table_reference.LEFT + RIGHT = libcudf_exp.table_reference.RIGHT + + +# Note that this function only currently supports numeric literals. libcudf +# expressions don't really support other types yet though, so this isn't +# restrictive at the moment. +cdef class Literal(Expression): + def __cinit__(self, value): + # TODO: Would love to find a better solution than unions for literals. + cdef int intval + cdef double doubleval + + if isinstance(value, int): + self.c_scalar_type = scalar_type_t.INT + intval = value + self.c_scalar.int_ptr = make_unique[numeric_scalar[int64_t]]( + intval, True + ) + self.c_obj = make_unique[libcudf_exp.literal]( + dereference(self.c_scalar.int_ptr) + ) + elif isinstance(value, float): + self.c_scalar_type = scalar_type_t.DOUBLE + doubleval = value + self.c_scalar.double_ptr = make_unique[numeric_scalar[double]]( + doubleval, True + ) + self.c_obj = make_unique[libcudf_exp.literal]( + dereference(self.c_scalar.double_ptr) + ) + + +cdef class ColumnReference(Expression): + def __cinit__(self, size_type index): + self.c_obj = make_unique[libcudf_exp.column_reference]( + index + ) + + +cdef class Operation(Expression): + def __cinit__(self, op, Expression left, Expression right=None): + # This awkward double casting is the only way to get Cython to generate + # valid C++. Cython doesn't support scoped enumerations, so it assumes + # that enums correspond to their underlying value types and will thus + # attempt operations that are invalid without first explicitly casting + # to the underlying before casting to the desired type. + cdef libcudf_exp.ast_operator op_value = ( + op.value + ) + + if right is None: + self.c_obj = make_unique[libcudf_exp.operation]( + op_value, dereference(left.c_obj) + ) + else: + self.c_obj = make_unique[libcudf_exp.operation]( + op_value, dereference(left.c_obj), dereference(right.c_obj) + ) diff --git a/python/cudf/cudf/_lib/groupby.pyx b/python/cudf/cudf/_lib/groupby.pyx index 48f566b846d..be5bb2741b4 100644 --- a/python/cudf/cudf/_lib/groupby.pyx +++ b/python/cudf/cudf/_lib/groupby.pyx @@ -341,7 +341,7 @@ cdef class GroupBy: return columns_from_unique_ptr(move(c_result.second)) -_GROUPBY_SCANS = {"cumcount", "cumsum", "cummin", "cummax"} +_GROUPBY_SCANS = {"cumcount", "cumsum", "cummin", "cummax", "rank"} def _is_all_scan_aggregate(all_aggs): diff --git a/python/cudf/cudf/_lib/hash.pyx b/python/cudf/cudf/_lib/hash.pyx index 301f571f5fb..8bb8ab92a48 100644 --- a/python/cudf/cudf/_lib/hash.pyx +++ b/python/cudf/cudf/_lib/hash.pyx @@ -14,16 +14,14 @@ from cudf._lib.cpp.hash cimport hash as cpp_hash, hash_id as cpp_hash_id from cudf._lib.cpp.partitioning cimport hash_partition as cpp_hash_partition from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view -from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table +from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns -def hash_partition(source_table, object columns_to_hash, - int num_partitions, bool keep_index=True): +def hash_partition(list source_columns, object columns_to_hash, + int num_partitions): cdef vector[libcudf_types.size_type] c_columns_to_hash = columns_to_hash cdef int c_num_partitions = num_partitions - cdef table_view c_source_view = table_view_from_table( - source_table, not keep_index - ) + cdef table_view c_source_view = table_view_from_columns(source_columns) cdef pair[unique_ptr[table], vector[libcudf_types.size_type]] c_result with nogil: @@ -36,27 +34,17 @@ def hash_partition(source_table, object columns_to_hash, ) # Note that the offsets (`c_result.second`) may be empty when - # the original table (`source_table`) is empty. We need to + # the original table (`source_columns`) is empty. We need to # return a list of zeros in this case. return ( - *data_from_unique_ptr( - move(c_result.first), - column_names=source_table._column_names, - index_names=( - source_table._index_names - if keep_index is True - else None - ) - - ), - list(c_result.second) if c_result.second.size() - else [0] * num_partitions + columns_from_unique_ptr(move(c_result.first)), + list(c_result.second) + if c_result.second.size() else [0] * num_partitions ) -def hash(source_table, str method, int seed=0): - cdef table_view c_source_view = table_view_from_table( - source_table, ignore_index=True) +def hash(list source_columns, str method, int seed=0): + cdef table_view c_source_view = table_view_from_columns(source_columns) cdef unique_ptr[column] c_result cdef cpp_hash_id c_hash_function if method == "murmur3": diff --git a/python/cudf/cudf/_lib/interop.pyx b/python/cudf/cudf/_lib/interop.pyx index 06e287ee670..88c8b19ded0 100644 --- a/python/cudf/cudf/_lib/interop.pyx +++ b/python/cudf/cudf/_lib/interop.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. import cudf @@ -20,12 +20,12 @@ from cudf._lib.cpp.interop cimport ( ) from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view -from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table +from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns def from_dlpack(dlpack_capsule): """ - Converts a DLPack Tensor PyCapsule into a cudf Frame object. + Converts a DLPack Tensor PyCapsule into a list of columns. DLPack Tensor PyCapsule is expected to have the name "dltensor". """ @@ -40,31 +40,25 @@ def from_dlpack(dlpack_capsule): cpp_from_dlpack(dlpack_tensor) ) - res = data_from_unique_ptr( - move(c_result), - column_names=range(0, c_result.get()[0].num_columns()) - ) + res = columns_from_unique_ptr(move(c_result)) dlpack_tensor.deleter(dlpack_tensor) return res -def to_dlpack(source_table): +def to_dlpack(list source_columns): """ - Converts a cudf Frame into a DLPack Tensor PyCapsule. + Converts a list of columns into a DLPack Tensor PyCapsule. DLPack Tensor PyCapsule will have the name "dltensor". """ - for column in source_table._columns: - if column.null_count: - raise ValueError( - "Cannot create a DLPack tensor with null values. \ - Input is required to have null count as zero." - ) + if any(column.null_count for column in source_columns): + raise ValueError( + "Cannot create a DLPack tensor with null values. \ + Input is required to have null count as zero." + ) cdef DLManagedTensor *dlpack_tensor - cdef table_view source_table_view = table_view_from_table( - source_table, ignore_index=True - ) + cdef table_view source_table_view = table_view_from_columns(source_columns) with nogil: dlpack_tensor = cpp_to_dlpack( @@ -110,17 +104,14 @@ cdef vector[column_metadata] gather_metadata(object metadata) except *: raise ValueError("Malformed metadata has been encountered") -def to_arrow(input_table, - object metadata, - bool keep_index=True): - """Convert from cudf Frame to PyArrow Table. +def to_arrow(list source_columns, object metadata): + """Convert a list of columns from + cudf Frame to a PyArrow Table. Parameters ---------- - input_table : cudf table - column_names : names for the pyarrow arrays - field_names : field names for nested type arrays - keep_index : whether index needs to be part of arrow table + source_columns : a list of columns to convert + metadata : a list of metadata, see `gather_metadata` for layout Returns ------- @@ -128,9 +119,7 @@ def to_arrow(input_table, """ cdef vector[column_metadata] cpp_metadata = gather_metadata(metadata) - cdef table_view input_table_view = ( - table_view_from_table(input_table, not keep_index) - ) + cdef table_view input_table_view = table_view_from_columns(source_columns) cdef shared_ptr[CTable] cpp_arrow_table with nogil: @@ -141,22 +130,16 @@ def to_arrow(input_table, return pyarrow_wrap_table(cpp_arrow_table) -def from_arrow( - object input_table, - object column_names=None, - object index_names=None -): - """Convert from PyArrow Table to cudf Frame. +def from_arrow(object input_table): + """Convert from PyArrow Table to a list of columns. Parameters ---------- input_table : PyArrow table - column_names : names for the cudf table data columns - index_names : names for the cudf table index columns Returns ------- - cudf Frame + A list of columns to construct Frame object """ cdef shared_ptr[CTable] cpp_arrow_table = ( pyarrow_unwrap_table(input_table) @@ -166,8 +149,4 @@ def from_arrow( with nogil: c_result = move(cpp_from_arrow(cpp_arrow_table.get()[0])) - return data_from_unique_ptr( - move(c_result), - column_names=column_names, - index_names=index_names - ) + return columns_from_unique_ptr(move(c_result)) diff --git a/python/cudf/cudf/_lib/join.pyx b/python/cudf/cudf/_lib/join.pyx index 5921f06d36e..1baef266dab 100644 --- a/python/cudf/cudf/_lib/join.pyx +++ b/python/cudf/cudf/_lib/join.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. from itertools import chain @@ -16,31 +16,25 @@ from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view from cudf._lib.cpp.types cimport data_type, size_type, type_id -from cudf._lib.utils cimport table_view_from_table +from cudf._lib.utils cimport table_view_from_columns # The functions below return the *gathermaps* that represent # the join result when joining on the keys `lhs` and `rhs`. -cpdef join(lhs, rhs, how=None): +cpdef join(list lhs, list rhs, how=None): cdef pair[cpp_join.gather_map_type, cpp_join.gather_map_type] c_result - cdef table_view c_lhs = table_view_from_table(lhs) - cdef table_view c_rhs = table_view_from_table(rhs) + cdef table_view c_lhs = table_view_from_columns(lhs) + cdef table_view c_rhs = table_view_from_columns(rhs) if how == "inner": - c_result = move(cpp_join.inner_join( - c_lhs, - c_rhs - )) + with nogil: + c_result = move(cpp_join.inner_join(c_lhs, c_rhs)) elif how == "left": - c_result = move(cpp_join.left_join( - c_lhs, - c_rhs - )) + with nogil: + c_result = move(cpp_join.left_join(c_lhs, c_rhs)) elif how == "outer": - c_result = move(cpp_join.full_join( - c_lhs, - c_rhs - )) + with nogil: + c_result = move(cpp_join.full_join(c_lhs, c_rhs)) else: raise ValueError(f"Invalid join type {how}") @@ -49,30 +43,23 @@ cpdef join(lhs, rhs, how=None): return left_rows, right_rows -cpdef semi_join(lhs, rhs, how=None): +cpdef semi_join(list lhs, list rhs, how=None): # left-semi and left-anti joins cdef cpp_join.gather_map_type c_result - cdef table_view c_lhs = table_view_from_table(lhs) - cdef table_view c_rhs = table_view_from_table(rhs) + cdef table_view c_lhs = table_view_from_columns(lhs) + cdef table_view c_rhs = table_view_from_columns(rhs) if how == "leftsemi": - c_result = move(cpp_join.left_semi_join( - c_lhs, - c_rhs - )) + with nogil: + c_result = move(cpp_join.left_semi_join(c_lhs, c_rhs)) elif how == "leftanti": - c_result = move(cpp_join.left_anti_join( - c_lhs, - c_rhs - )) + with nogil: + c_result = move(cpp_join.left_anti_join(c_lhs, c_rhs)) else: raise ValueError(f"Invalid join type {how}") cdef Column left_rows = _gather_map_as_column(move(c_result)) - return ( - left_rows, - None - ) + return left_rows, None cdef Column _gather_map_as_column(cpp_join.gather_map_type gather_map): diff --git a/python/cudf/cudf/_lib/json.pyx b/python/cudf/cudf/_lib/json.pyx index 48da83450d7..263d70afe26 100644 --- a/python/cudf/cudf/_lib/json.pyx +++ b/python/cudf/cudf/_lib/json.pyx @@ -1,11 +1,11 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2022, NVIDIA CORPORATION. # cython: boundscheck = False -import collections.abc as abc import io import os +from collections import abc import cudf @@ -82,15 +82,15 @@ cpdef read_json(object filepaths_or_buffers, for k, v in dtype.items(): c_dtypes_map[str(k).encode()] = \ _get_cudf_data_type_from_dtype(v) - elif not isinstance(dtype, abc.Iterable): - raise TypeError("`dtype` must be 'list like' or 'dict'") - else: + elif isinstance(dtype, abc.Collection): is_list_like_dtypes = True c_dtypes_list.reserve(len(dtype)) for col_dtype in dtype: c_dtypes_list.push_back( _get_cudf_data_type_from_dtype( col_dtype)) + else: + raise TypeError("`dtype` must be 'list like' or 'dict'") cdef json_reader_options opts = move( json_reader_options.builder(make_source_info(filepaths_or_buffers)) diff --git a/python/cudf/cudf/_lib/lists.pyx b/python/cudf/cudf/_lib/lists.pyx index 523686fafe6..025fb0665d3 100644 --- a/python/cudf/cudf/_lib/lists.pyx +++ b/python/cudf/cudf/_lib/lists.pyx @@ -42,7 +42,7 @@ from cudf.core.dtypes import ListDtype from cudf._lib.cpp.lists.contains cimport contains, index_of as cpp_index_of from cudf._lib.cpp.lists.extract cimport extract_list_element -from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table +from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns def count_elements(Column col): @@ -61,8 +61,10 @@ def count_elements(Column col): return result -def explode_outer(tbl, int explode_column_idx, bool ignore_index=False): - cdef table_view c_table_view = table_view_from_table(tbl, ignore_index) +def explode_outer( + list source_columns, int explode_column_idx +): + cdef table_view c_table_view = table_view_from_columns(source_columns) cdef size_type c_explode_column_idx = explode_column_idx cdef unique_ptr[table] c_result @@ -70,11 +72,7 @@ def explode_outer(tbl, int explode_column_idx, bool ignore_index=False): with nogil: c_result = move(cpp_explode_outer(c_table_view, c_explode_column_idx)) - return data_from_unique_ptr( - move(c_result), - column_names=tbl._column_names, - index_names=None if ignore_index else tbl._index_names - ) + return columns_from_unique_ptr(move(c_result)) def drop_list_duplicates(Column col, bool nulls_equal, bool nans_all_equal): @@ -178,7 +176,7 @@ def contains_scalar(Column col, object py_search_key): return result -def index_of(Column col, object py_search_key): +def index_of_scalar(Column col, object py_search_key): cdef DeviceScalar search_key = py_search_key.device_value @@ -197,18 +195,35 @@ def index_of(Column col, object py_search_key): return Column.from_unique_ptr(move(c_result)) -def concatenate_rows(tbl): +def index_of_column(Column col, Column search_keys): + + cdef column_view keys_view = search_keys.view() + + cdef shared_ptr[lists_column_view] list_view = ( + make_shared[lists_column_view](col.view()) + ) + cdef unique_ptr[column] c_result - cdef table_view c_table_view = table_view_from_table(tbl) + with nogil: + c_result = move(cpp_index_of( + list_view.get()[0], + keys_view, + )) + return Column.from_unique_ptr(move(c_result)) + + +def concatenate_rows(list source_columns): + cdef unique_ptr[column] c_result + + cdef table_view c_table_view = table_view_from_columns(source_columns) with nogil: c_result = move(cpp_concatenate_rows( c_table_view, )) - result = Column.from_unique_ptr(move(c_result)) - return result + return Column.from_unique_ptr(move(c_result)) def concatenate_list_elements(Column input_column, dropna=False): diff --git a/python/cudf/cudf/_lib/merge.pyx b/python/cudf/cudf/_lib/merge.pyx index 915b46c5691..dae2c466266 100644 --- a/python/cudf/cudf/_lib/merge.pyx +++ b/python/cudf/cudf/_lib/merge.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. from libcpp cimport bool from libcpp.memory cimport unique_ptr @@ -10,79 +10,43 @@ from cudf._lib.column cimport Column from cudf._lib.cpp.merge cimport merge as cpp_merge from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view -from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table +from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns def merge_sorted( - object tables, - object keys=None, - bool by_index=False, - bool ignore_index=False, + list input_columns, + list key_columns_indices, bool ascending=True, - object na_position="last", + str na_position="last", ): - cdef vector[libcudf_types.size_type] c_column_keys + """Merge multiple lists of lexicographically sorted columns into one list + of sorted columns. `input_columns` is a list of lists of columns to be + merged. + """ + cdef vector[libcudf_types.size_type] c_column_keys = key_columns_indices cdef vector[table_view] c_input_tables cdef vector[libcudf_types.order] c_column_order cdef vector[libcudf_types.null_order] c_null_precedence - cdef libcudf_types.order column_order - cdef libcudf_types.null_order null_precedence - cdef source_table - # Create vector of tables - # Use metadata from 0th table for names, etc - c_input_tables.reserve(len(tables)) - for source_table in tables: + c_input_tables.reserve(len(input_columns)) + for source_columns in input_columns: c_input_tables.push_back( - table_view_from_table(source_table, ignore_index)) - source_table = tables[0] + table_view_from_columns(source_columns)) - # Define sorting order and null precedence - column_order = (libcudf_types.order.ASCENDING - if ascending - else libcudf_types.order.DESCENDING) + num_keys = len(key_columns_indices) - if ascending is False: - if na_position == "last": - na_position = "first" - else: - na_position = "last" - null_precedence = ( + cdef libcudf_types.order column_order = ( + libcudf_types.order.ASCENDING if ascending + else libcudf_types.order.DESCENDING + ) + c_column_order = vector[libcudf_types.order](num_keys, column_order) + + if not ascending: + na_position = "last" if na_position == "first" else "first" + cdef libcudf_types.null_order null_precedence = ( libcudf_types.null_order.BEFORE if na_position == "first" else libcudf_types.null_order.AFTER ) - - # Determine index-column offset and index names - if ignore_index: - num_index_columns = 0 - index_names = None - else: - num_index_columns = ( - 0 if source_table._index is None - else source_table._index._num_columns - ) - index_names = source_table._index_names - - # Define C vectors for each key column - if not by_index and keys is not None: - num_keys = len(keys) - c_column_keys.reserve(num_keys) - for name in keys: - c_column_keys.push_back( - num_index_columns + source_table._column_names.index(name) - ) - else: - if by_index: - start = 0 - stop = num_index_columns - else: - start = num_index_columns - stop = num_index_columns + source_table._num_columns - num_keys = stop - start - c_column_keys.reserve(num_keys) - for key in range(start, stop): - c_column_keys.push_back(key) - c_column_order = vector[libcudf_types.order](num_keys, column_order) c_null_precedence = vector[libcudf_types.null_order]( num_keys, null_precedence @@ -100,8 +64,4 @@ def merge_sorted( ) ) - return data_from_unique_ptr( - move(c_result), - column_names=source_table._column_names, - index_names=index_names, - ) + return columns_from_unique_ptr(move(c_result)) diff --git a/python/cudf/cudf/_lib/null_mask.pyx b/python/cudf/cudf/_lib/null_mask.pyx index b6e26fe594f..ce83a6f0f18 100644 --- a/python/cudf/cudf/_lib/null_mask.pyx +++ b/python/cudf/cudf/_lib/null_mask.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. from enum import Enum @@ -8,9 +8,6 @@ from libcpp.utility cimport move from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer from cudf._lib.column cimport Column - -import cudf._lib as libcudfxx - from cudf._lib.cpp.column.column_view cimport column_view from cudf._lib.cpp.null_mask cimport ( bitmask_allocation_size_bytes as cpp_bitmask_allocation_size_bytes, diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx index 8cb7dd942c1..e363ea875f0 100644 --- a/python/cudf/cudf/_lib/parquet.pyx +++ b/python/cudf/cudf/_lib/parquet.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021, NVIDIA CORPORATION. +# Copyright (c) 2019-2022, NVIDIA CORPORATION. # cython: boundscheck = False @@ -17,7 +17,7 @@ except ImportError: import json import numpy as np -from cython.operator import dereference +from cython.operator cimport dereference from cudf.api.types import ( is_categorical_dtype, diff --git a/python/cudf/cudf/_lib/partitioning.pyx b/python/cudf/cudf/_lib/partitioning.pyx index e53667e7589..f2f5a92aca1 100644 --- a/python/cudf/cudf/_lib/partitioning.pyx +++ b/python/cudf/cudf/_lib/partitioning.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. from libcpp cimport bool from libcpp.memory cimport unique_ptr @@ -11,21 +11,19 @@ from cudf._lib.cpp.column.column_view cimport column_view from cudf._lib.cpp.partitioning cimport partition as cpp_partition from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view -from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table +from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns from cudf._lib.stream_compaction import distinct_count as cpp_distinct_count cimport cudf._lib.cpp.types as libcudf_types -def partition(source_table, Column partition_map, - object num_partitions, bool keep_index=True): +def partition(list source_columns, Column partition_map, + object num_partitions): if num_partitions is None: num_partitions = cpp_distinct_count(partition_map, ignore_nulls=True) cdef int c_num_partitions = num_partitions - cdef table_view c_source_view = table_view_from_table( - source_table, not keep_index - ) + cdef table_view c_source_view = table_view_from_columns(source_columns) cdef column_view c_partition_map_view = partition_map.view() @@ -40,13 +38,5 @@ def partition(source_table, Column partition_map, ) return ( - *data_from_unique_ptr( - move(c_result.first), - column_names=source_table._column_names, - index_names=source_table._index_names if( - keep_index is True) - else None - - ), - list(c_result.second) + columns_from_unique_ptr(move(c_result.first)), list(c_result.second) ) diff --git a/python/cudf/cudf/_lib/quantiles.pyx b/python/cudf/cudf/_lib/quantiles.pyx index 497a71df89d..f65c29a55a8 100644 --- a/python/cudf/cudf/_lib/quantiles.pyx +++ b/python/cudf/cudf/_lib/quantiles.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. from libcpp cimport bool from libcpp.memory cimport unique_ptr @@ -31,7 +31,7 @@ from cudf._lib.cpp.types cimport ( order_info, sorted, ) -from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table +from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns def quantile( @@ -74,14 +74,13 @@ def quantile( return Column.from_unique_ptr(move(c_result)) -def quantiles(source_table, +def quantiles(list source_columns, vector[double] q, object interp, object is_input_sorted, list column_order, list null_precedence): - cdef table_view c_input = table_view_from_table( - source_table, ignore_index=True) + cdef table_view c_input = table_view_from_columns(source_columns) cdef vector[double] c_q = q cdef interpolation c_interp = ( interp @@ -119,7 +118,4 @@ def quantiles(source_table, ) ) - return data_from_unique_ptr( - move(c_result), - column_names=source_table._column_names - ) + return columns_from_unique_ptr(move(c_result)) diff --git a/python/cudf/cudf/_lib/reshape.pyx b/python/cudf/cudf/_lib/reshape.pyx index d64d0543892..29223947eea 100644 --- a/python/cudf/cudf/_lib/reshape.pyx +++ b/python/cudf/cudf/_lib/reshape.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2022, NVIDIA CORPORATION. from libcpp.memory cimport unique_ptr from libcpp.utility cimport move @@ -13,32 +13,25 @@ from cudf._lib.cpp.reshape cimport ( from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view from cudf._lib.cpp.types cimport size_type -from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table +from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns -def interleave_columns(source_table): - cdef table_view c_view = table_view_from_table( - source_table, ignore_index=True) +def interleave_columns(list source_columns): + cdef table_view c_view = table_view_from_columns(source_columns) cdef unique_ptr[column] c_result with nogil: c_result = move(cpp_interleave_columns(c_view)) - return Column.from_unique_ptr( - move(c_result) - ) + return Column.from_unique_ptr(move(c_result)) -def tile(source_table, size_type count): +def tile(list source_columns, size_type count): cdef size_type c_count = count - cdef table_view c_view = table_view_from_table(source_table) + cdef table_view c_view = table_view_from_columns(source_columns) cdef unique_ptr[table] c_result with nogil: c_result = move(cpp_tile(c_view, c_count)) - return data_from_unique_ptr( - move(c_result), - column_names=source_table._column_names, - index_names=source_table._index_names - ) + return columns_from_unique_ptr(move(c_result)) diff --git a/python/cudf/cudf/_lib/rolling.pyx b/python/cudf/cudf/_lib/rolling.pyx index b4b3384032c..a2cb115f668 100644 --- a/python/cudf/cudf/_lib/rolling.pyx +++ b/python/cudf/cudf/_lib/rolling.pyx @@ -1,6 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. - -from __future__ import print_function +# Copyright (c) 2020-2022, NVIDIA CORPORATION. import pandas as pd diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx index 32d6cb2ea6d..71ac022ba2d 100644 --- a/python/cudf/cudf/_lib/scalar.pyx +++ b/python/cudf/cudf/_lib/scalar.pyx @@ -65,6 +65,7 @@ from cudf._lib.cpp.wrappers.timestamps cimport ( timestamp_us, ) from cudf._lib.utils cimport ( + columns_from_table_view, data_from_table_view, table_view_from_columns, table_view_from_table, @@ -172,10 +173,10 @@ cdef class DeviceScalar: if self.value is cudf.NA: return ( f"{self.__class__.__name__}" - f"({self.value}, {self.dtype.__repr__()})" + f"({self.value}, {repr(self.dtype)})" ) else: - return f"{self.__class__.__name__}({self.value.__repr__()})" + return f"{self.__class__.__name__}({repr(self.value)})" @staticmethod cdef DeviceScalar from_unique_ptr(unique_ptr[scalar] ptr, dtype=None): @@ -361,8 +362,8 @@ cdef _set_struct_from_pydict(unique_ptr[scalar]& s, names=columns ) - data, _ = from_arrow(pyarrow_table, column_names=columns) - cdef table_view struct_view = table_view_from_columns(data.values()) + data = from_arrow(pyarrow_table) + cdef table_view struct_view = table_view_from_columns(data) s.reset( new struct_scalar(struct_view, valid) @@ -373,18 +374,10 @@ cdef _get_py_dict_from_struct(unique_ptr[scalar]& s): return cudf.NA cdef table_view struct_table_view = (s.get()).view() - columns = [str(i) for i in range(struct_table_view.num_columns())] + column_names = [str(i) for i in range(struct_table_view.num_columns())] - data, _ = data_from_table_view( - struct_table_view, - None, - column_names=columns - ) - to_arrow_table = cudf.core.frame.Frame( - cudf.core.column_accessor.ColumnAccessor(data) - ) - - python_dict = to_arrow(to_arrow_table, columns).to_pydict() + columns = columns_from_table_view(struct_table_view, None) + python_dict = to_arrow(columns, column_names).to_pydict() return {k: _nested_na_replace(python_dict[k])[0] for k in python_dict} @@ -415,9 +408,8 @@ cdef _get_py_list_from_list(unique_ptr[scalar]& s): cdef column_view list_col_view = (s.get()).view() cdef Column list_col = Column.from_column_view(list_col_view, None) - to_arrow_table = cudf.core.frame.Frame({"col": list_col}) - arrow_table = to_arrow(to_arrow_table, [["col", []]]) + arrow_table = to_arrow([list_col], [["col", []]]) result = arrow_table['col'].to_pylist() return _nested_na_replace(result) diff --git a/python/cudf/cudf/_lib/search.pyx b/python/cudf/cudf/_lib/search.pyx index f92ef753fc2..d5568f53231 100644 --- a/python/cudf/cudf/_lib/search.pyx +++ b/python/cudf/cudf/_lib/search.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. from libcpp.memory cimport unique_ptr from libcpp.utility cimport move @@ -10,20 +10,20 @@ from cudf._lib.column cimport Column from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.column.column_view cimport column_view from cudf._lib.cpp.table.table_view cimport table_view -from cudf._lib.utils cimport table_view_from_table +from cudf._lib.utils cimport table_view_from_columns def search_sorted( - table, values, side, ascending=True, na_position="last" + list source, list values, side, ascending=True, na_position="last" ): """Find indices where elements should be inserted to maintain order Parameters ---------- - table : Frame - Frame to search in - values : Frame - Frame of values to search for + source : list of columns + List of columns to search in + values : List of columns + List of value columns to search for side : str {‘left’, ‘right’} optional If ‘left’, the index of the first suitable location is given. If ‘right’, return the last such index @@ -33,10 +33,8 @@ def search_sorted( cdef vector[libcudf_types.null_order] c_null_precedence cdef libcudf_types.order c_order cdef libcudf_types.null_order c_null_order - cdef table_view c_table_data = table_view_from_table( - table, ignore_index=True) - cdef table_view c_values_data = table_view_from_table( - values, ignore_index=True) + cdef table_view c_table_data = table_view_from_columns(source) + cdef table_view c_values_data = table_view_from_columns(values) # Note: We are ignoring index columns here c_order = (libcudf_types.order.ASCENDING @@ -47,9 +45,9 @@ def search_sorted( if na_position=="last" else libcudf_types.null_order.BEFORE ) - c_column_order = vector[libcudf_types.order](table._num_columns, c_order) + c_column_order = vector[libcudf_types.order](len(source), c_order) c_null_precedence = vector[libcudf_types.null_order]( - table._num_columns, c_null_order + len(source), c_null_order ) if side == 'left': diff --git a/python/cudf/cudf/_lib/sort.pxd b/python/cudf/cudf/_lib/sort.pxd deleted file mode 100644 index d7488889555..00000000000 --- a/python/cudf/cudf/_lib/sort.pxd +++ /dev/null @@ -1,3 +0,0 @@ -from libc.stdint cimport int32_t - -ctypedef int32_t underlying_type_t_rank_method diff --git a/python/cudf/cudf/_lib/sort.pyx b/python/cudf/cudf/_lib/sort.pyx index 3aa0b35e90e..1d7204a0a39 100644 --- a/python/cudf/cudf/_lib/sort.pyx +++ b/python/cudf/cudf/_lib/sort.pyx @@ -1,6 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. - -import pandas as pd +# Copyright (c) 2020-2022, NVIDIA CORPORATION. from libcpp cimport bool from libcpp.memory cimport unique_ptr @@ -10,32 +8,34 @@ from libcpp.vector cimport vector from enum import IntEnum from cudf._lib.column cimport Column +from cudf._lib.cpp.aggregation cimport ( + rank_method, + underlying_type_t_rank_method, +) from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.column.column_view cimport column_view from cudf._lib.cpp.search cimport lower_bound, upper_bound from cudf._lib.cpp.sorting cimport ( is_sorted as cpp_is_sorted, rank, - rank_method, sorted_order, ) from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view from cudf._lib.cpp.types cimport null_order, null_policy, order -from cudf._lib.sort cimport underlying_type_t_rank_method -from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table +from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns def is_sorted( - source_table, object ascending=None, object null_position=None + list source_columns, object ascending=None, object null_position=None ): """ Checks whether the rows of a `table` are sorted in lexicographical order. Parameters ---------- - source_table : Frame - Frame whose columns are to be checked for sort order + source_columns : list of columns + columns to be checked for sort order ascending : None or list-like of booleans None or list-like of boolean values indicating expected sort order of each column. If list-like, size of list-like must be len(columns). If @@ -58,51 +58,39 @@ def is_sorted( cdef vector[null_order] null_precedence if ascending is None: - column_order = vector[order]( - source_table._num_columns, order.ASCENDING - ) - elif pd.api.types.is_list_like(ascending): - if len(ascending) != source_table._num_columns: + column_order = vector[order](len(source_columns), order.ASCENDING) + else: + if len(ascending) != len(source_columns): raise ValueError( - f"Expected a list-like of length {source_table._num_columns}, " + f"Expected a list-like of length {len(source_columns)}, " f"got length {len(ascending)} for `ascending`" ) column_order = vector[order]( - source_table._num_columns, order.DESCENDING + len(source_columns), order.DESCENDING ) for idx, val in enumerate(ascending): if val: column_order[idx] = order.ASCENDING - else: - raise TypeError( - f"Expected a list-like or None for `ascending`, got " - f"{type(ascending)}" - ) if null_position is None: null_precedence = vector[null_order]( - source_table._num_columns, null_order.AFTER + len(source_columns), null_order.AFTER ) - elif pd.api.types.is_list_like(null_position): - if len(null_position) != source_table._num_columns: + else: + if len(null_position) != len(source_columns): raise ValueError( - f"Expected a list-like of length {source_table._num_columns}, " + f"Expected a list-like of length {len(source_columns)}, " f"got length {len(null_position)} for `null_position`" ) null_precedence = vector[null_order]( - source_table._num_columns, null_order.AFTER + len(source_columns), null_order.AFTER ) for idx, val in enumerate(null_position): if val: null_precedence[idx] = null_order.BEFORE - else: - raise TypeError( - f"Expected a list-like or None for `null_position`, got " - f"{type(null_position)}" - ) cdef bool c_result - cdef table_view source_table_view = table_view_from_table(source_table) + cdef table_view source_table_view = table_view_from_columns(source_columns) with nogil: c_result = cpp_is_sorted( source_table_view, @@ -113,34 +101,34 @@ def is_sorted( return c_result -def order_by(source_table, object ascending, str na_position): +def order_by(list columns_from_table, object ascending, str na_position): """ - Sorting the table ascending/descending + Get index to sort the table in ascending/descending order. Parameters ---------- - source_table : table which will be sorted - ascending : list of boolean values which correspond to each column + columns_from_table : columns from the table which will be sorted + ascending : sequence of boolean values which correspond to each column in source_table signifying order of each column True - Ascending and False - Descending na_position : whether null value should show up at the "first" or "last" position of **all** sorted column. """ - cdef table_view source_table_view = table_view_from_table( - source_table, ignore_index=True + cdef table_view source_table_view = table_view_from_columns( + columns_from_table ) cdef vector[order] column_order column_order.reserve(len(ascending)) cdef vector[null_order] null_precedence null_precedence.reserve(len(ascending)) - for i in ascending: - if i is True: + for asc in ascending: + if asc: column_order.push_back(order.ASCENDING) else: column_order.push_back(order.DESCENDING) - if i ^ (na_position == "first"): + if asc ^ (na_position == "first"): null_precedence.push_back(null_order.AFTER) else: null_precedence.push_back(null_order.BEFORE) @@ -154,21 +142,21 @@ def order_by(source_table, object ascending, str na_position): return Column.from_unique_ptr(move(c_result)) -def digitize(source_values_table, bins, bool right=False): +def digitize(list source_columns, list bins, bool right=False): """ Return the indices of the bins to which each value in source_table belongs. Parameters ---------- - source_table : Input table to be binned. - bins : Frame containing columns of bins + source_columns : Input columns to be binned. + bins : List containing columns of bins right : Indicating whether the intervals include the right or the left bin edge. """ - cdef table_view bins_view = table_view_from_table(bins) - cdef table_view source_values_table_view = table_view_from_table( - source_values_table + cdef table_view bins_view = table_view_from_columns(bins) + cdef table_view source_table_view = table_view_from_columns( + source_columns ) cdef vector[order] column_order = ( vector[order]( @@ -184,11 +172,11 @@ def digitize(source_values_table, bins, bool right=False): ) cdef unique_ptr[column] c_result - if right is True: + if right: with nogil: c_result = move(lower_bound( bins_view, - source_values_table_view, + source_table_view, column_order, null_precedence) ) @@ -196,7 +184,7 @@ def digitize(source_values_table, bins, bool right=False): with nogil: c_result = move(upper_bound( bins_view, - source_values_table_view, + source_table_view, column_order, null_precedence) ) @@ -204,23 +192,13 @@ def digitize(source_values_table, bins, bool right=False): return Column.from_unique_ptr(move(c_result)) -class RankMethod(IntEnum): - FIRST = < underlying_type_t_rank_method > rank_method.FIRST - AVERAGE = < underlying_type_t_rank_method > rank_method.AVERAGE - MIN = < underlying_type_t_rank_method > rank_method.MIN - MAX = < underlying_type_t_rank_method > rank_method.MAX - DENSE = < underlying_type_t_rank_method > rank_method.DENSE - - -def rank_columns(source_table, object method, str na_option, +def rank_columns(list source_columns, object method, str na_option, bool ascending, bool pct ): """ Compute numerical data ranks (1 through n) of each column in the dataframe """ - cdef table_view source_table_view = table_view_from_table( - source_table, ignore_index=True - ) + cdef table_view source_table_view = table_view_from_columns(source_columns) cdef rank_method c_rank_method = < rank_method > ( < underlying_type_t_rank_method > method @@ -260,7 +238,7 @@ def rank_columns(source_table, object method, str na_option, cdef vector[unique_ptr[column]] c_results cdef column_view c_view cdef Column col - for col in source_table._columns: + for col in source_columns: c_view = col.view() with nogil: c_results.push_back(move( @@ -274,11 +252,6 @@ def rank_columns(source_table, object method, str na_option, ) )) - cdef unique_ptr[table] c_result - c_result.reset(new table(move(c_results))) - data, _ = data_from_unique_ptr( - move(c_result), - column_names=source_table._column_names, - index_names=None - ) - return data, source_table._index + return [Column.from_unique_ptr( + move(c_results[i]) + ) for i in range(c_results.size())] diff --git a/python/cudf/cudf/_lib/strings/combine.pyx b/python/cudf/cudf/_lib/strings/combine.pyx index 3b5ef33a668..eeb39f70728 100644 --- a/python/cudf/cudf/_lib/strings/combine.pyx +++ b/python/cudf/cudf/_lib/strings/combine.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. from libcpp.memory cimport unique_ptr from libcpp.string cimport string @@ -18,10 +18,10 @@ from cudf._lib.cpp.strings.combine cimport ( from cudf._lib.cpp.table.table_view cimport table_view from cudf._lib.cpp.types cimport size_type from cudf._lib.scalar cimport DeviceScalar -from cudf._lib.utils cimport table_view_from_table +from cudf._lib.utils cimport table_view_from_columns -def concatenate(source_strings, +def concatenate(list source_strings, object sep, object na_rep): """ @@ -33,8 +33,7 @@ def concatenate(source_strings, cdef DeviceScalar narep = na_rep.device_value cdef unique_ptr[column] c_result - cdef table_view source_view = table_view_from_table( - source_strings, ignore_index=True) + cdef table_view source_view = table_view_from_columns(source_strings) cdef const string_scalar* scalar_separator = \ (separator.get_raw_ptr()) diff --git a/python/cudf/cudf/_lib/transform.pyx b/python/cudf/cudf/_lib/transform.pyx index 96d25cb92c9..2d94ef2cedf 100644 --- a/python/cudf/cudf/_lib/transform.pyx +++ b/python/cudf/cudf/_lib/transform.pyx @@ -1,13 +1,15 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. import numpy as np from numba.np import numpy_support import cudf from cudf._lib.types import SUPPORTED_NUMPY_TO_LIBCUDF_TYPES +from cudf.core._internals.expressions import parse_expression from cudf.core.buffer import Buffer from cudf.utils import cudautils +from cython.operator cimport dereference from libc.stdint cimport uintptr_t from libcpp.memory cimport unique_ptr from libcpp.pair cimport pair @@ -20,13 +22,17 @@ cimport cudf._lib.cpp.transform as libcudf_transform from cudf._lib.column cimport Column from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.column.column_view cimport column_view +from cudf._lib.cpp.expressions cimport expression from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view from cudf._lib.cpp.types cimport bitmask_type, data_type, size_type, type_id +from cudf._lib.expressions cimport Expression from cudf._lib.types cimport underlying_type_t_type_id from cudf._lib.utils cimport ( + columns_from_unique_ptr, data_from_table_view, data_from_unique_ptr, + table_view_from_columns, table_view_from_table, ) @@ -123,21 +129,15 @@ def transform(Column input, op): return Column.from_unique_ptr(move(c_output)) -def table_encode(input): - cdef table_view c_input = table_view_from_table( - input, ignore_index=True) +def table_encode(list source_columns): + cdef table_view c_input = table_view_from_columns(source_columns) cdef pair[unique_ptr[table], unique_ptr[column]] c_result with nogil: c_result = move(libcudf_transform.encode(c_input)) - return ( - *data_from_unique_ptr( - move(c_result.first), - column_names=input._column_names, - ), - Column.from_unique_ptr(move(c_result.second)) - ) + return columns_from_unique_ptr( + move(c_result.first)), Column.from_unique_ptr(move(c_result.second)) def one_hot_encode(Column input_column, Column categories): @@ -162,3 +162,34 @@ def one_hot_encode(Column input_column, Column categories): ) return encodings + + +def compute_column(list columns, tuple column_names, expr: str): + """Compute a new column by evaluating an expression on a set of columns. + + Parameters + ---------- + columns : list + The set of columns forming the table to evaluate the expression on. + column_names : tuple[str] + The names associated with each column. These names are necessary to map + column names in the expression to indices in the provided list of + columns, which are what will be used by libcudf to evaluate the + expression on the table. + expr : str + The expression to evaluate. + """ + visitor = parse_expression(expr, column_names) + + # At the end, all the stack contains is the expression to evaluate. + cdef Expression cudf_expr = visitor.expression + cdef table_view tbl = table_view_from_columns(columns) + cdef unique_ptr[column] col + with nogil: + col = move( + libcudf_transform.compute_column( + tbl, + dereference(cudf_expr.c_obj.get()) + ) + ) + return Column.from_unique_ptr(move(col)) diff --git a/python/cudf/cudf/_lib/transpose.pyx b/python/cudf/cudf/_lib/transpose.pyx index 931a2702612..b9eea6169bd 100644 --- a/python/cudf/cudf/_lib/transpose.pyx +++ b/python/cudf/cudf/_lib/transpose.pyx @@ -1,7 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. - -import cudf -from cudf.api.types import is_categorical_dtype +# Copyright (c) 2020-2022, NVIDIA CORPORATION. from libcpp.memory cimport unique_ptr from libcpp.pair cimport pair @@ -9,65 +6,22 @@ from libcpp.utility cimport move from cudf._lib.column cimport Column from cudf._lib.cpp.column.column cimport column -from cudf._lib.cpp.column.column_view cimport column_view -from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view from cudf._lib.cpp.transpose cimport transpose as cpp_transpose -from cudf._lib.utils cimport data_from_table_view, table_view_from_table - +from cudf._lib.utils cimport columns_from_table_view, table_view_from_columns -def transpose(source): - """Transpose index and columns. - See Also - -------- - cudf.core.DataFrame.transpose +def transpose(list source_columns): + """Transpose m n-row columns into n m-row columns """ - - if source._num_columns == 0: - return source - - cats = None - columns = source._columns - dtype = columns[0].dtype - - if is_categorical_dtype(dtype): - if any(not is_categorical_dtype(c.dtype) for c in columns): - raise ValueError('Columns must all have the same dtype') - cats = list(c.categories for c in columns) - cats = cudf.core.column.concat_columns(cats).unique() - source = cudf.core.frame.Frame(index=source._index, data=[ - (name, col._set_categories(cats, is_unique=True).codes) - for name, col in source._data.items() - ]) - elif any(c.dtype != dtype for c in columns): - raise ValueError('Columns must all have the same dtype') - cdef pair[unique_ptr[column], table_view] c_result - cdef table_view c_input = table_view_from_table( - source, ignore_index=True) + cdef table_view c_input = table_view_from_columns(source_columns) with nogil: c_result = move(cpp_transpose(c_input)) result_owner = Column.from_unique_ptr(move(c_result.first)) - data, _ = data_from_table_view( + return columns_from_table_view( c_result.second, - owner=result_owner, - column_names=range(c_input.num_rows()) + owners=[result_owner] * c_result.second.num_columns() ) - - if cats is not None: - data= [ - (name, cudf.core.column.column.build_categorical_column( - codes=cudf.core.column.column.build_column( - col.base_data, dtype=col.dtype), - mask=col.base_mask, - size=col.size, - categories=cats, - offset=col.offset, - )) - for name, col in data.items() - ] - - return data diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx index 8557f430e25..643a1adca9f 100644 --- a/python/cudf/cudf/_lib/utils.pyx +++ b/python/cudf/cudf/_lib/utils.pyx @@ -317,10 +317,10 @@ cdef columns_from_table_view( ): """ Given a ``cudf::table_view``, construsts a list of columns from it, - along with referencing an ``owner`` Python object that owns the memory - lifetime. ``owner`` must be either None or a list of column. If ``owner`` - is a list of columns, the owner of the `i`th ``cudf::column_view`` in the - table view is ``owners[i]``. For more about memory ownership, + along with referencing an owner Python object that owns the memory + lifetime. owner must be either None or a list of column. If owner + is a list of columns, the owner of the `i`th ``cudf::column_view`` + in the table view is ``owners[i]``. For more about memory ownership, see ``Column.from_column_view``. """ diff --git a/python/cudf/cudf/api/types.py b/python/cudf/cudf/api/types.py index fad2a973681..62f8377a323 100644 --- a/python/cudf/cudf/api/types.py +++ b/python/cudf/cudf/api/types.py @@ -4,7 +4,7 @@ from __future__ import annotations -from collections.abc import Sequence +from collections import abc from functools import wraps from inspect import isclass from typing import List, Union @@ -48,16 +48,12 @@ def is_numeric_dtype(obj): if issubclass(obj, _BaseDtype): return False else: - if isinstance(obj, cudf.Decimal128Dtype) or isinstance( - getattr(obj, "dtype", None), cudf.Decimal128Dtype - ): - return True - if isinstance(obj, cudf.Decimal64Dtype) or isinstance( - getattr(obj, "dtype", None), cudf.Decimal64Dtype - ): - return True - if isinstance(obj, cudf.Decimal32Dtype) or isinstance( - getattr(obj, "dtype", None), cudf.Decimal32Dtype + if isinstance( + obj, + (cudf.Decimal128Dtype, cudf.Decimal64Dtype, cudf.Decimal32Dtype), + ) or isinstance( + getattr(obj, "dtype", None), + (cudf.Decimal128Dtype, cudf.Decimal64Dtype, cudf.Decimal32Dtype), ): return True if isinstance(obj, _BaseDtype) or isinstance( @@ -129,12 +125,14 @@ def is_scalar(val): bool Return True if given object is scalar. """ - return ( - isinstance(val, cudf._lib.scalar.DeviceScalar) - or isinstance(val, cudf.Scalar) - or isinstance(val, cudf.core.tools.datetimes.DateOffset) - or pd_types.is_scalar(val) - ) + return isinstance( + val, + ( + cudf.Scalar, + cudf._lib.scalar.DeviceScalar, + cudf.core.tools.datetimes.DateOffset, + ), + ) or pd_types.is_scalar(val) def _is_scalar_or_zero_d_array(val): @@ -174,7 +172,7 @@ def is_list_like(obj): bool Return True if given object is list-like. """ - return isinstance(obj, (Sequence, np.ndarray)) and not isinstance( + return isinstance(obj, (abc.Sequence, np.ndarray)) and not isinstance( obj, (str, bytes) ) diff --git a/python/cudf/cudf/comm/gpuarrow.py b/python/cudf/cudf/comm/gpuarrow.py index f21eb4e4d8c..0c4d9d7f77e 100644 --- a/python/cudf/cudf/comm/gpuarrow.py +++ b/python/cudf/cudf/comm/gpuarrow.py @@ -1,6 +1,5 @@ # Copyright (c) 2019-2022, NVIDIA CORPORATION. -from collections import OrderedDict -from collections.abc import Sequence +from collections import OrderedDict, abc import numpy as np import pandas as pd @@ -32,7 +31,7 @@ def __init__(self, source, schema=None): self._open(source, schema) -class GpuArrowReader(Sequence): +class GpuArrowReader(abc.Sequence): def __init__(self, schema, dev_ary): self._table = CudaRecordBatchStreamReader(dev_ary, schema).read_all() @@ -120,12 +119,12 @@ def null(self): @property def data_raw(self): - "Accessor for the data buffer as a device array" + """Accessor for the data buffer as a device array""" return self._series._column.data_array_view @property def null_raw(self): - "Accessor for the null buffer as a device array" + """Accessor for the null buffer as a device array""" return self._series._column.mask_array_view def make_series(self): diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py index 259a7f711c3..8dbd71739b5 100644 --- a/python/cudf/cudf/core/_base_index.py +++ b/python/cudf/cudf/core/_base_index.py @@ -3,6 +3,7 @@ from __future__ import annotations import pickle +import warnings from functools import cached_property from typing import Any, Set @@ -118,7 +119,7 @@ def get_level_values(self, level): See Also -------- - cudf.core.multiindex.MultiIndex.get_level_values : Get values for + cudf.MultiIndex.get_level_values : Get values for a level of a MultiIndex. Notes @@ -707,7 +708,18 @@ def difference(self, other, sort=None): if is_mixed_with_object_dtype(self, other): difference = self.copy() else: - difference = self.join(other, how="leftanti") + other = other.copy(deep=False) + other.names = self.names + difference = cudf.core.index._index_from_data( + cudf.DataFrame._from_data(self._data) + ._merge( + cudf.DataFrame._from_data(other._data), + how="leftanti", + on=self.name, + ) + ._data + ) + if self.dtype != other.dtype: difference = difference.astype(self.dtype) @@ -989,7 +1001,17 @@ def _union(self, other, sort=None): return union_result def _intersection(self, other, sort=None): - intersection_result = self.unique().join(other.unique(), how="inner") + other_unique = other.unique() + other_unique.names = self.names + intersection_result = cudf.core.index._index_from_data( + cudf.DataFrame._from_data(self.unique()._data) + ._merge( + cudf.DataFrame._from_data(other_unique._data), + how="inner", + on=self.name, + ) + ._data + ) if sort is None and len(other): return intersection_result.sort_values() @@ -1141,6 +1163,9 @@ def join( (1, 2)], names=['a', 'b']) """ + warnings.warn( + "Index.join is deprecated and will be removed", FutureWarning + ) if isinstance(self, cudf.MultiIndex) and isinstance( other, cudf.MultiIndex diff --git a/python/cudf/cudf/core/_internals/expressions.py b/python/cudf/cudf/core/_internals/expressions.py new file mode 100644 index 00000000000..bc587d4e1e2 --- /dev/null +++ b/python/cudf/cudf/core/_internals/expressions.py @@ -0,0 +1,222 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. + +import ast +import functools +from typing import List, Tuple + +from cudf._lib.expressions import ( + ASTOperator, + ColumnReference, + Expression, + Literal, + Operation, +) + +# This dictionary encodes the mapping from Python AST operators to their cudf +# counterparts. +python_cudf_operator_map = { + # Binary operators + ast.Add: ASTOperator.ADD, + ast.Sub: ASTOperator.SUB, + ast.Mult: ASTOperator.MUL, + ast.Div: ASTOperator.DIV, + ast.FloorDiv: ASTOperator.FLOOR_DIV, + ast.Mod: ASTOperator.PYMOD, + ast.Pow: ASTOperator.POW, + ast.Eq: ASTOperator.EQUAL, + ast.NotEq: ASTOperator.NOT_EQUAL, + ast.Lt: ASTOperator.LESS, + ast.Gt: ASTOperator.GREATER, + ast.LtE: ASTOperator.LESS_EQUAL, + ast.GtE: ASTOperator.GREATER_EQUAL, + ast.BitXor: ASTOperator.BITWISE_XOR, + # TODO: The mapping of logical/bitwise operators here is inconsistent with + # pandas. In pandas, Both `BitAnd` and `And` map to + # `ASTOperator.LOGICAL_AND` for booleans, while they map to + # `ASTOperator.BITWISE_AND` for integers. However, there is no good way to + # encode this at present because expressions can be arbitrarily nested so + # we won't know the dtype of the input without inserting a much more + # complex traversal of the expression tree to determine the output types at + # each node. For now, we'll rely on users to use the appropriate operator. + ast.BitAnd: ASTOperator.BITWISE_AND, + ast.BitOr: ASTOperator.BITWISE_OR, + ast.And: ASTOperator.LOGICAL_AND, + ast.Or: ASTOperator.LOGICAL_OR, + # Unary operators + ast.Invert: ASTOperator.BIT_INVERT, + ast.Not: ASTOperator.NOT, + # TODO: Missing USub, possibility other unary ops? +} + + +# Mapping between Python function names encode in an ast.Call node and the +# corresponding libcudf C++ AST operators. +python_cudf_function_map = { + # TODO: Operators listed on + # https://pandas.pydata.org/pandas-docs/stable/user_guide/enhancingperf.html#expression-evaluation-via-eval # noqa: E501 + # that we don't support yet: + # expm1, log1p, arctan2 and log10. + "sin": ASTOperator.SIN, + "cos": ASTOperator.COS, + "tan": ASTOperator.TAN, + "arcsin": ASTOperator.ARCSIN, + "arccos": ASTOperator.ARCCOS, + "arctan": ASTOperator.ARCTAN, + "sinh": ASTOperator.SINH, + "cosh": ASTOperator.COSH, + "tanh": ASTOperator.TANH, + "arcsinh": ASTOperator.ARCSINH, + "arccosh": ASTOperator.ARCCOSH, + "arctanh": ASTOperator.ARCTANH, + "exp": ASTOperator.EXP, + "log": ASTOperator.LOG, + "sqrt": ASTOperator.SQRT, + "abs": ASTOperator.ABS, + "ceil": ASTOperator.CEIL, + "floor": ASTOperator.FLOOR, + # TODO: Operators supported by libcudf with no Python function analog. + # ast.rint: ASTOperator.RINT, + # ast.cbrt: ASTOperator.CBRT, +} + + +class libcudfASTVisitor(ast.NodeVisitor): + """A NodeVisitor specialized for constructing a libcudf expression tree. + + This visitor is designed to handle AST nodes that have libcudf equivalents. + It constructs column references from names and literals from constants, + then builds up operations. The final result can be accessed using the + `expression` property. The visitor must be kept in scope for as long as the + expression is needed because all of the underlying libcudf expressions will + be destroyed when the libcudfASTVisitor is. + + Parameters + ---------- + col_names : Tuple[str] + The column names used to map the names in an expression. + """ + + def __init__(self, col_names: Tuple[str]): + self.stack: List[Expression] = [] + self.nodes: List[Expression] = [] + self.col_names = col_names + + @property + def expression(self): + """Expression: The result of parsing an AST.""" + assert len(self.stack) == 1 + return self.stack[-1] + + def visit_Name(self, node): + try: + col_id = self.col_names.index(node.id) + except ValueError: + raise ValueError(f"Unknown column name {node.id}") + self.stack.append(ColumnReference(col_id)) + + def visit_Constant(self, node): + if not isinstance(node, ast.Num): + raise ValueError( + f"Unsupported literal {repr(node.value)} of type " + "{type(node.value).__name__}" + ) + self.stack.append(Literal(node.value)) + + def visit_UnaryOp(self, node): + self.visit(node.operand) + self.nodes.append(self.stack.pop()) + if isinstance(node.op, ast.USub): + # TODO: Except for leaf nodes, we won't know the type of the + # operand, so there's no way to know whether this should be a float + # or an int. We should maybe see what Spark does, and this will + # probably require casting. + self.nodes.append(Literal(-1)) + op = ASTOperator.MUL + self.stack.append(Operation(op, self.nodes[-1], self.nodes[-2])) + elif isinstance(node.op, ast.UAdd): + self.stack.append(self.nodes[-1]) + else: + op = python_cudf_operator_map[type(node.op)] + self.stack.append(Operation(op, self.nodes[-1])) + + def visit_BinOp(self, node): + self.visit(node.left) + self.visit(node.right) + self.nodes.append(self.stack.pop()) + self.nodes.append(self.stack.pop()) + + op = python_cudf_operator_map[type(node.op)] + self.stack.append(Operation(op, self.nodes[-1], self.nodes[-2])) + + def _visit_BoolOp_Compare(self, operators, operands, has_multiple_ops): + # Helper function handling the common components of parsing BoolOp and + # Compare AST nodes. These two types of nodes both support chaining + # (e.g. `a > b > c` is equivalent to `a > b and b > c`, so this + # function helps standardize that. + + # TODO: Whether And/Or and BitAnd/BitOr actually correspond to + # logical or bitwise operators depends on the data types that they + # are applied to. We'll need to add logic to map to that. + inner_ops = [] + for op, (left, right) in zip(operators, operands): + # Note that this will lead to duplicate nodes, e.g. if + # the comparison is `a < b < c` that will be encoded as + # `a < b and b < c`. We could potentially optimize by caching + # expressions by name so that we only construct them once. + self.visit(left) + self.visit(right) + + self.nodes.append(self.stack.pop()) + self.nodes.append(self.stack.pop()) + + op = python_cudf_operator_map[type(op)] + inner_ops.append(Operation(op, self.nodes[-1], self.nodes[-2])) + + self.nodes.extend(inner_ops) + + # If we have more than one comparator, we need to link them + # together with LOGICAL_AND operators. + if has_multiple_ops: + op = ASTOperator.LOGICAL_AND + + def _combine_compare_ops(left, right): + self.nodes.append(Operation(op, left, right)) + return self.nodes[-1] + + functools.reduce(_combine_compare_ops, inner_ops) + + self.stack.append(self.nodes[-1]) + + def visit_BoolOp(self, node): + operators = [node.op] * (len(node.values) - 1) + operands = zip(node.values[:-1], node.values[1:]) + self._visit_BoolOp_Compare(operators, operands, len(node.values) > 2) + + def visit_Compare(self, node): + operands = (node.left, *node.comparators) + has_multiple_ops = len(operands) > 2 + operands = zip(operands[:-1], operands[1:]) + self._visit_BoolOp_Compare(node.ops, operands, has_multiple_ops) + + def visit_Call(self, node): + try: + op = python_cudf_function_map[node.func.id] + except KeyError: + raise ValueError(f"Unsupported function {node.func}.") + # Assuming only unary functions are supported, which is checked above. + if len(node.args) != 1 or node.keywords: + raise ValueError( + f"Function {node.func} only accepts one positional " + "argument." + ) + self.visit(node.args[0]) + + self.nodes.append(self.stack.pop()) + self.stack.append(Operation(op, self.nodes[-1])) + + +@functools.lru_cache(256) +def parse_expression(expr: str, col_names: Tuple[str]): + visitor = libcudfASTVisitor(col_names) + visitor.visit(ast.parse(expr)) + return visitor diff --git a/python/cudf/cudf/core/algorithms.py b/python/cudf/cudf/core/algorithms.py index 22a5666ef3f..d13c55dfcc0 100644 --- a/python/cudf/cudf/core/algorithms.py +++ b/python/cudf/cudf/core/algorithms.py @@ -1,5 +1,5 @@ # Copyright (c) 2020-2022, NVIDIA CORPORATION. -from warnings import warn +import warnings import cupy as cp import numpy as np @@ -50,7 +50,7 @@ def factorize(values, sort=False, na_sentinel=-1, size_hint=None): raise NotImplementedError("na_sentinel can not be None.") if size_hint: - warn("size_hint is not applicable for cudf.factorize") + warnings.warn("size_hint is not applicable for cudf.factorize") return_cupy_array = isinstance(values, cp.ndarray) diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index 911391ef984..f9bb7ea2f1a 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -3,7 +3,7 @@ from __future__ import annotations import pickle -from collections.abc import MutableSequence +from collections import abc from functools import cached_property from typing import ( TYPE_CHECKING, @@ -911,8 +911,8 @@ def normalize_binop_value(self, other: ScalarLike) -> CategoricalColumn: ) return other - ary = cudf.utils.utils.scalar_broadcast_to( - self._encode(other), size=len(self), dtype=self.codes.dtype + ary = column.full( + len(self), self._encode(other), dtype=self.codes.dtype ) return column.build_categorical_column( categories=self.dtype.categories._values, @@ -1379,7 +1379,9 @@ def view(self, dtype: Dtype) -> ColumnBase: ) @staticmethod - def _concat(objs: MutableSequence[CategoricalColumn]) -> CategoricalColumn: + def _concat( + objs: abc.MutableSequence[CategoricalColumn], + ) -> CategoricalColumn: # TODO: This function currently assumes it is being called from # column.concat_columns, at least to the extent that all the # preprocessing in that function has already been done. That should be @@ -1627,9 +1629,9 @@ def _create_empty_categorical_column( return column.build_categorical_column( categories=column.as_column(dtype.categories), codes=column.as_column( - cudf.utils.utils.scalar_broadcast_to( - _DEFAULT_CATEGORICAL_VALUE, + column.full( categorical_column.size, + _DEFAULT_CATEGORICAL_VALUE, categorical_column.codes.dtype, ) ), diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index b2e3e42531b..3fb71173178 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -68,7 +68,6 @@ StructDtype, ) from cudf.core.mixins import BinaryOperand, Reducible -from cudf.utils import utils from cudf.utils.dtypes import ( cudf_dtype_from_pa_type, get_time_unit, @@ -229,13 +228,9 @@ def to_arrow(self) -> pa.Array: 4 ] """ - return libcudf.interop.to_arrow( - cudf.core.frame.Frame( - cudf.core.column_accessor.ColumnAccessor({"None": self}) - ), - [["None"]], - keep_index=False, - )["None"].chunk(0) + return libcudf.interop.to_arrow([self], [["None"]],)[ + "None" + ].chunk(0) @classmethod def from_arrow(cls, array: pa.Array) -> ColumnBase: @@ -280,12 +275,8 @@ def from_arrow(cls, array: pa.Array) -> ColumnBase: } ) - codes = libcudf.interop.from_arrow( - indices_table, indices_table.column_names - )[0]["None"] - categories = libcudf.interop.from_arrow( - dictionaries_table, dictionaries_table.column_names - )[0]["None"] + codes = libcudf.interop.from_arrow(indices_table)[0] + categories = libcudf.interop.from_arrow(dictionaries_table)[0] return build_categorical_column( categories=categories, @@ -301,7 +292,7 @@ def from_arrow(cls, array: pa.Array) -> ColumnBase: ): return cudf.core.column.IntervalColumn.from_arrow(array) - result = libcudf.interop.from_arrow(data, data.column_names)[0]["None"] + result = libcudf.interop.from_arrow(data)[0] return result._with_type_metadata(cudf_dtype_from_pa_type(array.type)) @@ -1782,9 +1773,7 @@ def as_column( if dtype is None: dtype = cudf.dtype("float64") - data = as_column( - utils.scalar_broadcast_to(arbitrary, length, dtype=dtype) - ) + data = as_column(full(length, arbitrary, dtype=dtype)) if not nan_as_null and not is_decimal_dtype(data.dtype): if np.issubdtype(data.dtype, np.floating): data = data.fillna(np.nan) diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index fac8af652c1..375a19f5423 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -2,7 +2,7 @@ from __future__ import annotations -import datetime as dt +import datetime import locale import re from locale import nl_langinfo @@ -237,9 +237,9 @@ def normalize_binop_value(self, other: DatetimeLikeScalar) -> ScalarLike: if isinstance(other, (cudf.Scalar, ColumnBase, cudf.DateOffset)): return other - if isinstance(other, dt.datetime): + if isinstance(other, datetime.datetime): other = np.datetime64(other) - elif isinstance(other, dt.timedelta): + elif isinstance(other, datetime.timedelta): other = np.timedelta64(other) elif isinstance(other, pd.Timestamp): other = other.to_datetime64() diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index f10e257d359..d8ddb3d8d1a 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -1,8 +1,8 @@ # Copyright (c) 2021-2022, NVIDIA CORPORATION. +import warnings from decimal import Decimal from typing import Any, Sequence, Tuple, Union, cast -from warnings import warn import cupy as cp import numpy as np @@ -43,7 +43,7 @@ def as_decimal_column( isinstance(dtype, cudf.core.dtypes.DecimalDtype) and dtype.scale < self.dtype.scale ): - warn( + warnings.warn( "cuDF truncates when downcasting decimals to a lower scale. " "To round, use Series.round() or DataFrame.round()." ) diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index 8578bfe8147..2964378d114 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -17,7 +17,8 @@ drop_list_duplicates, extract_element_column, extract_element_scalar, - index_of, + index_of_column, + index_of_scalar, sort_lists, ) from cudf._lib.strings.convert.convert_lists import format_list_column @@ -113,9 +114,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: return NotImplemented if isinstance(other.dtype, ListDtype): if op == "__add__": - return concatenate_rows( - cudf.core.frame.Frame({0: self, 1: other}) - ) + return concatenate_rows([self, other]) else: raise NotImplementedError( "Lists concatenation for this operation is not yet" @@ -301,16 +300,31 @@ def as_string_column( """ Create a strings column from a list column """ - # Convert the leaf child column to strings column + lc = self._transform_leaves( + lambda col, dtype: col.as_string_column(dtype), dtype + ) + + # Separator strings to match the Python format + separators = as_column([", ", "[", "]"]) + + # Call libcudf to format the list column + return format_list_column(lc, separators) + + def _transform_leaves(self, func, *args, **kwargs): + # return a new list column with the same nested structure + # as ``self``, but with the leaf column transformed + # by applying ``func`` to it + cc: List[ListColumn] = [] c: ColumnBase = self + while isinstance(c, ListColumn): cc.insert(0, c) c = c.children[1] - s = c.as_string_column(dtype) + + lc = func(c, *args, **kwargs) # Rebuild the list column replacing just the leaf child - lc = s for c in cc: o = c.children[0] lc = cudf.core.column.ListColumn( # type: ignore @@ -321,12 +335,7 @@ def as_string_column( null_count=c.null_count, children=(o, lc), ) - - # Separator strings to match the Python format - separators = as_column([", ", "[", "]"]) - - # Call libcudf to format the list column - return format_list_column(lc, separators) + return lc class ListMethods(ColumnMethods): @@ -455,10 +464,61 @@ def contains(self, search_key: ScalarLike) -> ParentType: raise return res - def index(self, search_key: ScalarLike) -> ParentType: - search_key = cudf.Scalar(search_key) + def index(self, search_key: Union[ScalarLike, ColumnLike]) -> ParentType: + """ + Returns integers representing the index of the search key for each row. + + If ``search_key`` is a sequence, it must be the same length as the + Series and ``search_key[i]`` represents the search key for the + ``i``-th row of the Series. + + If the search key is not contained in a row, -1 is returned. If either + the row or the search key are null, is returned. If the search key + is contained multiple times, the smallest matching index is returned. + + Parameters + ---------- + search_key : scalar or sequence of scalars + Element or elements being searched for in each row of the list + column + + Returns + ------- + Series or Index + + Examples + -------- + >>> s = cudf.Series([[1, 2, 3], [3, 4, 5], [4, 5, 6]]) + >>> s.list.index(4) + 0 -1 + 1 1 + 2 0 + dtype: int32 + + >>> s = cudf.Series([["a", "b", "c"], ["x", "y", "z"]]) + >>> s.list.index(["b", "z"]) + 0 1 + 1 2 + dtype: int32 + + >>> s = cudf.Series([[4, 5, 6], None, [-3, -2, -1]]) + >>> s.list.index([None, 3, -2]) + 0 + 1 + 2 1 + dtype: int32 + """ + try: - res = self._return_or_inplace(index_of(self._column, search_key)) + if is_scalar(search_key): + return self._return_or_inplace( + index_of_scalar(self._column, cudf.Scalar(search_key)) + ) + else: + return self._return_or_inplace( + index_of_column(self._column, as_column(search_key)) + ) + except RuntimeError as e: if ( "Type/Scale of search key does not " @@ -466,7 +526,6 @@ def index(self, search_key: ScalarLike) -> ParentType: ): raise TypeError(str(e)) from e raise - return res @property def leaves(self) -> ParentType: @@ -717,3 +776,31 @@ def concat(self, dropna=True) -> ParentType: "of nesting" ) return self._return_or_inplace(result) + + def astype(self, dtype): + """ + Return a new list Series with the leaf values casted + to the specified data type. + + Parameters + ---------- + dtype: data type to cast leaves values to + + Returns + ------- + A new Series of lists + + Examples + -------- + >>> s = cudf.Series([[1, 2], [3, 4]]) + >>> s.dtype + ListDtype(int64) + >>> s2 = s.list.astype("float64") + >>> s2.dtype + ListDtype(float64) + """ + return self._return_or_inplace( + self._column._transform_leaves( + lambda col, dtype: col.astype(dtype), dtype + ) + ) diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index 216faaa8250..d30026e8bfa 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -40,11 +40,12 @@ as_column, build_column, column, + full, string, ) from cudf.core.dtypes import CategoricalDtype from cudf.core.mixins import BinaryOperand -from cudf.utils import cudautils, utils +from cudf.utils import cudautils from cudf.utils.dtypes import ( NUMERIC_TYPES, min_column_type, @@ -254,9 +255,7 @@ def normalize_binop_value( if np.isscalar(other): return cudf.dtype(other_dtype).type(other) else: - ary = utils.scalar_broadcast_to( - other, size=len(self), dtype=other_dtype - ) + ary = full(len(self), other, dtype=other_dtype) return column.build_column( data=Buffer(ary), dtype=ary.dtype, @@ -438,9 +437,7 @@ def find_and_replace( ) if len(replacement_col) == 1 and len(to_replace_col) > 1: replacement_col = column.as_column( - utils.scalar_broadcast_to( - replacement[0], (len(to_replace_col),), self.dtype - ) + full(len(to_replace_col), replacement[0], self.dtype) ) elif len(replacement_col) == 1 and len(to_replace_col) == 0: return self.copy() @@ -774,6 +771,4 @@ def digitize( if bin_col.nullable: raise ValueError("`bins` cannot contain null entries.") - return as_column( - libcudf.sort.digitize(column.as_frame(), bin_col.as_frame(), right) - ) + return as_column(libcudf.sort.digitize([column], [bin_col], right)) diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index d5d45c341d5..0db7e7d9a27 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -37,7 +37,6 @@ from cudf.core.buffer import Buffer from cudf.core.column import column, datetime from cudf.core.column.methods import ColumnMethods, ParentType -from cudf.utils import utils from cudf.utils.docutils import copy_docstring from cudf.utils.dtypes import can_convert_to_column @@ -202,7 +201,7 @@ def __getitem__(self, key): return self.get(key) def len(self) -> SeriesOrIndex: - """ + r""" Computes the length of each element in the Series/Index. Returns @@ -214,7 +213,7 @@ def len(self) -> SeriesOrIndex: Examples -------- >>> import cudf - >>> s = cudf.Series(["dog", "", "\\n", None]) + >>> s = cudf.Series(["dog", "", "\n", None]) >>> s.str.len() 0 3 1 0 @@ -365,9 +364,7 @@ def cat(self, others=None, sep=None, na_rep=None): other_cols = _get_cols_list(self._parent, others) all_cols = [self._column] + other_cols data = libstrings.concatenate( - cudf.DataFrame( - {index: value for index, value in enumerate(all_cols)} - ), + all_cols, cudf.Scalar(sep), cudf.Scalar(na_rep, "str"), ) @@ -963,7 +960,7 @@ def replace( ) def replace_with_backrefs(self, pat: str, repl: str) -> SeriesOrIndex: - """ + r""" Use the ``repl`` back-ref template to create a new string with the extracted elements found using the ``pat`` expression. @@ -983,7 +980,7 @@ def replace_with_backrefs(self, pat: str, repl: str) -> SeriesOrIndex: -------- >>> import cudf >>> s = cudf.Series(["A543","Z756"]) - >>> s.str.replace_with_backrefs('(\\\\d)(\\\\d)', 'V\\\\2\\\\1') + >>> s.str.replace_with_backrefs('(\\d)(\\d)', 'V\\2\\1') 0 AV453 1 ZV576 dtype: object @@ -1198,7 +1195,7 @@ def istimestamp(self, format: str) -> SeriesOrIndex: ) def isfloat(self) -> SeriesOrIndex: - """ + r""" Check whether all characters in each string form floating value. If a string has zero characters, False is returned for @@ -1252,7 +1249,7 @@ def isfloat(self) -> SeriesOrIndex: 4 True 5 False dtype: bool - >>> s = cudf.Series(["this is plain text", "\\t\\n", "9.9", "9.9.9"]) + >>> s = cudf.Series(["this is plain text", "\t\n", "9.9", "9.9.9"]) >>> s.str.isfloat() 0 False 1 False @@ -2242,7 +2239,7 @@ def get(self, i: int = 0) -> SeriesOrIndex: return self._return_or_inplace(libstrings.get(self._column, i)) def get_json_object(self, json_path): - """ + r""" Applies a JSONPath string to an input strings column where each row in the column is a valid json string @@ -2261,7 +2258,7 @@ def get_json_object(self, json_path): >>> import cudf >>> s = cudf.Series( [ - \\"\\"\\" + \"\"\" { "store":{ "book":[ @@ -2280,13 +2277,13 @@ def get_json_object(self, json_path): ] } } - \\"\\"\\" + \"\"\" ]) >>> s - 0 {"store": {\\n "book": [\\n { "cat... + 0 {"store": {\n "book": [\n { "cat... dtype: object >>> s.str.get_json_object("$.store.book") - 0 [\\n { "category": "reference",\\n ... + 0 [\n { "category": "reference",\n ... dtype: object """ @@ -3141,7 +3138,7 @@ def rjust(self, width: int, fillchar: str = " ") -> SeriesOrIndex: ) def strip(self, to_strip: str = None) -> SeriesOrIndex: - """ + r""" Remove leading and trailing characters. Strip whitespaces (including newlines) or a set of @@ -3172,11 +3169,11 @@ def strip(self, to_strip: str = None) -> SeriesOrIndex: Examples -------- >>> import cudf - >>> s = cudf.Series(['1. Ant. ', '2. Bee!\\n', '3. Cat?\\t', None]) + >>> s = cudf.Series(['1. Ant. ', '2. Bee!\n', '3. Cat?\t', None]) >>> s 0 1. Ant. - 1 2. Bee!\\n - 2 3. Cat?\\t + 1 2. Bee!\n + 2 3. Cat?\t 3 dtype: object >>> s.str.strip() @@ -3185,7 +3182,7 @@ def strip(self, to_strip: str = None) -> SeriesOrIndex: 2 3. Cat? 3 dtype: object - >>> s.str.strip('123.!? \\n\\t') + >>> s.str.strip('123.!? \n\t') 0 Ant 1 Bee 2 Cat @@ -3200,7 +3197,7 @@ def strip(self, to_strip: str = None) -> SeriesOrIndex: ) def lstrip(self, to_strip: str = None) -> SeriesOrIndex: - """ + r""" Remove leading and trailing characters. Strip whitespaces (including newlines) @@ -3231,11 +3228,11 @@ def lstrip(self, to_strip: str = None) -> SeriesOrIndex: Examples -------- >>> import cudf - >>> s = cudf.Series(['1. Ant. ', '2. Bee!\\n', '3. Cat?\\t', None]) + >>> s = cudf.Series(['1. Ant. ', '2. Bee!\n', '3. Cat?\t', None]) >>> s.str.lstrip('123.') 0 Ant. - 1 Bee!\\n - 2 Cat?\\t + 1 Bee!\n + 2 Cat?\t 3 dtype: object """ @@ -3247,7 +3244,7 @@ def lstrip(self, to_strip: str = None) -> SeriesOrIndex: ) def rstrip(self, to_strip: str = None) -> SeriesOrIndex: - """ + r""" Remove leading and trailing characters. Strip whitespaces (including newlines) @@ -3280,14 +3277,14 @@ def rstrip(self, to_strip: str = None) -> SeriesOrIndex: Examples -------- >>> import cudf - >>> s = cudf.Series(['1. Ant. ', '2. Bee!\\n', '3. Cat?\\t', None]) + >>> s = cudf.Series(['1. Ant. ', '2. Bee!\n', '3. Cat?\t', None]) >>> s 0 1. Ant. - 1 2. Bee!\\n - 2 3. Cat?\\t + 1 2. Bee!\n + 2 3. Cat?\t 3 dtype: object - >>> s.str.rstrip('.!? \\n\\t') + >>> s.str.rstrip('.!? \n\t') 0 1. Ant 1 2. Bee 2 3. Cat @@ -3302,7 +3299,7 @@ def rstrip(self, to_strip: str = None) -> SeriesOrIndex: ) def wrap(self, width: int, **kwargs) -> SeriesOrIndex: - """ + r""" Wrap long strings in the Series/Index to be formatted in paragraphs with length less than a given width. @@ -3343,8 +3340,8 @@ def wrap(self, width: int, **kwargs) -> SeriesOrIndex: >>> data = ['line to be wrapped', 'another line to be wrapped'] >>> s = cudf.Series(data) >>> s.str.wrap(12) - 0 line to be\\nwrapped - 1 another line\\nto be\\nwrapped + 0 line to be\nwrapped + 1 another line\nto be\nwrapped dtype: object """ if not is_integer(width): @@ -3578,7 +3575,7 @@ def isempty(self) -> SeriesOrIndex: return self._return_or_inplace((self._column == "").fillna(False)) def isspace(self) -> SeriesOrIndex: - """ + r""" Check whether all characters in each string are whitespace. This is equivalent to running the Python string method @@ -3626,7 +3623,7 @@ def isspace(self) -> SeriesOrIndex: Examples -------- >>> import cudf - >>> s = cudf.Series([' ', '\\t\\r\\n ', '']) + >>> s = cudf.Series([' ', '\t\r\n ', '']) >>> s.str.isspace() 0 True 1 True @@ -4274,7 +4271,7 @@ def normalize_spaces(self) -> SeriesOrIndex: ) def normalize_characters(self, do_lower: bool = True) -> SeriesOrIndex: - """ + r""" Normalizes strings characters for tokenizing. This uses the normalizer that is built into the @@ -4283,7 +4280,7 @@ def normalize_characters(self, do_lower: bool = True) -> SeriesOrIndex: - adding padding around punctuation (unicode category starts with "P") as well as certain ASCII symbols like "^" and "$" - adding padding around the CJK Unicode block characters - - changing whitespace (e.g. ``\\t``, ``\\n``, ``\\r``) to space + - changing whitespace (e.g. ``\t``, ``\n``, ``\r``) to space - removing control characters (unicode categories "Cc" and "Cf") If `do_lower_case = true`, lower-casing also removes the accents. @@ -4306,7 +4303,7 @@ def normalize_characters(self, do_lower: bool = True) -> SeriesOrIndex: Examples -------- >>> import cudf - >>> ser = cudf.Series(["héllo, \\tworld","ĂĆCĖÑTED","$99"]) + >>> ser = cudf.Series(["héllo, \tworld","ĂĆCĖÑTED","$99"]) >>> ser.str.normalize_characters() 0 hello , world 1 accented @@ -5523,15 +5520,21 @@ def _binaryop( if isinstance(other, (StringColumn, str, cudf.Scalar)): if op == "__add__": if isinstance(other, cudf.Scalar): - other = utils.scalar_broadcast_to( - other, size=len(self), dtype="object" + other = cast( + StringColumn, + column.full(len(self), other, dtype="object"), ) + + # Explicit types are necessary because mypy infers ColumnBase + # rather than StringColumn and sometimes forgets Scalar. + lhs: Union[cudf.Scalar, StringColumn] + rhs: Union[cudf.Scalar, StringColumn] lhs, rhs = (other, self) if reflect else (self, other) return cast( "column.ColumnBase", libstrings.concatenate( - cudf.DataFrame._from_data(data={0: lhs, 1: rhs}), + [lhs, rhs], sep=cudf.Scalar(""), na_rep=cudf.Scalar(None, "str"), ), diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py index 15815427aca..810624e9f4e 100644 --- a/python/cudf/cudf/core/column/timedelta.py +++ b/python/cudf/cudf/core/column/timedelta.py @@ -2,7 +2,7 @@ from __future__ import annotations -import datetime as dt +import datetime from typing import Any, Sequence, cast import numpy as np @@ -211,7 +211,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: def normalize_binop_value(self, other) -> ColumnBinaryOperand: if isinstance(other, (ColumnBase, cudf.Scalar)): return other - if isinstance(other, dt.timedelta): + if isinstance(other, datetime.timedelta): other = np.timedelta64(other) elif isinstance(other, pd.Timestamp): other = other.to_datetime64() diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py index 291e50386cc..34236a8c09e 100644 --- a/python/cudf/cudf/core/column_accessor.py +++ b/python/cudf/cudf/core/column_accessor.py @@ -3,7 +3,7 @@ from __future__ import annotations import itertools -from collections.abc import MutableMapping +from collections import abc from functools import cached_property, reduce from typing import ( TYPE_CHECKING, @@ -78,7 +78,7 @@ def _to_flat_dict(d): return {k: v for k, v in _to_flat_dict_inner(d)} -class ColumnAccessor(MutableMapping): +class ColumnAccessor(abc.MutableMapping): """ Parameters ---------- @@ -99,7 +99,7 @@ class ColumnAccessor(MutableMapping): def __init__( self, - data: Union[MutableMapping, ColumnAccessor] = None, + data: Union[abc.MutableMapping, ColumnAccessor] = None, multiindex: bool = False, level_names=None, ): @@ -149,7 +149,7 @@ def _create_unsafe( return obj def __iter__(self): - return self._data.__iter__() + return iter(self._data) def __getitem__(self, key: Any) -> ColumnBase: return self._data[key] @@ -158,7 +158,7 @@ def __setitem__(self, key: Any, value: Any): self.set_by_label(key, value) def __delitem__(self, key: Any): - self._data.__delitem__(key) + del self._data[key] self._clear_cache() def __len__(self) -> int: @@ -213,7 +213,7 @@ def columns(self) -> Tuple[ColumnBase, ...]: return tuple(self.values()) @cached_property - def _grouped_data(self) -> MutableMapping: + def _grouped_data(self) -> abc.MutableMapping: """ If self.multiindex is True, return the underlying mapping as a nested mapping. diff --git a/python/cudf/cudf/core/cut.py b/python/cudf/cudf/core/cut.py index 7c585602c23..2ec39043eb2 100644 --- a/python/cudf/cudf/core/cut.py +++ b/python/cudf/cudf/core/cut.py @@ -1,4 +1,6 @@ -from collections.abc import Sequence +# Copyright (c) 2021-2022, NVIDIA CORPORATION. + +from collections import abc import cupy import numpy as np @@ -21,21 +23,27 @@ def cut( duplicates: str = "raise", ordered: bool = True, ): + """Bin values into discrete intervals. - """ - Bin values into discrete intervals. Use cut when you need to segment and sort data values into bins. This function is also useful for going from a continuous variable to a categorical variable. + Parameters ---------- x : array-like The input array to be binned. Must be 1-dimensional. bins : int, sequence of scalars, or IntervalIndex The criteria to bin by. - * int : Defines the number of equal-width bins in the - range of x. The range of x is extended by .1% on each - side to include the minimum and maximum values of x. + + * int : Defines the number of equal-width bins in the range of `x`. The + range of `x` is extended by .1% on each side to include the minimum + and maximum values of `x`. + * sequence of scalars : Defines the bin edges allowing for non-uniform + width. No extension of the range of `x` is done. + * IntervalIndex : Defines the exact bins to be used. Note that + IntervalIndex for `bins` must be non-overlapping. + right : bool, default True Indicates whether bins includes the rightmost edge or not. labels : array or False, default None @@ -56,6 +64,7 @@ def cut( Categorical and Series (with Categorical dtype). If True, the resulting categorical will be ordered. If False, the resulting categorical will be unordered (labels must be provided). + Returns ------- out : CategoricalIndex @@ -66,30 +75,38 @@ def cut( For scalar or sequence bins, this is an ndarray with the computed bins. If set duplicates=drop, bins will drop non-unique bin. For an IntervalIndex bins, this is equal to bins. + Examples -------- Discretize into three equal-sized bins. + >>> cudf.cut(np.array([1, 7, 5, 4, 6, 3]), 3) CategoricalIndex([(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], - ... (5.0, 7.0],(0.994, 3.0]], categories=[(0.994, 3.0], - ... (3.0, 5.0], (5.0, 7.0]], ordered=True, dtype='category') + (5.0, 7.0], (0.994, 3.0]], categories=[(0.994, 3.0], + (3.0, 5.0], (5.0, 7.0]], ordered=True, dtype='category') + >>> cudf.cut(np.array([1, 7, 5, 4, 6, 3]), 3, retbins=True) (CategoricalIndex([(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], - ... (5.0, 7.0],(0.994, 3.0]],categories=[(0.994, 3.0], - ... (3.0, 5.0], (5.0, 7.0]],ordered=True, dtype='category'), - array([0.994, 3. , 5. , 7. ])) + (5.0, 7.0], (0.994, 3.0]], categories=[(0.994, 3.0], + (3.0, 5.0], (5.0, 7.0]], ordered=True, dtype='category'), + array([0.994, 3. , 5. , 7. ])) + >>> cudf.cut(np.array([1, 7, 5, 4, 6, 3]), - ... 3, labels=["bad", "medium", "good"]) + ... 3, labels=["bad", "medium", "good"]) CategoricalIndex(['bad', 'good', 'medium', 'medium', 'good', 'bad'], - ... categories=['bad', 'medium', 'good'],ordered=True, - ... dtype='category') + categories=['bad', 'medium', 'good'],ordered=True, + dtype='category') + >>> cudf.cut(np.array([1, 7, 5, 4, 6, 3]), 3, - ... labels=["B", "A", "B"], ordered=False) + ... labels=["B", "A", "B"], ordered=False) CategoricalIndex(['B', 'B', 'A', 'A', 'B', 'B'], categories=['A', 'B'], - ... ordered=False, dtype='category') + ordered=False, dtype='category') + >>> cudf.cut([0, 1, 1, 2], bins=4, labels=False) array([0, 1, 1, 3], dtype=int32) + Passing a Series as an input returns a Series with categorical dtype: + >>> s = cudf.Series(np.array([2, 4, 6, 8, 10]), ... index=['a', 'b', 'c', 'd', 'e']) >>> cudf.cut(s, 3) @@ -123,7 +140,7 @@ def cut( ) # bins can either be an int, sequence of scalars or an intervalIndex - if isinstance(bins, Sequence): + if isinstance(bins, abc.Sequence): if len(set(bins)) is not len(bins): if duplicates == "raise": raise ValueError( @@ -141,7 +158,7 @@ def cut( # create bins if given an int or single scalar if not isinstance(bins, pd.IntervalIndex): - if not isinstance(bins, (Sequence)): + if not isinstance(bins, (abc.Sequence)): if isinstance( x, (pd.Series, cudf.Series, np.ndarray, cupy.ndarray) ): diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 277fd5aae57..0d3b3ee0300 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -7,12 +7,13 @@ import itertools import numbers import pickle +import re import sys import warnings -from collections import defaultdict -from collections.abc import Iterable, Mapping, Sequence +from collections import abc, defaultdict from typing import ( Any, + Callable, Dict, List, MutableMapping, @@ -25,6 +26,7 @@ ) import cupy +import numba import numpy as np import pandas as pd import pyarrow as pa @@ -78,7 +80,7 @@ from cudf.core.resample import DataFrameResampler from cudf.core.series import Series from cudf.core.udf.row_function import _get_row_kernel -from cudf.utils import applyutils, docutils, ioutils, queryutils, utils +from cudf.utils import applyutils, docutils, ioutils, queryutils from cudf.utils.docutils import copy_docstring from cudf.utils.dtypes import ( can_convert_to_column, @@ -1103,7 +1105,7 @@ def __getitem__(self, arg): elif can_convert_to_column(arg): mask = arg if is_list_like(mask): - mask = cudf.utils.utils._create_pandas_series(data=mask) + mask = pd.Series(mask) if mask.dtype == "bool": return self._apply_boolean_mask(mask) else: @@ -1172,9 +1174,7 @@ def __setitem__(self, arg, value): allow_non_unique=True, ) if is_scalar(value): - self._data[arg] = utils.scalar_broadcast_to( - value, len(self) - ) + self._data[arg] = column.full(len(self), value) else: value = as_column(value) self._data[arg] = value @@ -1855,7 +1855,7 @@ def _make_operands_and_index_for_binop( Optional[BaseIndex], ]: # Check built-in types first for speed. - if isinstance(other, (list, dict, Sequence, Mapping)): + if isinstance(other, (list, dict, abc.Sequence, abc.Mapping)): warnings.warn( "Binary operations between host objects such as " f"{type(other)} and cudf.DataFrame are deprecated and will be " @@ -1876,7 +1876,7 @@ def _make_operands_and_index_for_binop( if _is_scalar_or_zero_d_array(other): rhs = {name: other for name in self._data} - elif isinstance(other, (list, Sequence)): + elif isinstance(other, (list, abc.Sequence)): rhs = {name: o for (name, o) in zip(self._data, other)} elif isinstance(other, Series): rhs = dict(zip(other.index.values_host, other.values_host)) @@ -1905,7 +1905,7 @@ def _make_operands_and_index_for_binop( # the fill value. left_default = fill_value - if not isinstance(rhs, (dict, Mapping)): + if not isinstance(rhs, (dict, abc.Mapping)): return NotImplemented, None operands = { @@ -2571,8 +2571,24 @@ def _insert(self, loc, name, value, nan_as_null=None, ignore_index=True): f"{num_cols * (num_cols > 0)}" ) + # TODO: This check is currently necessary because + # _is_scalar_or_zero_d_array below will treat a length 1 pd.Categorical + # as a scalar and attempt to use column.full, which can't handle it. + # Maybe _is_scalar_or_zero_d_array should be changed, or maybe we just + # shouldn't support pd.Categorical at all, but those changes will at + # least require a deprecation cycle because we currently support + # inserting a pd.Categorical. + if isinstance(value, pd.Categorical): + value = cudf.core.column.categorical.pandas_categorical_as_column( + value + ) + if _is_scalar_or_zero_d_array(value): - value = utils.scalar_broadcast_to(value, len(self)) + value = column.full( + len(self), + value, + "str" if libcudf.scalar._is_null_host_scalar(value) else None, + ) if len(self) == 0: if isinstance(value, (pd.Series, Series)): @@ -2658,11 +2674,6 @@ def diff(self, periods=1, axis=0): if axis != 0: raise NotImplementedError("Only axis=0 is supported.") - if not all(is_numeric_dtype(i) for i in self.dtypes): - raise NotImplementedError( - "DataFrame.diff only supports numeric dtypes" - ) - if abs(periods) > len(self): df = cudf.DataFrame._from_data( { @@ -2964,7 +2975,9 @@ def agg(self, aggs, axis=None): if axis == 0 or axis is not None: raise NotImplementedError("axis not implemented yet") - if isinstance(aggs, Iterable) and not isinstance(aggs, (str, dict)): + if isinstance(aggs, abc.Iterable) and not isinstance( + aggs, (str, dict) + ): result = DataFrame() # TODO : Allow simultaneous pass for multi-aggregation as # a future optimization @@ -3000,13 +3013,13 @@ def agg(self, aggs, axis=None): f"'Series' object" ) result[key] = getattr(col, value)() - elif all([isinstance(val, Iterable) for val in aggs.values()]): + elif all([isinstance(val, abc.Iterable) for val in aggs.values()]): idxs = set() for val in aggs.values(): - if isinstance(val, Iterable): - idxs.update(val) - elif isinstance(val, str): + if isinstance(val, str): idxs.add(val) + elif isinstance(val, abc.Iterable): + idxs.update(val) idxs = sorted(list(idxs)) for agg in idxs: if agg is callable: @@ -3020,7 +3033,7 @@ def agg(self, aggs, axis=None): len(idxs), dtype=col.dtype, masked=True ) ans = cudf.Series(data=col_empty, index=idxs) - if isinstance(aggs.get(key), Iterable): + if isinstance(aggs.get(key), abc.Iterable): # TODO : Allow simultaneous pass for multi-aggregation # as a future optimization for agg in aggs.get(key): @@ -3196,17 +3209,42 @@ def transpose(self): Difference from pandas: Not supporting *copy* because default and only behavior is copy=True """ - # Never transpose a MultiIndex - remove the existing columns and - # replace with a RangeIndex. Afterward, reassign. - columns = self.index.copy(deep=False) + index = self._data.to_pandas_index() + columns = self.index.copy(deep=False) if self._num_columns == 0 or self._num_rows == 0: return DataFrame(index=index, columns=columns) + + # No column from index is transposed with libcudf. + source_columns = [*self._columns] + source_dtype = source_columns[0].dtype + if is_categorical_dtype(source_dtype): + if any(not is_categorical_dtype(c.dtype) for c in source_columns): + raise ValueError("Columns must all have the same dtype") + cats = list(c.categories for c in source_columns) + cats = cudf.core.column.concat_columns(cats).unique() + source_columns = [ + col._set_categories(cats, is_unique=True).codes + for col in source_columns + ] + + if any(c.dtype != source_columns[0].dtype for c in source_columns): + raise ValueError("Columns must all have the same dtype") + + result_columns = libcudf.transpose.transpose(source_columns) + + if is_categorical_dtype(source_dtype): + result_columns = [ + codes._with_type_metadata( + cudf.core.dtypes.CategoricalDtype(categories=cats) + ) + for codes in result_columns + ] + # Set the old column names as the new index result = self.__class__._from_data( - # Cython renames the columns to the range [0...ncols] - libcudf.transpose.transpose(self), - as_index(index), + {i: col for i, col in enumerate(result_columns)}, + index=as_index(index), ) # Set the old index as the new column names result.columns = columns @@ -3564,12 +3602,13 @@ def apply( ): """ Apply a function along an axis of the DataFrame. - - Designed to mimic `pandas.DataFrame.apply`. Applies a user - defined function row wise over a dataframe, with true null - handling. Works with UDFs using `core.udf.pipeline.nulludf` - and returns a single series. Uses numba to jit compile the - function to PTX via LLVM. + ``apply`` relies on Numba to JIT compile ``func``. + Thus the allowed operations within ``func`` are limited + to the ones specified + [here](https://numba.pydata.org/numba-doc/latest/cuda/cudapysupported.html). + For more information, see the cuDF guide + to user defined functions found + [here](https://docs.rapids.ai/api/cudf/stable/user_guide/guide-to-udfs.html). Parameters ---------- @@ -3590,7 +3629,7 @@ def apply( Examples -------- - Simple function of a single variable which could be NA + Simple function of a single variable which could be NA: >>> def f(row): ... if row['a'] is cudf.NA: @@ -3606,7 +3645,7 @@ def apply( dtype: int64 Function of multiple variables will operate in - a null aware manner + a null aware manner: >>> def f(row): ... return row['a'] - row['b'] @@ -3622,7 +3661,7 @@ def apply( 3 dtype: int64 - Functions may conditionally return NA as in pandas + Functions may conditionally return NA as in pandas: >>> def f(row): ... if row['a'] + row['b'] > 3: @@ -3641,7 +3680,7 @@ def apply( dtype: int64 Mixed types are allowed, but will return the common - type, rather than object as in pandas + type, rather than object as in pandas: >>> def f(row): ... return row['a'] + row['b'] @@ -3658,7 +3697,7 @@ def apply( Functions may also return scalar values, however the result will be promoted to a safe type regardless of - the data + the data: >>> def f(row): ... if row['a'] > 3: @@ -3675,7 +3714,7 @@ def apply( 2 5.0 dtype: float64 - Ops against N columns are supported generally + Ops against N columns are supported generally: >>> def f(row): ... v, w, x, y, z = ( @@ -3707,6 +3746,68 @@ def apply( return self._apply(func, _get_row_kernel, *args, **kwargs) + def applymap( + self, + func: Callable[[Any], Any], + na_action: Union[str, None] = None, + **kwargs, + ) -> DataFrame: + + """ + Apply a function to a Dataframe elementwise. + + This method applies a function that accepts and returns a scalar + to every element of a DataFrame. + + Parameters + ---------- + func : callable + Python function, returns a single value from a single value. + na_action : {None, 'ignore'}, default None + If 'ignore', propagate NaN values, without passing them to func. + + Returns + ------- + DataFrame + Transformed DataFrame. + """ + + if kwargs: + raise NotImplementedError( + "DataFrame.applymap does not yet support **kwargs." + ) + + if na_action not in {"ignore", None}: + raise ValueError( + f"na_action must be 'ignore' or None. Got {repr(na_action)}" + ) + + if na_action == "ignore": + devfunc = numba.cuda.jit(device=True)(func) + + # promote to a null-ignoring function + # this code is never run in python, it only + # exists to provide numba with the correct + # bytecode to generate the equivalent PTX + # as a null-ignoring version of the function + def _func(x): # pragma: no cover + if x is cudf.NA: + return cudf.NA + else: + return devfunc(x) + + else: + _func = func + + # TODO: naive implementation + # this could be written as a single kernel + result = {} + for name, col in self._data.items(): + apply_sr = Series._from_data({None: col}) + result[name] = apply_sr.apply(_func) + + return DataFrame._from_data(result, index=self.index) + @_cudf_nvtx_annotate @applyutils.doc_apply() def apply_rows( @@ -3872,19 +3973,16 @@ def partition_by_hash(self, columns, nparts, keep_index=True): ------- partitioned: list of DataFrame """ - idx = ( - 0 - if (self._index is None or keep_index is False) - else self._index._num_columns - ) - key_indices = [self._data.names.index(k) + idx for k in columns] - output_data, output_index, offsets = libcudf.hash.hash_partition( - self, key_indices, nparts, keep_index + key_indices = [self._column_names.index(k) for k in columns] + output_columns, offsets = libcudf.hash.hash_partition( + [*self._columns], key_indices, nparts + ) + outdf = self._from_columns_like_self( + [*(self._index._columns if keep_index else ()), *output_columns], + self._column_names, + self._index_names if keep_index else None, ) - outdf = self.__class__._from_data(output_data, output_index) - outdf._copy_type_metadata(self, include_index=keep_index) - # Slice into partition return [outdf[s:e] for s, e in zip(offsets, offsets[1:] + [None])] @@ -5115,26 +5213,33 @@ def _reduce( if level is not None: raise NotImplementedError("level parameter is not implemented yet") - if numeric_only not in (None, True): - raise NotImplementedError( - "numeric_only parameter is not implemented yet" + source = self + if numeric_only: + numeric_cols = ( + name + for name in self._data.names + if is_numeric_dtype(self._data[name]) ) - axis = self._get_axis_from_axis_arg(axis) + source = self._get_columns_by_label(numeric_cols) + if source.empty: + return Series(index=cudf.StringIndex([])) + + axis = source._get_axis_from_axis_arg(axis) if axis == 0: try: result = [ - getattr(self._data[col], op)(**kwargs) - for col in self._data.names + getattr(source._data[col], op)(**kwargs) + for col in source._data.names ] except AttributeError: - raise TypeError(f"cannot perform {op} with type {self.dtype}") + raise TypeError(f"Not all column dtypes support op {op}") return Series._from_data( - {None: result}, as_index(self._data.names) + {None: result}, as_index(source._data.names) ) elif axis == 1: - return self._apply_cupy_method_axis_1(op, **kwargs) + return source._apply_cupy_method_axis_1(op, **kwargs) @_cudf_nvtx_annotate def _scan( @@ -5529,14 +5634,14 @@ def select_dtypes(self, include=None, exclude=None): @ioutils.doc_to_parquet() def to_parquet(self, path, *args, **kwargs): """{docstring}""" - from cudf.io import parquet as pq + from cudf.io import parquet - return pq.to_parquet(self, path, *args, **kwargs) + return parquet.to_parquet(self, path, *args, **kwargs) @ioutils.doc_to_feather() def to_feather(self, path, *args, **kwargs): """{docstring}""" - from cudf.io import feather as feather + from cudf.io import feather feather.to_feather(self, path, *args, **kwargs) @@ -5556,7 +5661,7 @@ def to_csv( **kwargs, ): """{docstring}""" - from cudf.io import csv as csv + from cudf.io import csv return csv.to_csv( self, @@ -5576,7 +5681,7 @@ def to_csv( @ioutils.doc_to_orc() def to_orc(self, fname, compression=None, *args, **kwargs): """{docstring}""" - from cudf.io import orc as orc + from cudf.io import orc orc.to_orc(self, fname, compression, *args, **kwargs) @@ -5610,22 +5715,24 @@ def stack(self, level=-1, dropna=True): """ assert level in (None, -1) repeated_index = self.index.repeat(self.shape[1]) - name_index = cudf.DataFrame._from_data({0: self._column_names}).tile( - self.shape[0] + name_index = libcudf.reshape.tile( + [as_column(self._column_names)], self.shape[0] ) - new_index = list(repeated_index._columns) + [name_index._columns[0]] + new_index_columns = [*repeated_index._columns, *name_index] if isinstance(self._index, MultiIndex): index_names = self._index.names + [None] else: - index_names = [None] * len(new_index) + index_names = [None] * len(new_index_columns) new_index = MultiIndex.from_frame( - DataFrame(dict(zip(range(0, len(new_index)), new_index))), + DataFrame._from_data( + dict(zip(range(0, len(new_index_columns)), new_index_columns)) + ), names=index_names, ) # Collect datatypes and cast columns as that type common_type = np.result_type(*self.dtypes) - homogenized = DataFrame( + homogenized = DataFrame._from_data( { c: ( self._data[c].astype(common_type) @@ -5636,9 +5743,15 @@ def stack(self, level=-1, dropna=True): } ) - data_col = libcudf.reshape.interleave_columns(homogenized) + result = Series._from_data( + { + None: libcudf.reshape.interleave_columns( + [*homogenized._columns] + ) + }, + index=new_index, + ) - result = Series(data=data_col, index=new_index) if dropna: return result.dropna() else: @@ -6090,7 +6203,7 @@ def _sample_axis_1( def _from_columns_like_self( self, columns: List[ColumnBase], - column_names: Iterable[str], + column_names: abc.Iterable[str], index_names: Optional[List[str]] = None, ) -> DataFrame: result = super()._from_columns_like_self( @@ -6099,6 +6212,207 @@ def _from_columns_like_self( result._set_column_names_like(self) return result + @_cudf_nvtx_annotate + def interleave_columns(self): + """ + Interleave Series columns of a table into a single column. + + Converts the column major table `cols` into a row major column. + + Parameters + ---------- + cols : input Table containing columns to interleave. + + Examples + -------- + >>> import cudf + >>> df = cudf.DataFrame({0: ['A1', 'A2', 'A3'], 1: ['B1', 'B2', 'B3']}) + >>> df + 0 1 + 0 A1 B1 + 1 A2 B2 + 2 A3 B3 + >>> df.interleave_columns() + 0 A1 + 1 B1 + 2 A2 + 3 B2 + 4 A3 + 5 B3 + dtype: object + + Returns + ------- + The interleaved columns as a single column + """ + if ("category" == self.dtypes).any(): + raise ValueError( + "interleave_columns does not support 'category' dtype." + ) + + return self._constructor_sliced._from_data( + {None: libcudf.reshape.interleave_columns([*self._columns])} + ) + + @_cudf_nvtx_annotate + def eval(self, expr: str, inplace: bool = False, **kwargs): + """Evaluate a string describing operations on DataFrame columns. + + Operates on columns only, not specific rows or elements. + + Parameters + ---------- + expr : str + The expression string to evaluate. + inplace : bool, default False + If the expression contains an assignment, whether to perform the + operation inplace and mutate the existing DataFrame. Otherwise, + a new DataFrame is returned. + **kwargs + Not supported. + + Returns + ------- + DataFrame, Series, or None + Series if a single column is returned (the typical use case), + DataFrame if any assignment statements are included in + ``expr``, or None if ``inplace=True``. + + Notes + ----- + Difference from pandas: + * Additional kwargs are not supported. + * Bitwise and logical operators are not dtype-dependent. + Specifically, `&` must be used for bitwise operators on integers, + not `and`, which is specifically for the logical and between + booleans. + * Only numerical types are currently supported. + * Operators generally will not cast automatically. Users are + responsible for casting columns to suitable types before + evaluating a function. + * Multiple assignments to the same name (i.e. a sequence of + assignment statements where later statements are conditioned upon + the output of earlier statements) is not supported. + + Examples + -------- + >>> df = cudf.DataFrame({'A': range(1, 6), 'B': range(10, 0, -2)}) + >>> df + A B + 0 1 10 + 1 2 8 + 2 3 6 + 3 4 4 + 4 5 2 + >>> df.eval('A + B') + 0 11 + 1 10 + 2 9 + 3 8 + 4 7 + dtype: int64 + + Assignment is allowed though by default the original DataFrame is not + modified. + + >>> df.eval('C = A + B') + A B C + 0 1 10 11 + 1 2 8 10 + 2 3 6 9 + 3 4 4 8 + 4 5 2 7 + >>> df + A B + 0 1 10 + 1 2 8 + 2 3 6 + 3 4 4 + 4 5 2 + + Use ``inplace=True`` to modify the original DataFrame. + + >>> df.eval('C = A + B', inplace=True) + >>> df + A B C + 0 1 10 11 + 1 2 8 10 + 2 3 6 9 + 3 4 4 8 + 4 5 2 7 + + Multiple columns can be assigned to using multi-line expressions: + + >>> df.eval( + ... ''' + ... C = A + B + ... D = A - B + ... ''' + ... ) + A B C D + 0 1 10 11 -9 + 1 2 8 10 -6 + 2 3 6 9 -3 + 3 4 4 8 0 + 4 5 2 7 3 + """ + if kwargs: + raise ValueError( + "Keyword arguments other than `inplace` are not supported" + ) + + # Have to use a regex match to avoid capturing "==" + includes_assignment = re.search("[^=]=[^=]", expr) is not None + + # Check if there were multiple statements. Filter out empty lines. + statements = tuple(filter(None, expr.strip().split("\n"))) + if len(statements) > 1 and any( + re.search("[^=]=[^=]", st) is None for st in statements + ): + raise ValueError( + "Multi-line expressions are only valid if all expressions " + "contain an assignment." + ) + + if not includes_assignment: + if inplace: + raise ValueError( + "Cannot operate inplace if there is no assignment" + ) + return Series._from_data( + { + None: libcudf.transform.compute_column( + [*self._columns], self._column_names, statements[0] + ) + } + ) + + targets = [] + exprs = [] + for st in statements: + try: + t, e = re.split("[^=]=[^=]", st) + except ValueError as err: + if "too many values" in str(err): + raise ValueError( + f"Statement {st} contains too many assignments ('=')" + ) + raise + targets.append(t.strip()) + exprs.append(e.strip()) + + cols = ( + libcudf.transform.compute_column( + [*self._columns], self._column_names, e + ) + for e in exprs + ) + ret = self if inplace else self.copy(deep=False) + for name, col in zip(targets, cols): + ret._data[name] = col + if not inplace: + return ret + def from_dataframe(df, allow_copy=False): return df_protocol.from_dataframe(df, allow_copy=allow_copy) diff --git a/python/cudf/cudf/core/df_protocol.py b/python/cudf/cudf/core/df_protocol.py index 4a30a78bf65..f4ce658bff3 100644 --- a/python/cudf/cudf/core/df_protocol.py +++ b/python/cudf/cudf/core/df_protocol.py @@ -1,7 +1,7 @@ # Copyright (c) 2021-2022, NVIDIA CORPORATION. -import collections import enum +from collections import abc from typing import ( Any, Dict, @@ -569,13 +569,13 @@ def get_columns(self) -> Iterable[_CuDFColumn]: ] def select_columns(self, indices: Sequence[int]) -> "_CuDFDataFrame": - if not isinstance(indices, collections.abc.Sequence): + if not isinstance(indices, abc.Sequence): raise ValueError("`indices` is not a sequence") return _CuDFDataFrame(self._df.iloc[:, indices]) def select_columns_by_name(self, names: Sequence[str]) -> "_CuDFDataFrame": - if not isinstance(names, collections.Sequence): + if not isinstance(names, abc.Sequence): raise ValueError("`names` is not a sequence") return _CuDFDataFrame( diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py index 21cae5f05b7..585e8b94e80 100644 --- a/python/cudf/cudf/core/dtypes.py +++ b/python/cudf/cudf/core/dtypes.py @@ -237,7 +237,7 @@ def __eq__(self, other): def __repr__(self): if isinstance(self.element_type, (ListDtype, StructDtype)): - return f"{type(self).__name__}({self.element_type.__repr__()})" + return f"{type(self).__name__}({repr(self.element_type)})" else: return f"{type(self).__name__}({self.element_type})" @@ -681,8 +681,13 @@ def is_interval_dtype(obj): # TODO: Should there be any branch in this function that calls # pd.api.types.is_interval_dtype? return ( - isinstance(obj, cudf.core.dtypes.IntervalDtype) - or isinstance(obj, pd.core.dtypes.dtypes.IntervalDtype) + isinstance( + obj, + ( + cudf.core.dtypes.IntervalDtype, + pd.core.dtypes.dtypes.IntervalDtype, + ), + ) or obj is cudf.core.dtypes.IntervalDtype or ( isinstance(obj, str) and obj == cudf.core.dtypes.IntervalDtype.name diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 5185fb05cb4..d0e9e6d94c1 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -40,6 +40,7 @@ as_column, build_categorical_column, deserialize_columns, + full, serialize_columns, ) from cudf.core.column_accessor import ColumnAccessor @@ -917,6 +918,12 @@ def scatter_by_map( ------- A list of cudf.DataFrame objects. """ + if not isinstance(self, cudf.DataFrame): + warnings.warn( + f"{self.__class__.__name__}.scatter_by_map is deprecated and " + "will be removed.", + FutureWarning, + ) # map_index might be a column name or array, # make it a Column @@ -954,10 +961,16 @@ def scatter_by_map( f"ERROR: map_size must be >= {count} (got {map_size})." ) - data, index, output_offsets = libcudf.partitioning.partition( - self, map_index, map_size, keep_index + partitioned_columns, output_offsets = libcudf.partitioning.partition( + [*(self._index._columns if keep_index else ()), *self._columns], + map_index, + map_size, + ) + partitioned = self._from_columns_like_self( + partitioned_columns, + column_names=self._column_names, + index_names=self._index_names if keep_index else None, ) - partitioned = self.__class__._from_data(data, index) # due to the split limitation mentioned # here: https://github.com/rapidsai/cudf/issues/4607 @@ -967,9 +980,6 @@ def scatter_by_map( result = partitioned._split(output_offsets, keep_index=keep_index) - for frame in result: - frame._copy_type_metadata(self, include_index=keep_index) - if map_size: result += [ self._empty_like(keep_index) @@ -1095,6 +1105,8 @@ def fillna( elif method == "backfill": method = "bfill" + # TODO: This logic should be handled in different subclasses since + # different Frames support different types of values. if isinstance(value, cudf.Series): value = value.reindex(self._data.names) elif isinstance(value, cudf.DataFrame): @@ -1129,7 +1141,13 @@ def fillna( filled_data[col_name] = col.copy(deep=True) return self._mimic_inplace( - self._from_data(data=filled_data), + self._from_data( + data=ColumnAccessor._create_unsafe( + data=filled_data, + multiindex=self._data.multiindex, + level_names=self._data.level_names, + ) + ), inplace=inplace, ) @@ -1209,6 +1227,11 @@ def interpolate( some or all ``NaN`` values """ + if isinstance(self, cudf.BaseIndex): + warnings.warn( + "Index.interpolate is deprecated and will be removed.", + FutureWarning, + ) if method in {"pad", "ffill"} and limit_direction != "forward": raise ValueError( @@ -1261,100 +1284,27 @@ def _quantiles( libcudf.types.NullOrder[key] for key in null_precedence ] - result = self.__class__._from_data( - *libcudf.quantiles.quantiles( - self, + return self._from_columns_like_self( + libcudf.quantiles.quantiles( + [*self._columns], q, interpolation, is_sorted, column_order, null_precedence, - ) - ) - - result._copy_type_metadata(self) - return result - - @_cudf_nvtx_annotate - def rank( - self, - axis=0, - method="average", - numeric_only=None, - na_option="keep", - ascending=True, - pct=False, - ): - """ - Compute numerical data ranks (1 through n) along axis. - By default, equal values are assigned a rank that is the average of the - ranks of those values. - - Parameters - ---------- - axis : {0 or 'index'}, default 0 - Index to direct ranking. - method : {'average', 'min', 'max', 'first', 'dense'}, default 'average' - How to rank the group of records that have the same value - (i.e. ties): - * average: average rank of the group - * min: lowest rank in the group - * max: highest rank in the group - * first: ranks assigned in order they appear in the array - * dense: like 'min', but rank always increases by 1 between groups. - numeric_only : bool, optional - For DataFrame objects, rank only numeric columns if set to True. - na_option : {'keep', 'top', 'bottom'}, default 'keep' - How to rank NaN values: - * keep: assign NaN rank to NaN values - * top: assign smallest rank to NaN values if ascending - * bottom: assign highest rank to NaN values if ascending. - ascending : bool, default True - Whether or not the elements should be ranked in ascending order. - pct : bool, default False - Whether or not to display the returned rankings in percentile - form. - - Returns - ------- - same type as caller - Return a Series or DataFrame with data ranks as values. - """ - if method not in {"average", "min", "max", "first", "dense"}: - raise KeyError(method) - - method_enum = libcudf.sort.RankMethod[method.upper()] - if na_option not in {"keep", "top", "bottom"}: - raise ValueError( - "na_option must be one of 'keep', 'top', or 'bottom'" - ) - - if axis not in (0, "index"): - raise NotImplementedError( - f"axis must be `0`/`index`, " - f"axis={axis} is not yet supported in rank" - ) - - source = self - if numeric_only: - numeric_cols = ( - name - for name in self._data.names - if _is_non_decimal_numeric_dtype(self._data[name]) - ) - source = self._get_columns_by_label(numeric_cols) - if source.empty: - return source.astype("float64") - - data, index = libcudf.sort.rank_columns( - source, method_enum, na_option, ascending, pct + ), + column_names=self._column_names, ) - return self._from_data(data, index).astype(np.float64) - @_cudf_nvtx_annotate def shift(self, periods=1, freq=None, axis=0, fill_value=None): """Shift values by `periods` positions.""" + if isinstance(self, cudf.BaseIndex): + warnings.warn( + "Index.shift is deprecated and will be removed.", + FutureWarning, + ) + axis = self._get_axis_from_axis_arg(axis) if axis != 0: raise ValueError("Only axis=0 is supported.") @@ -1441,30 +1391,33 @@ def from_arrow(cls, data): dict_indices_table = pa.table(dict_indices) data = data.drop(dict_indices_table.column_names) - cudf_indices_frame, _ = libcudf.interop.from_arrow( - dict_indices_table, dict_indices_table.column_names - ) + indices_columns = libcudf.interop.from_arrow(dict_indices_table) # as dictionary size can vary, it can't be a single table cudf_dictionaries_columns = { name: ColumnBase.from_arrow(dict_dictionaries[name]) for name in dict_dictionaries.keys() } - for name, codes in cudf_indices_frame.items(): - cudf_category_frame[name] = build_categorical_column( + cudf_category_frame = { + name: build_categorical_column( cudf_dictionaries_columns[name], codes, mask=codes.base_mask, size=codes.size, ordered=dict_ordered[name], ) + for name, codes in zip( + dict_indices_table.column_names, indices_columns + ) + } # Handle non-dict arrays - cudf_non_category_frame = ( - {} - if data.num_columns == 0 - else libcudf.interop.from_arrow(data, data.column_names)[0] - ) + cudf_non_category_frame = { + name: col + for name, col in zip( + data.column_names, libcudf.interop.from_arrow(data) + ) + } result = {**cudf_non_category_frame, **cudf_category_frame} @@ -1747,6 +1700,12 @@ def replace( 3 3 8 d 4 4 9 e """ + if isinstance(self, cudf.BaseIndex): + warnings.warn( + "Index.replace is deprecated and will be removed.", + FutureWarning, + ) + if limit is not None: raise NotImplementedError("limit parameter is not implemented yet") @@ -1996,76 +1955,6 @@ def notnull(self): # Alias for notnull notna = notnull - @_cudf_nvtx_annotate - def interleave_columns(self): - """ - Interleave Series columns of a table into a single column. - - Converts the column major table `cols` into a row major column. - - Parameters - ---------- - cols : input Table containing columns to interleave. - - Examples - -------- - >>> df = DataFrame([['A1', 'A2', 'A3'], ['B1', 'B2', 'B3']]) - >>> df - 0 [A1, A2, A3] - 1 [B1, B2, B3] - >>> df.interleave_columns() - 0 A1 - 1 B1 - 2 A2 - 3 B2 - 4 A3 - 5 B3 - - Returns - ------- - The interleaved columns as a single column - """ - if ("category" == self.dtypes).any(): - raise ValueError( - "interleave_columns does not support 'category' dtype." - ) - - result = self._constructor_sliced( - libcudf.reshape.interleave_columns(self) - ) - - return result - - @_cudf_nvtx_annotate - def tile(self, count): - """ - Repeats the rows from `self` DataFrame `count` times to form a - new DataFrame. - - Parameters - ---------- - self : input Table containing columns to interleave. - count : Number of times to tile "rows". Must be non-negative. - - Examples - -------- - >>> df = Dataframe([[8, 4, 7], [5, 2, 3]]) - >>> count = 2 - >>> df.tile(df, count) - 0 1 2 - 0 8 4 7 - 1 5 2 3 - 0 8 4 7 - 1 5 2 3 - - Returns - ------- - The table containing the tiled "rows". - """ - result = self.__class__._from_data(*libcudf.reshape.tile(self, count)) - result._copy_type_metadata(self) - return result - @_cudf_nvtx_annotate def searchsorted( self, values, side="left", ascending=True, na_position="last" @@ -2135,12 +2024,24 @@ def searchsorted( scalar_flag = True if not isinstance(values, Frame): - values = as_column(values) - if values.dtype != self.dtype: - self = self.astype(values.dtype) - values = values.as_frame() + values = [as_column(values)] + else: + values = [*values._columns] + if len(values) != len(self._data): + raise ValueError("Mismatch number of columns to search for.") + + sources = [ + col + if is_dtype_equal(col.dtype, val.dtype) + else col.astype(val.dtype) + for col, val in zip(self._columns, values) + ] outcol = libcudf.search.search_sorted( - self, values, side, ascending=ascending, na_position=na_position + sources, + values, + side, + ascending=ascending, + na_position=na_position, ) # Retrun result as cupy array if the values is non-scalar @@ -2242,15 +2143,17 @@ def _get_sorted_inds(self, by=None, ascending=True, na_position="last"): # Get an int64 column consisting of the indices required to sort self # according to the columns specified in by. - to_sort = ( - self - if by is None - else self._get_columns_by_label(list(by), downcast=False) - ) + to_sort = [ + *( + self + if by is None + else self._get_columns_by_label(list(by), downcast=False) + )._columns + ] # If given a scalar need to construct a sequence of length # of columns if np.isscalar(ascending): - ascending = [ascending] * to_sort._num_columns + ascending = [ascending] * len(to_sort) return libcudf.sort.order_by(to_sort, ascending, na_position) @@ -2309,6 +2212,12 @@ def scale(self): 4 0.043478 dtype: float64 """ + if isinstance(self, cudf.BaseIndex): + warnings.warn( + "Index.scale is deprecated and will be removed.", + FutureWarning, + ) + vmin = self.min() vmax = self.max() scaled = (self - vmin) / (vmax - vmin) @@ -2404,8 +2313,22 @@ def _is_sorted(self, ascending=None, null_position=None): Returns True, if sorted as expected by ``ascending`` and ``null_position``, False otherwise. """ + if ascending is not None and not cudf.api.types.is_list_like( + ascending + ): + raise TypeError( + f"Expected a list-like or None for `ascending`, got " + f"{type(ascending)}" + ) + if null_position is not None and not cudf.api.types.is_list_like( + null_position + ): + raise TypeError( + f"Expected a list-like or None for `null_position`, got " + f"{type(null_position)}" + ) return libcudf.sort.is_sorted( - self, ascending=ascending, null_position=null_position + [*self._columns], ascending=ascending, null_position=null_position ) @_cudf_nvtx_annotate @@ -2425,10 +2348,8 @@ def _split(self, splits): @_cudf_nvtx_annotate def _encode(self): - data, index, indices = libcudf.transform.table_encode(self) - for name, col in data.items(): - data[name] = col._with_type_metadata(self._data[name].dtype) - keys = self.__class__._from_data(data, index) + columns, indices = libcudf.transform.table_encode([*self._columns]) + keys = self._from_columns_like_self(columns) return keys, indices @_cudf_nvtx_annotate @@ -3365,6 +3286,12 @@ def _scan(self, op, axis=None, skipna=True): 2 6 24 3 10 34 """ + if isinstance(self, cudf.BaseIndex): + warnings.warn( + f"Index.{op} is deprecated and will be removed.", + FutureWarning, + ) + cast_to_int = op in ("cumsum", "cumprod") skipna = True if skipna is None else skipna @@ -3402,7 +3329,7 @@ def _scan(self, op, axis=None, skipna=True): # TODO: This will work for Index because it's passing self._index # (which is None), but eventually we may want to remove that parameter # for Index._from_data and simplify. - return self._from_data(results, index=self._index) + return self._from_data(results, self._index) @_cudf_nvtx_annotate @ioutils.doc_to_json() @@ -3429,7 +3356,7 @@ def to_dlpack(self): @_cudf_nvtx_annotate def to_string(self): - """ + r""" Convert to string cuDF uses Pandas internals for efficient string formatting. @@ -3446,9 +3373,9 @@ def to_string(self): >>> df['key'] = [0, 1, 2] >>> df['val'] = [float(i + 10) for i in range(3)] >>> df.to_string() - ' key val\\n0 0 10.0\\n1 1 11.0\\n2 2 12.0' + ' key val\n0 0 10.0\n1 1 11.0\n2 2 12.0' """ - return self.__repr__() + return repr(self) def __str__(self): return self.to_string() @@ -3735,9 +3662,9 @@ def _get_replacement_values_for_columns( values_columns = { col: [value] if _is_non_decimal_numeric_dtype(columns_dtype_map[col]) - else cudf.utils.utils.scalar_broadcast_to( + else full( + len(to_replace), value, - (len(to_replace),), cudf.dtype(type(value)), ) for col in columns_dtype_map diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index 6b98e82d553..013ae7ad033 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -1,9 +1,9 @@ # Copyright (c) 2020-2022, NVIDIA CORPORATION. -import collections import itertools import pickle import warnings +from collections import abc from functools import cached_property from typing import Any, Iterable, List, Tuple, Union @@ -207,6 +207,30 @@ def cumcount(self): .reset_index(drop=True) ) + def rank( + self, + method="average", + ascending=True, + na_option="keep", + pct=False, + axis=0, + ): + """ + Return the rank of values within each group. + """ + if not axis == 0: + raise NotImplementedError("Only axis=0 is supported.") + + def rank(x): + return getattr(x, "rank")( + method=method, + ascending=ascending, + na_option=na_option, + pct=pct, + ) + + return self.agg(rank) + @cached_property def _groupby(self): return libgroupby.GroupBy( @@ -516,7 +540,7 @@ def pipe(self, func, *args, **kwargs): """ return cudf.core.common.pipe(self, func, *args, **kwargs) - def apply(self, function): + def apply(self, function, *args): """Apply a python transformation function over the grouped chunk. Parameters @@ -566,19 +590,20 @@ def mult(df): .. code-block:: >>> df = pd.DataFrame({ - 'a': [1, 1, 2, 2], - 'b': [1, 2, 1, 2], - 'c': [1, 2, 3, 4]}) + ... 'a': [1, 1, 2, 2], + ... 'b': [1, 2, 1, 2], + ... 'c': [1, 2, 3, 4], + ... }) >>> gdf = cudf.from_pandas(df) >>> df.groupby('a').apply(lambda x: x.iloc[[0]]) - a b c - a - 1 0 1 1 1 - 2 2 2 1 3 + a b c + a + 1 0 1 1 1 + 2 2 2 1 3 >>> gdf.groupby('a').apply(lambda x: x.iloc[[0]]) - a b c - 0 1 1 1 - 2 2 1 3 + a b c + 0 1 1 1 + 2 2 1 3 """ if not callable(function): raise TypeError(f"type {type(function)} is not callable") @@ -594,8 +619,7 @@ def mult(df): chunks = [ grouped_values[s:e] for s, e in zip(offsets[:-1], offsets[1:]) ] - chunk_results = [function(chk) for chk in chunks] - + chunk_results = [function(chk, *args) for chk in chunks] if not len(chunk_results): return self.obj.head(0) @@ -603,8 +627,11 @@ def mult(df): result = cudf.Series(chunk_results, index=group_names) result.index.names = self.grouping.names elif isinstance(chunk_results[0], cudf.Series): - result = cudf.concat(chunk_results, axis=1).T - result.index.names = self.grouping.names + if isinstance(self.obj, cudf.DataFrame): + result = cudf.concat(chunk_results, axis=1).T + result.index.names = self.grouping.names + else: + result = cudf.concat(chunk_results) else: result = cudf.concat(chunk_results) @@ -1104,16 +1131,11 @@ def _cov_or_corr(self, func, method_name): for i in range(0, len(cols_list), num_cols) ] - def combine_columns(gb_cov_corr, ys): - list_of_columns = [gb_cov_corr._data[y] for y in ys] - frame = cudf.core.frame.Frame._from_columns(list_of_columns, ys) - return interleave_columns(frame) - # interleave: combines the correlation or covariance results for each # column-pair into a single column res = cudf.DataFrame._from_data( { - x: combine_columns(gb_cov_corr, ys) + x: interleave_columns([gb_cov_corr._data[y] for y in ys]) for ys, x in zip(cols_split, column_names) } ) @@ -1581,8 +1603,8 @@ def agg(self, func): return result - def apply(self, func): - result = super().apply(func) + def apply(self, func, *args): + result = super().apply(func, *args) # apply Series name to result result.name = self.obj.name @@ -1637,7 +1659,7 @@ def _handle_by_or_level(self, by=None, level=None): self._handle_series(by) elif isinstance(by, cudf.BaseIndex): self._handle_index(by) - elif isinstance(by, collections.abc.Mapping): + elif isinstance(by, abc.Mapping): self._handle_mapping(by) elif isinstance(by, Grouper): self._handle_grouper(by) @@ -1756,7 +1778,7 @@ def _is_multi_agg(aggs): Returns True if more than one aggregation is performed on any of the columns as specified in `aggs`. """ - if isinstance(aggs, collections.abc.Mapping): + if isinstance(aggs, abc.Mapping): return any(is_list_like(agg) for agg in aggs.values()) if is_list_like(aggs): return True diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index aff13025e72..1ed530ae22b 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -76,10 +76,10 @@ def _lexsorted_equal_range( sort_inds = None sort_vals = idx lower_bound = search_sorted( - sort_vals, key_as_table, side="left" + [*sort_vals._data.columns], [*key_as_table._columns], side="left" ).element_indexing(0) upper_bound = search_sorted( - sort_vals, key_as_table, side="right" + [*sort_vals._data.columns], [*key_as_table._columns], side="right" ).element_indexing(0) return lower_bound, upper_bound, sort_inds @@ -1116,7 +1116,7 @@ def __repr__(self): # related issue : https://github.com/pandas-dev/pandas/issues/35389 if isinstance(preprocess, CategoricalIndex): if preprocess.categories.dtype.kind == "f": - output = ( + output = repr( preprocess.astype("str") .to_pandas() .astype( @@ -1127,18 +1127,17 @@ def __repr__(self): ordered=preprocess.dtype.ordered, ) ) - .__repr__() ) break_idx = output.find("ordered=") output = ( output[:break_idx].replace("'", "") + output[break_idx:] ) else: - output = preprocess.to_pandas().__repr__() + output = repr(preprocess.to_pandas()) output = output.replace("nan", cudf._NA_REP) elif preprocess._values.nullable: - output = self._clean_nulls_from_index().to_pandas().__repr__() + output = repr(self._clean_nulls_from_index().to_pandas()) if not isinstance(self, StringIndex): # We should remove all the single quotes @@ -1150,7 +1149,7 @@ def __repr__(self): # of StringIndex and it is valid to have them. output = output.replace("'", "") else: - output = preprocess.to_pandas().__repr__() + output = repr(preprocess.to_pandas()) # Fix and correct the class name of the output # string by finding first occurrence of "(" in the output diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 10736948b57..1361fc56fa0 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -818,7 +818,8 @@ def hash_values(self, method="murmur3"): # calculation, necessitating the unfortunate circular reference to the # child class here. return cudf.Series._from_data( - {None: libcudf.hash.hash(self, method)}, index=self.index + {None: libcudf.hash.hash([*self._columns], method)}, + index=self.index, ) def _gather( @@ -991,6 +992,7 @@ def add_prefix(self, prefix): Examples -------- **Series** + >>> s = cudf.Series([1, 2, 3, 4]) >>> s 0 1 @@ -1006,6 +1008,7 @@ def add_prefix(self, prefix): dtype: int64 **DataFrame** + >>> df = cudf.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]}) >>> df A B @@ -2204,7 +2207,7 @@ def _make_operands_and_index_for_binop( Optional[cudf.BaseIndex], ]: raise NotImplementedError( - "Binary operations are not supported for {self.__class__}" + f"Binary operations are not supported for {self.__class__}" ) def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): @@ -2688,21 +2691,52 @@ def _explode(self, explode_column: Any, ignore_index: bool): if not ignore_index and self._index is not None: explode_column_num += self._index.nlevels - data, index = libcudf.lists.explode_outer( - self, explode_column_num, ignore_index + exploded = libcudf.lists.explode_outer( + [ + *(self._index._data.columns if not ignore_index else ()), + *self._columns, + ], + explode_column_num, ) - res = self.__class__._from_data( - ColumnAccessor( - data, - multiindex=self._data.multiindex, - level_names=self._data._level_names, - ), - index=index, + + return self._from_columns_like_self( + exploded, + self._column_names, + self._index_names if not ignore_index else None, ) - if not ignore_index and self._index is not None: - res.index.names = self._index.names - return res + @_cudf_nvtx_annotate + def tile(self, count): + """Repeats the rows `count` times to form a new Frame. + + Parameters + ---------- + self : input Table containing columns to interleave. + count : Number of times to tile "rows". Must be non-negative. + + Examples + -------- + >>> import cudf + >>> df = cudf.Dataframe([[8, 4, 7], [5, 2, 3]]) + >>> count = 2 + >>> df.tile(df, count) + 0 1 2 + 0 8 4 7 + 1 5 2 3 + 0 8 4 7 + 1 5 2 3 + + Returns + ------- + The indexed frame containing the tiled "rows". + """ + return self._from_columns_like_self( + libcudf.reshape.tile( + [*self._index._columns, *self._columns], count + ), + column_names=self._column_names, + index_names=self._index_names, + ) @_cudf_nvtx_annotate @docutils.doc_apply( @@ -3543,6 +3577,93 @@ def ge( other=other, op="__ge__", fill_value=fill_value, can_reindex=True ) + @_cudf_nvtx_annotate + def rank( + self, + axis=0, + method="average", + numeric_only=None, + na_option="keep", + ascending=True, + pct=False, + ): + """ + Compute numerical data ranks (1 through n) along axis. + + By default, equal values are assigned a rank that is the average of the + ranks of those values. + + Parameters + ---------- + axis : {0 or 'index'}, default 0 + Index to direct ranking. + method : {'average', 'min', 'max', 'first', 'dense'}, default 'average' + How to rank the group of records that have the same value + (i.e. ties): + * average: average rank of the group + * min: lowest rank in the group + * max: highest rank in the group + * first: ranks assigned in order they appear in the array + * dense: like 'min', but rank always increases by 1 between groups. + numeric_only : bool, optional + For DataFrame objects, rank only numeric columns if set to True. + na_option : {'keep', 'top', 'bottom'}, default 'keep' + How to rank NaN values: + * keep: assign NaN rank to NaN values + * top: assign smallest rank to NaN values if ascending + * bottom: assign highest rank to NaN values if ascending. + ascending : bool, default True + Whether or not the elements should be ranked in ascending order. + pct : bool, default False + Whether or not to display the returned rankings in percentile + form. + + Returns + ------- + same type as caller + Return a Series or DataFrame with data ranks as values. + """ + if isinstance(self, cudf.BaseIndex): + warnings.warn( + "Index.rank is deprecated and will be removed.", + FutureWarning, + ) + + if method not in {"average", "min", "max", "first", "dense"}: + raise KeyError(method) + + method_enum = libcudf.aggregation.RankMethod[method.upper()] + if na_option not in {"keep", "top", "bottom"}: + raise ValueError( + "na_option must be one of 'keep', 'top', or 'bottom'" + ) + + if axis not in (0, "index"): + raise NotImplementedError( + f"axis must be `0`/`index`, " + f"axis={axis} is not yet supported in rank" + ) + + source = self + if numeric_only: + numeric_cols = ( + name + for name in self._data.names + if _is_non_decimal_numeric_dtype(self._data[name]) + ) + source = self._get_columns_by_label(numeric_cols) + if source.empty: + return source.astype("float64") + + result_columns = libcudf.sort.rank_columns( + [*source._columns], method_enum, na_option, ascending, pct + ) + + return self.__class__._from_data( + dict(zip(source._column_names, result_columns)), + index=source._index, + ).astype(np.float64) + def _check_duplicate_level_names(specified, level_names): """Raise if any of `specified` has duplicates in `level_names`.""" diff --git a/python/cudf/cudf/core/join/_join_helpers.py b/python/cudf/cudf/core/join/_join_helpers.py index ead0cd566d9..e1057c3b997 100644 --- a/python/cudf/cudf/core/join/_join_helpers.py +++ b/python/cudf/cudf/core/join/_join_helpers.py @@ -2,8 +2,8 @@ from __future__ import annotations -import collections import warnings +from collections import abc from typing import TYPE_CHECKING, Any, Tuple, cast import numpy as np @@ -166,7 +166,7 @@ def _match_categorical_dtypes_both( def _coerce_to_tuple(obj): - if isinstance(obj, collections.abc.Iterable) and not isinstance(obj, str): + if isinstance(obj, abc.Iterable) and not isinstance(obj, str): return tuple(obj) else: return (obj,) diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py index c7e46cf0165..6a495ef8d9a 100644 --- a/python/cudf/cudf/core/join/join.py +++ b/python/cudf/cudf/core/join/join.py @@ -177,15 +177,15 @@ def __init__( ) def perform_merge(self) -> Frame: - left_join_cols = {} - right_join_cols = {} + left_join_cols = [] + right_join_cols = [] for left_key, right_key in zip(self._left_keys, self._right_keys): lcol = left_key.get(self.lhs) rcol = right_key.get(self.rhs) lcol_casted, rcol_casted = _match_join_keys(lcol, rcol, self.how) - left_join_cols[left_key.name] = lcol_casted - right_join_cols[left_key.name] = rcol_casted + left_join_cols.append(lcol_casted) + right_join_cols.append(rcol_casted) # Categorical dtypes must be cast back from the underlying codes # type that was returned by _match_join_keys. @@ -201,8 +201,8 @@ def perform_merge(self) -> Frame: right_key.set(self.rhs, rcol_casted, validate=False) left_rows, right_rows = self._joiner( - cudf.core.frame.Frame(left_join_cols), - cudf.core.frame.Frame(right_join_cols), + left_join_cols, + right_join_cols, how=self.how, ) diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 591ec582a3b..332e8897d3b 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -5,12 +5,12 @@ import itertools import numbers import pickle -from collections.abc import Sequence +from collections import abc from functools import cached_property from numbers import Integral from typing import Any, List, MutableMapping, Tuple, Union -import cupy +import cupy as cp import numpy as np import pandas as pd from pandas._config import get_option @@ -29,11 +29,26 @@ as_index, ) from cudf.utils.docutils import doc_apply -from cudf.utils.utils import ( - NotIterable, - _cudf_nvtx_annotate, - _maybe_indices_to_slice, -) +from cudf.utils.utils import NotIterable, _cudf_nvtx_annotate + + +def _maybe_indices_to_slice(indices: cp.ndarray) -> Union[slice, cp.ndarray]: + """Makes best effort to convert an array of indices into a python slice. + If the conversion is not possible, return input. `indices` are expected + to be valid. + """ + # TODO: improve efficiency by avoiding sync. + if len(indices) == 1: + x = indices[0].item() + return slice(x, x + 1) + if len(indices) == 2: + x1, x2 = indices[0].item(), indices[1].item() + return slice(x1, x2 + 1, x2 - x1) + start, step = indices[0].item(), (indices[1] - indices[0]).item() + stop = start + step * len(indices) + if (indices == cp.arange(start, stop, step)).all(): + return slice(start, stop, step) + return indices class MultiIndex(Frame, BaseIndex, NotIterable): @@ -95,7 +110,7 @@ def __init__( if len(levels) == 0: raise ValueError("Must pass non-zero number of levels/codes") if not isinstance(codes, cudf.DataFrame) and not isinstance( - codes[0], (Sequence, np.ndarray) + codes[0], (abc.Sequence, np.ndarray) ): raise TypeError("Codes is not a Sequence of sequences") @@ -455,7 +470,7 @@ def __repr__(self): else: preprocess = preprocess.to_pandas(nullable=True) - output = preprocess.__repr__() + output = repr(preprocess) output_prefix = self.__class__.__name__ + "(" output = output.lstrip(output_prefix) lines = output.split("\n") @@ -912,7 +927,7 @@ def deserialize(cls, header, frames): def __getitem__(self, index): flatten = isinstance(index, int) - if isinstance(index, (Integral, Sequence)): + if isinstance(index, (Integral, abc.Sequence)): index = np.array(index) elif isinstance(index, slice): start, stop, step = index.indices(len(self)) @@ -1709,7 +1724,7 @@ def get_loc(self, key, method=None, tolerance=None): return true_inds # Not sorted and not unique. Return a boolean mask - mask = cupy.full(self._data.nrows, False) + mask = cp.full(self._data.nrows, False) mask[true_inds] = True return mask diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py index a388e2560ee..5977b63777f 100644 --- a/python/cudf/cudf/core/reshape.py +++ b/python/cudf/cudf/core/reshape.py @@ -1,6 +1,7 @@ # Copyright (c) 2018-2022, NVIDIA CORPORATION. import itertools +from collections import abc from typing import Dict, Optional import numpy as np @@ -485,14 +486,14 @@ def melt( 1 b B 3 2 c B 5 """ - assert col_level in (None,) + if col_level is not None: + raise NotImplementedError("col_level != None is not supported yet.") # Arg cleaning - import collections # id_vars if id_vars is not None: - if not isinstance(id_vars, collections.abc.Sequence): + if not isinstance(id_vars, abc.Sequence): id_vars = [id_vars] id_vars = list(id_vars) missing = set(id_vars) - set(frame._column_names) @@ -506,7 +507,7 @@ def melt( # value_vars if value_vars is not None: - if not isinstance(value_vars, collections.abc.Sequence): + if not isinstance(value_vars, abc.Sequence): value_vars = [value_vars] value_vars = list(value_vars) missing = set(value_vars) - set(frame._column_names) @@ -771,10 +772,10 @@ def merge_sorted( Parameters ---------- - objs : list of DataFrame, Series, or Index + objs : list of DataFrame or Series keys : list, default None List of Column names to sort by. If None, all columns used - (Ignored if `index=True`) + (Ignored if `by_index=True`) by_index : bool, default False Use index for sorting. `keys` input will be ignored if True ignore_index : bool, default False @@ -805,18 +806,38 @@ def merge_sorted( if by_index and ignore_index: raise ValueError("`by_index` and `ignore_index` cannot both be True") - result = objs[0].__class__._from_data( - *cudf._lib.merge.merge_sorted( - objs, - keys=keys, - by_index=by_index, - ignore_index=ignore_index, + if by_index: + key_columns_indices = list(range(0, objs[0]._index.nlevels)) + else: + if keys is None: + key_columns_indices = list(range(0, objs[0]._num_columns)) + else: + key_columns_indices = [ + objs[0]._column_names.index(key) for key in keys + ] + if not ignore_index: + key_columns_indices = [ + idx + objs[0]._index.nlevels for idx in key_columns_indices + ] + + columns = [ + [ + *(obj._index._data.columns if not ignore_index else ()), + *obj._columns, + ] + for obj in objs + ] + + return objs[0]._from_columns_like_self( + cudf._lib.merge.merge_sorted( + input_columns=columns, + key_columns_indices=key_columns_indices, ascending=ascending, na_position=na_position, - ) + ), + column_names=objs[0]._column_names, + index_names=None if ignore_index else objs[0]._index_names, ) - result._copy_type_metadata(objs[0]) - return result def _pivot(df, index, columns): diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 5bf52ed7520..d813db58d1e 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -6,7 +6,7 @@ import inspect import pickle import warnings -from collections import abc as abc +from collections import abc from shutil import get_terminal_size from typing import Any, Dict, MutableMapping, Optional, Set, Tuple, Type, Union @@ -14,11 +14,11 @@ import numpy as np import pandas as pd from pandas._config import get_option +from pandas.core.dtypes.common import is_float import cudf from cudf import _lib as libcudf from cudf._lib.scalar import _is_null_host_scalar -from cudf._lib.transform import bools_to_mask from cudf._typing import ColumnLike, DataFrameOrSeries, ScalarLike from cudf.api.types import ( _is_non_decimal_numeric_dtype, @@ -42,7 +42,6 @@ arange, as_column, column, - column_empty_like, full, ) from cudf.core.column.categorical import ( @@ -64,15 +63,16 @@ ) from cudf.core.single_column_frame import SingleColumnFrame from cudf.core.udf.scalar_function import _get_scalar_kernel -from cudf.utils import cudautils, docutils +from cudf.utils import docutils from cudf.utils.docutils import copy_docstring from cudf.utils.dtypes import ( can_convert_to_column, find_common_type, is_mixed_with_object_dtype, min_scalar_type, + to_cudf_compatible_scalar, ) -from cudf.utils.utils import _cudf_nvtx_annotate, to_cudf_compatible_scalar +from cudf.utils.utils import _cudf_nvtx_annotate def _append_new_row_inplace(col: ColumnLike, value: ScalarLike): @@ -1064,11 +1064,8 @@ def __repr__(self): ) or isinstance( preprocess._column, cudf.core.column.timedelta.TimeDeltaColumn ): - output = ( - preprocess.astype("O") - .fillna(cudf._NA_REP) - .to_pandas() - .__repr__() + output = repr( + preprocess.astype("O").fillna(cudf._NA_REP).to_pandas() ) elif isinstance( preprocess._column, cudf.core.column.CategoricalColumn @@ -1111,7 +1108,7 @@ def __repr__(self): na_rep=cudf._NA_REP, ) else: - output = preprocess.to_pandas().__repr__() + output = repr(preprocess.to_pandas()) lines = output.split("\n") @@ -2021,9 +2018,15 @@ def _return_sentinel_series(): def apply(self, func, convert_dtype=True, args=(), **kwargs): """ Apply a scalar function to the values of a Series. + Similar to ``pandas.Series.apply``. - Similar to `pandas.Series.apply. Applies a user - defined function elementwise over a series. + ``apply`` relies on Numba to JIT compile ``func``. + Thus the allowed operations within ``func`` are limited + to the ones specified + [here](https://numba.pydata.org/numba-doc/latest/cuda/cudapysupported.html). + For more information, see the cuDF guide to + user defined functions found + [here](https://docs.rapids.ai/api/cudf/stable/user_guide/guide-to-udfs.html). Parameters ---------- @@ -2061,7 +2064,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs): 2 4 dtype: int64 - Apply a basic function to a series with nulls + Apply a basic function to a series with nulls: >>> sr = cudf.Series([1,cudf.NA,3]) >>> def f(x): @@ -2073,7 +2076,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs): dtype: int64 Use a function that does something conditionally, - based on if the value is or is not null + based on if the value is or is not null: >>> sr = cudf.Series([1,cudf.NA,3]) >>> def f(x): @@ -2091,7 +2094,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs): as derived from the UDFs logic. Note that this means the common type will be returned even if such data is passed that would not result in any values of that - dtype. + dtype: >>> sr = cudf.Series([1,cudf.NA,3]) >>> def f(x): @@ -2104,7 +2107,10 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs): """ if convert_dtype is not True: raise ValueError("Series.apply only supports convert_dtype=True") - return self._apply(func, _get_scalar_kernel, *args, **kwargs) + + result = self._apply(func, _get_scalar_kernel, *args, **kwargs) + result.name = self.name + return result @_cudf_nvtx_annotate def applymap(self, udf, out_dtype=None): @@ -2963,19 +2969,22 @@ def digitize(self, bins, right=False): @_cudf_nvtx_annotate def diff(self, periods=1): - """Calculate the difference between values at positions i and i - N in - an array and store the output in a new array. + """First discrete difference of element. + + Calculates the difference of a Series element compared with another + element in the Series (default is element in previous row). + + Parameters + ---------- + periods : int, default 1 + Periods to shift for calculating difference, + accepts negative values. Returns ------- Series First differences of the Series. - Notes - ----- - Diff currently only supports float and integer dtype columns with - no null values. - Examples -------- >>> import cudf @@ -3022,32 +3031,12 @@ def diff(self, periods=1): 5 dtype: int64 """ - if self.has_nulls: - raise AssertionError( - "Diff currently requires columns with no null values" - ) - - if not np.issubdtype(self.dtype, np.number): - raise NotImplementedError( - "Diff currently only supports numeric dtypes" - ) - - # TODO: move this libcudf - input_col = self._column - output_col = column_empty_like(input_col) - output_mask = column_empty_like(input_col, dtype="bool") - if output_col.size > 0: - cudautils.gpu_diff.forall(output_col.size)( - input_col, output_col, output_mask, periods - ) - - output_col = column.build_column( - data=output_col.data, - dtype=output_col.dtype, - mask=bools_to_mask(output_mask), - ) + if not is_integer(periods): + if not (is_float(periods) and periods.is_integer()): + raise ValueError("periods must be an integer") + periods = int(periods) - return Series(output_col, name=self.name, index=self.index) + return self - self.shift(periods=periods) @copy_docstring(SeriesGroupBy) @_cudf_nvtx_annotate @@ -4625,13 +4614,13 @@ def _align_indices(series_list, how="outer", allow_non_unique=False): @_cudf_nvtx_annotate def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False): - """Returns a boolean array where two arrays are equal within a tolerance. + r"""Returns a boolean array where two arrays are equal within a tolerance. Two values in ``a`` and ``b`` are considered equal when the following equation is satisfied. .. math:: - |a - b| \\le \\mathrm{atol} + \\mathrm{rtol} |b| + |a - b| \le \mathrm{atol} + \mathrm{rtol} |b| Parameters ---------- diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py index 003f8ea7fdb..7fa66bd831d 100644 --- a/python/cudf/cudf/core/single_column_frame.py +++ b/python/cudf/cudf/core/single_column_frame.py @@ -52,9 +52,9 @@ def _reduce( if level is not None: raise NotImplementedError("level parameter is not implemented yet") - if numeric_only not in (None, True): + if numeric_only: raise NotImplementedError( - "numeric_only parameter is not implemented yet" + f"Series.{op} does not implement numeric_only" ) try: return getattr(self._column, op)(**kwargs) @@ -81,8 +81,8 @@ def name(self, value): @property # type: ignore @_cudf_nvtx_annotate - def ndim(self): - """Get the dimensionality (always 1 for single-columned frames).""" + def ndim(self): # noqa: D401 + """Number of dimensions of the underlying data, by definition 1.""" return 1 @property # type: ignore diff --git a/python/cudf/cudf/core/subword_tokenizer.py b/python/cudf/cudf/core/subword_tokenizer.py index 782b74ef4a6..83cceff5c4c 100644 --- a/python/cudf/cudf/core/subword_tokenizer.py +++ b/python/cudf/cudf/core/subword_tokenizer.py @@ -1,9 +1,9 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. +# Copyright (c) 2021-2022, NVIDIA CORPORATION. from __future__ import annotations +import warnings from typing import Union -from warnings import warn import cupy as cp @@ -186,7 +186,7 @@ def __call__( "When truncation is not True, the behaviour currently differs " "from HuggingFace as cudf always returns overflowing tokens" ) - warn(warning_msg) + warnings.warn(warning_msg) if padding != "max_length": error_msg = ( diff --git a/python/cudf/cudf/core/tools/numeric.py b/python/cudf/cudf/core/tools/numeric.py index 7eea7cedaad..0273227010b 100644 --- a/python/cudf/cudf/core/tools/numeric.py +++ b/python/cudf/cudf/core/tools/numeric.py @@ -57,7 +57,7 @@ def to_numeric(arg, errors="raise", downcast=None): otherwise ndarray Notes - ------- + ----- An important difference from pandas is that this function does not accept mixed numeric/non-numeric type sequences. For example ``[1, 'a']``. A ``TypeError`` will be raised when such input is received, regardless of diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py index cdb9492c695..ed5fc1d6d23 100644 --- a/python/cudf/cudf/core/udf/typing.py +++ b/python/cudf/cudf/core/udf/typing.py @@ -63,7 +63,7 @@ def __hash__(self): Needed so that numba caches type instances with different `value_type` separately. """ - return self.__repr__().__hash__() + return hash(repr(self)) def unify(self, context, other): """ diff --git a/python/cudf/cudf/io/dlpack.py b/python/cudf/cudf/io/dlpack.py index 00a2cb4cee2..644643db83c 100644 --- a/python/cudf/cudf/io/dlpack.py +++ b/python/cudf/cudf/io/dlpack.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021, NVIDIA CORPORATION. +# Copyright (c) 2019-2022, NVIDIA CORPORATION. import cudf @@ -34,12 +34,13 @@ def from_dlpack(pycapsule_obj): tensor is row-major, transpose it before passing it to this function. """ - data, _ = libdlpack.from_dlpack(pycapsule_obj) + columns = libdlpack.from_dlpack(pycapsule_obj) + column_names = range(len(columns)) - if len(data) == 1: - return cudf.Series._from_data(data) + if len(columns) == 1: + return cudf.Series._from_columns(columns, column_names=column_names) else: - return cudf.DataFrame._from_data(data) + return cudf.DataFrame._from_columns(columns, column_names=column_names) @ioutils.doc_to_dlpack() @@ -91,4 +92,4 @@ def to_dlpack(cudf_obj): ) gdf = gdf.astype(dtype) - return libdlpack.to_dlpack(gdf) + return libdlpack.to_dlpack([*gdf._columns]) diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py index 4dd9f434097..607d9121630 100644 --- a/python/cudf/cudf/testing/_utils.py +++ b/python/cudf/cudf/testing/_utils.py @@ -3,7 +3,7 @@ import itertools import re import warnings -from collections.abc import Mapping, Sequence +from collections import abc from contextlib import contextmanager from decimal import Decimal @@ -238,9 +238,9 @@ def _get_args_kwars_for_assert_exceptions(func_args_and_kwargs): else: if len(func_args_and_kwargs) == 1: func_args, func_kwargs = [], {} - if isinstance(func_args_and_kwargs[0], Sequence): + if isinstance(func_args_and_kwargs[0], abc.Sequence): func_args = func_args_and_kwargs[0] - elif isinstance(func_args_and_kwargs[0], Mapping): + elif isinstance(func_args_and_kwargs[0], abc.Mapping): func_kwargs = func_args_and_kwargs[0] else: raise ValueError( @@ -248,12 +248,12 @@ def _get_args_kwars_for_assert_exceptions(func_args_and_kwargs): "either a Sequence or a Mapping" ) elif len(func_args_and_kwargs) == 2: - if not isinstance(func_args_and_kwargs[0], Sequence): + if not isinstance(func_args_and_kwargs[0], abc.Sequence): raise ValueError( "Positional argument at 1st position of " "func_args_and_kwargs should be a sequence." ) - if not isinstance(func_args_and_kwargs[1], Mapping): + if not isinstance(func_args_and_kwargs[1], abc.Mapping): raise ValueError( "Key-word argument at 2nd position of " "func_args_and_kwargs should be a dictionary mapping." @@ -363,6 +363,13 @@ def assert_column_memory_ne( raise AssertionError("lhs and rhs holds the same memory.") +def _create_pandas_series(data=None, index=None, dtype=None, *args, **kwargs): + # Wrapper around pd.Series using a float64 default dtype for empty data. + if dtype is None and (data is None or len(data) == 0): + dtype = "float64" + return pd.Series(data=data, index=index, dtype=dtype, *args, **kwargs) + + parametrize_numeric_dtypes_pairwise = pytest.mark.parametrize( "left_dtype,right_dtype", list(itertools.combinations_with_replacement(NUMERIC_TYPES, 2)), diff --git a/python/cudf/cudf/testing/dataset_generator.py b/python/cudf/cudf/testing/dataset_generator.py index c3e25adad77..8134e307a72 100644 --- a/python/cudf/cudf/testing/dataset_generator.py +++ b/python/cudf/cudf/testing/dataset_generator.py @@ -504,7 +504,11 @@ def rand_dataframe( generator=lambda cardinality=cardinality: [ mimesis.random.random.schoice( string.printable, - meta.get("max_string_length", 1000), + np.random.randint( + low=0, + high=meta.get("max_string_length", 1000), + size=1, + )[0], ) for _ in range(cardinality) ], diff --git a/python/cudf/cudf/testing/testing.py b/python/cudf/cudf/testing/testing.py index 702136d7c98..b134d2b26e9 100644 --- a/python/cudf/cudf/testing/testing.py +++ b/python/cudf/cudf/testing/testing.py @@ -215,7 +215,7 @@ def assert_column_equal( msg1 = f"{left.ordered}" msg2 = f"{right.ordered}" raise_assert_detail( - "{obj} category", "Orders are different", msg1, msg2 + f"{obj} category", "Orders are different", msg1, msg2 ) if ( diff --git a/python/cudf/cudf/tests/test_api_types.py b/python/cudf/cudf/tests/test_api_types.py index e7cf113f604..c2cd78f88a0 100644 --- a/python/cudf/cudf/tests/test_api_types.py +++ b/python/cudf/cudf/tests/test_api_types.py @@ -3,10 +3,10 @@ import numpy as np import pandas as pd import pytest -from pandas.api import types as ptypes +from pandas.api import types as pd_types import cudf -from cudf.api import types as types +from cudf.api import types @pytest.mark.parametrize( @@ -1035,11 +1035,13 @@ def test_is_decimal_dtype(obj, expect): ), ) def test_pandas_agreement(obj): - assert types.is_categorical_dtype(obj) == ptypes.is_categorical_dtype(obj) - assert types.is_numeric_dtype(obj) == ptypes.is_numeric_dtype(obj) - assert types.is_integer_dtype(obj) == ptypes.is_integer_dtype(obj) - assert types.is_integer(obj) == ptypes.is_integer(obj) - assert types.is_string_dtype(obj) == ptypes.is_string_dtype(obj) + assert types.is_categorical_dtype(obj) == pd_types.is_categorical_dtype( + obj + ) + assert types.is_numeric_dtype(obj) == pd_types.is_numeric_dtype(obj) + assert types.is_integer_dtype(obj) == pd_types.is_integer_dtype(obj) + assert types.is_integer(obj) == pd_types.is_integer(obj) + assert types.is_string_dtype(obj) == pd_types.is_string_dtype(obj) @pytest.mark.parametrize( @@ -1115,7 +1117,7 @@ def test_pandas_agreement(obj): ), ) def test_pandas_agreement_scalar(obj): - assert types.is_scalar(obj) == ptypes.is_scalar(obj) + assert types.is_scalar(obj) == pd_types.is_scalar(obj) # TODO: Add test of interval. diff --git a/python/cudf/cudf/tests/test_applymap.py b/python/cudf/cudf/tests/test_applymap.py index bd322a28a08..c8a9b5d03f5 100644 --- a/python/cudf/cudf/tests/test_applymap.py +++ b/python/cudf/cudf/tests/test_applymap.py @@ -6,7 +6,7 @@ import numpy as np import pytest -from cudf import Series +from cudf import NA, DataFrame, Series from cudf.testing import _utils as utils @@ -58,3 +58,45 @@ def test_applymap_change_out_dtype(): expect = np.array(data, dtype=float) got = out.to_numpy() np.testing.assert_array_equal(expect, got) + + +@pytest.mark.parametrize( + "data", + [ + {"a": [1, 2, 3], "b": [4, 5, 6]}, + {"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]}, + {"a": [1, 2, 3], "b": [True, False, True]}, + {"a": [1, NA, 2], "b": [NA, 4, NA]}, + ], +) +@pytest.mark.parametrize( + "func", + [ + lambda x: x + 1, + lambda x: x - 0.5, + lambda x: 2 if x is NA else 2 + (x + 1) / 4.1, + lambda x: 42, + ], +) +@pytest.mark.parametrize("na_action", [None, "ignore"]) +def test_applymap_dataframe(data, func, na_action): + gdf = DataFrame(data) + pdf = gdf.to_pandas(nullable=True) + + expect = pdf.applymap(func, na_action=na_action) + got = gdf.applymap(func, na_action=na_action) + + utils.assert_eq(expect, got, check_dtype=False) + + +def test_applymap_raise_cases(): + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + + def f(x, some_kwarg=0): + return x + some_kwarg + + with pytest.raises(NotImplementedError): + df.applymap(f, some_kwarg=1) + + with pytest.raises(ValueError): + df.applymap(f, na_action="some_invalid_option") diff --git a/python/cudf/cudf/tests/test_array_function.py b/python/cudf/cudf/tests/test_array_function.py index 29654fb9556..b4e07ed67c9 100644 --- a/python/cudf/cudf/tests/test_array_function.py +++ b/python/cudf/cudf/tests/test_array_function.py @@ -1,17 +1,36 @@ -# Copyright (c) 2018, NVIDIA CORPORATION. +# Copyright (c) 2018-2022, NVIDIA CORPORATION. import numpy as np import pandas as pd import pytest import cudf from cudf.testing._utils import assert_eq -from cudf.utils.utils import IS_NEP18_ACTIVE -missing_arrfunc_cond = not IS_NEP18_ACTIVE -missing_arrfunc_reason = "NEP-18 support is not available in NumPy" -# Test implementation based on dask array test -# https://github.com/dask/dask/blob/master/dask/array/tests/test_array_function.py +# To determine if NEP18 is available in the current version of NumPy we simply +# attempt to concatenate an object with `__array_function__` defined and see if +# NumPy invokes the protocol or not. Taken from dask array +# https://github.com/dask/dask/blob/master/dask/array/utils.py#L352-L363 +# TODO: Unclear if this is still necessary. NEP 18 was introduced as the +# default in 1.17 (https://github.com/numpy/numpy/releases/tag/v1.17.0) almost +# 3 years ago, and it was originally introduced one version before in 1.16 +# (although not enabled by default then). Can we safely assume that testers +# will have a sufficiently new version of numpy to run these tests? +class _Test: + def __array_function__(self, *args, **kwargs): + return True + + +try: + np.concatenate([_Test()]) +except ValueError: + missing_arrfunc_cond = True +else: + missing_arrfunc_cond = False + +del _Test + +missing_arrfunc_reason = "NEP-18 support is not available in NumPy" @pytest.mark.skipif(missing_arrfunc_cond, reason=missing_arrfunc_reason) diff --git a/python/cudf/cudf/tests/test_concat.py b/python/cudf/cudf/tests/test_concat.py index 3cc3e4153b1..2017ba06f76 100644 --- a/python/cudf/cudf/tests/test_concat.py +++ b/python/cudf/cudf/tests/test_concat.py @@ -646,7 +646,7 @@ def test_concat_two_empty_series(ignore_index, axis): ), ], ) -def test_concat_dataframe_with_multiIndex(df1, df2): +def test_concat_dataframe_with_multiindex(df1, df2): gdf1 = df1 gdf1 = gdf1.set_index(["k1", "k2"]) diff --git a/python/cudf/cudf/tests/test_contains.py b/python/cudf/cudf/tests/test_contains.py index f06142f4cc9..15dfa111860 100644 --- a/python/cudf/cudf/tests/test_contains.py +++ b/python/cudf/cudf/tests/test_contains.py @@ -1,4 +1,6 @@ -from datetime import datetime as dt +# Copyright (c) 2019-2022, NVIDIA CORPORATION. + +import datetime import numpy as np import pandas as pd @@ -41,12 +43,12 @@ def get_string_series(): testdata_all = [ ( cudf_date_series("20010101", "20020215", freq="400h"), - dt.strptime("2001-01-01", "%Y-%m-%d"), + datetime.datetime.strptime("2001-01-01", "%Y-%m-%d"), True, ), ( cudf_date_series("20010101", "20020215", freq="400h"), - dt.strptime("2000-01-01", "%Y-%m-%d"), + datetime.datetime.strptime("2000-01-01", "%Y-%m-%d"), False, ), (cudf_date_series("20010101", "20020215", freq="400h"), 20000101, False), diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index a7fad792bd0..d95fe278469 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -13,7 +13,6 @@ from copy import copy import cupy -import cupy as cp import numpy as np import pandas as pd import pyarrow as pa @@ -2468,35 +2467,6 @@ def test_arrow_handle_no_index_name(pdf, gdf): assert_eq(expect, got) -@pytest.mark.parametrize("num_rows", [1, 3, 10, 100]) -@pytest.mark.parametrize("num_bins", [1, 2, 4, 20]) -@pytest.mark.parametrize("right", [True, False]) -@pytest.mark.parametrize("dtype", NUMERIC_TYPES + ["bool"]) -@pytest.mark.parametrize("series_bins", [True, False]) -def test_series_digitize(num_rows, num_bins, right, dtype, series_bins): - data = np.random.randint(0, 100, num_rows).astype(dtype) - bins = np.unique(np.sort(np.random.randint(2, 95, num_bins).astype(dtype))) - s = cudf.Series(data) - if series_bins: - s_bins = cudf.Series(bins) - indices = s.digitize(s_bins, right) - else: - indices = s.digitize(bins, right) - np.testing.assert_array_equal( - np.digitize(data, bins, right), indices.to_numpy() - ) - - -def test_series_digitize_invalid_bins(): - s = cudf.Series(np.random.randint(0, 30, 80), dtype="int32") - bins = cudf.Series([2, None, None, 50, 90], dtype="int32") - - with pytest.raises( - ValueError, match="`bins` cannot contain null entries." - ): - _ = s.digitize(bins) - - def test_pandas_non_contiguious(): arr1 = np.random.sample([5000, 10]) assert arr1.flags["C_CONTIGUOUS"] is True @@ -3692,9 +3662,7 @@ def test_all(data): # Pandas treats `None` in object type columns as True for some reason, so # replacing with `False` if np.array(data).ndim <= 1: - pdata = cudf.utils.utils._create_pandas_series(data=data).replace( - [None], False - ) + pdata = pd.Series(data=data).replace([None], False) gdata = cudf.Series.from_pandas(pdata) else: pdata = pd.DataFrame(data, columns=["a", "b"]).replace([None], False) @@ -3745,7 +3713,7 @@ def test_all(data): @pytest.mark.parametrize("axis", [0, 1]) def test_any(data, axis): if np.array(data).ndim <= 1: - pdata = cudf.utils.utils._create_pandas_series(data=data) + pdata = pd.Series(data=data) gdata = cudf.Series.from_pandas(pdata) if axis == 1: @@ -4215,7 +4183,7 @@ def test_create_dataframe_column(): ], ) def test_series_values_host_property(data): - pds = cudf.utils.utils._create_pandas_series(data=data) + pds = pd.Series(data=data) gds = cudf.Series(data) np.testing.assert_array_equal(pds.values, gds.values_host) @@ -4238,7 +4206,7 @@ def test_series_values_host_property(data): ], ) def test_series_values_property(data): - pds = cudf.utils.utils._create_pandas_series(data=data) + pds = pd.Series(data=data) gds = cudf.Series(data) gds_vals = gds.values assert isinstance(gds_vals, cupy.ndarray) @@ -7332,7 +7300,7 @@ def test_sample_axis_0( @pytest.mark.parametrize("replace", [True, False]) @pytest.mark.parametrize( - "random_state_lib", [cp.random.RandomState, np.random.RandomState] + "random_state_lib", [cupy.random.RandomState, np.random.RandomState] ) def test_sample_reproducibility(replace, random_state_lib): df = cudf.DataFrame({"a": cupy.arange(0, 1024)}) @@ -7384,7 +7352,7 @@ def test_oversample_without_replace(n, frac, axis): ) -@pytest.mark.parametrize("random_state", [None, cp.random.RandomState(42)]) +@pytest.mark.parametrize("random_state", [None, cupy.random.RandomState(42)]) def test_sample_unsupported_arguments(random_state): df = cudf.DataFrame({"float": [0.05, 0.2, 0.3, 0.2, 0.25]}) with pytest.raises( @@ -9098,7 +9066,7 @@ def test_groupby_cov_for_pandas_bug_case(): ], ) @pytest.mark.parametrize("periods", (-5, -1, 0, 1, 5)) -def test_diff_dataframe_numeric_dtypes(data, periods): +def test_diff_numeric_dtypes(data, periods): gdf = cudf.DataFrame(data) pdf = gdf.to_pandas() @@ -9137,7 +9105,7 @@ def test_diff_decimal_dtypes(precision, scale, dtype): ) -def test_diff_dataframe_invalid_axis(): +def test_diff_invalid_axis(): gdf = cudf.DataFrame(np.array([1.123, 2.343, 5.890, 0.0])) with pytest.raises(NotImplementedError, match="Only axis=0 is supported."): gdf.diff(periods=1, axis=1) @@ -9152,16 +9120,30 @@ def test_diff_dataframe_invalid_axis(): "string_col": ["a", "b", "c", "d", "e"], }, ["a", "b", "c", "d", "e"], - [np.nan, None, np.nan, None], ], ) -def test_diff_dataframe_non_numeric_dypes(data): +def test_diff_unsupported_dtypes(data): gdf = cudf.DataFrame(data) with pytest.raises( - NotImplementedError, - match="DataFrame.diff only supports numeric dtypes", + TypeError, + match=r"unsupported operand type\(s\)", ): - gdf.diff(periods=2, axis=0) + gdf.diff() + + +def test_diff_many_dtypes(): + pdf = pd.DataFrame( + { + "dates": pd.date_range("2020-01-01", "2020-01-06", freq="D"), + "bools": [True, True, True, False, True, True], + "floats": [1.0, 2.0, 3.5, np.nan, 5.0, -1.7], + "ints": [1, 2, 3, 3, 4, 5], + "nans_nulls": [np.nan, None, None, np.nan, np.nan, None], + } + ) + gdf = cudf.from_pandas(pdf) + assert_eq(pdf.diff(), gdf.diff()) + assert_eq(pdf.diff(periods=2), gdf.diff(periods=2)) def test_dataframe_assign_cp_np_array(): @@ -9230,3 +9212,144 @@ def test_dataframe_pct_change(data, periods, fill_method): expected = pdf.pct_change(periods=periods, fill_method=fill_method) assert_eq(expected, actual) + + +def test_mean_timeseries(): + gdf = cudf.datasets.timeseries() + pdf = gdf.to_pandas() + + expected = pdf.mean(numeric_only=True) + actual = gdf.mean(numeric_only=True) + + assert_eq(expected, actual) + + with pytest.raises(TypeError): + gdf.mean() + + +@pytest.mark.parametrize( + "data", + [ + { + "a": [1, 2, 3, 4, 5], + "b": ["a", "b", "c", "d", "e"], + "c": [1.0, 2.0, 3.0, 4.0, 5.0], + } + ], +) +def test_std_different_dtypes(data): + gdf = cudf.DataFrame(data) + pdf = gdf.to_pandas() + + expected = pdf.std(numeric_only=True) + actual = gdf.std(numeric_only=True) + + assert_eq(expected, actual) + + with pytest.raises(TypeError): + gdf.std() + + +@pytest.mark.parametrize( + "data", + [ + { + "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"], + "val1": ["v", "n", "k", "l", "m", "i", "y", "r", "w"], + "val2": ["d", "d", "d", "e", "e", "e", "f", "f", "f"], + } + ], +) +def test_empty_numeric_only(data): + gdf = cudf.DataFrame(data) + pdf = gdf.to_pandas() + expected = pdf.prod(numeric_only=True) + actual = gdf.prod(numeric_only=True) + assert_eq(expected, actual) + + +@pytest.fixture +def df_eval(): + N = 10 + int_max = 10 + rng = cupy.random.default_rng(0) + return cudf.DataFrame( + { + "a": rng.integers(N, size=int_max), + "b": rng.integers(N, size=int_max), + "c": rng.integers(N, size=int_max), + "d": rng.integers(N, size=int_max), + } + ) + + +# Note that for now expressions do not automatically handle casting, so inputs +# need to be casted appropriately +@pytest.mark.parametrize( + "expr, dtype", + [ + ("a", int), + ("+a", int), + ("a + b", int), + ("a == b", int), + ("a / b", float), + ("a * b", int), + ("a > b", int), + ("a > b > c", int), + ("a > b < c", int), + ("a & b", int), + ("a & b | c", int), + ("sin(a)", float), + ("exp(sin(abs(a)))", float), + ("sqrt(floor(a))", float), + ("ceil(arctanh(a))", float), + ("(a + b) - (c * d)", int), + ("~a", int), + ("(a > b) and (c > d)", int), + ("(a > b) or (c > d)", int), + ("not (a > b)", int), + ("a + 1", int), + ("a + 1.0", float), + ("-a + 1", int), + ("+a + 1", int), + ("e = a + 1", int), + ( + """ + e = log(cos(a)) + 1.0 + f = abs(c) - exp(d) + """, + float, + ), + ("a_b_are_equal = (a == b)", int), + ], +) +def test_dataframe_eval(df_eval, expr, dtype): + df_eval = df_eval.astype(dtype) + expect = df_eval.to_pandas().eval(expr) + got = df_eval.eval(expr) + # In the specific case where the evaluated expression is a unary function + # of a single column with no nesting, pandas will retain the name. This + # level of compatibility is out of scope for now. + assert_eq(expect, got, check_names=False) + + # Test inplace + if re.search("[^=]=[^=]", expr) is not None: + pdf_eval = df_eval.to_pandas() + pdf_eval.eval(expr, inplace=True) + df_eval.eval(expr, inplace=True) + assert_eq(pdf_eval, df_eval) + + +@pytest.mark.parametrize( + "expr", + [ + """ + e = a + b + a == b + """, + "a_b_are_equal = (a == b) = c", + ], +) +def test_dataframe_eval_errors(df_eval, expr): + with pytest.raises(ValueError): + df_eval.eval(expr) diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index 964ac9e5457..8be338e787a 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -1,7 +1,6 @@ # Copyright (c) 2019-2022, NVIDIA CORPORATION. import datetime -import datetime as dt import operator import re @@ -219,8 +218,8 @@ def test_sort_datetime(): def test_issue_165(): df_pandas = pd.DataFrame() - start_date = dt.datetime.strptime("2000-10-21", "%Y-%m-%d") - data = [(start_date + dt.timedelta(days=x)) for x in range(6)] + start_date = datetime.datetime.strptime("2000-10-21", "%Y-%m-%d") + data = [(start_date + datetime.timedelta(days=x)) for x in range(6)] df_pandas["dates"] = data df_pandas["num"] = [1, 2, 3, 4, 5, 6] df_cudf = DataFrame.from_pandas(df_pandas) diff --git a/python/cudf/cudf/tests/test_dropna.py b/python/cudf/cudf/tests/test_dropna.py index 3e7891ba0af..3277e52edb3 100644 --- a/python/cudf/cudf/tests/test_dropna.py +++ b/python/cudf/cudf/tests/test_dropna.py @@ -5,7 +5,7 @@ import pytest import cudf -from cudf.testing._utils import assert_eq +from cudf.testing._utils import _create_pandas_series, assert_eq @pytest.mark.parametrize( @@ -22,7 +22,7 @@ @pytest.mark.parametrize("inplace", [True, False]) def test_dropna_series(data, nulls, inplace): - psr = cudf.utils.utils._create_pandas_series(data=data) + psr = _create_pandas_series(data) if len(data) > 0: if nulls == "one": diff --git a/python/cudf/cudf/tests/test_duplicates.py b/python/cudf/cudf/tests/test_duplicates.py index e8a695570f0..98061f4e977 100644 --- a/python/cudf/cudf/tests/test_duplicates.py +++ b/python/cudf/cudf/tests/test_duplicates.py @@ -1,6 +1,6 @@ # Copyright (c) 2020-2022, NVIDIA CORPORATION. -import itertools as it +import itertools import random import numpy as np @@ -9,7 +9,11 @@ import cudf from cudf import concat -from cudf.testing._utils import assert_eq, assert_exceptions_equal +from cudf.testing._utils import ( + _create_pandas_series, + assert_eq, + assert_exceptions_equal, +) # TODO: PANDAS 1.0 support # Revisit drop_duplicates() tests to update parameters like ignore_index. @@ -59,7 +63,7 @@ def test_duplicated_with_misspelled_column_name(subset): ], ) def test_drop_duplicates_series(data, keep): - pds = cudf.utils.utils._create_pandas_series(data) + pds = _create_pandas_series(data) gds = cudf.from_pandas(pds) assert_df(pds.drop_duplicates(keep=keep), gds.drop_duplicates(keep=keep)) @@ -280,7 +284,7 @@ def test_drop_duplicates_empty(df): @pytest.mark.parametrize("num_columns", [3, 4, 5]) def test_dataframe_drop_duplicates_numeric_method(num_columns): - comb = list(it.permutations(range(num_columns), num_columns)) + comb = list(itertools.permutations(range(num_columns), num_columns)) shuf = list(comb) random.Random(num_columns).shuffle(shuf) diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py index eba37c1f5af..9e87fdbd3be 100644 --- a/python/cudf/cudf/tests/test_groupby.py +++ b/python/cudf/cudf/tests/test_groupby.py @@ -292,6 +292,40 @@ def foo(df): assert_groupby_results_equal(expect, got) +def create_test_groupby_apply_args_params(): + def f1(df, k): + df["out"] = df["val1"] + df["val2"] + k + return df + + def f2(df, k, L): + df["out"] = df["val1"] - df["val2"] + (k / L) + return df + + def f3(df, k, L, m): + df["out"] = ((k * df["val1"]) + (L * df["val2"])) / m + return df + + return [(f1, (42,)), (f2, (42, 119)), (f3, (42, 119, 212.1))] + + +@pytest.mark.parametrize("func,args", create_test_groupby_apply_args_params()) +def test_groupby_apply_args(func, args): + np.random.seed(0) + df = DataFrame() + nelem = 20 + df["key1"] = np.random.randint(0, 3, nelem) + df["key2"] = np.random.randint(0, 2, nelem) + df["val1"] = np.random.random(nelem) + df["val2"] = np.random.random(nelem) + + expect_grpby = df.to_pandas().groupby(["key1", "key2"], as_index=False) + got_grpby = df.groupby(["key1", "key2"]) + + expect = expect_grpby.apply(func, *args) + got = got_grpby.apply(func, *args) + assert_groupby_results_equal(expect, got) + + def test_groupby_apply_grouped(): np.random.seed(0) df = DataFrame() @@ -1595,7 +1629,38 @@ def test_groupby_pipe(): assert_groupby_results_equal(expected, actual) -def test_groupby_apply_return_scalars(): +def create_test_groupby_apply_return_scalars_params(): + def f0(x): + x = x[~x["B"].isna()] + ticker = x.shape[0] + full = ticker / 10 + return full + + def f1(x, k): + x = x[~x["B"].isna()] + ticker = x.shape[0] + full = ticker / k + return full + + def f2(x, k, L): + x = x[~x["B"].isna()] + ticker = x.shape[0] + full = L * (ticker / k) + return full + + def f3(x, k, L, m): + x = x[~x["B"].isna()] + ticker = x.shape[0] + full = L * (ticker / k) % m + return full + + return [(f0, ()), (f1, (42,)), (f2, (42, 119)), (f3, (42, 119, 212.1))] + + +@pytest.mark.parametrize( + "func,args", create_test_groupby_apply_return_scalars_params() +) +def test_groupby_apply_return_scalars(func, args): pdf = pd.DataFrame( { "A": [1, 1, 2, 2, 3, 3, 4, 4, 5, 5], @@ -1615,30 +1680,52 @@ def test_groupby_apply_return_scalars(): ) gdf = cudf.from_pandas(pdf) - def custom_map_func(x): - x = x[~x["B"].isna()] - ticker = x.shape[0] - full = ticker / 10 - return full - - expected = pdf.groupby("A").apply(lambda x: custom_map_func(x)) - actual = gdf.groupby("A").apply(lambda x: custom_map_func(x)) + expected = pdf.groupby("A").apply(func, *args) + actual = gdf.groupby("A").apply(func, *args) assert_groupby_results_equal(expected, actual) +def create_test_groupby_apply_return_series_dataframe_params(): + def f0(x): + return x - x.max() + + def f1(x): + return x.min() - x.max() + + def f2(x): + return x.min() + + def f3(x, k): + return x - x.max() + k + + def f4(x, k, L): + return x.min() - x.max() + (k / L) + + def f5(x, k, L, m): + return m * x.min() + (k / L) + + return [ + (f0, ()), + (f1, ()), + (f2, ()), + (f3, (42,)), + (f4, (42, 119)), + (f5, (41, 119, 212.1)), + ] + + @pytest.mark.parametrize( - "cust_func", - [lambda x: x - x.max(), lambda x: x.min() - x.max(), lambda x: x.min()], + "func,args", create_test_groupby_apply_return_series_dataframe_params() ) -def test_groupby_apply_return_series_dataframe(cust_func): +def test_groupby_apply_return_series_dataframe(func, args): pdf = pd.DataFrame( {"key": [0, 0, 1, 1, 2, 2, 2], "val": [0, 1, 2, 3, 4, 5, 6]} ) gdf = cudf.from_pandas(pdf) - expected = pdf.groupby(["key"]).apply(cust_func) - actual = gdf.groupby(["key"]).apply(cust_func) + expected = pdf.groupby(["key"]).apply(func, *args) + actual = gdf.groupby(["key"]).apply(func, *args) assert_groupby_results_equal(expected, actual) @@ -1724,6 +1811,50 @@ def test_groupby_2keys_scan(nelem, func): assert_groupby_results_equal(got_df, expect_df, check_dtype=check_dtype) +@pytest.mark.parametrize("nelem", [100, 1000]) +@pytest.mark.parametrize("method", ["average", "min", "max", "first", "dense"]) +@pytest.mark.parametrize("ascending", [True, False]) +@pytest.mark.parametrize("na_option", ["keep", "top", "bottom"]) +@pytest.mark.parametrize("pct", [False, True]) +def test_groupby_2keys_rank(nelem, method, ascending, na_option, pct): + t = rand_dataframe( + dtypes_meta=[ + {"dtype": "int64", "null_frequency": 0, "cardinality": 10}, + {"dtype": "int64", "null_frequency": 0, "cardinality": 10}, + {"dtype": "int64", "null_frequency": 0.4, "cardinality": 10}, + ], + rows=nelem, + use_threads=False, + ) + pdf = t.to_pandas() + pdf.columns = ["x", "y", "z"] + gdf = cudf.from_pandas(pdf) + expect_df = pdf.groupby(["x", "y"], sort=True).rank( + method=method, ascending=ascending, na_option=na_option, pct=pct + ) + got_df = gdf.groupby(["x", "y"], sort=True).rank( + method=method, ascending=ascending, na_option=na_option, pct=pct + ) + + assert_groupby_results_equal(got_df, expect_df, check_dtype=False) + + +def test_groupby_rank_fails(): + gdf = cudf.DataFrame( + {"x": [1, 2, 3, 4], "y": [1, 2, 3, 4], "z": [1, 2, 3, 4]} + ) + with pytest.raises(NotImplementedError): + gdf.groupby(["x", "y"]).rank(method="min", axis=1) + gdf = cudf.DataFrame( + { + "a": [1, 1, 1, 2, 2, 2], + "b": [[1, 2], [3, None, 5], None, [], [7, 8], [9]], + } + ) + with pytest.raises(NotImplementedError): + gdf.groupby(["a"]).rank(method="min", axis=1) + + def test_groupby_mix_agg_scan(): err_msg = "Cannot perform both aggregation and scan in one operation" func = ["cumsum", "sum"] @@ -2213,6 +2344,22 @@ def foo(x): assert_groupby_results_equal(expect, got) +@pytest.mark.parametrize( + "func,args", + [ + (lambda x, k: x + k, (42,)), + (lambda x, k, L: x + k - L, (42, 191)), + (lambda x, k, L, m: (x + k) / (L * m), (42, 191, 99.9)), + ], +) +def test_groupby_apply_series_args(func, args): + + got = make_frame(DataFrame, 100).groupby("x").y.apply(func, *args) + expect = make_frame(pd.DataFrame, 100).groupby("x").y.apply(func, *args) + + assert_groupby_results_equal(expect, got) + + @pytest.mark.parametrize("label", [None, "left", "right"]) @pytest.mark.parametrize("closed", [None, "left", "right"]) def test_groupby_freq_week(label, closed): diff --git a/python/cudf/cudf/tests/test_hdfs.py b/python/cudf/cudf/tests/test_hdfs.py index de4303a34a8..8730cb187b5 100644 --- a/python/cudf/cudf/tests/test_hdfs.py +++ b/python/cudf/cudf/tests/test_hdfs.py @@ -3,12 +3,12 @@ import os from io import BytesIO -import fastavro as fa +import fastavro import numpy as np import pandas as pd import pyarrow as pa import pytest -from pyarrow import orc as orc +from pyarrow import orc import cudf from cudf.testing._utils import assert_eq @@ -253,7 +253,7 @@ def test_read_avro(datadir, hdfs, test_url): got = cudf.read_avro(hd_fpath) with open(fname, mode="rb") as f: - expect = pd.DataFrame.from_records(fa.reader(f)) + expect = pd.DataFrame.from_records(fastavro.reader(f)) for col in expect.columns: expect[col] = expect[col].astype(got[col].dtype) diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index 37286c65341..05830f79880 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -28,6 +28,7 @@ SIGNED_INTEGER_TYPES, SIGNED_TYPES, UNSIGNED_TYPES, + _create_pandas_series, assert_column_memory_eq, assert_column_memory_ne, assert_eq, @@ -973,9 +974,7 @@ def test_index_equal_misc(data, other): actual = gd_data.equals(np.array(gd_other)) assert_eq(expected, actual) - expected = pd_data.equals( - cudf.utils.utils._create_pandas_series(data=pd_other) - ) + expected = pd_data.equals(_create_pandas_series(pd_other)) actual = gd_data.equals(cudf.Series(gd_other)) assert_eq(expected, actual) @@ -2433,7 +2432,7 @@ def test_index_nan_as_null(data, nan_idx, NA_idx, nan_as_null): ], ) def test_isin_index(data, values): - psr = cudf.utils.utils._create_pandas_series(data=data) + psr = _create_pandas_series(data) gsr = cudf.Series.from_pandas(psr) got = gsr.index.isin(values) diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py index 740c32a8a26..225aa0cd6bc 100644 --- a/python/cudf/cudf/tests/test_indexing.py +++ b/python/cudf/cudf/tests/test_indexing.py @@ -726,7 +726,7 @@ def test_dataframe_take(ntake): @pytest.mark.parametrize("ntake", [1, 2, 8, 9]) -def test_dataframe_take_with_multiIndex(ntake): +def test_dataframe_take_with_multiindex(ntake): np.random.seed(0) df = cudf.DataFrame( index=cudf.MultiIndex( diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py index cf53a3525ef..09eee3520e5 100644 --- a/python/cudf/cudf/tests/test_list.py +++ b/python/cudf/cudf/tests/test_list.py @@ -11,6 +11,7 @@ import cudf from cudf import NA from cudf._lib.copying import get_element +from cudf.api.types import is_scalar from cudf.testing._utils import ( DATETIME_TYPES, NUMERIC_TYPES, @@ -425,7 +426,7 @@ def test_contains_invalid(data, scalar): @pytest.mark.parametrize( - "data, scalar, expect", + "data, search_key, expect", [ ( [[1, 2, 3], [], [3, 4, 5]], @@ -448,6 +449,16 @@ def test_contains_invalid(data, scalar): "y", [3, -1], ), + ( + [["h", "a", None], ["t", "g"]], + ["a", "b"], + [1, -1], + ), + ( + [None, ["h", "i"], ["p", "k", "z"]], + ["x", None, "z"], + [None, None, 2], + ), ( [["d", None, "e"], [None, "f"], []], cudf.Scalar(cudf.NA, "O"), @@ -460,15 +471,21 @@ def test_contains_invalid(data, scalar): ), ], ) -def test_index(data, scalar, expect): +def test_index(data, search_key, expect): sr = cudf.Series(data) expect = cudf.Series(expect, dtype="int32") - got = sr.list.index(cudf.Scalar(scalar, sr.dtype.element_type)) + if is_scalar(search_key): + got = sr.list.index(cudf.Scalar(search_key, sr.dtype.element_type)) + else: + got = sr.list.index( + cudf.Series(search_key, dtype=sr.dtype.element_type) + ) + assert_eq(expect, got) @pytest.mark.parametrize( - "data, scalar", + "data, search_key", [ ( [[9, None, 8], [], [7, 6, 5]], @@ -478,16 +495,42 @@ def test_index(data, scalar, expect): [["a", "b", "c"], None, [None, "d"]], 2, ), + ( + [["e", "s"], ["t", "w"]], + [5, 6], + ), ], ) -def test_index_invalid(data, scalar): +def test_index_invalid_type(data, search_key): sr = cudf.Series(data) with pytest.raises( TypeError, match="Type/Scale of search key does not " "match list column element type.", ): - sr.list.index(scalar) + sr.list.index(search_key) + + +@pytest.mark.parametrize( + "data, search_key", + [ + ( + [[5, 8], [2, 6]], + [8, 2, 4], + ), + ( + [["h", "j"], ["p", None], ["t", "z"]], + ["j", "a"], + ), + ], +) +def test_index_invalid_length(data, search_key): + sr = cudf.Series(data) + with pytest.raises( + RuntimeError, + match="Number of search keys must match list column size.", + ): + sr.list.index(search_key) @pytest.mark.parametrize( @@ -759,3 +802,15 @@ def test_listcol_setitem_retain_dtype(): # prior to this fix: https://github.com/rapidsai/cudf/pull/10151/ df2 = df1.copy() assert df2["a"].dtype == df["a"].dtype + + +def test_list_astype(): + s = cudf.Series([[1, 2], [3, 4]]) + s2 = s.list.astype("float64") + assert s2.dtype == cudf.ListDtype("float64") + assert_eq(s.list.leaves.astype("float64"), s2.list.leaves) + + s = cudf.Series([[[1, 2], [3]], [[5, 6], None]]) + s2 = s.list.astype("string") + assert s2.dtype == cudf.ListDtype(cudf.ListDtype("string")) + assert_eq(s.list.leaves.astype("string"), s2.list.leaves) diff --git a/python/cudf/cudf/tests/test_multiindex.py b/python/cudf/cudf/tests/test_multiindex.py index eaef002f37d..38225b3efb9 100644 --- a/python/cudf/cudf/tests/test_multiindex.py +++ b/python/cudf/cudf/tests/test_multiindex.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021, NVIDIA CORPORATION. +# Copyright (c) 2019-2022, NVIDIA CORPORATION. """ Test related to MultiIndex @@ -688,7 +688,7 @@ def test_multiindex_copy_sem(data, levels, codes, names): # Test same behavior when used on DataFrame gdf.index = gmi_copy pdf.index = pmi_copy - assert gdf.__repr__() == pdf.__repr__() + assert repr(gdf) == repr(pdf) @pytest.mark.parametrize( @@ -1131,7 +1131,7 @@ def test_multiindex_values_host(): ), ], ) -def test_multiIndex_fillna(gdi, fill_value, expected): +def test_multiindex_fillna(gdi, fill_value, expected): assert_eq(expected, gdi.fillna(fill_value)) @@ -1173,7 +1173,7 @@ def test_multiIndex_fillna(gdi, fill_value, expected): ), ], ) -def test_multiIndex_empty(pdi): +def test_multiindex_empty(pdi): gdi = cudf.from_pandas(pdi) assert_eq(pdi.empty, gdi.empty) @@ -1217,7 +1217,7 @@ def test_multiIndex_empty(pdi): ), ], ) -def test_multiIndex_size(pdi): +def test_multiindex_size(pdi): gdi = cudf.from_pandas(pdi) assert_eq(pdi.size, gdi.size) @@ -1375,7 +1375,7 @@ def test_multiindex_sort_values(pmidx, ascending, return_indexer): ], ) @pytest.mark.parametrize("ascending", [True, False]) -def test_multiIndex_argsort(pdi, ascending): +def test_multiindex_argsort(pdi, ascending): gdi = cudf.from_pandas(pdi) if not ascending: @@ -1562,7 +1562,7 @@ def test_multiindex_indexing(key): assert_eq(gi[key], pi[key], exact=False) -def test_multiIndex_duplicate_names(): +def test_multiindex_duplicate_names(): gi = cudf.MultiIndex( levels=[["a", "b"], ["b", "a"]], codes=[[0, 0], [0, 1]], @@ -1699,7 +1699,7 @@ def test_intersection_mulitIndex(idx1, idx2, sort): None, ], ) -def test_pickle_roundtrip_multiIndex(names): +def test_pickle_roundtrip_multiindex(names): df = cudf.DataFrame( { "one": [1, 2, 3], @@ -1745,7 +1745,7 @@ def test_pickle_roundtrip_multiIndex(names): "is_interval", ], ) -def test_multiIndex_type_methods(pidx, func): +def test_multiindex_type_methods(pidx, func): gidx = cudf.from_pandas(pidx) expected = getattr(pidx, func)() diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py index 5082fb08b92..c3969bf6c14 100644 --- a/python/cudf/cudf/tests/test_orc.py +++ b/python/cudf/cudf/tests/test_orc.py @@ -11,7 +11,7 @@ import pandas as pd import pyarrow as pa import pyarrow.orc -import pyorc as po +import pyorc import pytest import cudf @@ -307,7 +307,7 @@ def test_orc_read_skiprows(tmpdir): {"a": [1, 0, 1, 0, None, 1, 1, 1, 0, None, 0, 0, 1, 1, 1, 1]}, dtype=pd.BooleanDtype(), ) - writer = po.Writer(buff, po.Struct(a=po.Boolean())) + writer = pyorc.Writer(buff, pyorc.Struct(a=pyorc.Boolean())) tuples = list( map( lambda x: (None,) if x[0] is pd.NA else x, @@ -931,29 +931,35 @@ def generate_list_struct_buff(size=100_000): buff = BytesIO() schema = { - "lvl3_list": po.Array(po.Array(po.Array(po.BigInt()))), - "lvl1_list": po.Array(po.BigInt()), - "lvl1_struct": po.Struct(**{"a": po.BigInt(), "b": po.BigInt()}), - "lvl2_struct": po.Struct( + "lvl3_list": pyorc.Array(pyorc.Array(pyorc.Array(pyorc.BigInt()))), + "lvl1_list": pyorc.Array(pyorc.BigInt()), + "lvl1_struct": pyorc.Struct( + **{"a": pyorc.BigInt(), "b": pyorc.BigInt()} + ), + "lvl2_struct": pyorc.Struct( **{ - "a": po.BigInt(), - "lvl1_struct": po.Struct( - **{"c": po.BigInt(), "d": po.BigInt()} + "a": pyorc.BigInt(), + "lvl1_struct": pyorc.Struct( + **{"c": pyorc.BigInt(), "d": pyorc.BigInt()} ), } ), - "list_nests_struct": po.Array( - po.Array(po.Struct(**{"a": po.BigInt(), "b": po.BigInt()})) + "list_nests_struct": pyorc.Array( + pyorc.Array( + pyorc.Struct(**{"a": pyorc.BigInt(), "b": pyorc.BigInt()}) + ) ), - "struct_nests_list": po.Struct( + "struct_nests_list": pyorc.Struct( **{ - "struct": po.Struct(**{"a": po.BigInt(), "b": po.BigInt()}), - "list": po.Array(po.BigInt()), + "struct": pyorc.Struct( + **{"a": pyorc.BigInt(), "b": pyorc.BigInt()} + ), + "list": pyorc.Array(pyorc.BigInt()), } ), } - schema = po.Struct(**schema) + schema = pyorc.Struct(**schema) lvl3_list = [ rd.choice( @@ -1019,7 +1025,7 @@ def generate_list_struct_buff(size=100_000): } ) - writer = po.Writer(buff, schema, stripe_size=1024) + writer = pyorc.Writer(buff, schema, stripe_size=1024) tuples = list( map( lambda x: (None,) if x[0] is pd.NA else x, @@ -1101,15 +1107,17 @@ def gen_map_buff(size=10000): buff = BytesIO() schema = { - "lvl1_map": po.Map(key=po.String(), value=po.BigInt()), - "lvl2_map": po.Map(key=po.String(), value=po.Array(po.BigInt())), - "lvl2_struct_map": po.Map( - key=po.String(), - value=po.Struct(**{"a": po.BigInt(), "b": po.BigInt()}), + "lvl1_map": pyorc.Map(key=pyorc.String(), value=pyorc.BigInt()), + "lvl2_map": pyorc.Map( + key=pyorc.String(), value=pyorc.Array(pyorc.BigInt()) + ), + "lvl2_struct_map": pyorc.Map( + key=pyorc.String(), + value=pyorc.Struct(**{"a": pyorc.BigInt(), "b": pyorc.BigInt()}), ), } - schema = po.Struct(**schema) + schema = pyorc.Struct(**schema) lvl1_map = [ rd.choice( @@ -1186,8 +1194,8 @@ def gen_map_buff(size=10000): "lvl2_struct_map": lvl2_struct_map, } ) - writer = po.Writer( - buff, schema, stripe_size=1024, compression=po.CompressionKind.NONE + writer = pyorc.Writer( + buff, schema, stripe_size=1024, compression=pyorc.CompressionKind.NONE ) tuples = list( map( @@ -1479,8 +1487,9 @@ def test_statistics_sum_overflow(): minint64 = np.iinfo(np.int64).min buff = BytesIO() - with po.Writer( - buff, po.Struct(a=po.BigInt(), b=po.BigInt(), c=po.BigInt()) + with pyorc.Writer( + buff, + pyorc.Struct(a=pyorc.BigInt(), b=pyorc.BigInt(), c=pyorc.BigInt()), ) as writer: writer.write((maxint64, minint64, minint64)) writer.write((1, -1, 1)) @@ -1497,20 +1506,20 @@ def test_statistics_sum_overflow(): def test_empty_statistics(): buff = BytesIO() - orc_schema = po.Struct( - a=po.BigInt(), - b=po.Double(), - c=po.String(), - d=po.Decimal(11, 2), - e=po.Date(), - f=po.Timestamp(), - g=po.Boolean(), - h=po.Binary(), - i=po.BigInt(), + orc_schema = pyorc.Struct( + a=pyorc.BigInt(), + b=pyorc.Double(), + c=pyorc.String(), + d=pyorc.Decimal(11, 2), + e=pyorc.Date(), + f=pyorc.Timestamp(), + g=pyorc.Boolean(), + h=pyorc.Binary(), + i=pyorc.BigInt(), # One column with non null value, else cudf/pyorc readers crash ) data = tuple([None] * (len(orc_schema.fields) - 1) + [1]) - with po.Writer(buff, orc_schema) as writer: + with pyorc.Writer(buff, orc_schema) as writer: writer.write(data) got = cudf.io.orc.read_orc_statistics([buff]) diff --git a/python/cudf/cudf/tests/test_replace.py b/python/cudf/cudf/tests/test_replace.py index 14e81d6ad30..08311f89148 100644 --- a/python/cudf/cudf/tests/test_replace.py +++ b/python/cudf/cudf/tests/test_replace.py @@ -1371,3 +1371,17 @@ def test_replace_nulls(gsr, old, new, expected): expected.sort_values().reset_index(drop=True), actual.sort_values().reset_index(drop=True), ) + + +def test_fillna_columns_multiindex(): + columns = pd.MultiIndex.from_tuples([("a", "b"), ("d", "e")]) + pdf = pd.DataFrame( + {"0": [1, 2, None, 3, None], "1": [None, None, None, None, 4]} + ) + pdf.columns = columns + gdf = cudf.from_pandas(pdf) + + expected = pdf.fillna(10) + actual = gdf.fillna(10) + + assert_eq(expected, actual) diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py index 8f2e4811e36..c4985639173 100644 --- a/python/cudf/cudf/tests/test_repr.py +++ b/python/cudf/cudf/tests/test_repr.py @@ -41,7 +41,7 @@ def test_null_series(nrows, dtype): ps = sr.to_pandas() pd.options.display.max_rows = int(nrows) - psrepr = ps.__repr__() + psrepr = repr(ps) psrepr = psrepr.replace("NaN", "") psrepr = psrepr.replace("NaT", "") psrepr = psrepr.replace("None", "") @@ -49,7 +49,7 @@ def test_null_series(nrows, dtype): psrepr = psrepr.replace("UInt", "uint") elif "Int" in psrepr: psrepr = psrepr.replace("Int", "int") - assert psrepr.split() == sr.__repr__().split() + assert psrepr.split() == repr(sr).split() pd.reset_option("display.max_rows") @@ -72,11 +72,13 @@ def test_null_dataframe(ncols): gdf[dtype] = sr pdf = gdf.to_pandas() pd.options.display.max_columns = int(ncols) - pdfrepr = pdf.__repr__() - pdfrepr = pdfrepr.replace("NaN", "") - pdfrepr = pdfrepr.replace("NaT", "") - pdfrepr = pdfrepr.replace("None", "") - assert pdfrepr.split() == gdf.__repr__().split() + pdf_repr = ( + repr(pdf) + .replace("NaN", "") + .replace("NaT", "") + .replace("None", "") + ) + assert pdf_repr.split() == repr(gdf).split() pd.reset_option("display.max_columns") @@ -87,7 +89,7 @@ def test_full_series(nrows, dtype): ps = pd.Series(np.random.randint(0, 100, size)).astype(dtype) sr = cudf.from_pandas(ps) pd.options.display.max_rows = nrows - assert ps.__repr__() == sr.__repr__() + assert repr(ps) == repr(sr) pd.reset_option("display.max_rows") @@ -121,8 +123,8 @@ def test_integer_dataframe(x): gdf = cudf.DataFrame({"x": x}) pdf = gdf.to_pandas() pd.options.display.max_columns = 1 - assert gdf.__repr__() == pdf.__repr__() - assert gdf.T.__repr__() == pdf.T.__repr__() + assert repr(gdf) == repr(pdf) + assert repr(gdf.T) == repr(pdf.T) pd.reset_option("display.max_columns") @@ -134,9 +136,9 @@ def test_integer_dataframe(x): @settings(deadline=None) def test_integer_series(x): sr = cudf.Series(x) - ps = cudf.utils.utils._create_pandas_series(data=x) + ps = pd.Series(data=x) - assert sr.__repr__() == ps.__repr__() + assert repr(sr) == repr(ps) @given(st.lists(st.floats())) @@ -144,15 +146,15 @@ def test_integer_series(x): def test_float_dataframe(x): gdf = cudf.DataFrame({"x": cudf.Series(x, nan_as_null=False)}) pdf = gdf.to_pandas() - assert gdf.__repr__() == pdf.__repr__() + assert repr(gdf) == repr(pdf) @given(st.lists(st.floats())) @settings(deadline=None) def test_float_series(x): sr = cudf.Series(x, nan_as_null=False) - ps = cudf.utils.utils._create_pandas_series(data=x) - assert sr.__repr__() == ps.__repr__() + ps = pd.Series(data=x) + assert repr(sr) == repr(ps) @pytest.fixture @@ -176,12 +178,12 @@ def mixed_gdf(mixed_pdf): def test_mixed_dataframe(mixed_pdf, mixed_gdf): - assert mixed_gdf.__repr__() == mixed_pdf.__repr__() + assert repr(mixed_gdf) == repr(mixed_pdf) def test_mixed_series(mixed_pdf, mixed_gdf): for col in mixed_gdf.columns: - assert mixed_gdf[col].__repr__() == mixed_pdf[col].__repr__() + assert repr(mixed_gdf[col]) == repr(mixed_pdf[col]) def test_MI(): @@ -204,11 +206,9 @@ def test_MI(): pd.options.display.max_columns = 0 gdf = gdf.set_index(cudf.MultiIndex(levels=levels, codes=codes)) pdf = gdf.to_pandas() - gdfT = gdf.T - pdfT = pdf.T - assert gdf.__repr__() == pdf.__repr__() - assert gdf.index.__repr__() == pdf.index.__repr__() - assert gdfT.__repr__() == pdfT.__repr__() + assert repr(gdf) == repr(pdf) + assert repr(gdf.index) == repr(pdf.index) + assert repr(gdf.T) == repr(pdf.T) pd.reset_option("display.max_rows") pd.reset_option("display.max_columns") @@ -224,9 +224,9 @@ def test_groupby_MI(nrows, ncols): pdg = pdf.groupby(["a", "b"], sort=True).count() pd.options.display.max_rows = nrows pd.options.display.max_columns = ncols - assert gdg.__repr__() == pdg.__repr__() - assert gdg.index.__repr__() == pdg.index.__repr__() - assert gdg.T.__repr__() == pdg.T.__repr__() + assert repr(gdg) == repr(pdg) + assert repr(gdg.index) == repr(pdg.index) + assert repr(gdg.T) == repr(pdg.T) pd.reset_option("display.max_rows") pd.reset_option("display.max_columns") @@ -241,7 +241,7 @@ def test_generic_index(length, dtype): ) gsr = cudf.Series.from_pandas(psr) - assert psr.index.__repr__() == gsr.index.__repr__() + assert repr(psr.index) == repr(gsr.index) @pytest.mark.parametrize( @@ -290,8 +290,8 @@ def test_dataframe_sliced(gdf, slice, max_seq_items, max_rows): sliced_gdf = gdf[slice] sliced_pdf = pdf[slice] - expected_repr = sliced_pdf.__repr__().replace("None", "") - actual_repr = sliced_gdf.__repr__() + expected_repr = repr(sliced_pdf).replace("None", "") + actual_repr = repr(sliced_gdf) assert expected_repr == actual_repr pd.reset_option("display.max_rows") @@ -392,7 +392,7 @@ def test_dataframe_sliced(gdf, slice, max_seq_items, max_rows): ) def test_generic_index_null(index, expected_repr): - actual_repr = index.__repr__() + actual_repr = repr(index) assert expected_repr == actual_repr @@ -475,19 +475,19 @@ def test_dataframe_null_index_repr(df, pandas_special_case): gdf = cudf.from_pandas(pdf) expected_repr = ( - pdf.__repr__() + repr(pdf) .replace("NaN", "") .replace("NaT", "") .replace("None", "") ) - actual_repr = gdf.__repr__() + actual_repr = repr(gdf) if pandas_special_case: # Pandas inconsistently print StringIndex null values # as `None` at some places and `NaN` at few other places # Whereas cudf is consistent with strings `null` values # to be printed as `None` everywhere. - actual_repr = gdf.__repr__().replace("None", "") + actual_repr = repr(gdf).replace("None", "") assert expected_repr.split() == actual_repr.split() @@ -554,19 +554,19 @@ def test_series_null_index_repr(sr, pandas_special_case): gsr = cudf.from_pandas(psr) expected_repr = ( - psr.__repr__() + repr(psr) .replace("NaN", "") .replace("NaT", "") .replace("None", "") ) - actual_repr = gsr.__repr__() + actual_repr = repr(gsr) if pandas_special_case: # Pandas inconsistently print StringIndex null values # as `None` at some places and `NaN` at few other places # Whereas cudf is consistent with strings `null` values # to be printed as `None` everywhere. - actual_repr = gsr.__repr__().replace("None", "") + actual_repr = repr(gsr).replace("None", "") assert expected_repr.split() == actual_repr.split() @@ -607,9 +607,9 @@ def test_timedelta_series_s_us_repr(data, dtype): psr = sr.to_pandas() expected = ( - psr.__repr__().replace("timedelta64[ns]", dtype).replace("NaT", "") + repr(psr).replace("timedelta64[ns]", dtype).replace("NaT", "") ) - actual = sr.__repr__() + actual = repr(sr) assert expected.split() == actual.split() @@ -886,7 +886,7 @@ def test_timedelta_series_s_us_repr(data, dtype): ) def test_timedelta_series_ns_ms_repr(ser, expected_repr): expected = expected_repr - actual = ser.__repr__() + actual = repr(ser) assert expected.split() == actual.split() @@ -1042,7 +1042,7 @@ def test_timedelta_series_ns_ms_repr(ser, expected_repr): ], ) def test_timedelta_dataframe_repr(df, expected_repr): - actual_repr = df.__repr__() + actual_repr = repr(df) assert actual_repr.split() == expected_repr.split() @@ -1105,7 +1105,7 @@ def test_timedelta_dataframe_repr(df, expected_repr): def test_timedelta_index_repr(index, expected_repr): if not PANDAS_GE_110: pytest.xfail(reason="pandas >= 1.1 requried") - actual_repr = index.__repr__() + actual_repr = repr(index) assert actual_repr.split() == expected_repr.split() @@ -1132,11 +1132,11 @@ def test_timedelta_index_repr(index, expected_repr): ], ) @pytest.mark.parametrize("max_seq_items", [None, 1, 2, 5, 10, 100]) -def test_multiIndex_repr(pmi, max_seq_items): +def test_multiindex_repr(pmi, max_seq_items): pd.set_option("display.max_seq_items", max_seq_items) gmi = cudf.from_pandas(pmi) - assert gmi.__repr__() == pmi.__repr__() + assert repr(gmi) == repr(pmi) pd.reset_option("display.max_seq_items") @@ -1377,8 +1377,8 @@ def test_multiIndex_repr(pmi, max_seq_items): ), ], ) -def test_multiIndex_null_repr(gdi, expected_repr): - actual_repr = gdi.__repr__() +def test_multiindex_null_repr(gdi, expected_repr): + actual_repr = repr(gdi) assert actual_repr.split() == expected_repr.split() @@ -1401,7 +1401,7 @@ def test_categorical_series_with_nan_repr(): """ ) - assert series.__repr__().split() == expected_repr.split() + assert repr(series).split() == expected_repr.split() sliced_expected_repr = textwrap.dedent( """ @@ -1414,7 +1414,7 @@ def test_categorical_series_with_nan_repr(): """ ) - assert series[2:].__repr__().split() == sliced_expected_repr.split() + assert repr(series[2:]).split() == sliced_expected_repr.split() def test_categorical_dataframe_with_nan_repr(): @@ -1434,7 +1434,7 @@ def test_categorical_dataframe_with_nan_repr(): """ ) - assert df.__repr__().split() == expected_repr.split() + assert repr(df).split() == expected_repr.split() def test_categorical_index_with_nan_repr(): @@ -1449,21 +1449,21 @@ def test_categorical_index_with_nan_repr(): "categories=[1.0, 2.0, 10.0, NaN], ordered=False, dtype='category')" ) - assert cat_index.__repr__() == expected_repr + assert repr(cat_index) == expected_repr sliced_expected_repr = ( "CategoricalIndex([NaN, 10.0, NaN, ], " "categories=[1.0, 2.0, 10.0, NaN], ordered=False, dtype='category')" ) - assert cat_index[2:].__repr__() == sliced_expected_repr + assert repr(cat_index[2:]) == sliced_expected_repr def test_empty_series_name(): ps = pd.Series([], name="abc", dtype="int") gs = cudf.from_pandas(ps) - assert ps.__repr__() == gs.__repr__() + assert repr(ps) == repr(gs) def test_repr_struct_after_concat(): @@ -1493,4 +1493,4 @@ def test_repr_struct_after_concat(): ) pdf = df.to_pandas() - assert df.__repr__() == pdf.__repr__() + assert repr(df) == repr(pdf) diff --git a/python/cudf/cudf/tests/test_rolling.py b/python/cudf/cudf/tests/test_rolling.py index 87d1faf33ca..397d7f1c277 100644 --- a/python/cudf/cudf/tests/test_rolling.py +++ b/python/cudf/cudf/tests/test_rolling.py @@ -9,7 +9,7 @@ import cudf from cudf.core._compat import PANDAS_GE_110 -from cudf.testing._utils import assert_eq +from cudf.testing._utils import _create_pandas_series, assert_eq from cudf.testing.dataset_generator import rand_dataframe @@ -58,7 +58,7 @@ def test_rolling_series_basic(data, index, agg, nulls, center): elif nulls == "all": data = [np.nan] * len(data) - psr = cudf.utils.utils._create_pandas_series(data=data, index=index) + psr = _create_pandas_series(data, index=index) gsr = cudf.Series(psr) for window_size in range(1, len(data) + 1): for min_periods in range(1, window_size + 1): @@ -322,7 +322,7 @@ def test_rolling_getitem_window(): @pytest.mark.parametrize("center", [True, False]) def test_rollling_series_numba_udf_basic(data, index, center): - psr = cudf.utils.utils._create_pandas_series(data=data, index=index) + psr = _create_pandas_series(data, index=index) gsr = cudf.from_pandas(psr) def some_func(A): diff --git a/python/cudf/cudf/tests/test_scalar.py b/python/cudf/cudf/tests/test_scalar.py index e8382681820..0aceb07bd35 100644 --- a/python/cudf/cudf/tests/test_scalar.py +++ b/python/cudf/cudf/tests/test_scalar.py @@ -1,7 +1,6 @@ # Copyright (c) 2021-2022, NVIDIA CORPORATION. import datetime -import datetime as dt import re from decimal import Decimal @@ -11,7 +10,6 @@ import pytest import cudf -from cudf import Scalar as pycudf_scalar from cudf._lib.copying import get_element from cudf.testing._utils import ( ALL_TYPES, @@ -297,9 +295,9 @@ def test_date_duration_scalars(value): actual = s.value - if isinstance(value, dt.datetime): + if isinstance(value, datetime.datetime): expected = np.datetime64(value) - elif isinstance(value, dt.timedelta): + elif isinstance(value, datetime.timedelta): expected = np.timedelta64(value) elif isinstance(value, pd.Timestamp): expected = value.to_datetime64() @@ -344,7 +342,7 @@ def test_scalar_invalid_implicit_conversion(cls, dtype): cls(pd.NA) except TypeError as e: with pytest.raises(TypeError, match=re.escape(str(e))): - slr = pycudf_scalar(None, dtype=dtype) + slr = cudf.Scalar(None, dtype=dtype) cls(slr) @@ -354,7 +352,7 @@ def test_scalar_invalid_implicit_conversion(cls, dtype): [cudf.Decimal32Dtype, cudf.Decimal64Dtype, cudf.Decimal128Dtype], ) def test_device_scalar_direct_construction(value, decimal_type): - value = cudf.utils.utils.to_cudf_compatible_scalar(value) + value = cudf.utils.dtypes.to_cudf_compatible_scalar(value) dtype = ( value.dtype @@ -378,7 +376,7 @@ def test_device_scalar_direct_construction(value, decimal_type): @pytest.mark.parametrize("value", SCALAR_VALUES + DECIMAL_VALUES) def test_construct_from_scalar(value): - value = cudf.utils.utils.to_cudf_compatible_scalar(value) + value = cudf.utils.dtypes.to_cudf_compatible_scalar(value) x = cudf.Scalar( value, value.dtype if not isinstance(value, Decimal) else None ) diff --git a/python/cudf/cudf/tests/test_search.py b/python/cudf/cudf/tests/test_search.py index cd029d02d79..d3433a589a7 100644 --- a/python/cudf/cudf/tests/test_search.py +++ b/python/cudf/cudf/tests/test_search.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018, NVIDIA CORPORATION. +# Copyright (c) 2018-2022, NVIDIA CORPORATION. import cupy import numpy as np import pandas as pd @@ -73,6 +73,14 @@ def test_searchsorted_dataframe(side, multiindex): assert result == [2, 0, 4, 1] +def test_search_sorted_dataframe_unequal_number_of_columns(): + values = cudf.DataFrame({"a": [1, 0, 5, 1]}) + base = cudf.DataFrame({"a": [1, 0, 5, 1], "b": ["x", "z", "w", "a"]}) + + with pytest.raises(ValueError, match="Mismatch number of columns"): + base.searchsorted(values) + + @pytest.mark.parametrize("side", ["left", "right"]) def test_searchsorted_categorical(side): diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py index 6f0f77f0aa2..d755ed58724 100644 --- a/python/cudf/cudf/tests/test_series.py +++ b/python/cudf/cudf/tests/test_series.py @@ -16,8 +16,10 @@ from cudf.testing._utils import ( NUMERIC_TYPES, TIMEDELTA_TYPES, + _create_pandas_series, assert_eq, assert_exceptions_equal, + gen_rand, ) @@ -380,7 +382,7 @@ def test_series_tolist(data): [[], [None, None], ["a"], ["a", "b", "c"] * 500, [1.0, 2.0, 0.3] * 57], ) def test_series_size(data): - psr = cudf.utils.utils._create_pandas_series(data=data) + psr = _create_pandas_series(data) gsr = cudf.Series(data) assert_eq(psr.size, gsr.size) @@ -1593,7 +1595,7 @@ def test_series_nunique_index(data): ) def test_isin_numeric(data, values): index = np.random.randint(0, 100, len(data)) - psr = cudf.utils.utils._create_pandas_series(data=data, index=index) + psr = _create_pandas_series(data, index=index) gsr = cudf.Series.from_pandas(psr, nan_as_null=False) expected = psr.isin(values) @@ -1653,7 +1655,7 @@ def test_fill_new_category(): ], ) def test_isin_datetime(data, values): - psr = cudf.utils.utils._create_pandas_series(data=data) + psr = _create_pandas_series(data) gsr = cudf.Series.from_pandas(psr) got = gsr.isin(values) @@ -1689,7 +1691,7 @@ def test_isin_datetime(data, values): ], ) def test_isin_string(data, values): - psr = cudf.utils.utils._create_pandas_series(data=data) + psr = _create_pandas_series(data) gsr = cudf.Series.from_pandas(psr) got = gsr.isin(values) @@ -1718,9 +1720,95 @@ def test_isin_string(data, values): ], ) def test_isin_categorical(data, values): - psr = cudf.utils.utils._create_pandas_series(data=data) + psr = _create_pandas_series(data) gsr = cudf.Series.from_pandas(psr) got = gsr.isin(values) expected = psr.isin(values) assert_eq(got, expected) + + +@pytest.mark.parametrize("dtype", NUMERIC_TYPES) +@pytest.mark.parametrize("period", [-1, -5, -10, -20, 0, 1, 5, 10, 20]) +@pytest.mark.parametrize("data_empty", [False, True]) +def test_diff(dtype, period, data_empty): + if data_empty: + data = None + else: + if dtype == np.int8: + # to keep data in range + data = gen_rand(dtype, 100000, low=-2, high=2) + else: + data = gen_rand(dtype, 100000) + + gs = cudf.Series(data, dtype=dtype) + ps = pd.Series(data, dtype=dtype) + + expected_outcome = ps.diff(period) + diffed_outcome = gs.diff(period).astype(expected_outcome.dtype) + + if data_empty: + assert_eq(diffed_outcome, expected_outcome, check_index_type=False) + else: + assert_eq(diffed_outcome, expected_outcome) + + +@pytest.mark.parametrize( + "data", + [ + ["a", "b", "c", "d", "e"], + ], +) +def test_diff_unsupported_dtypes(data): + gs = cudf.Series(data) + with pytest.raises( + TypeError, + match=r"unsupported operand type\(s\)", + ): + gs.diff() + + +@pytest.mark.parametrize( + "data", + [ + pd.date_range("2020-01-01", "2020-01-06", freq="D"), + [True, True, True, False, True, True], + [1.0, 2.0, 3.5, 4.0, 5.0, -1.7], + [1, 2, 3, 3, 4, 5], + [np.nan, None, None, np.nan, np.nan, None], + ], +) +def test_diff_many_dtypes(data): + ps = pd.Series(data) + gs = cudf.from_pandas(ps) + assert_eq(ps.diff(), gs.diff()) + assert_eq(ps.diff(periods=2), gs.diff(periods=2)) + + +@pytest.mark.parametrize("num_rows", [1, 100]) +@pytest.mark.parametrize("num_bins", [1, 10]) +@pytest.mark.parametrize("right", [True, False]) +@pytest.mark.parametrize("dtype", NUMERIC_TYPES + ["bool"]) +@pytest.mark.parametrize("series_bins", [True, False]) +def test_series_digitize(num_rows, num_bins, right, dtype, series_bins): + data = np.random.randint(0, 100, num_rows).astype(dtype) + bins = np.unique(np.sort(np.random.randint(2, 95, num_bins).astype(dtype))) + s = cudf.Series(data) + if series_bins: + s_bins = cudf.Series(bins) + indices = s.digitize(s_bins, right) + else: + indices = s.digitize(bins, right) + np.testing.assert_array_equal( + np.digitize(data, bins, right), indices.to_numpy() + ) + + +def test_series_digitize_invalid_bins(): + s = cudf.Series(np.random.randint(0, 30, 80), dtype="int32") + bins = cudf.Series([2, None, None, 50, 90], dtype="int32") + + with pytest.raises( + ValueError, match="`bins` cannot contain null entries." + ): + _ = s.digitize(bins) diff --git a/python/cudf/cudf/tests/test_stats.py b/python/cudf/cudf/tests/test_stats.py index 977a01952db..f134849663d 100644 --- a/python/cudf/cudf/tests/test_stats.py +++ b/python/cudf/cudf/tests/test_stats.py @@ -9,7 +9,11 @@ import cudf from cudf.datasets import randomdata -from cudf.testing._utils import assert_eq, assert_exceptions_equal +from cudf.testing._utils import ( + _create_pandas_series, + assert_eq, + assert_exceptions_equal, +) params_dtypes = [np.int32, np.uint32, np.float32, np.float64] methods = ["min", "max", "sum", "mean", "var", "std"] @@ -217,7 +221,7 @@ def test_approx_quantiles_int(): ) def test_misc_quantiles(data, q): - pdf_series = cudf.utils.utils._create_pandas_series(data=data) + pdf_series = _create_pandas_series(data) gdf_series = cudf.Series(data) expected = pdf_series.quantile(q.get() if isinstance(q, cp.ndarray) else q) @@ -239,13 +243,10 @@ def test_misc_quantiles(data, q): cudf.Series([1.1032, 2.32, 43.4, 13, -312.0], index=[0, 4, 3, 19, 6]), cudf.Series([]), cudf.Series([-3]), - randomdata( - nrows=1000, dtypes={"a": float, "b": int, "c": float, "d": str} - ), ], ) @pytest.mark.parametrize("null_flag", [False, True]) -def test_kurtosis(data, null_flag): +def test_kurtosis_series(data, null_flag): pdata = data.to_pandas() if null_flag and len(data) > 2: @@ -262,8 +263,13 @@ def test_kurtosis(data, null_flag): expected = pdata.kurt() np.testing.assert_array_almost_equal(got, expected) + got = data.kurt(numeric_only=False) + got = got if np.isscalar(got) else got.to_numpy() + expected = pdata.kurt(numeric_only=False) + np.testing.assert_array_almost_equal(got, expected) + with pytest.raises(NotImplementedError): - data.kurt(numeric_only=False) + data.kurt(numeric_only=True) @pytest.mark.parametrize( @@ -280,13 +286,10 @@ def test_kurtosis(data, null_flag): cudf.Series([1.1032, 2.32, 43.4, 13, -312.0], index=[0, 4, 3, 19, 6]), cudf.Series([]), cudf.Series([-3]), - randomdata( - nrows=1000, dtypes={"a": float, "b": int, "c": float, "d": str} - ), ], ) @pytest.mark.parametrize("null_flag", [False, True]) -def test_skew(data, null_flag): +def test_skew_series(data, null_flag): pdata = data.to_pandas() if null_flag and len(data) > 2: @@ -298,8 +301,13 @@ def test_skew(data, null_flag): got = got if np.isscalar(got) else got.to_numpy() np.testing.assert_array_almost_equal(got, expected) + got = data.skew(numeric_only=False) + expected = pdata.skew(numeric_only=False) + got = got if np.isscalar(got) else got.to_numpy() + np.testing.assert_array_almost_equal(got, expected) + with pytest.raises(NotImplementedError): - data.skew(numeric_only=False) + data.skew(numeric_only=True) @pytest.mark.parametrize("dtype", params_dtypes) @@ -480,14 +488,14 @@ def test_df_corr(method): ) @pytest.mark.parametrize("skipna", [True, False, None]) def test_nans_stats(data, ops, skipna): - psr = cudf.utils.utils._create_pandas_series(data=data) + psr = _create_pandas_series(data) gsr = cudf.Series(data, nan_as_null=False) assert_eq( getattr(psr, ops)(skipna=skipna), getattr(gsr, ops)(skipna=skipna) ) - psr = cudf.utils.utils._create_pandas_series(data=data) + psr = _create_pandas_series(data) gsr = cudf.Series(data, nan_as_null=False) # Since there is no concept of `nan_as_null` in pandas, # nulls will be returned in the operations. So only @@ -541,3 +549,62 @@ def test_cov_corr_invalid_dtypes(gsr): rfunc_args_and_kwargs=([gsr],), compare_error_message=False, ) + + +@pytest.mark.parametrize( + "data", + [ + randomdata( + nrows=1000, dtypes={"a": float, "b": int, "c": float, "d": str} + ), + ], +) +@pytest.mark.parametrize("null_flag", [False, True]) +def test_kurtosis_df(data, null_flag): + pdata = data.to_pandas() + + if null_flag and len(data) > 2: + data.iloc[[0, 2]] = None + pdata.iloc[[0, 2]] = None + + got = data.kurtosis() + got = got if np.isscalar(got) else got.to_numpy() + expected = pdata.kurtosis() + np.testing.assert_array_almost_equal(got, expected) + + got = data.kurt() + got = got if np.isscalar(got) else got.to_numpy() + expected = pdata.kurt() + np.testing.assert_array_almost_equal(got, expected) + + got = data.kurt(numeric_only=True) + got = got if np.isscalar(got) else got.to_numpy() + expected = pdata.kurt(numeric_only=True) + np.testing.assert_array_almost_equal(got, expected) + + +@pytest.mark.parametrize( + "data", + [ + randomdata( + nrows=1000, dtypes={"a": float, "b": int, "c": float, "d": str} + ), + ], +) +@pytest.mark.parametrize("null_flag", [False, True]) +def test_skew_df(data, null_flag): + pdata = data.to_pandas() + + if null_flag and len(data) > 2: + data.iloc[[0, 2]] = None + pdata.iloc[[0, 2]] = None + + got = data.skew() + expected = pdata.skew() + got = got if np.isscalar(got) else got.to_numpy() + np.testing.assert_array_almost_equal(got, expected) + + got = data.skew(numeric_only=True) + expected = pdata.skew(numeric_only=True) + got = got if np.isscalar(got) else got.to_numpy() + np.testing.assert_array_almost_equal(got, expected) diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index b68a7562b6b..438f46d4266 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -367,9 +367,12 @@ def func(row): @pytest.mark.parametrize( - "data", [cudf.Series([1, 2, 3]), cudf.Series([1, cudf.NA, 3])] + "data,name", + [([1, 2, 3], None), ([1, cudf.NA, 3], None), ([1, 2, 3], "test_name")], ) -def test_series_apply_basic(data): +def test_series_apply_basic(data, name): + data = cudf.Series(data, name=name) + def func(x): return x + 1 diff --git a/python/cudf/cudf/utils/cudautils.py b/python/cudf/cudf/utils/cudautils.py index 4796402f14d..fb6e35f4f58 100755 --- a/python/cudf/cudf/utils/cudautils.py +++ b/python/cudf/cudf/utils/cudautils.py @@ -14,27 +14,6 @@ # -@cuda.jit -def gpu_diff(in_col, out_col, out_mask, N): - """Calculate the difference between values at positions i and i - N in an - array and store the output in a new array. - """ - i = cuda.grid(1) - - if N > 0: - if i < in_col.size: - out_col[i] = in_col[i] - in_col[i - N] - out_mask[i] = True - if i < N: - out_mask[i] = False - else: - if i <= (in_col.size + N): - out_col[i] = in_col[i] - in_col[i - N] - out_mask[i] = True - if i >= (in_col.size + N) and i < in_col.size: - out_mask[i] = False - - # Find segments diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py index 4cd1738996f..35c6fdc73f8 100644 --- a/python/cudf/cudf/utils/dtypes.py +++ b/python/cudf/cudf/utils/dtypes.py @@ -1,6 +1,6 @@ # Copyright (c) 2020-2022, NVIDIA CORPORATION. -import datetime as dt +import datetime from collections import namedtuple from decimal import Decimal @@ -259,9 +259,9 @@ def to_cudf_compatible_scalar(val, dtype=None): ) or cudf.api.types.is_string_dtype(dtype): dtype = "str" - if isinstance(val, dt.datetime): + if isinstance(val, datetime.datetime): val = np.datetime64(val) - elif isinstance(val, dt.timedelta): + elif isinstance(val, datetime.timedelta): val = np.timedelta64(val) elif isinstance(val, pd.Timestamp): val = val.to_datetime64() diff --git a/python/cudf/cudf/utils/gpu_utils.py b/python/cudf/cudf/utils/gpu_utils.py index a722d350ef4..ab3adc1651a 100644 --- a/python/cudf/cudf/utils/gpu_utils.py +++ b/python/cudf/cudf/utils/gpu_utils.py @@ -55,6 +55,12 @@ def validate_setup(): raise e # If there is no GPU detected, set `gpus_count` to -1 gpus_count = -1 + except RuntimeError as e: + # getDeviceCount() can raise a RuntimeError + # when ``libcuda.so`` is missing. + # We don't want this to propagate up to the user. + warnings.warn(str(e)) + return if gpus_count > 0: # Cupy throws RunTimeException to get GPU count, diff --git a/python/cudf/cudf/utils/queryutils.py b/python/cudf/cudf/utils/queryutils.py index cdaaff6b2af..25b3d517e1c 100644 --- a/python/cudf/cudf/utils/queryutils.py +++ b/python/cudf/cudf/utils/queryutils.py @@ -1,7 +1,7 @@ # Copyright (c) 2018-2022, NVIDIA CORPORATION. import ast -import datetime as dt +import datetime from typing import Any, Dict import numpy as np @@ -232,7 +232,7 @@ def query_execute(df, expr, callenv): name = name[len(ENVREF_PREFIX) :] try: val = envdict[name] - if isinstance(val, dt.datetime): + if isinstance(val, datetime.datetime): val = np.datetime64(val) except KeyError: msg = "{!r} not defined in the calling environment" diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py index 1de6a1a01ec..70273f1d949 100644 --- a/python/cudf/cudf/utils/utils.py +++ b/python/cudf/cudf/utils/utils.py @@ -1,6 +1,5 @@ # Copyright (c) 2020-2022, NVIDIA CORPORATION. -import decimal import functools import hashlib import os @@ -8,9 +7,7 @@ from functools import partial from typing import FrozenSet, Set, Union -import cupy as cp import numpy as np -import pandas as pd from nvtx import annotate import rmm @@ -18,7 +15,6 @@ import cudf from cudf.core import column from cudf.core.buffer import Buffer -from cudf.utils.dtypes import to_cudf_compatible_scalar # The size of the mask in bytes mask_dtype = cudf.dtype(np.int32) @@ -176,41 +172,6 @@ def wrapper(*args, **kwargs): return wrapper -# TODO: We should evaluate whether calls to this could be more easily replaced -# with column.full, which appears to be significantly faster in simple cases. -def scalar_broadcast_to(scalar, size, dtype=None): - - if isinstance(size, (tuple, list)): - size = size[0] - - if cudf._lib.scalar._is_null_host_scalar(scalar): - if dtype is None: - dtype = "object" - return column.column_empty(size, dtype=dtype, masked=True) - - if isinstance(scalar, pd.Categorical): - if dtype is None: - return _categorical_scalar_broadcast_to(scalar, size) - else: - return scalar_broadcast_to(scalar.categories[0], size).astype( - dtype - ) - - if isinstance(scalar, decimal.Decimal): - if dtype is None: - dtype = cudf.Decimal128Dtype._from_decimal(scalar) - - out_col = column.column_empty(size, dtype=dtype) - if out_col.size != 0: - out_col[:] = scalar - return out_col - - scalar = to_cudf_compatible_scalar(scalar, dtype=dtype) - dtype = scalar.dtype - - return cudf.core.column.full(size=size, fill_value=scalar, dtype=dtype) - - def initfunc(f): """ Decorator for initialization functions that should @@ -228,19 +189,6 @@ def wrapper(*args, **kwargs): return wrapper -# taken from dask array -# https://github.com/dask/dask/blob/master/dask/array/utils.py#L352-L363 -def _is_nep18_active(): - class A: - def __array_function__(self, *args, **kwargs): - return True - - try: - return np.concatenate([A()]) - except ValueError: - return False - - @initfunc def set_allocator( allocator="default", @@ -274,9 +222,6 @@ def set_allocator( ) -IS_NEP18_ACTIVE = _is_nep18_active() - - class GetAttrGetItemMixin: """This mixin changes `__getattr__` to attempt a `__getitem__` call. @@ -406,92 +351,6 @@ def search_range(start, stop, x, step=1, side="left"): return max(min(length, i), 0) -def _categorical_scalar_broadcast_to(cat_scalar, size): - if isinstance(cat_scalar, (cudf.Series, pd.Series)): - cats = cat_scalar.cat.categories - code = cat_scalar.cat.codes[0] - ordered = cat_scalar.cat.ordered - else: - # handles pd.Categorical, cudf.categorical.CategoricalColumn - cats = cat_scalar.categories - code = cat_scalar.codes[0] - ordered = cat_scalar.ordered - - cats = column.as_column(cats) - codes = scalar_broadcast_to(code, size) - - return column.build_categorical_column( - categories=cats, - codes=codes, - mask=codes.base_mask, - size=codes.size, - offset=codes.offset, - ordered=ordered, - ) - - -def _create_pandas_series( - data=None, index=None, dtype=None, name=None, copy=False, fastpath=False -): - """ - Wrapper to create a Pandas Series. If the length of data is 0 and - dtype is not passed, this wrapper defaults the dtype to `float64`. - - Parameters - ---------- - data : array-like, Iterable, dict, or scalar value - Contains data stored in Series. If data is a dict, argument - order is maintained. - index : array-like or Index (1d) - Values must be hashable and have the same length as data. - Non-unique index values are allowed. Will default to - RangeIndex (0, 1, 2, …, n) if not provided. - If data is dict-like and index is None, then the keys - in the data are used as the index. If the index is not None, - the resulting Series is reindexed with the index values. - dtype : str, numpy.dtype, or ExtensionDtype, optional - Data type for the output Series. If not specified, this - will be inferred from data. See the user guide for more usages. - name : str, optional - The name to give to the Series. - copy : bool, default False - Copy input data. - - Returns - ------- - pd.Series - """ - if (data is None or len(data) == 0) and dtype is None: - dtype = "float64" - return pd.Series( - data=data, - index=index, - dtype=dtype, - name=name, - copy=copy, - fastpath=fastpath, - ) - - -def _maybe_indices_to_slice(indices: cp.ndarray) -> Union[slice, cp.ndarray]: - """Makes best effort to convert an array of indices into a python slice. - If the conversion is not possible, return input. `indices` are expected - to be valid. - """ - # TODO: improve efficiency by avoiding sync. - if len(indices) == 1: - x = indices[0].item() - return slice(x, x + 1) - if len(indices) == 2: - x1, x2 = indices[0].item(), indices[1].item() - return slice(x1, x2 + 1, x2 - x1) - start, step = indices[0].item(), (indices[1] - indices[0]).item() - stop = start + step * len(indices) - if (indices == cp.arange(start, stop, step)).all(): - return slice(start, stop, step) - return indices - - def _get_color_for_nvtx(name): m = hashlib.sha256() m.update(name.encode()) diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 2ec9909dd6f..a447fcfe027 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -47,7 +47,8 @@ "pytest", "pytest-benchmark", "pytest-xdist", - "hypothesis" "mimesis", + "hypothesis", + "mimesis", "fastavro>=0.22.9", "python-snappy>=0.6.0", "pyorc", diff --git a/python/dask_cudf/dask_cudf/io/orc.py b/python/dask_cudf/dask_cudf/io/orc.py index 3c11fe3ffbb..f5df0e261c9 100644 --- a/python/dask_cudf/dask_cudf/io/orc.py +++ b/python/dask_cudf/dask_cudf/io/orc.py @@ -79,7 +79,7 @@ def read_orc(path, columns=None, filters=None, storage_options=None, **kwargs): ex = set(columns) - set(schema) if ex: raise ValueError( - "Requested columns ({ex}) not in schema ({set(schema)})" + f"Requested columns ({ex}) not in schema ({set(schema)})" ) else: columns = list(schema) diff --git a/python/dask_cudf/dask_cudf/io/parquet.py b/python/dask_cudf/dask_cudf/io/parquet.py index 042759f68cf..b201626becf 100644 --- a/python/dask_cudf/dask_cudf/io/parquet.py +++ b/python/dask_cudf/dask_cudf/io/parquet.py @@ -177,65 +177,98 @@ def read_partition( strings_to_cats = kwargs.get("strings_to_categorical", False) read_kwargs = kwargs.get("read", {}) read_kwargs.update(open_file_options or {}) - - # Assume multi-piece read - paths = [] - rgs = [] - last_partition_keys = None - dfs = [] - - for i, piece in enumerate(pieces): - - (path, row_group, partition_keys) = piece - row_group = None if row_group == [None] else row_group - - if i > 0 and partition_keys != last_partition_keys: - dfs.append( - cls._read_paths( - paths, - fs, - columns=read_columns, - row_groups=rgs if rgs else None, - strings_to_categorical=strings_to_cats, - partitions=partitions, - partitioning=partitioning, - partition_keys=last_partition_keys, - **read_kwargs, + check_file_size = read_kwargs.pop("check_file_size", None) + + # Wrap reading logic in a `try` block so that we can + # inform the user that the `read_parquet` partition + # size is too large for the available memory + try: + + # Assume multi-piece read + paths = [] + rgs = [] + last_partition_keys = None + dfs = [] + + for i, piece in enumerate(pieces): + + (path, row_group, partition_keys) = piece + row_group = None if row_group == [None] else row_group + + # File-size check to help "protect" users from change + # to up-stream `split_row_groups` default. We only + # check the file size if this partition corresponds + # to a full file, and `check_file_size` is defined + if check_file_size and len(pieces) == 1 and row_group is None: + file_size = fs.size(path) + if file_size > check_file_size: + warnings.warn( + f"A large parquet file ({file_size}B) is being " + f"used to create a DataFrame partition in " + f"read_parquet. This may cause out of memory " + f"exceptions in operations downstream. See the " + f"notes on split_row_groups in the read_parquet " + f"documentation. Setting split_row_groups " + f"explicitly will silence this warning." + ) + + if i > 0 and partition_keys != last_partition_keys: + dfs.append( + cls._read_paths( + paths, + fs, + columns=read_columns, + row_groups=rgs if rgs else None, + strings_to_categorical=strings_to_cats, + partitions=partitions, + partitioning=partitioning, + partition_keys=last_partition_keys, + **read_kwargs, + ) ) + paths = rgs = [] + last_partition_keys = None + paths.append(path) + rgs.append( + [row_group] + if not isinstance(row_group, list) + and row_group is not None + else row_group ) - paths = rgs = [] - last_partition_keys = None - paths.append(path) - rgs.append( - [row_group] - if not isinstance(row_group, list) and row_group is not None - else row_group - ) - last_partition_keys = partition_keys + last_partition_keys = partition_keys - dfs.append( - cls._read_paths( - paths, - fs, - columns=read_columns, - row_groups=rgs if rgs else None, - strings_to_categorical=strings_to_cats, - partitions=partitions, - partitioning=partitioning, - partition_keys=last_partition_keys, - **read_kwargs, + dfs.append( + cls._read_paths( + paths, + fs, + columns=read_columns, + row_groups=rgs if rgs else None, + strings_to_categorical=strings_to_cats, + partitions=partitions, + partitioning=partitioning, + partition_keys=last_partition_keys, + **read_kwargs, + ) ) - ) - df = cudf.concat(dfs) if len(dfs) > 1 else dfs[0] - - # Re-set "object" dtypes align with pa schema - set_object_dtypes_from_pa_schema(df, schema) + df = cudf.concat(dfs) if len(dfs) > 1 else dfs[0] - if index and (index[0] in df.columns): - df = df.set_index(index[0]) - elif index is False and df.index.names != (None,): - # If index=False, we shouldn't have a named index - df.reset_index(inplace=True) + # Re-set "object" dtypes align with pa schema + set_object_dtypes_from_pa_schema(df, schema) + + if index and (index[0] in df.columns): + df = df.set_index(index[0]) + elif index is False and df.index.names != (None,): + # If index=False, we shouldn't have a named index + df.reset_index(inplace=True) + + except MemoryError as err: + raise MemoryError( + "Parquet data was larger than the available GPU memory!\n\n" + "See the notes on split_row_groups in the read_parquet " + "documentation.\n\n" + "Original Error: " + str(err) + ) + raise err return df @@ -349,25 +382,34 @@ def set_object_dtypes_from_pa_schema(df, schema): df._data[col_name] = col.astype(typ) -def read_parquet( - path, - columns=None, - split_row_groups=None, - row_groups_per_part=None, - **kwargs, -): +def read_parquet(path, columns=None, **kwargs): """Read parquet files into a Dask DataFrame - Calls ``dask.dataframe.read_parquet`` to cordinate the execution of - ``cudf.read_parquet``, and ultimately read multiple partitions into - a single Dask dataframe. The Dask version must supply an - ``ArrowDatasetEngine`` class to support full functionality. - See ``cudf.read_parquet`` and Dask documentation for further details. + Calls ``dask.dataframe.read_parquet`` with ``engine=CudfEngine`` + to cordinate the execution of ``cudf.read_parquet``, and to + ultimately create a ``dask_cudf.DataFrame`` collection. + + See the ``dask.dataframe.read_parquet`` documentation for + all available options. Examples -------- - >>> import dask_cudf - >>> df = dask_cudf.read_parquet("/path/to/dataset/") # doctest: +SKIP + >>> from dask_cudf import read_parquet + >>> df = read_parquet("/path/to/dataset/") # doctest: +SKIP + + When dealing with one or more large parquet files having an + in-memory footprint >15% device memory, the ``split_row_groups`` + argument should be used to map Parquet **row-groups** to DataFrame + partitions (instead of **files** to partitions). For example, the + following code will map each row-group to a distinct partition: + + >>> df = read_parquet(..., split_row_groups=True) # doctest: +SKIP + + To map **multiple** row-groups to each partition, an integer can be + passed to ``split_row_groups`` to specify the **maximum** number of + row-groups allowed in each output partition: + + >>> df = read_parquet(..., split_row_groups=10) # doctest: +SKIP See Also -------- @@ -376,22 +418,24 @@ def read_parquet( if isinstance(columns, str): columns = [columns] - if row_groups_per_part: - warnings.warn( - "row_groups_per_part is deprecated. " - "Pass an integer value to split_row_groups instead.", - FutureWarning, - ) - if split_row_groups is None: - split_row_groups = row_groups_per_part - - return dd.read_parquet( - path, - columns=columns, - split_row_groups=split_row_groups, - engine=CudfEngine, - **kwargs, - ) + # Set "check_file_size" option to determine whether we + # should check the parquet-file size. This check is meant + # to "protect" users from `split_row_groups` default changes + check_file_size = kwargs.pop("check_file_size", 500_000_000) + if ( + check_file_size + and ("split_row_groups" not in kwargs) + and ("chunksize" not in kwargs) + ): + # User is not specifying `split_row_groups` or `chunksize`, + # so we should warn them if/when a file is ~>0.5GB on disk. + # They can set `split_row_groups` explicitly to silence/skip + # this check + if "read" not in kwargs: + kwargs["read"] = {} + kwargs["read"]["check_file_size"] = check_file_size + + return dd.read_parquet(path, columns=columns, engine=CudfEngine, **kwargs) to_parquet = partial(dd.to_parquet, engine=CudfEngine) diff --git a/python/dask_cudf/dask_cudf/io/tests/test_parquet.py b/python/dask_cudf/dask_cudf/io/tests/test_parquet.py index d9b8ee4595a..ef5741b0539 100644 --- a/python/dask_cudf/dask_cudf/io/tests/test_parquet.py +++ b/python/dask_cudf/dask_cudf/io/tests/test_parquet.py @@ -36,42 +36,55 @@ ddf = dd.from_pandas(df, npartitions=npartitions) -@pytest.mark.parametrize("stats", [True, False]) -def test_roundtrip_from_dask(tmpdir, stats): +# Helper function to make it easier to handle the +# upcoming deprecation of `gather_statistics`. +# See: https://github.com/dask/dask/issues/8937 +# TODO: This function should be used to switch to +# the "new" `calculate_divisions` kwarg (for newer +# Dask versions) once it is introduced +def _divisions(setting): + return {"gather_statistics": setting} + + +@pytest.mark.parametrize("write_metadata_file", [True, False]) +@pytest.mark.parametrize("divisions", [True, False]) +def test_roundtrip_from_dask(tmpdir, divisions, write_metadata_file): tmpdir = str(tmpdir) - ddf.to_parquet(tmpdir, engine="pyarrow") + ddf.to_parquet( + tmpdir, write_metadata_file=write_metadata_file, engine="pyarrow" + ) files = sorted( (os.path.join(tmpdir, f) for f in os.listdir(tmpdir)), key=natural_sort_key, ) # Read list of parquet files - ddf2 = dask_cudf.read_parquet(files, gather_statistics=stats) - dd.assert_eq(ddf, ddf2, check_divisions=stats) + ddf2 = dask_cudf.read_parquet(files, **_divisions(divisions)) + dd.assert_eq(ddf, ddf2, check_divisions=divisions) # Specify columns=['x'] ddf2 = dask_cudf.read_parquet( - files, columns=["x"], gather_statistics=stats + files, columns=["x"], **_divisions(divisions) ) - dd.assert_eq(ddf[["x"]], ddf2, check_divisions=stats) + dd.assert_eq(ddf[["x"]], ddf2, check_divisions=divisions) # Specify columns='y' - ddf2 = dask_cudf.read_parquet(files, columns="y", gather_statistics=stats) - dd.assert_eq(ddf[["y"]], ddf2, check_divisions=stats) + ddf2 = dask_cudf.read_parquet(files, columns="y", **_divisions(divisions)) + dd.assert_eq(ddf[["y"]], ddf2, check_divisions=divisions) # Now include metadata - ddf2 = dask_cudf.read_parquet(tmpdir, gather_statistics=stats) - dd.assert_eq(ddf, ddf2, check_divisions=stats) + ddf2 = dask_cudf.read_parquet(tmpdir, **_divisions(divisions)) + dd.assert_eq(ddf, ddf2, check_divisions=divisions) # Specify columns=['x'] (with metadata) ddf2 = dask_cudf.read_parquet( - tmpdir, columns=["x"], gather_statistics=stats + tmpdir, columns=["x"], **_divisions(divisions) ) - dd.assert_eq(ddf[["x"]], ddf2, check_divisions=stats) + dd.assert_eq(ddf[["x"]], ddf2, check_divisions=divisions) # Specify columns='y' (with metadata) - ddf2 = dask_cudf.read_parquet(tmpdir, columns="y", gather_statistics=stats) - dd.assert_eq(ddf[["y"]], ddf2, check_divisions=stats) + ddf2 = dask_cudf.read_parquet(tmpdir, columns="y", **_divisions(divisions)) + dd.assert_eq(ddf[["y"]], ddf2, check_divisions=divisions) def test_roundtrip_from_dask_index_false(tmpdir): @@ -99,8 +112,8 @@ def test_roundtrip_from_dask_cudf(tmpdir, write_meta): gddf = dask_cudf.from_dask_dataframe(ddf) gddf.to_parquet(tmpdir, write_metadata_file=write_meta) - gddf2 = dask_cudf.read_parquet(tmpdir) - dd.assert_eq(gddf, gddf2, check_divisions=write_meta) + gddf2 = dask_cudf.read_parquet(tmpdir, **_divisions(True)) + dd.assert_eq(gddf, gddf2) def test_roundtrip_none_rangeindex(tmpdir): @@ -161,21 +174,21 @@ def test_dask_timeseries_from_pandas(tmpdir): @pytest.mark.parametrize("index", [False, None]) -@pytest.mark.parametrize("stats", [False, True]) -def test_dask_timeseries_from_dask(tmpdir, index, stats): +@pytest.mark.parametrize("divisions", [False, True]) +def test_dask_timeseries_from_dask(tmpdir, index, divisions): fn = str(tmpdir) ddf2 = dask.datasets.timeseries(freq="D") ddf2.to_parquet(fn, engine="pyarrow", write_index=index) - read_df = dask_cudf.read_parquet(fn, index=index, gather_statistics=stats) + read_df = dask_cudf.read_parquet(fn, index=index, **_divisions(divisions)) dd.assert_eq( - ddf2, read_df, check_divisions=(stats and index), check_index=index + ddf2, read_df, check_divisions=(divisions and index), check_index=index ) @pytest.mark.parametrize("index", [False, None]) -@pytest.mark.parametrize("stats", [False, True]) -def test_dask_timeseries_from_daskcudf(tmpdir, index, stats): +@pytest.mark.parametrize("divisions", [False, True]) +def test_dask_timeseries_from_daskcudf(tmpdir, index, divisions): fn = str(tmpdir) ddf2 = dask_cudf.from_cudf( @@ -183,9 +196,9 @@ def test_dask_timeseries_from_daskcudf(tmpdir, index, stats): ) ddf2.name = ddf2.name.astype("object") ddf2.to_parquet(fn, write_index=index) - read_df = dask_cudf.read_parquet(fn, index=index, gather_statistics=stats) + read_df = dask_cudf.read_parquet(fn, index=index, **_divisions(divisions)) dd.assert_eq( - ddf2, read_df, check_divisions=(stats and index), check_index=index + ddf2, read_df, check_divisions=(divisions and index), check_index=index ) @@ -212,17 +225,23 @@ def test_filters(tmpdir): ddf.to_parquet(tmp_path, engine="pyarrow") - a = dask_cudf.read_parquet(tmp_path, filters=[("x", ">", 4)]) + a = dask_cudf.read_parquet( + tmp_path, filters=[("x", ">", 4)], split_row_groups=True + ) assert a.npartitions == 3 assert (a.x > 3).all().compute() - b = dask_cudf.read_parquet(tmp_path, filters=[("y", "==", "c")]) + b = dask_cudf.read_parquet( + tmp_path, filters=[("y", "==", "c")], split_row_groups=True + ) assert b.npartitions == 1 b = b.compute().to_pandas() assert (b.y == "c").all() c = dask_cudf.read_parquet( - tmp_path, filters=[("y", "==", "c"), ("x", ">", 6)] + tmp_path, + filters=[("y", "==", "c"), ("x", ">", 6)], + split_row_groups=True, ) assert c.npartitions <= 1 assert not len(c) @@ -237,13 +256,17 @@ def test_filters_at_row_group_level(tmpdir): ddf.to_parquet(tmp_path, engine="pyarrow", row_group_size=10 / 5) - a = dask_cudf.read_parquet(tmp_path, filters=[("x", "==", 1)]) + a = dask_cudf.read_parquet( + tmp_path, filters=[("x", "==", 1)], split_row_groups=True + ) assert a.npartitions == 1 assert (a.shape[0] == 2).compute() ddf.to_parquet(tmp_path, engine="pyarrow", row_group_size=1) - b = dask_cudf.read_parquet(tmp_path, filters=[("x", "==", 1)]) + b = dask_cudf.read_parquet( + tmp_path, filters=[("x", "==", 1)], split_row_groups=True + ) assert b.npartitions == 1 assert (b.shape[0] == 1).compute() @@ -341,7 +364,7 @@ def test_chunksize(tmpdir, chunksize, metadata): path, chunksize=chunksize, split_row_groups=True, - gather_statistics=True, + **_divisions(True), ) ddf2.compute(scheduler="synchronous") @@ -360,8 +383,8 @@ def test_chunksize(tmpdir, chunksize, metadata): path, chunksize=chunksize, split_row_groups=True, - gather_statistics=True, aggregate_files=True, + **_divisions(True), ) dd.assert_eq(ddf1, ddf3, check_divisions=False) @@ -382,7 +405,7 @@ def test_chunksize(tmpdir, chunksize, metadata): @pytest.mark.parametrize("row_groups", [1, 3, 10, 12]) @pytest.mark.parametrize("index", [False, True]) -def test_row_groups_per_part(tmpdir, row_groups, index): +def test_split_row_groups(tmpdir, row_groups, index): nparts = 2 df_size = 100 row_group_size = 5 @@ -410,7 +433,7 @@ def test_row_groups_per_part(tmpdir, row_groups, index): ddf2 = dask_cudf.read_parquet( str(tmpdir), - row_groups_per_part=row_groups, + split_row_groups=row_groups, ) dd.assert_eq(ddf1, ddf2, check_divisions=False) @@ -448,9 +471,9 @@ def test_create_metadata_file(tmpdir, partition_on): # with the _metadata file present ddf2 = dask_cudf.read_parquet( tmpdir, - gather_statistics=True, split_row_groups=False, index="myindex", + **_divisions(True), ) if partition_on: ddf1 = df1.sort_values("b") @@ -481,7 +504,7 @@ def test_create_metadata_file_inconsistent_schema(tmpdir): # New pyarrow-dataset base can handle an inconsistent # schema (even without a _metadata file), but computing # and dtype validation may fail - ddf1 = dask_cudf.read_parquet(str(tmpdir), gather_statistics=True) + ddf1 = dask_cudf.read_parquet(str(tmpdir), **_divisions(True)) # Add global metadata file. # Dask-CuDF can do this without requiring schema @@ -490,7 +513,7 @@ def test_create_metadata_file_inconsistent_schema(tmpdir): # Check that we can still read the ddf # with the _metadata file present - ddf2 = dask_cudf.read_parquet(str(tmpdir), gather_statistics=True) + ddf2 = dask_cudf.read_parquet(str(tmpdir), **_divisions(True)) # Check that the result is the same with and # without the _metadata file. Note that we must @@ -538,3 +561,12 @@ def test_cudf_list_struct_write(tmpdir): ddf.to_parquet(temp_file) new_ddf = dask_cudf.read_parquet(temp_file) dd.assert_eq(df, new_ddf) + + +def test_check_file_size(tmpdir): + # Test simple file-size check to help warn users + # of upstream change to `split_row_groups` default + fn = str(tmpdir.join("test.parquet")) + cudf.DataFrame({"a": np.arange(1000)}).to_parquet(fn) + with pytest.warns(match="large parquet file"): + dask_cudf.read_parquet(fn, check_file_size=1).compute() diff --git a/python/dask_cudf/dask_cudf/tests/test_applymap.py b/python/dask_cudf/dask_cudf/tests/test_applymap.py new file mode 100644 index 00000000000..929f00ec296 --- /dev/null +++ b/python/dask_cudf/dask_cudf/tests/test_applymap.py @@ -0,0 +1,29 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. + +import pytest +from pandas import NA + +from dask import dataframe as dd + +from dask_cudf.tests.utils import _make_random_frame + + +@pytest.mark.parametrize( + "func", + [ + lambda x: x + 1, + lambda x: x - 0.5, + lambda x: 2 if x is NA else 2 + (x + 1) / 4.1, + lambda x: 42, + ], +) +@pytest.mark.parametrize("has_na", [True, False]) +def test_applymap_basic(func, has_na): + size = 2000 + pdf, dgdf = _make_random_frame(size, include_na=False) + + dpdf = dd.from_pandas(pdf, npartitions=dgdf.npartitions) + + expect = dpdf.applymap(func) + got = dgdf.applymap(func) + dd.assert_eq(expect, got, check_dtype=False) diff --git a/python/dask_cudf/dask_cudf/tests/test_binops.py b/python/dask_cudf/dask_cudf/tests/test_binops.py index 64b7cc85971..87bd401accd 100644 --- a/python/dask_cudf/dask_cudf/tests/test_binops.py +++ b/python/dask_cudf/dask_cudf/tests/test_binops.py @@ -1,3 +1,5 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. + import operator import numpy as np @@ -8,6 +10,8 @@ import cudf +from dask_cudf.tests.utils import _make_random_frame + def _make_empty_frame(npartitions=2): df = pd.DataFrame({"x": [], "y": []}) @@ -16,15 +20,6 @@ def _make_empty_frame(npartitions=2): return dgf -def _make_random_frame(nelem, npartitions=2): - df = pd.DataFrame( - {"x": np.random.random(size=nelem), "y": np.random.random(size=nelem)} - ) - gdf = cudf.DataFrame.from_pandas(df) - dgf = dd.from_pandas(gdf, npartitions=npartitions) - return df, dgf - - def _make_random_frame_float(nelem, npartitions=2): df = pd.DataFrame( { diff --git a/python/dask_cudf/dask_cudf/tests/utils.py b/python/dask_cudf/dask_cudf/tests/utils.py new file mode 100644 index 00000000000..88a2116fb0a --- /dev/null +++ b/python/dask_cudf/dask_cudf/tests/utils.py @@ -0,0 +1,21 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. + +import numpy as np +import pandas as pd + +import dask.dataframe as dd + +import cudf + + +def _make_random_frame(nelem, npartitions=2, include_na=False): + df = pd.DataFrame( + {"x": np.random.random(size=nelem), "y": np.random.random(size=nelem)} + ) + + if include_na: + df["x"][::2] = pd.NA + + gdf = cudf.DataFrame.from_pandas(df) + dgf = dd.from_pandas(gdf, npartitions=npartitions) + return df, dgf
\n", - " Comm: tcp://127.0.0.1:41341\n", + " Comm: tcp://127.0.0.1:44033\n", " \n", " Total threads: 1\n", @@ -6166,31 +6201,31 @@ "
\n", - " Dashboard: http://127.0.0.1:39963/status\n", + " Dashboard: http://127.0.0.1:45225/status\n", " \n", - " Memory: 22.89 GiB\n", + " Memory: 62.82 GiB\n", "
\n", - " Nanny: tcp://127.0.0.1:33675\n", + " Nanny: tcp://127.0.0.1:46529\n", "
\n", - " Local directory: /home/ashwin/workspace/rapids/cudf/docs/cudf/source/user_guide/dask-worker-space/worker-phx0wjv_\n", + " Local directory: /home/mmccarty/sandbox/rapids/cudf/docs/cudf/source/user_guide/dask-worker-space/worker-zlsacw8_\n", "
\n", - " GPU: Quadro GV100\n", + " GPU: NVIDIA RTX A6000\n", " \n", - " GPU memory: 31.74 GiB\n", + " GPU memory: 47.54 GiB\n", "