diff --git a/build.sh b/build.sh index bee66d819b4..7cbd0fceb5a 100755 --- a/build.sh +++ b/build.sh @@ -300,8 +300,7 @@ if buildAll || hasArg libcudf; then # Record build times if [[ "$BUILD_REPORT_METRICS" == "ON" && -f "${LIB_BUILD_DIR}/.ninja_log" ]]; then echo "Formatting build metrics" - python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt xml > ${LIB_BUILD_DIR}/ninja_log.xml - MSG="

" + MSG="" # get some sccache stats after the compile if [[ "$BUILD_REPORT_INCL_CACHE_STATS" == "ON" && -x "$(command -v sccache)" ]]; then COMPILE_REQUESTS=$(sccache -s | grep "Compile requests \+ [0-9]\+$" | awk '{ print $NF }') @@ -318,7 +317,9 @@ if buildAll || hasArg libcudf; then BMR_DIR=${RAPIDS_ARTIFACTS_DIR:-"${LIB_BUILD_DIR}"} echo "Metrics output dir: [$BMR_DIR]" mkdir -p ${BMR_DIR} - python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt html --msg "$MSG" > ${BMR_DIR}/ninja_log.html + MSG_OUTFILE="$(mktemp)" + echo "$MSG" > "${MSG_OUTFILE}" + python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt html --msg "${MSG_OUTFILE}" > ${BMR_DIR}/ninja_log.html cp ${LIB_BUILD_DIR}/.ninja_log ${BMR_DIR}/ninja.log fi diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh index b68c2bdbef6..bc27e7d76b0 100755 --- a/ci/build_cpp.sh +++ b/ci/build_cpp.sh @@ -14,29 +14,3 @@ rapids-logger "Begin cpp build" rapids-mamba-retry mambabuild conda/recipes/libcudf rapids-upload-conda-to-s3 cpp - -echo "++++++++++++++++++++++++++++++++++++++++++++" - -if [[ -d $RAPIDS_ARTIFACTS_DIR ]]; then - ls -l ${RAPIDS_ARTIFACTS_DIR} -fi - -echo "++++++++++++++++++++++++++++++++++++++++++++" - -FILE=${RAPIDS_ARTIFACTS_DIR}/ninja.log -if [[ -f $FILE ]]; then - echo -e "\x1B[33;1m\x1B[48;5;240m Ninja log for this build available at the following link \x1B[0m" - UPLOAD_NAME=cpp_cuda${RAPIDS_CUDA_VERSION%%.*}_$(arch).ninja.log - rapids-upload-to-s3 "${UPLOAD_NAME}" "${FILE}" -fi - -echo "++++++++++++++++++++++++++++++++++++++++++++" - -FILE=${RAPIDS_ARTIFACTS_DIR}/ninja_log.html -if [[ -f $FILE ]]; then - echo -e "\x1B[33;1m\x1B[48;5;240m Build Metrics Report for this build available at the following link \x1B[0m" - UPLOAD_NAME=cpp_cuda${RAPIDS_CUDA_VERSION%%.*}_$(arch).BuildMetricsReport.html - rapids-upload-to-s3 "${UPLOAD_NAME}" "${FILE}" -fi - -echo "++++++++++++++++++++++++++++++++++++++++++++" diff --git a/ci/release/apply_wheel_modifications.sh b/ci/release/apply_wheel_modifications.sh index 9d9758f1f15..0c55c4b9141 100755 --- a/ci/release/apply_wheel_modifications.sh +++ b/ci/release/apply_wheel_modifications.sh @@ -6,12 +6,6 @@ VERSION=${1} CUDA_SUFFIX=${2} -# __init__.py versions -sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/cudf/cudf/__init__.py -sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/dask_cudf/dask_cudf/__init__.py -sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/cudf_kafka/cudf_kafka/__init__.py -sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/custreamz/custreamz/__init__.py - # pyproject.toml versions sed -i "s/^version = .*/version = \"${VERSION}\"/g" python/cudf/pyproject.toml sed -i "s/^version = .*/version = \"${VERSION}\"/g" python/dask_cudf/pyproject.toml diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index caa807bd7ec..0b2fc71aacd 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -92,6 +92,7 @@ outputs: - test -f $PREFIX/include/cudf/concatenate.hpp - test -f $PREFIX/include/cudf/copying.hpp - test -f $PREFIX/include/cudf/datetime.hpp + - test -f $PREFIX/include/cudf/timezone.hpp - test -f $PREFIX/include/cudf/detail/aggregation/aggregation.hpp - test -f $PREFIX/include/cudf/detail/aggregation/result_cache.hpp - test -f $PREFIX/include/cudf/detail/binaryop.hpp @@ -128,6 +129,8 @@ outputs: - test -f $PREFIX/include/cudf/detail/stream_compaction.hpp - test -f $PREFIX/include/cudf/detail/structs/utilities.hpp - test -f $PREFIX/include/cudf/detail/tdigest/tdigest.hpp + - test -f $PREFIX/include/cudf/detail/timezone.cuh + - test -f $PREFIX/include/cudf/detail/timezone.hpp - test -f $PREFIX/include/cudf/detail/transform.hpp - test -f $PREFIX/include/cudf/detail/transpose.hpp - test -f $PREFIX/include/cudf/detail/unary.hpp diff --git a/conda/recipes/libcudf/post-link.sh b/conda/recipes/libcudf/post-link.sh index 64e0b1ad305..8ae2349f791 100644 --- a/conda/recipes/libcudf/post-link.sh +++ b/conda/recipes/libcudf/post-link.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # Only add the license notice to libcudf and not our examples / tests if [[ "$PKG_NAME" == "libcudf" ]]; then - cat ./nvlink.txt >> $PREFIX/.messages.txt + cat ./nvcomp.txt >> $PREFIX/.messages.txt fi diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 0fcd1895972..13583378134 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -369,7 +369,7 @@ add_library( src/io/orc/stripe_data.cu src/io/orc/stripe_enc.cu src/io/orc/stripe_init.cu - src/io/orc/timezone.cpp + src/datetime/timezone.cpp src/io/orc/writer_impl.cu src/io/parquet/compact_protocol_reader.cpp src/io/parquet/compact_protocol_writer.cpp @@ -890,31 +890,20 @@ install( EXPORT cudf-exports ) -install(DIRECTORY ${CUDF_SOURCE_DIR}/include/cudf ${CUDF_SOURCE_DIR}/include/cudf_test - ${CUDF_SOURCE_DIR}/include/nvtext DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} -) - -if(CUDF_BUILD_TESTUTIL) +set(_components_export_string) +if(TARGET cudftestutil) install( TARGETS cudftest_default_stream cudftestutil DESTINATION ${lib_dir} EXPORT cudf-testing-exports ) - - install( - EXPORT cudf-testing-exports - FILE cudf-testing-targets.cmake - NAMESPACE cudf:: - DESTINATION "${lib_dir}/cmake/cudf" - ) - - include("${rapids-cmake-dir}/export/write_dependencies.cmake") - rapids_export_write_dependencies( - INSTALL cudf-testing-exports - "${PROJECT_BINARY_DIR}/rapids-cmake/cudf/export/cudf-testing-dependencies.cmake" - ) + set(_components_export_string COMPONENTS testing COMPONENTS_EXPORT_SET cudf-testing-exports) endif() +install(DIRECTORY ${CUDF_SOURCE_DIR}/include/cudf ${CUDF_SOURCE_DIR}/include/cudf_test + ${CUDF_SOURCE_DIR}/include/nvtext DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} +) + if(CUDF_BUILD_STREAMS_TEST_UTIL) install(TARGETS cudf_identify_stream_usage_mode_cudf DESTINATION ${lib_dir}) install(TARGETS cudf_identify_stream_usage_mode_testing DESTINATION ${lib_dir}) @@ -976,12 +965,6 @@ string( [=[ if(testing IN_LIST cudf_FIND_COMPONENTS) enable_language(CUDA) - if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-testing-dependencies.cmake") - include("${CMAKE_CURRENT_LIST_DIR}/cudf-testing-dependencies.cmake") - endif() - if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-testing-targets.cmake") - include("${CMAKE_CURRENT_LIST_DIR}/cudf-testing-targets.cmake") - endif() endif() ]=] ) @@ -989,8 +972,8 @@ string(APPEND install_code_string "${common_code_string}") rapids_export( INSTALL cudf - EXPORT_SET cudf-exports - GLOBAL_TARGETS cudf + EXPORT_SET cudf-exports ${_components_export_string} + GLOBAL_TARGETS cudf cudftestutil NAMESPACE cudf:: DOCUMENTATION doc_string FINAL_CODE_BLOCK install_code_string @@ -1013,23 +996,13 @@ string(APPEND build_code_string "${common_code_string}") rapids_export( BUILD cudf - EXPORT_SET cudf-exports - GLOBAL_TARGETS cudf + EXPORT_SET cudf-exports ${_components_export_string} + GLOBAL_TARGETS cudf cudftestutil NAMESPACE cudf:: DOCUMENTATION doc_string FINAL_CODE_BLOCK build_code_string ) -if(CUDF_BUILD_TESTUTIL) - export( - EXPORT cudf-testing-exports - FILE ${CUDF_BINARY_DIR}/cudf-testing-targets.cmake - NAMESPACE cudf:: - ) - rapids_export_write_dependencies( - BUILD cudf-testing-exports "${CUDF_BINARY_DIR}/cudf-testing-dependencies.cmake" - ) -endif() # ################################################################################################## # * make documentation ---------------------------------------------------------------------------- diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu index 545028260b8..762e9640d12 100644 --- a/cpp/benchmarks/common/generate_input.cu +++ b/cpp/benchmarks/common/generate_input.cu @@ -430,8 +430,12 @@ std::unique_ptr create_random_column(data_profile const& profile, null_mask.begin()); } - auto [result_bitmask, null_count] = cudf::detail::valid_if( - null_mask.begin(), null_mask.end(), thrust::identity{}, cudf::get_default_stream()); + auto [result_bitmask, null_count] = + cudf::detail::valid_if(null_mask.begin(), + null_mask.end(), + thrust::identity{}, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); return std::make_unique( dtype, @@ -509,8 +513,12 @@ std::unique_ptr create_random_utf8_string_column(data_profile cons thrust::make_zip_iterator(offsets.begin(), offsets.begin() + 1), num_rows, string_generator{chars.data(), engine}); - auto [result_bitmask, null_count] = cudf::detail::valid_if( - null_mask.begin(), null_mask.end() - 1, thrust::identity{}, cudf::get_default_stream()); + auto [result_bitmask, null_count] = + cudf::detail::valid_if(null_mask.begin(), + null_mask.end() - 1, + thrust::identity{}, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); return cudf::make_strings_column( num_rows, std::move(offsets), @@ -628,8 +636,11 @@ std::unique_ptr create_random_column(data_profi auto [null_mask, null_count] = [&]() { if (profile.get_null_probability().has_value()) { auto valids = valid_dist(engine, num_rows); - return cudf::detail::valid_if( - valids.begin(), valids.end(), thrust::identity{}, cudf::get_default_stream()); + return cudf::detail::valid_if(valids.begin(), + valids.end(), + thrust::identity{}, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); } return std::pair{}; }(); @@ -712,9 +723,12 @@ std::unique_ptr create_random_column(data_profile auto offsets_column = std::make_unique( cudf::data_type{cudf::type_id::INT32}, num_rows + 1, offsets.release()); - auto [null_mask, null_count] = cudf::detail::valid_if( - valids.begin(), valids.end(), thrust::identity{}, cudf::get_default_stream()); - list_column = cudf::make_lists_column( + auto [null_mask, null_count] = cudf::detail::valid_if(valids.begin(), + valids.end(), + thrust::identity{}, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); + list_column = cudf::make_lists_column( num_rows, std::move(offsets_column), std::move(current_child_column), @@ -840,7 +854,8 @@ std::pair create_random_null_mask( return cudf::detail::valid_if(thrust::make_counting_iterator(0), thrust::make_counting_iterator(size), bool_generator{seed, 1.0 - *null_probability}, - cudf::get_default_stream()); + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); } } diff --git a/cpp/benchmarks/iterator/iterator.cu b/cpp/benchmarks/iterator/iterator.cu index 73060200d00..1b1cf9b7e9d 100644 --- a/cpp/benchmarks/iterator/iterator.cu +++ b/cpp/benchmarks/iterator/iterator.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -140,8 +140,8 @@ void BM_iterator(benchmark::State& state) cudf::column_view hasnull_F = wrap_hasnull_F; // Initialize dev_result to false - auto dev_result = - cudf::detail::make_zeroed_device_uvector_sync(1, cudf::get_default_stream()); + auto dev_result = cudf::detail::make_zeroed_device_uvector_sync( + 1, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 if (cub_or_thrust) { @@ -210,7 +210,7 @@ void BM_pair_iterator(benchmark::State& state) // Initialize dev_result to false auto dev_result = cudf::detail::make_zeroed_device_uvector_sync>( - 1, cudf::get_default_stream()); + 1, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 if (cub_or_thrust) { diff --git a/cpp/benchmarks/join/join_common.hpp b/cpp/benchmarks/join/join_common.hpp index e37a4ca1193..70036a95377 100644 --- a/cpp/benchmarks/join/join_common.hpp +++ b/cpp/benchmarks/join/join_common.hpp @@ -104,8 +104,11 @@ void BM_join(state_type& state, Join JoinFunc) // roughly 75% nulls auto validity = thrust::make_transform_iterator(thrust::make_counting_iterator(0), null75_generator{}); - return cudf::detail::valid_if( - validity, validity + size, thrust::identity{}, cudf::get_default_stream()) + return cudf::detail::valid_if(validity, + validity + size, + thrust::identity{}, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()) .first; }; diff --git a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md index a88f621095c..91c3dccfdc6 100644 --- a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md +++ b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md @@ -121,8 +121,8 @@ recommend watching Sean Parent's [C++ Seasoning talk](https://www.youtube.com/wa and we try to follow his rules: "No raw loops. No raw pointers. No raw synchronization primitives." * Prefer algorithms from STL and Thrust to raw loops. - * Prefer libcudf and RMM [owning data structures and views](#libcudf-data-structures) to raw pointers - and raw memory allocation. + * Prefer libcudf and RMM [owning data structures and views](#libcudf-data-structures) to raw + pointers and raw memory allocation. * libcudf doesn't have a lot of CPU-thread concurrency, but there is some. And currently libcudf does use raw synchronization primitives. So we should revisit Parent's third rule and improve here. @@ -146,8 +146,8 @@ The following guidelines apply to organizing `#include` lines. * Separate groups by a blank line. * Order the groups from "nearest" to "farthest". In other words, local includes, then includes from other RAPIDS libraries, then includes from related libraries, like ``, then - includes from dependencies installed with cuDF, and then standard headers (for example ``, - ``). + includes from dependencies installed with cuDF, and then standard headers (for example + ``, ``). * Use `<>` instead of `""` unless the header is in the same directory as the source file. * Tools like `clangd` often auto-insert includes when they can, but they usually get the grouping and brackets wrong. @@ -271,10 +271,12 @@ A *mutable*, non-owning view of a table. ## cudf::size_type -The `cudf::size_type` is the type used for the number of elements in a column, offsets to elements within a column, indices to address specific elements, segments for subsets of column elements, etc. +The `cudf::size_type` is the type used for the number of elements in a column, offsets to elements +within a column, indices to address specific elements, segments for subsets of column elements, etc. It is equivalent to a signed, 32-bit integer type and therefore has a maximum value of 2147483647. -Some APIs also accept negative index values and those functions support a minimum value of -2147483648. -This fundamental type also influences output values not just for column size limits but for counting elements as well. +Some APIs also accept negative index values and those functions support a minimum value of +-2147483648. This fundamental type also influences output values not just for column size limits +but for counting elements as well. ## Spans @@ -343,8 +345,8 @@ auto s1 = static_cast(s.get()); ``` ### Passing to device -Each scalar type, except `list_scalar`, has a corresponding non-owning device view class which allows -access to the value and its validity from the device. This can be obtained using the function +Each scalar type, except `list_scalar`, has a corresponding non-owning device view class which +allows access to the value and its validity from the device. This can be obtained using the function `get_scalar_device_view(ScalarType s)`. Note that a device view is not provided for a base scalar object, only for the derived typed scalar class objects. @@ -355,68 +357,84 @@ data, a specialized device view for list columns can be constructed via # libcudf Policies and Design Principles -`libcudf` is designed to provide thread-safe, single-GPU accelerated algorithm primitives for solving a wide variety of problems that arise in data science. -APIs are written to execute on the default GPU, which can be controlled by the caller through standard CUDA device APIs or environment variables like `CUDA_VISIBLE_DEVICES`. -Our goal is to enable diverse use cases like Spark or Pandas to benefit from the performance of GPUs, and libcudf relies on these higher-level layers like Spark or Dask to orchestrate multi-GPU tasks. +`libcudf` is designed to provide thread-safe, single-GPU accelerated algorithm primitives for +solving a wide variety of problems that arise in data science. APIs are written to execute on the +default GPU, which can be controlled by the caller through standard CUDA device APIs or environment +variables like `CUDA_VISIBLE_DEVICES`. Our goal is to enable diverse use cases like Spark or Pandas +to benefit from the performance of GPUs, and libcudf relies on these higher-level layers like Spark +or Dask to orchestrate multi-GPU tasks. -To best satisfy these use-cases, libcudf prioritizes performance and flexibility, which sometimes may come at the cost of convenience. -While we welcome users to use libcudf directly, we design with the expectation that most users will be consuming libcudf through higher-level layers like Spark or cuDF Python that handle some of details that direct users of libcudf must handle on their own. -We document these policies and the reasons behind them here. +To best satisfy these use-cases, libcudf prioritizes performance and flexibility, which sometimes +may come at the cost of convenience. While we welcome users to use libcudf directly, we design with +the expectation that most users will be consuming libcudf through higher-level layers like Spark or +cuDF Python that handle some of details that direct users of libcudf must handle on their own. We +document these policies and the reasons behind them here. ## libcudf does not introspect data libcudf APIs generally do not perform deep introspection and validation of input data. There are numerous reasons for this: 1. It violates the single responsibility principle: validation is separate from execution. -2. Since libcudf data structures store data on the GPU, any validation incurs _at minimum_ the overhead of a kernel launch, and may in general be prohibitively expensive. +2. Since libcudf data structures store data on the GPU, any validation incurs _at minimum_ the + overhead of a kernel launch, and may in general be prohibitively expensive. 3. API promises around data introspection often significantly complicate implementation. Users are therefore responsible for passing valid data into such APIs. _Note that this policy does not mean that libcudf performs no validation whatsoever_. libcudf APIs should still perform any validation that does not require introspection. -To give some idea of what should or should not be validated, here are (non-exhaustive) lists of examples. +To give some idea of what should or should not be validated, here are (non-exhaustive) lists of +examples. **Things that libcudf should validate**: - Input column/table sizes or data types **Things that libcudf should not validate**: - Integer overflow -- Ensuring that outputs will not exceed the [2GB size](#cudfsize_type) limit for a given set of inputs +- Ensuring that outputs will not exceed the [2GB size](#cudfsize_type) limit for a given set of + inputs ## libcudf expects nested types to have sanitized null masks -Various libcudf APIs accepting columns of nested data types (such as `LIST` or `STRUCT`) may assume that these columns have been sanitized. -In this context, sanitization refers to ensuring that the null elements in a column with a nested dtype are compatible with the elements of nested columns. +Various libcudf APIs accepting columns of nested data types (such as `LIST` or `STRUCT`) may assume +that these columns have been sanitized. In this context, sanitization refers to ensuring that the +null elements in a column with a nested dtype are compatible with the elements of nested columns. Specifically: -- Null elements of list columns should also be empty. The starting offset of a null element should be equal to the ending offset. +- Null elements of list columns should also be empty. The starting offset of a null element should + be equal to the ending offset. - Null elements of struct columns should also be null elements in the underlying structs. -- For compound columns, nulls should only be present at the level of the parent column. Child columns should not contain nulls. +- For compound columns, nulls should only be present at the level of the parent column. Child + columns should not contain nulls. - Slice operations on nested columns do not propagate offsets to child columns. -libcudf APIs _should_ promise to never return "dirty" columns, i.e. columns containing unsanitized data. -Therefore, the only problem is if users construct input columns that are not correctly sanitized and then pass those into libcudf APIs. +libcudf APIs _should_ promise to never return "dirty" columns, i.e. columns containing unsanitized +data. Therefore, the only problem is if users construct input columns that are not correctly +sanitized and then pass those into libcudf APIs. ## Treat libcudf APIs as if they were asynchronous libcudf APIs called on the host do not guarantee that the stream is synchronized before returning. -Work in libcudf occurs on `cudf::get_default_stream().value`, which defaults to the CUDA default stream (stream 0). -Note that the stream 0 behavior differs if [per-thread default stream is enabled](https://docs.nvidia.com/cuda/cuda-runtime-api/stream-sync-behavior.html) via `CUDF_USE_PER_THREAD_DEFAULT_STREAM`. -Any data provided to or returned by libcudf that uses a separate non-blocking stream requires synchronization with the default libcudf stream to ensure stream safety. +Work in libcudf occurs on `cudf::get_default_stream().value`, which defaults to the CUDA default +stream (stream 0). Note that the stream 0 behavior differs if [per-thread default stream is +enabled](https://docs.nvidia.com/cuda/cuda-runtime-api/stream-sync-behavior.html) via +`CUDF_USE_PER_THREAD_DEFAULT_STREAM`. Any data provided to or returned by libcudf that uses a +separate non-blocking stream requires synchronization with the default libcudf stream to ensure +stream safety. ## libcudf generally does not make ordering guarantees -Functions like merge or groupby in libcudf make no guarantees about the order of entries in the output. -Promising deterministic ordering is not, in general, conducive to fast parallel algorithms. +Functions like merge or groupby in libcudf make no guarantees about the order of entries in the +output. Promising deterministic ordering is not, in general, conducive to fast parallel algorithms. Calling code is responsible for performing sorts after the fact if sorted outputs are needed. ## libcudf does not promise specific exception messages -libcudf documents the exceptions that will be thrown by an API for different kinds of invalid inputs. -The types of those exceptions (e.g. `cudf::logic_error`) are part of the public API. -However, the explanatory string returned by the `what` method of those exceptions is not part of the API and is subject to change. -Calling code should not rely on the contents of libcudf error messages to determine the nature of the error. -For information on the types of exceptions that libcudf throws under different circumstances, see the [section on error handling](#errors). +libcudf documents the exceptions that will be thrown by an API for different kinds of invalid +inputs. The types of those exceptions (e.g. `cudf::logic_error`) are part of the public API. +However, the explanatory string returned by the `what` method of those exceptions is not part of the +API and is subject to change. Calling code should not rely on the contents of libcudf error +messages to determine the nature of the error. For information on the types of exceptions that +libcudf throws under different circumstances, see the [section on error handling](#errors). # libcudf API and Implementation @@ -475,14 +493,6 @@ asynchrony if and when we add an asynchronous API to libcudf. **Note:** `cudaDeviceSynchronize()` should *never* be used. This limits the ability to do any multi-stream/multi-threaded work with libcudf APIs. - ### NVTX Ranges - -In order to aid in performance optimization and debugging, all compute intensive libcudf functions -should have a corresponding NVTX range. In libcudf, we have a convenience macro `CUDF_FUNC_RANGE()` -that will automatically annotate the lifetime of the enclosing function and use the function's name -as the name of the NVTX range. For more information about NVTX, see -[here](https://github.com/NVIDIA/NVTX/tree/dev/c). - ### Stream Creation There may be times in implementing libcudf features where it would be advantageous to use streams @@ -494,8 +504,8 @@ should avoid creating streams (even if it is slightly less efficient). It is a g ## Memory Allocation -Device [memory resources](#rmmdevice_memory_resource) are used in libcudf to abstract and control how device -memory is allocated. +Device [memory resources](#rmmdevice_memory_resource) are used in libcudf to abstract and control +how device memory is allocated. ### Output Memory @@ -515,6 +525,12 @@ std::unique_ptr returns_output_memory( void does_not_allocate_output_memory(...); ``` +This rule automatically applies to all detail APIs that allocates memory. Any detail API may be +called by any public API, and therefore could be allocating memory that is returned to the user. +To support such uses cases, all detail APIs allocating memory resources should accept an `mr` +parameter. Callers are responsible for either passing through a provided `mr` or +`rmm::mr::get_current_device_resource()` as needed. + ### Temporary Memory Not all memory allocated within a libcudf API is returned to the caller. Often algorithms must @@ -535,7 +551,7 @@ rmm::device_buffer some_function( ### Memory Management libcudf code generally eschews raw pointers and direct memory allocation. Use RMM classes built to -use `device_memory_resource`(*)s for device memory allocation with automated lifetime management. +use `device_memory_resource`s for device memory allocation with automated lifetime management. #### rmm::device_buffer Allocates a specified number of bytes of untyped, uninitialized device memory using a @@ -617,6 +633,32 @@ rmm::mr::device_memory_resource * mr = new my_custom_resource{...}; rmm::device_uvector v2{100, s, mr}; ``` +## Default Parameters + +While public libcudf APIs are free to include default function parameters, detail functions should +not. Default memory resource parameters make it easy for developers to accidentally allocate memory +using the incorrect resource. Avoiding default memory resources forces developers to consider each +memory allocation carefully. + +While streams are not currently exposed in libcudf's API, we plan to do so eventually. As a result, +the same reasons for memory resources also apply to streams. Public APIs default to using +`cudf::get_default_stream()`. However, including the same default in detail APIs opens the door for +developers to forget to pass in a user-provided stream if one is passed to a public API. Forcing +every detail API call to explicitly pass a stream is intended to prevent such mistakes. + +The memory resources (and eventually, the stream) are the final parameters for essentially all +public APIs. For API consistency, the same is true throughout libcudf's internals. Therefore, a +consequence of not allowing default streams or MRs is that no parameters in detail APIs may have +defaults. + +## NVTX Ranges + +In order to aid in performance optimization and debugging, all compute intensive libcudf functions +should have a corresponding NVTX range. libcudf has a convenience macro `CUDF_FUNC_RANGE()` that +automatically annotates the lifetime of the enclosing function and uses the function's name as +the name of the NVTX range. For more information about NVTX, see +[here](https://github.com/NVIDIA/NVTX/tree/dev/c). + ## Input/Output Style The preferred style for how inputs are passed in and outputs are returned is the following: @@ -886,9 +928,9 @@ CUDF_FAIL("This code path should not be reached."); ### CUDA Error Checking -Use the `CUDF_CUDA_TRY` macro to check for the successful completion of CUDA runtime API functions. This -macro throws a `cudf::cuda_error` exception if the CUDA API return value is not `cudaSuccess`. The -thrown exception includes a description of the CUDA error code in its `what()` message. +Use the `CUDF_CUDA_TRY` macro to check for the successful completion of CUDA runtime API functions. +This macro throws a `cudf::cuda_error` exception if the CUDA API return value is not `cudaSuccess`. +The thrown exception includes a description of the CUDA error code in its `what()` message. Example: @@ -1111,8 +1153,8 @@ For list columns, the parent column's type is `LIST` and contains no data, but i the number of lists in the column, and its null mask represents the validity of each list element. The parent has two children. -1. A non-nullable column of [`size_type`](#cudfsize_type) elements that indicates the offset to the beginning of each list - in a dense column of elements. +1. A non-nullable column of [`size_type`](#cudfsize_type) elements that indicates the offset to the + beginning of each list in a dense column of elements. 2. A column containing the actual data and optional null mask for all elements of all the lists packed together. @@ -1271,9 +1313,9 @@ libcudf provides view types for nested column types as well as for the data elem `cudf::strings_column_view` is a view of a strings column, like `cudf::column_view` is a view of any `cudf::column`. `cudf::string_view` is a view of a single string, and therefore `cudf::string_view` is the data type of a `cudf::column` of type `STRING` just like `int32_t` is the -data type for a `cudf::column` of type [`size_type`](#cudfsize_type). As its name implies, this is a read-only object -instance that points to device memory inside the strings column. It's lifespan is the same (or less) -as the column it views. +data type for a `cudf::column` of type [`size_type`](#cudfsize_type). As its name implies, this is a +read-only object instance that points to device memory inside the strings column. It's lifespan is +the same (or less) as the column it views. Use the `column_device_view::element` method to access an individual row element. Like any other column, do not call `element()` on a row that is null. diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh index ac2865c05c5..5460a0e5a76 100644 --- a/cpp/include/cudf/detail/gather.cuh +++ b/cpp/include/cudf/detail/gather.cuh @@ -583,10 +583,12 @@ void gather_bitmask(table_view const& source, std::transform(target.begin(), target.end(), target_masks.begin(), [](auto const& col) { return col->mutable_view().null_mask(); }); - auto d_target_masks = make_device_uvector_async(target_masks, stream); + auto d_target_masks = + make_device_uvector_async(target_masks, stream, rmm::mr::get_current_device_resource()); auto const device_source = table_device_view::create(source, stream); - auto d_valid_counts = make_zeroed_device_uvector_async(target.size(), stream); + auto d_valid_counts = make_zeroed_device_uvector_async( + target.size(), stream, rmm::mr::get_current_device_resource()); // Dispatch operation enum to get implementation auto const impl = [op]() { diff --git a/cpp/include/cudf/detail/null_mask.cuh b/cpp/include/cudf/detail/null_mask.cuh index cb9ced6fc28..ce2619d767e 100644 --- a/cpp/include/cudf/detail/null_mask.cuh +++ b/cpp/include/cudf/detail/null_mask.cuh @@ -426,7 +426,8 @@ std::vector segmented_count_bits(bitmask_type const* bitmask, // Construct a contiguous host buffer of indices and copy to device. auto const h_indices = std::vector(indices_begin, indices_end); - auto const d_indices = make_device_uvector_async(h_indices, stream); + auto const d_indices = + make_device_uvector_async(h_indices, stream, rmm::mr::get_current_device_resource()); // Compute the bit counts over each segment. auto first_bit_indices_begin = thrust::make_transform_iterator( diff --git a/cpp/include/cudf/detail/reshape.hpp b/cpp/include/cudf/detail/reshape.hpp index ccffcbc61df..5ab53690a23 100644 --- a/cpp/include/cudf/detail/reshape.hpp +++ b/cpp/include/cudf/detail/reshape.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,21 +30,19 @@ namespace detail { * * @param stream CUDA stream used for device memory operations and kernel launches */ -std::unique_ptr tile( - table_view const& input, - size_type count, - rmm::cuda_stream_view, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
tile(table_view const& input, + size_type count, + rmm::cuda_stream_view, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::interleave_columns * * @param stream CUDA stream used for device memory operations and kernel launches */ -std::unique_ptr interleave_columns( - table_view const& input, - rmm::cuda_stream_view, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr interleave_columns(table_view const& input, + rmm::cuda_stream_view, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/round.hpp b/cpp/include/cudf/detail/round.hpp index 1e5612919f4..cdfc7caef37 100644 --- a/cpp/include/cudf/detail/round.hpp +++ b/cpp/include/cudf/detail/round.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,12 +31,11 @@ namespace detail { * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr round( - column_view const& input, - int32_t decimal_places, - rounding_method method, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr round(column_view const& input, + int32_t decimal_places, + rounding_method method, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh index c8b17e22df2..dbf7bfa9527 100644 --- a/cpp/include/cudf/detail/scatter.cuh +++ b/cpp/include/cudf/detail/scatter.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -386,13 +386,12 @@ struct column_scatterer_impl { * @return Result of scattering values from source to target */ template -std::unique_ptr
scatter( - table_view const& source, - MapIterator scatter_map_begin, - MapIterator scatter_map_end, - table_view const& target, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +std::unique_ptr
scatter(table_view const& source, + MapIterator scatter_map_begin, + MapIterator scatter_map_end, + table_view const& target, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); diff --git a/cpp/include/cudf/detail/scatter.hpp b/cpp/include/cudf/detail/scatter.hpp index 7c4b04537ea..39ae4fe1944 100644 --- a/cpp/include/cudf/detail/scatter.hpp +++ b/cpp/include/cudf/detail/scatter.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -59,12 +59,11 @@ namespace detail { * @param mr Device memory resource used to allocate the returned table's device memory * @return Result of scattering values from source to target */ -std::unique_ptr
scatter( - table_view const& source, - column_view const& scatter_map, - table_view const& target, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
scatter(table_view const& source, + column_view const& scatter_map, + table_view const& target, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::detail::scatter(table_view const&,column_view const&,table_view @@ -72,12 +71,11 @@ std::unique_ptr
scatter( * * @throws cudf::logic_error if `scatter_map` span size is larger than max of `size_type`. */ -std::unique_ptr
scatter( - table_view const& source, - device_span const scatter_map, - table_view const& target, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
scatter(table_view const& source, + device_span const scatter_map, + table_view const& target, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Scatters a row of scalar values into a copy of the target table @@ -108,12 +106,11 @@ std::unique_ptr
scatter( * @param mr Device memory resource used to allocate the returned table's device memory * @return Result of scattering values from source to target */ -std::unique_ptr
scatter( - std::vector> const& source, - column_view const& indices, - table_view const& target, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
scatter(std::vector> const& source, + column_view const& indices, + table_view const& target, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::boolean_mask_scatter( @@ -123,12 +120,11 @@ std::unique_ptr
scatter( * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr
boolean_mask_scatter( - table_view const& source, - table_view const& target, - column_view const& boolean_mask, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr
boolean_mask_scatter(table_view const& source, + table_view const& target, + column_view const& boolean_mask, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::boolean_mask_scatter( @@ -144,7 +140,7 @@ std::unique_ptr
boolean_mask_scatter( table_view const& target, column_view const& boolean_mask, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/search.hpp b/cpp/include/cudf/detail/search.hpp index 56d41fd635c..4c4ad7834f4 100644 --- a/cpp/include/cudf/detail/search.hpp +++ b/cpp/include/cudf/detail/search.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -89,12 +89,11 @@ std::unique_ptr contains(column_view const& haystack, * @param mr Device memory resource used to allocate the returned vector * @return A vector of bools indicating if each row in `needles` has matching rows in `haystack` */ -rmm::device_uvector contains( - table_view const& haystack, - table_view const& needles, - null_equality compare_nulls, - nan_equality compare_nans, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +rmm::device_uvector contains(table_view const& haystack, + table_view const& needles, + null_equality compare_nulls, + nan_equality compare_nans, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace cudf::detail diff --git a/cpp/include/cudf/detail/sequence.hpp b/cpp/include/cudf/detail/sequence.hpp index 4a9bf5c74e1..3c3d1d0ed9e 100644 --- a/cpp/include/cudf/detail/sequence.hpp +++ b/cpp/include/cudf/detail/sequence.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,12 +32,11 @@ namespace detail { * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr sequence( - size_type size, - scalar const& init, - scalar const& step, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr sequence(size_type size, + scalar const& init, + scalar const& step, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::sequence(size_type size, scalar const& init, @@ -46,11 +45,10 @@ std::unique_ptr sequence( * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr sequence( - size_type size, - scalar const& init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr sequence(size_type size, + scalar const& init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::calendrical_month_sequence(size_type size, @@ -60,12 +58,11 @@ std::unique_ptr sequence( * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr calendrical_month_sequence( - size_type size, - scalar const& init, - size_type months, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr calendrical_month_sequence(size_type size, + scalar const& init, + size_type months, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/timezone.cuh b/cpp/include/cudf/detail/timezone.cuh new file mode 100644 index 00000000000..830ee1a7fa6 --- /dev/null +++ b/cpp/include/cudf/detail/timezone.cuh @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include + +#include +#include + +namespace cudf::detail { + +/** + * @brief Returns the UT offset for a given date and given timezone table. + * + * @param transition_times Transition times; trailing `solar_cycle_entry_count` entries are used for + * all times beyond the one covered by the TZif file + * @param offsets Time offsets in specific intervals; trailing `solar_cycle_entry_count` entries are + * used for all times beyond the one covered by the TZif file + * @param ts ORC timestamp + * + * @return offset from UT, in seconds + */ +inline __device__ duration_s get_ut_offset(table_device_view tz_table, timestamp_s ts) +{ + if (tz_table.num_rows() == 0) { return duration_s{0}; } + + cudf::device_span transition_times(tz_table.column(0).head(), + static_cast(tz_table.num_rows())); + + auto const ts_ttime_it = [&]() { + auto last_less_equal = [](auto begin, auto end, auto value) { + auto const first_larger = thrust::upper_bound(thrust::seq, begin, end, value); + // Return start of the range if all elements are larger than the value + if (first_larger == begin) return begin; + // Element before the first larger element is the last one less or equal + return first_larger - 1; + }; + + auto const file_entry_end = + transition_times.begin() + (transition_times.size() - solar_cycle_entry_count); + + if (ts <= *(file_entry_end - 1)) { + // Search the file entries if the timestamp is in range + return last_less_equal(transition_times.begin(), file_entry_end, ts); + } else { + auto project_to_cycle = [](timestamp_s ts) { + // Years divisible by four are leap years + // Exceptions are years divisible by 100, but not divisible by 400 + static constexpr int32_t num_leap_years_in_cycle = + solar_cycle_years / 4 - (solar_cycle_years / 100 - solar_cycle_years / 400); + static constexpr duration_s cycle_s = cuda::std::chrono::duration_cast( + duration_D{365 * solar_cycle_years + num_leap_years_in_cycle}); + return timestamp_s{(ts.time_since_epoch() + cycle_s) % cycle_s}; + }; + // Search the 400-year cycle if outside of the file entries range + return last_less_equal(file_entry_end, transition_times.end(), project_to_cycle(ts)); + } + }(); + + return tz_table.column(1).element(ts_ttime_it - transition_times.begin()); +} + +} // namespace cudf::detail diff --git a/cpp/include/cudf/detail/timezone.hpp b/cpp/include/cudf/detail/timezone.hpp new file mode 100644 index 00000000000..f7f97c0a7c2 --- /dev/null +++ b/cpp/include/cudf/detail/timezone.hpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include + +namespace cudf::detail { + +/** + * @copydoc cudf::make_timezone_transition_table(std::optional, std::string_view, + * rmm::mr::device_memory_resource*) + * + * @param stream CUDA stream used for device memory operations and kernel launches. + */ +std::unique_ptr
make_timezone_transition_table( + std::optional tzif_dir, + std::string_view timezone_name, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +} // namespace cudf::detail diff --git a/cpp/include/cudf/detail/transform.hpp b/cpp/include/cudf/detail/transform.hpp index 8e19ebb8da7..5b64f61f11a 100644 --- a/cpp/include/cudf/detail/transform.hpp +++ b/cpp/include/cudf/detail/transform.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,24 +29,22 @@ namespace detail { * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr transform( - column_view const& input, - std::string const& unary_udf, - data_type output_type, - bool is_ptx, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr transform(column_view const& input, + std::string const& unary_udf, + data_type output_type, + bool is_ptx, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::compute_column * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr compute_column( - table_view const table, - ast::operation const& expr, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr compute_column(table_view const table, + ast::operation const& expr, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::nans_to_nulls @@ -54,9 +52,7 @@ std::unique_ptr compute_column( * @param stream CUDA stream used for device memory operations and kernel launches. */ std::pair, size_type> nans_to_nulls( - column_view const& input, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::bools_to_mask @@ -64,9 +60,7 @@ std::pair, size_type> nans_to_nulls( * @param stream CUDA stream used for device memory operations and kernel launches. */ std::pair, cudf::size_type> bools_to_mask( - column_view const& input, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::encode @@ -74,42 +68,37 @@ std::pair, cudf::size_type> bools_to_mask( * @param stream CUDA stream used for device memory operations and kernel launches. */ std::pair, std::unique_ptr> encode( - cudf::table_view const& input, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + cudf::table_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::one_hot_encode * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::pair, table_view> one_hot_encode( - column_view const& input, - column_view const& categories, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::pair, table_view> one_hot_encode(column_view const& input, + column_view const& categories, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::mask_to_bools * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr mask_to_bools( - bitmask_type const* null_mask, - size_type begin_bit, - size_type end_bit, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr mask_to_bools(bitmask_type const* null_mask, + size_type begin_bit, + size_type end_bit, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::row_bit_count * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr row_bit_count( - table_view const& t, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr row_bit_count(table_view const& t, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/transpose.hpp b/cpp/include/cudf/detail/transpose.hpp index 0470d625edc..d0be51860b2 100644 --- a/cpp/include/cudf/detail/transpose.hpp +++ b/cpp/include/cudf/detail/transpose.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,10 +28,9 @@ namespace detail { * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::pair, table_view> transpose( - table_view const& input, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::pair, table_view> transpose(table_view const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/unary.hpp b/cpp/include/cudf/detail/unary.hpp index b7ecedc1489..3fbdf4a5a8f 100644 --- a/cpp/include/cudf/detail/unary.hpp +++ b/cpp/include/cudf/detail/unary.hpp @@ -45,13 +45,12 @@ namespace detail { */ template -std::unique_ptr true_if( - InputIterator begin, - InputIterator end, - size_type size, - Predicate p, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +std::unique_ptr true_if(InputIterator begin, + InputIterator end, + size_type size, + Predicate p, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto output = make_numeric_column(data_type(type_id::BOOL8), size, mask_state::UNALLOCATED, stream, mr); @@ -68,52 +67,47 @@ std::unique_ptr true_if( * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr unary_operation( - cudf::column_view const& input, - cudf::unary_operator op, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr unary_operation(cudf::column_view const& input, + cudf::unary_operator op, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::is_valid * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr is_valid( - cudf::column_view const& input, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr is_valid(cudf::column_view const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::cast * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr cast( - column_view const& input, - data_type type, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr cast(column_view const& input, + data_type type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::is_nan * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr is_nan( - cudf::column_view const& input, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr is_nan(cudf::column_view const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::is_not_nan * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr is_not_nan( - cudf::column_view const& input, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr is_not_nan(cudf::column_view const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/utilities/vector_factories.hpp b/cpp/include/cudf/detail/utilities/vector_factories.hpp index 75df0d92d0a..c446a7b5148 100644 --- a/cpp/include/cudf/detail/utilities/vector_factories.hpp +++ b/cpp/include/cudf/detail/utilities/vector_factories.hpp @@ -48,10 +48,9 @@ namespace detail { * @return A device_uvector containing zeros */ template -rmm::device_uvector make_zeroed_device_uvector_async( - std::size_t size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +rmm::device_uvector make_zeroed_device_uvector_async(std::size_t size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { rmm::device_uvector ret(size, stream, mr); CUDF_CUDA_TRY(cudaMemsetAsync(ret.data(), 0, size * sizeof(T), stream.value())); @@ -70,10 +69,9 @@ rmm::device_uvector make_zeroed_device_uvector_async( * @return A device_uvector containing zeros */ template -rmm::device_uvector make_zeroed_device_uvector_sync( - std::size_t size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +rmm::device_uvector make_zeroed_device_uvector_sync(std::size_t size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { rmm::device_uvector ret(size, stream, mr); CUDF_CUDA_TRY(cudaMemsetAsync(ret.data(), 0, size * sizeof(T), stream.value())); @@ -94,10 +92,9 @@ rmm::device_uvector make_zeroed_device_uvector_sync( * @return A device_uvector containing the copied data */ template -rmm::device_uvector make_device_uvector_async( - host_span source_data, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +rmm::device_uvector make_device_uvector_async(host_span source_data, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { rmm::device_uvector ret(source_data.size(), stream, mr); CUDF_CUDA_TRY(cudaMemcpyAsync(ret.data(), @@ -126,9 +123,7 @@ template < std::enable_if_t< std::is_convertible_v>>* = nullptr> rmm::device_uvector make_device_uvector_async( - Container const& c, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + Container const& c, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { return make_device_uvector_async(host_span{c}, stream, mr); } @@ -146,10 +141,9 @@ rmm::device_uvector make_device_uvector_async( * @return A device_uvector containing the copied data */ template -rmm::device_uvector make_device_uvector_async( - device_span source_data, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +rmm::device_uvector make_device_uvector_async(device_span source_data, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { rmm::device_uvector ret(source_data.size(), stream, mr); CUDF_CUDA_TRY(cudaMemcpyAsync(ret.data(), @@ -178,9 +172,7 @@ template < std::enable_if_t< std::is_convertible_v>>* = nullptr> rmm::device_uvector make_device_uvector_async( - Container const& c, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + Container const& c, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { return make_device_uvector_async( device_span{c}, stream, mr); @@ -199,10 +191,9 @@ rmm::device_uvector make_device_uvector_async( * @return A device_uvector containing the copied data */ template -rmm::device_uvector make_device_uvector_sync( - host_span source_data, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +rmm::device_uvector make_device_uvector_sync(host_span source_data, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto ret = make_device_uvector_async(source_data, stream, mr); stream.synchronize(); @@ -227,9 +218,7 @@ template < std::enable_if_t< std::is_convertible_v>>* = nullptr> rmm::device_uvector make_device_uvector_sync( - Container const& c, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + Container const& c, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { return make_device_uvector_sync(host_span{c}, stream, mr); } @@ -247,10 +236,9 @@ rmm::device_uvector make_device_uvector_sync( * @return A device_uvector containing the copied data */ template -rmm::device_uvector make_device_uvector_sync( - device_span source_data, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +rmm::device_uvector make_device_uvector_sync(device_span source_data, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto ret = make_device_uvector_async(source_data, stream, mr); stream.synchronize(); @@ -275,9 +263,7 @@ template < std::enable_if_t< std::is_convertible_v>>* = nullptr> rmm::device_uvector make_device_uvector_sync( - Container const& c, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + Container const& c, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { return make_device_uvector_sync(device_span{c}, stream, mr); } diff --git a/cpp/include/cudf/detail/valid_if.cuh b/cpp/include/cudf/detail/valid_if.cuh index 04c78bed17d..76d6fd719a4 100644 --- a/cpp/include/cudf/detail/valid_if.cuh +++ b/cpp/include/cudf/detail/valid_if.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -86,12 +86,11 @@ __global__ void valid_if_kernel( * null count */ template -std::pair valid_if( - InputIterator begin, - InputIterator end, - Predicate p, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +std::pair valid_if(InputIterator begin, + InputIterator end, + Predicate p, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(begin <= end, "Invalid range."); diff --git a/cpp/include/cudf/io/detail/csv.hpp b/cpp/include/cudf/io/detail/csv.hpp index 90d730338fc..9fdc7a47fb9 100644 --- a/cpp/include/cudf/io/detail/csv.hpp +++ b/cpp/include/cudf/io/detail/csv.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -56,7 +56,7 @@ void write_csv(data_sink* sink, host_span column_names, csv_writer_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::mr::device_memory_resource* mr); } // namespace csv } // namespace detail diff --git a/cpp/include/cudf/io/detail/tokenize_json.hpp b/cpp/include/cudf/io/detail/tokenize_json.hpp index b03dbd4fb70..4914f434c98 100644 --- a/cpp/include/cudf/io/detail/tokenize_json.hpp +++ b/cpp/include/cudf/io/detail/tokenize_json.hpp @@ -131,7 +131,7 @@ std::pair, rmm::device_uvector> ge device_span json_in, cudf::io::json_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::mr::device_memory_resource* mr); } // namespace detail diff --git a/cpp/include/cudf/io/text/detail/tile_state.hpp b/cpp/include/cudf/io/text/detail/tile_state.hpp index bf833d4720c..6ae399fbe75 100644 --- a/cpp/include/cudf/io/text/detail/tile_state.hpp +++ b/cpp/include/cudf/io/text/detail/tile_state.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -82,7 +82,7 @@ struct scan_tile_state { scan_tile_state(cudf::size_type num_tiles, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + rmm::mr::device_memory_resource* mr) : tile_status(rmm::device_uvector>( num_tiles, stream, mr)), tile_state_partial(rmm::device_uvector(num_tiles, stream, mr)), diff --git a/cpp/include/cudf/io/text/detail/trie.hpp b/cpp/include/cudf/io/text/detail/trie.hpp index a908a9fa227..7bb2e4e2ece 100644 --- a/cpp/include/cudf/io/text/detail/trie.hpp +++ b/cpp/include/cudf/io/text/detail/trie.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -165,7 +165,7 @@ struct trie { */ static trie create(std::string const& pattern, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + rmm::mr::device_memory_resource* mr) { return create(std::vector{pattern}, stream, mr); @@ -181,7 +181,7 @@ struct trie { */ static trie create(std::vector const& patterns, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + rmm::mr::device_memory_resource* mr) { std::vector tokens; std::vector transitions; diff --git a/cpp/include/cudf/lists/detail/combine.hpp b/cpp/include/cudf/lists/detail/combine.hpp index 9f28074173a..4bc45e48a9f 100644 --- a/cpp/include/cudf/lists/detail/combine.hpp +++ b/cpp/include/cudf/lists/detail/combine.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,22 +27,20 @@ namespace detail { * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr concatenate_rows( - table_view const& input, - concatenate_null_policy null_policy, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr concatenate_rows(table_view const& input, + concatenate_null_policy null_policy, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::lists::concatenate_list_elements * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr concatenate_list_elements( - column_view const& input, - concatenate_null_policy null_policy, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr concatenate_list_elements(column_view const& input, + concatenate_null_policy null_policy, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace lists diff --git a/cpp/include/cudf/lists/detail/contains.hpp b/cpp/include/cudf/lists/detail/contains.hpp index 24318e72e98..58ec18cb9ef 100644 --- a/cpp/include/cudf/lists/detail/contains.hpp +++ b/cpp/include/cudf/lists/detail/contains.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,12 +29,11 @@ namespace detail { * rmm::mr::device_memory_resource*) * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr index_of( - cudf::lists_column_view const& lists, - cudf::scalar const& search_key, - cudf::lists::duplicate_find_option find_option, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr index_of(cudf::lists_column_view const& lists, + cudf::scalar const& search_key, + cudf::lists::duplicate_find_option find_option, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::lists::index_of(cudf::lists_column_view const&, @@ -43,12 +42,11 @@ std::unique_ptr index_of( * rmm::mr::device_memory_resource*) * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr index_of( - cudf::lists_column_view const& lists, - cudf::column_view const& search_keys, - cudf::lists::duplicate_find_option find_option, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr index_of(cudf::lists_column_view const& lists, + cudf::column_view const& search_keys, + cudf::lists::duplicate_find_option find_option, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::lists::contains(cudf::lists_column_view const&, @@ -56,11 +54,10 @@ std::unique_ptr index_of( * rmm::mr::device_memory_resource*) * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr contains( - cudf::lists_column_view const& lists, - cudf::scalar const& search_key, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr contains(cudf::lists_column_view const& lists, + cudf::scalar const& search_key, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::lists::contains(cudf::lists_column_view const&, @@ -68,11 +65,10 @@ std::unique_ptr contains( * rmm::mr::device_memory_resource*) * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr contains( - cudf::lists_column_view const& lists, - cudf::column_view const& search_keys, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr contains(cudf::lists_column_view const& lists, + cudf::column_view const& search_keys, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace lists } // namespace cudf diff --git a/cpp/include/cudf/lists/detail/set_operations.hpp b/cpp/include/cudf/lists/detail/set_operations.hpp index ef4255de430..1411c65448e 100644 --- a/cpp/include/cudf/lists/detail/set_operations.hpp +++ b/cpp/include/cudf/lists/detail/set_operations.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,52 +30,48 @@ namespace cudf::lists::detail { * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr have_overlap( - lists_column_view const& lhs, - lists_column_view const& rhs, - null_equality nulls_equal, - nan_equality nans_equal, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr have_overlap(lists_column_view const& lhs, + lists_column_view const& rhs, + null_equality nulls_equal, + nan_equality nans_equal, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::list::intersect_distinct * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr intersect_distinct( - lists_column_view const& lhs, - lists_column_view const& rhs, - null_equality nulls_equal, - nan_equality nans_equal, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr intersect_distinct(lists_column_view const& lhs, + lists_column_view const& rhs, + null_equality nulls_equal, + nan_equality nans_equal, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::list::union_distinct * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr union_distinct( - lists_column_view const& lhs, - lists_column_view const& rhs, - null_equality nulls_equal, - nan_equality nans_equal, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr union_distinct(lists_column_view const& lhs, + lists_column_view const& rhs, + null_equality nulls_equal, + nan_equality nans_equal, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc cudf::list::difference_distinct * * @param stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr difference_distinct( - lists_column_view const& lhs, - lists_column_view const& rhs, - null_equality nulls_equal, - nan_equality nans_equal, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr difference_distinct(lists_column_view const& lhs, + lists_column_view const& rhs, + null_equality nulls_equal, + nan_equality nans_equal, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** @} */ // end of group } // namespace cudf::lists::detail diff --git a/cpp/include/cudf/lists/lists_column_factories.hpp b/cpp/include/cudf/lists/lists_column_factories.hpp index a6eacb97e91..fea1118748c 100644 --- a/cpp/include/cudf/lists/lists_column_factories.hpp +++ b/cpp/include/cudf/lists/lists_column_factories.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,11 +35,10 @@ namespace detail { * @param[in] stream CUDA stream used for device memory operations and kernel launches. * @param[in] mr Device memory resource used to allocate the returned column's device memory. */ -std::unique_ptr make_lists_column_from_scalar( - list_scalar const& value, - size_type size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr make_lists_column_from_scalar(list_scalar const& value, + size_type size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace lists diff --git a/cpp/include/cudf/structs/detail/concatenate.hpp b/cpp/include/cudf/structs/detail/concatenate.hpp index a098703e4b0..82ccca188e2 100644 --- a/cpp/include/cudf/structs/detail/concatenate.hpp +++ b/cpp/include/cudf/structs/detail/concatenate.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -48,10 +48,9 @@ namespace detail { * @param mr Device memory resource used to allocate the returned column's device memory. * @return New column with concatenated results. */ -std::unique_ptr concatenate( - host_span columns, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr concatenate(host_span columns, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace structs diff --git a/cpp/include/cudf/timezone.hpp b/cpp/include/cudf/timezone.hpp new file mode 100644 index 00000000000..56678c73811 --- /dev/null +++ b/cpp/include/cudf/timezone.hpp @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include +#include +#include + +namespace cudf { +class table; + +// Cycle in which the time offsets repeat in Gregorian calendar +static constexpr int32_t solar_cycle_years = 400; +// Number of future entries in the timezone transition table: +// Two entries per year, over the length of the Gregorian calendar's solar cycle +static constexpr uint32_t solar_cycle_entry_count = 2 * solar_cycle_years; + +/** + * @brief Creates a transition table to convert ORC timestamps to UTC. + * + * Uses system's TZif files. Assumes little-endian platform when parsing these files. + * The transition table starts with the entries from the TZif file. For timestamps after the file's + * last transition, the table includes entries that form a `solar_cycle_years`-year cycle (future + * entries). This portion of the table has `solar_cycle_entry_count` elements, as it assumes two + * transitions per year from Daylight Saving Time. If the timezone does not have DST, the table will + * still include the future entries, which will all have the same offset. + * + * @param tzif_dir The directory where the TZif files are located + * @param timezone_name standard timezone name (for example, "America/Los_Angeles") + * @param mr Device memory resource used to allocate the returned table's device memory. + * + * @return The transition table for the given timezone + */ +std::unique_ptr
make_timezone_transition_table( + std::optional tzif_dir, + std::string_view timezone_name, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +} // namespace cudf diff --git a/cpp/include/cudf/utilities/type_checks.hpp b/cpp/include/cudf/utilities/type_checks.hpp index 4fa712fe7c3..b925fc8ae92 100644 --- a/cpp/include/cudf/utilities/type_checks.hpp +++ b/cpp/include/cudf/utilities/type_checks.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,4 +36,15 @@ namespace cudf { */ bool column_types_equal(column_view const& lhs, column_view const& rhs); +/** + * @brief Compare the type IDs of two `column_view`s + * This function returns true if the type of `lhs` equals that of `rhs`. + * - For fixed point types, the scale is ignored. + * + * @param lhs The first `column_view` to compare + * @param rhs The second `column_view` to compare + * @return true if column types match + */ +bool column_types_equivalent(column_view const& lhs, column_view const& rhs); + } // namespace cudf diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp index 91773b2c3f1..6341e2e10b0 100644 --- a/cpp/include/cudf_test/column_wrapper.hpp +++ b/cpp/include/cudf_test/column_wrapper.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -732,9 +732,11 @@ class strings_column_wrapper : public detail::column_wrapper { { auto all_valid = thrust::make_constant_iterator(true); auto [chars, offsets] = detail::make_chars_and_offsets(begin, end, all_valid); - auto d_chars = cudf::detail::make_device_uvector_sync(chars, cudf::get_default_stream()); - auto d_offsets = cudf::detail::make_device_uvector_sync(offsets, cudf::get_default_stream()); - wrapped = cudf::make_strings_column(d_chars, d_offsets); + auto d_chars = cudf::detail::make_device_uvector_sync( + chars, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto d_offsets = cudf::detail::make_device_uvector_sync( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + wrapped = cudf::make_strings_column(d_chars, d_offsets); } /** @@ -772,10 +774,13 @@ class strings_column_wrapper : public detail::column_wrapper { size_type num_strings = std::distance(begin, end); auto [chars, offsets] = detail::make_chars_and_offsets(begin, end, v); auto null_mask = detail::make_null_mask_vector(v, v + num_strings); - auto d_chars = cudf::detail::make_device_uvector_sync(chars, cudf::get_default_stream()); - auto d_offsets = cudf::detail::make_device_uvector_sync(offsets, cudf::get_default_stream()); - auto d_bitmask = cudf::detail::make_device_uvector_sync(null_mask, cudf::get_default_stream()); - wrapped = cudf::make_strings_column(d_chars, d_offsets, d_bitmask); + auto d_chars = cudf::detail::make_device_uvector_sync( + chars, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto d_offsets = cudf::detail::make_device_uvector_sync( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto d_bitmask = cudf::detail::make_device_uvector_sync( + null_mask, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + wrapped = cudf::make_strings_column(d_chars, d_offsets, d_bitmask); } /** diff --git a/cpp/include/cudf_test/tdigest_utilities.cuh b/cpp/include/cudf_test/tdigest_utilities.cuh index ce45ad91be1..d23d7f29a6c 100644 --- a/cpp/include/cudf_test/tdigest_utilities.cuh +++ b/cpp/include/cudf_test/tdigest_utilities.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -168,7 +168,8 @@ void tdigest_minmax_compare(cudf::tdigest::tdigest_column_view const& tdv, // verify min/max thrust::host_vector> h_spans; h_spans.push_back({input_values.begin(), static_cast(input_values.size())}); - auto spans = cudf::detail::make_device_uvector_async(h_spans, cudf::get_default_stream()); + auto spans = cudf::detail::make_device_uvector_async( + h_spans, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto expected_min = cudf::make_fixed_width_column( data_type{type_id::FLOAT64}, spans.size(), mask_state::UNALLOCATED); diff --git a/cpp/scripts/sort_ninja_log.py b/cpp/scripts/sort_ninja_log.py index 9cb8afbff9f..3fe503f749e 100755 --- a/cpp/scripts/sort_ninja_log.py +++ b/cpp/scripts/sort_ninja_log.py @@ -1,10 +1,11 @@ # -# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# Copyright (c) 2021-2023, NVIDIA CORPORATION. # import argparse import os import sys import xml.etree.ElementTree as ET +from pathlib import Path from xml.dom import minidom parser = argparse.ArgumentParser() @@ -22,52 +23,50 @@ "--msg", type=str, default=None, - help="optional message to include in html output", + help="optional text file to include at the top of the html output", +) +parser.add_argument( + "--cmp_log", + type=str, + default=None, + help="optional baseline ninja_log to compare results", ) args = parser.parse_args() log_file = args.log_file -log_path = os.path.dirname(os.path.abspath(log_file)) - output_fmt = args.fmt +cmp_file = args.cmp_log # build a map of the log entries -entries = {} -with open(log_file) as log: - last = 0 - files = {} - for line in log: - entry = line.split() - if len(entry) > 4: - obj_file = entry[3] - file_size = ( - os.path.getsize(os.path.join(log_path, obj_file)) - if os.path.exists(obj_file) - else 0 - ) - start = int(entry[0]) - end = int(entry[1]) - # logic based on ninjatracing - if end < last: - files = {} - last = end - files.setdefault(entry[4], (entry[3], start, end, file_size)) - - # build entries from files dict - for entry in files.values(): - entries[entry[0]] = (entry[1], entry[2], entry[3]) - -# check file could be loaded and we have entries to report -if len(entries) == 0: - print("Could not parse", log_file) - exit() +def build_log_map(log_file): + entries = {} + log_path = os.path.dirname(os.path.abspath(log_file)) + with open(log_file) as log: + last = 0 + files = {} + for line in log: + entry = line.split() + if len(entry) > 4: + obj_file = entry[3] + file_size = ( + os.path.getsize(os.path.join(log_path, obj_file)) + if os.path.exists(obj_file) + else 0 + ) + start = int(entry[0]) + end = int(entry[1]) + # logic based on ninjatracing + if end < last: + files = {} + last = end + files.setdefault(entry[4], (entry[3], start, end, file_size)) + + # build entries from files dict + for entry in files.values(): + entries[entry[0]] = (entry[1], entry[2], entry[3]) + + return entries -# sort the entries by build-time (descending order) -sorted_list = sorted( - list(entries.keys()), - key=lambda k: entries[k][1] - entries[k][0], - reverse=True, -) # output results in XML format def output_xml(entries, sorted_list, args): @@ -147,14 +146,46 @@ def assign_entries_to_threads(entries): return (results, end_time) -# output chart results in HTML format -def output_html(entries, sorted_list, args): +# format the build-time +def format_build_time(input_time): + build_time = abs(input_time) + build_time_str = str(build_time) + " ms" + if build_time > 120000: # 2 minutes + minutes = int(build_time / 60000) + seconds = int(((build_time / 60000) - minutes) * 60) + build_time_str = "{:d}:{:02d} min".format(minutes, seconds) + elif build_time > 1000: + build_time_str = "{:.3f} s".format(build_time / 1000) + if input_time < 0: + build_time_str = "-" + build_time_str + return build_time_str + + +# format file size +def format_file_size(input_size): + file_size = abs(input_size) + file_size_str = "" + if file_size > 1000000: + file_size_str = "{:.3f} MB".format(file_size / 1000000) + elif file_size > 1000: + file_size_str = "{:.3f} KB".format(file_size / 1000) + elif file_size > 0: + file_size_str = str(file_size) + " bytes" + if input_size < 0: + file_size_str = "-" + file_size_str + return file_size_str + + +# Output chart results in HTML format +# Builds a standalone html file with no javascript or styles +def output_html(entries, sorted_list, cmp_entries, args): print("Build Metrics Report") - # Note: Jenkins does not support javascript nor style defined in the html - # https://www.jenkins.io/doc/book/security/configuring-content-security-policy/ print("") if args.msg is not None: - print("

", args.msg, "

") + msg_file = Path(args.msg) + if msg_file.is_file(): + msg = msg_file.read_text() + print("

", msg, "

") # map entries to threads # the end_time is used to scale all the entries to a fixed output width @@ -201,15 +232,8 @@ def output_html(entries, sorted_list, args): # adjust for the cellspacing prev_end = end + int(end_time / 500) - # format the build-time build_time = end - start - build_time_str = str(build_time) + " ms" - if build_time > 120000: # 2 minutes - minutes = int(build_time / 60000) - seconds = int(((build_time / 60000) - minutes) * 60) - build_time_str = "{:d}:{:02d} min".format(minutes, seconds) - elif build_time > 1000: - build_time_str = "{:.3f} s".format(build_time / 1000) + build_time_str = format_build_time(build_time) # assign color and accumulate legend values color = white @@ -248,7 +272,7 @@ def output_html(entries, sorted_list, args): # done with this entry print("") # update the entry with just the computed output info - entries[name] = (build_time_str, color, entry[2]) + entries[name] = (build_time, color, entry[2]) # add a filler column at the end of each row print("
") @@ -259,30 +283,53 @@ def output_html(entries, sorted_list, args): # output detail table in build-time descending order print("") print( - "", - "", - "", - sep="", + "", "", "", sep="" ) + if cmp_entries: + print("", sep="") + print("") + for name in sorted_list: entry = entries[name] - build_time_str = entry[0] + build_time = entry[0] color = entry[1] file_size = entry[2] - # format file size - file_size_str = "" - if file_size > 1000000: - file_size_str = "{:.3f} MB".format(file_size / 1000000) - elif file_size > 1000: - file_size_str = "{:.3f} KB".format(file_size / 1000) - elif file_size > 0: - file_size_str = str(file_size) + " bytes" + build_time_str = format_build_time(build_time) + file_size_str = format_file_size(file_size) # output entry row print("", sep="", end="") print("", sep="", end="") - print("", sep="") + print("", sep="", end="") + # output diff column + cmp_entry = ( + cmp_entries[name] if cmp_entries and name in cmp_entries else None + ) + if cmp_entry: + diff_time = build_time - (cmp_entry[1] - cmp_entry[0]) + diff_time_str = format_build_time(diff_time) + diff_color = white + diff_percent = int((diff_time / build_time) * 100) + if build_time > 60000: + if diff_percent > 20: + diff_color = red + diff_time_str = "" + diff_time_str + "" + elif diff_percent < -20: + diff_color = green + diff_time_str = "" + diff_time_str + "" + elif diff_percent > 0: + diff_color = yellow + print( + "", + sep="", + end="", + ) + print("") print("
FileCompile timeSize
FileCompile timeSizet-cmp
", name, "", build_time_str, "", file_size_str, "
", file_size_str, "", + diff_time_str, + "

") @@ -296,22 +343,62 @@ def output_html(entries, sorted_list, args): print("", summary["green"], "") print("time < 1 second") print("", summary["white"], "") - print("") + print("") + + if cmp_entries: + print("") + print("time increase > 20%") + print("time increase > 0") + print("time decrease > 20%") + print( + "time change < 20%% or build time < 1 minute", + ) + print("
") + + print("") # output results in CSV format -def output_csv(entries, sorted_list, args): - print("time,size,file") +def output_csv(entries, sorted_list, cmp_entries, args): + print("time,size,file", end="") + if cmp_entries: + print(",diff", end="") + print() for name in sorted_list: entry = entries[name] build_time = entry[1] - entry[0] file_size = entry[2] - print(build_time, file_size, name, sep=",") + cmp_entry = ( + cmp_entries[name] if cmp_entries and name in cmp_entries else None + ) + print(build_time, file_size, name, sep=",", end="") + if cmp_entry: + diff_time = build_time - (cmp_entry[1] - cmp_entry[0]) + print(",", diff_time, sep="", end="") + print() + + +# parse log file into map +entries = build_log_map(log_file) +if len(entries) == 0: + print("Could not parse", log_file) + exit() + +# sort the entries by build-time (descending order) +sorted_list = sorted( + list(entries.keys()), + key=lambda k: entries[k][1] - entries[k][0], + reverse=True, +) +# load the comparison build log if available +cmp_entries = build_log_map(cmp_file) if cmp_file else None if output_fmt == "xml": output_xml(entries, sorted_list, args) elif output_fmt == "html": - output_html(entries, sorted_list, args) + output_html(entries, sorted_list, cmp_entries, args) else: - output_csv(entries, sorted_list, args) + output_csv(entries, sorted_list, cmp_entries, args) diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu index 5d36d70696c..6d6ef9fd7b0 100644 --- a/cpp/src/copying/concatenate.cu +++ b/cpp/src/copying/concatenate.cu @@ -76,7 +76,8 @@ auto create_device_views(host_span views, rmm::cuda_stream_vi std::back_inserter(device_views), [](auto const& col) { return *col; }); - auto d_views = make_device_uvector_async(device_views, stream); + auto d_views = + make_device_uvector_async(device_views, stream, rmm::mr::get_current_device_resource()); // Compute the partition offsets auto offsets = thrust::host_vector(views.size() + 1); @@ -87,7 +88,8 @@ auto create_device_views(host_span views, rmm::cuda_stream_vi std::next(offsets.begin()), [](auto const& col) { return col.size(); }, thrust::plus{}); - auto d_offsets = make_device_uvector_async(offsets, stream); + auto d_offsets = + make_device_uvector_async(offsets, stream, rmm::mr::get_current_device_resource()); auto const output_size = offsets.back(); return std::make_tuple( diff --git a/cpp/src/copying/get_element.cu b/cpp/src/copying/get_element.cu index 5e76b4adbbe..cc12aaa1382 100644 --- a/cpp/src/copying/get_element.cu +++ b/cpp/src/copying/get_element.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,11 +37,10 @@ namespace { struct get_element_functor { template () && !is_fixed_point()>* p = nullptr> - std::unique_ptr operator()( - column_view const& input, - size_type index, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + std::unique_ptr operator()(column_view const& input, + size_type index, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto s = make_fixed_width_scalar(data_type(type_to_id()), stream, mr); @@ -61,11 +60,10 @@ struct get_element_functor { } template >* p = nullptr> - std::unique_ptr operator()( - column_view const& input, - size_type index, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + std::unique_ptr operator()(column_view const& input, + size_type index, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto device_col = column_device_view::create(input, stream); @@ -86,11 +84,10 @@ struct get_element_functor { } template >* p = nullptr> - std::unique_ptr operator()( - column_view const& input, - size_type index, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + std::unique_ptr operator()(column_view const& input, + size_type index, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto dict_view = dictionary_column_view(input); auto indices_iter = detail::indexalator_factory::make_input_iterator(dict_view.indices()); @@ -122,11 +119,10 @@ struct get_element_functor { } template >* p = nullptr> - std::unique_ptr operator()( - column_view const& input, - size_type index, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + std::unique_ptr operator()(column_view const& input, + size_type index, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { bool valid = is_element_valid_sync(input, index, stream); auto const child_col_idx = lists_column_view::child_column_index; @@ -147,11 +143,10 @@ struct get_element_functor { } template ()>* p = nullptr> - std::unique_ptr operator()( - column_view const& input, - size_type index, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + std::unique_ptr operator()(column_view const& input, + size_type index, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { using Type = typename T::rep; @@ -178,11 +173,10 @@ struct get_element_functor { } template >* p = nullptr> - std::unique_ptr operator()( - column_view const& input, - size_type index, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + std::unique_ptr operator()(column_view const& input, + size_type index, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { bool valid = is_element_valid_sync(input, index, stream); auto row_contents = diff --git a/cpp/src/copying/purge_nonempty_nulls.cu b/cpp/src/copying/purge_nonempty_nulls.cu index 5bdf10c8af6..20a8ce986aa 100644 --- a/cpp/src/copying/purge_nonempty_nulls.cu +++ b/cpp/src/copying/purge_nonempty_nulls.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,6 +38,8 @@ bool has_nonempty_null_rows(cudf::column_view const& input, rmm::cuda_stream_vie { if (not input.has_nulls()) { return false; } // No nulls => no dirty rows. + if ((input.size() == input.null_count()) && (input.num_children() == 0)) { return false; } + // Cross-reference nullmask and offsets. auto const type = input.type().id(); auto const offsets = (type == type_id::STRING) ? (strings_column_view{input}).offsets() diff --git a/cpp/src/io/orc/timezone.cpp b/cpp/src/datetime/timezone.cpp similarity index 79% rename from cpp/src/io/orc/timezone.cpp rename to cpp/src/datetime/timezone.cpp index 810dfe87320..55d68fe4a1a 100644 --- a/cpp/src/io/orc/timezone.cpp +++ b/cpp/src/datetime/timezone.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, NVIDIA CORPORATION. + * Copyright (c) 2018-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,22 +13,24 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "timezone.cuh" +#include +#include +#include #include +#include #include +#include #include namespace cudf { -namespace io { + +namespace { constexpr uint32_t tzif_magic = ('T' << 0) | ('Z' << 8) | ('i' << 16) | ('f' << 24); std::string const tzif_system_directory = "/usr/share/zoneinfo/"; -// Seconds from Jan 1st, 1970 to Jan 1st, 2015 -constexpr int64_t orc_utc_offset = 1420070400; - #pragma pack(push, 1) /** * @brief 32-bit TZif header @@ -127,12 +129,13 @@ struct timezone_file { "Number of transition times is larger than the file size."); } - timezone_file(std::string const& timezone_name) + timezone_file(std::optional tzif_dir, std::string_view timezone_name) { using std::ios_base; // Open the input file - std::string const tz_filename = tzif_system_directory + timezone_name; + auto const tz_filename = + std::filesystem::path{tzif_dir.value_or(tzif_system_directory)} / timezone_name; std::ifstream fin; fin.open(tz_filename, ios_base::in | ios_base::binary | ios_base::ate); CUDF_EXPECTS(fin, "Failed to open the timezone file."); @@ -373,45 +376,62 @@ static int64_t get_transition_time(dst_transition_s const& trans, int year) return trans.time + cuda::std::chrono::duration_cast(duration_D{day}).count(); } -timezone_table build_timezone_transition_table(std::string const& timezone_name, - rmm::cuda_stream_view stream) +} // namespace + +std::unique_ptr make_timezone_transition_table(std::optional tzif_dir, + std::string_view timezone_name, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::make_timezone_transition_table( + tzif_dir, timezone_name, cudf::get_default_stream(), mr); +} + +namespace detail { + +std::unique_ptr
make_timezone_transition_table(std::optional tzif_dir, + std::string_view timezone_name, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { if (timezone_name == "UTC" || timezone_name.empty()) { // Return an empty table for UTC - return {}; + return std::make_unique(); } - timezone_file const tzf(timezone_name); + timezone_file const tzf(tzif_dir, timezone_name); - std::vector ttimes(1); - std::vector offsets(1); + std::vector transition_times(1); + std::vector offsets(1); // One ancient rule entry, one per TZ file entry, 2 entries per year in the future cycle - ttimes.reserve(1 + tzf.timecnt() + cycle_entry_cnt); - offsets.reserve(1 + tzf.timecnt() + cycle_entry_cnt); + transition_times.reserve(1 + tzf.timecnt() + solar_cycle_entry_count); + offsets.reserve(1 + tzf.timecnt() + solar_cycle_entry_count); size_t earliest_std_idx = 0; for (size_t t = 0; t < tzf.timecnt(); t++) { auto const ttime = tzf.transition_times[t]; auto const idx = tzf.ttime_idx[t]; CUDF_EXPECTS(idx < tzf.typecnt(), "Out-of-range type index"); auto const utcoff = tzf.ttype[idx].utcoff; - ttimes.push_back(ttime); + transition_times.push_back(ttime); offsets.push_back(utcoff); - if (!earliest_std_idx && !tzf.ttype[idx].isdst) { earliest_std_idx = ttimes.size() - 1; } + if (!earliest_std_idx && !tzf.ttype[idx].isdst) { + earliest_std_idx = transition_times.size() - 1; + } } if (tzf.timecnt() != 0) { if (!earliest_std_idx) { earliest_std_idx = 1; } - ttimes[0] = ttimes[earliest_std_idx]; - offsets[0] = offsets[earliest_std_idx]; + transition_times[0] = transition_times[earliest_std_idx]; + offsets[0] = offsets[earliest_std_idx]; } else { if (tzf.typecnt() == 0 || tzf.ttype[0].utcoff == 0) { // No transitions, offset is zero; Table would be a no-op. // Return an empty table to speed up parsing. - return {}; + return std::make_unique(); } // No transitions to use for the time/offset - use the first offset and apply to all timestamps - ttimes[0] = std::numeric_limits::max(); - offsets[0] = tzf.ttype[0].utcoff; + transition_times[0] = std::numeric_limits::max(); + offsets[0] = tzf.ttype[0].utcoff; } // Generate entries for times after the last transition @@ -440,19 +460,19 @@ timezone_table build_timezone_transition_table(std::string const& timezone_name, // Add entries to fill the transition cycle int64_t year_timestamp = 0; - for (int32_t year = 1970; year < 1970 + cycle_years; ++year) { + for (int32_t year = 1970; year < 1970 + solar_cycle_years; ++year) { auto const dst_start_time = get_transition_time(dst_start, year); auto const dst_end_time = get_transition_time(dst_end, year); // Two entries per year, since there are two transitions - ttimes.push_back(year_timestamp + dst_start_time - future_std_offset); + transition_times.push_back(year_timestamp + dst_start_time - future_std_offset); offsets.push_back(future_dst_offset); - ttimes.push_back(year_timestamp + dst_end_time - future_dst_offset); + transition_times.push_back(year_timestamp + dst_end_time - future_dst_offset); offsets.push_back(future_std_offset); // Swap the newly added transitions if in descending order - if (ttimes.rbegin()[1] > ttimes.rbegin()[0]) { - std::swap(ttimes.rbegin()[0], ttimes.rbegin()[1]); + if (transition_times.rbegin()[1] > transition_times.rbegin()[0]) { + std::swap(transition_times.rbegin()[0], transition_times.rbegin()[1]); std::swap(offsets.rbegin()[0], offsets.rbegin()[1]); } @@ -461,13 +481,33 @@ timezone_table build_timezone_transition_table(std::string const& timezone_name, .count(); } - rmm::device_uvector d_ttimes = cudf::detail::make_device_uvector_async(ttimes, stream); - rmm::device_uvector d_offsets = cudf::detail::make_device_uvector_async(offsets, stream); - auto const gmt_offset = get_gmt_offset(ttimes, offsets, orc_utc_offset); + CUDF_EXPECTS(transition_times.size() == offsets.size(), + "Error reading TZif file for timezone " + std::string{timezone_name}); + + std::vector ttimes_typed; + ttimes_typed.reserve(transition_times.size()); + std::transform(transition_times.cbegin(), + transition_times.cend(), + std::back_inserter(ttimes_typed), + [](auto ts) { return timestamp_s{duration_s{ts}}; }); + std::vector offsets_typed; + offsets_typed.reserve(offsets.size()); + std::transform(offsets.cbegin(), offsets.cend(), std::back_inserter(offsets_typed), [](auto ts) { + return duration_s{ts}; + }); + + auto d_ttimes = cudf::detail::make_device_uvector_async(ttimes_typed, stream, mr); + auto d_offsets = cudf::detail::make_device_uvector_async(offsets_typed, stream, mr); + + std::vector> tz_table_columns; + tz_table_columns.emplace_back(std::make_unique(std::move(d_ttimes))); + tz_table_columns.emplace_back(std::make_unique(std::move(d_offsets))); + + // Need to finish copies before transition_times and offsets go out of scope stream.synchronize(); - return {gmt_offset, std::move(d_ttimes), std::move(d_offsets)}; + return std::make_unique(std::move(tz_table_columns)); } -} // namespace io +} // namespace detail } // namespace cudf diff --git a/cpp/src/dictionary/detail/concatenate.cu b/cpp/src/dictionary/detail/concatenate.cu index d4f3a9ca495..bc54f65bbd3 100644 --- a/cpp/src/dictionary/detail/concatenate.cu +++ b/cpp/src/dictionary/detail/concatenate.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -114,7 +114,8 @@ struct compute_children_offsets_fn { [](auto lhs, auto rhs) { return offsets_pair{lhs.first + rhs.first, lhs.second + rhs.second}; }); - return cudf::detail::make_device_uvector_sync(offsets, stream); + return cudf::detail::make_device_uvector_sync( + offsets, stream, rmm::mr::get_current_device_resource()); } private: diff --git a/cpp/src/filling/fill.cu b/cpp/src/filling/fill.cu index ecd66f1b0c9..a747cc195ae 100644 --- a/cpp/src/filling/fill.cu +++ b/cpp/src/filling/fill.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -104,11 +104,10 @@ struct out_of_place_fill_range_dispatch { template () or cudf::is_fixed_point())> - std::unique_ptr operator()( - cudf::size_type begin, - cudf::size_type end, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + std::unique_ptr operator()(cudf::size_type begin, + cudf::size_type end, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(input.type() == value.type(), "Data type mismatch."); auto p_ret = std::make_unique(input, stream, mr); diff --git a/cpp/src/filling/sequence.cu b/cpp/src/filling/sequence.cu index 284e7c46347..b4bab369c61 100644 --- a/cpp/src/filling/sequence.cu +++ b/cpp/src/filling/sequence.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -134,11 +134,10 @@ std::unique_ptr sequence(size_type size, return type_dispatcher(init.type(), sequence_functor{}, size, init, step, stream, mr); } -std::unique_ptr sequence( - size_type size, - scalar const& init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +std::unique_ptr sequence(size_type size, + scalar const& init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(size >= 0, "size must be >= 0"); CUDF_EXPECTS(is_numeric(init.type()), "init scalar type must be numeric"); diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index 72ac6255549..f8203218760 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -481,7 +481,8 @@ void compute_single_pass_aggs(table_view const& keys, // prepare to launch kernel to do the actual aggregation auto d_sparse_table = mutable_table_device_view::create(sparse_table, stream); auto d_values = table_device_view::create(flattened_values, stream); - auto const d_aggs = cudf::detail::make_device_uvector_async(agg_kinds, stream); + auto const d_aggs = cudf::detail::make_device_uvector_async( + agg_kinds, stream, rmm::mr::get_current_device_resource()); auto const skip_key_rows_with_nulls = keys_have_nulls and include_null_keys == null_policy::EXCLUDE; diff --git a/cpp/src/groupby/sort/group_quantiles.cu b/cpp/src/groupby/sort/group_quantiles.cu index 90ca5a5c90e..a9edcfecbf7 100644 --- a/cpp/src/groupby/sort/group_quantiles.cu +++ b/cpp/src/groupby/sort/group_quantiles.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -156,7 +156,8 @@ std::unique_ptr group_quantiles(column_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto dv_quantiles = cudf::detail::make_device_uvector_async(quantiles, stream); + auto dv_quantiles = cudf::detail::make_device_uvector_async( + quantiles, stream, rmm::mr::get_current_device_resource()); auto values_type = cudf::is_dictionary(values.type()) ? dictionary_column_view(values).keys().type() diff --git a/cpp/src/groupby/sort/scan.cpp b/cpp/src/groupby/sort/scan.cpp index 1aaa06750db..820dc8a3077 100644 --- a/cpp/src/groupby/sort/scan.cpp +++ b/cpp/src/groupby/sort/scan.cpp @@ -129,8 +129,10 @@ void scan_result_functor::operator()(aggregation const& agg) auto const group_labels_view = column_view(cudf::device_span(group_labels)); auto const gather_map = [&]() { if (is_presorted()) { // assumes both keys and values are sorted, Spark does this. - return cudf::detail::sequence( - group_labels.size(), *cudf::make_fixed_width_scalar(size_type{0}, stream), stream); + return cudf::detail::sequence(group_labels.size(), + *cudf::make_fixed_width_scalar(size_type{0}, stream), + stream, + rmm::mr::get_current_device_resource()); } else { auto sort_order = (rank_agg._method == rank_method::FIRST ? cudf::detail::stable_sorted_order : cudf::detail::sorted_order); diff --git a/cpp/src/hash/unordered_multiset.cuh b/cpp/src/hash/unordered_multiset.cuh index c017fd43079..55036bec6a6 100644 --- a/cpp/src/hash/unordered_multiset.cuh +++ b/cpp/src/hash/unordered_multiset.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -84,10 +84,10 @@ class unordered_multiset { auto d_column = column_device_view::create(col, stream); auto d_col = *d_column; - auto hash_bins_start = - cudf::detail::make_zeroed_device_uvector_async(2 * d_col.size() + 1, stream); - auto hash_bins_end = - cudf::detail::make_zeroed_device_uvector_async(2 * d_col.size() + 1, stream); + auto hash_bins_start = cudf::detail::make_zeroed_device_uvector_async( + 2 * d_col.size() + 1, stream, rmm::mr::get_current_device_resource()); + auto hash_bins_end = cudf::detail::make_zeroed_device_uvector_async( + 2 * d_col.size() + 1, stream, rmm::mr::get_current_device_resource()); auto hash_data = rmm::device_uvector(d_col.size(), stream); Hasher hasher; diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu index 861b5b0fba4..7f88019beb2 100644 --- a/cpp/src/interop/to_arrow.cu +++ b/cpp/src/interop/to_arrow.cu @@ -215,7 +215,7 @@ std::shared_ptr dispatch_to_arrow::operator()(column_view in arrow::MemoryPool* ar_mr, rmm::cuda_stream_view stream) { - auto bitmask = bools_to_mask(input, stream); + auto bitmask = bools_to_mask(input, stream, rmm::mr::get_current_device_resource()); auto data_buffer = allocate_arrow_buffer(static_cast(bitmask.first->size()), ar_mr); diff --git a/cpp/src/io/avro/reader_impl.cu b/cpp/src/io/avro/reader_impl.cu index cd557ff166a..60a1b4263b2 100644 --- a/cpp/src/io/avro/reader_impl.cu +++ b/cpp/src/io/avro/reader_impl.cu @@ -444,7 +444,8 @@ std::vector decode_data(metadata& meta, } } - auto block_list = cudf::detail::make_device_uvector_async(meta.block_list, stream); + auto block_list = cudf::detail::make_device_uvector_async( + meta.block_list, stream, rmm::mr::get_current_device_resource()); schema_desc.host_to_device(stream); @@ -574,8 +575,10 @@ table_with_metadata read_avro(std::unique_ptr&& source, } } - d_global_dict = cudf::detail::make_device_uvector_async(h_global_dict, stream); - d_global_dict_data = cudf::detail::make_device_uvector_async(h_global_dict_data, stream); + d_global_dict = cudf::detail::make_device_uvector_async( + h_global_dict, stream, rmm::mr::get_current_device_resource()); + d_global_dict_data = cudf::detail::make_device_uvector_async( + h_global_dict_data, stream, rmm::mr::get_current_device_resource()); stream.synchronize(); } diff --git a/cpp/src/io/comp/uncomp.cpp b/cpp/src/io/comp/uncomp.cpp index 6778ddead28..008c7215cca 100644 --- a/cpp/src/io/comp/uncomp.cpp +++ b/cpp/src/io/comp/uncomp.cpp @@ -509,9 +509,10 @@ size_t decompress_zstd(host_span src, rmm::cuda_stream_view stream) { // Init device span of spans (source) - auto const d_src = cudf::detail::make_device_uvector_async(src, stream); - auto hd_srcs = hostdevice_vector>(1, stream); - hd_srcs[0] = d_src; + auto const d_src = + cudf::detail::make_device_uvector_async(src, stream, rmm::mr::get_current_device_resource()); + auto hd_srcs = hostdevice_vector>(1, stream); + hd_srcs[0] = d_src; hd_srcs.host_to_device(stream); // Init device span of spans (temporary destination) diff --git a/cpp/src/io/csv/csv_gpu.cu b/cpp/src/io/csv/csv_gpu.cu index 4f6f8162246..51e3783bac5 100644 --- a/cpp/src/io/csv/csv_gpu.cu +++ b/cpp/src/io/csv/csv_gpu.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -794,8 +794,8 @@ std::vector detect_column_types( const int block_size = csvparse_block_dim; const int grid_size = (row_starts.size() + block_size - 1) / block_size; - auto d_stats = - detail::make_zeroed_device_uvector_async(num_active_columns, stream); + auto d_stats = detail::make_zeroed_device_uvector_async( + num_active_columns, stream, rmm::mr::get_current_device_resource()); data_type_detection<<>>( options, data, column_flags, row_starts, d_stats); diff --git a/cpp/src/io/csv/durations.hpp b/cpp/src/io/csv/durations.hpp index d42ddf3817c..ac925011c58 100644 --- a/cpp/src/io/csv/durations.hpp +++ b/cpp/src/io/csv/durations.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,10 +28,9 @@ namespace io { namespace detail { namespace csv { -std::unique_ptr pandas_format_durations( - column_view const& durations, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr pandas_format_durations(column_view const& durations, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace csv } // namespace detail diff --git a/cpp/src/io/csv/reader_impl.cu b/cpp/src/io/csv/reader_impl.cu index 2e38ea7f4ab..9c1ff67d97c 100644 --- a/cpp/src/io/csv/reader_impl.cu +++ b/cpp/src/io/csv/reader_impl.cu @@ -523,13 +523,13 @@ void infer_column_types(parse_options const& parse_opts, }); if (num_inferred_columns == 0) { return; } - auto const column_stats = - cudf::io::csv::gpu::detect_column_types(parse_opts.view(), - data, - make_device_uvector_async(column_flags, stream), - row_offsets, - num_inferred_columns, - stream); + auto const column_stats = cudf::io::csv::gpu::detect_column_types( + parse_opts.view(), + data, + make_device_uvector_async(column_flags, stream, rmm::mr::get_current_device_resource()), + row_offsets, + num_inferred_columns, + stream); stream.synchronize(); auto inf_col_idx = 0; @@ -595,14 +595,15 @@ std::vector decode_data(parse_options const& parse_opts, h_valid[i] = out_buffers[i].null_mask(); } - cudf::io::csv::gpu::decode_row_column_data(parse_opts.view(), - data, - make_device_uvector_async(column_flags, stream), - row_offsets, - make_device_uvector_async(column_types, stream), - make_device_uvector_async(h_data, stream), - make_device_uvector_async(h_valid, stream), - stream); + cudf::io::csv::gpu::decode_row_column_data( + parse_opts.view(), + data, + make_device_uvector_async(column_flags, stream, rmm::mr::get_current_device_resource()), + row_offsets, + make_device_uvector_async(column_types, stream, rmm::mr::get_current_device_resource()), + make_device_uvector_async(h_data, stream, rmm::mr::get_current_device_resource()), + make_device_uvector_async(h_valid, stream, rmm::mr::get_current_device_resource()), + stream); return out_buffers; } diff --git a/cpp/src/io/json/experimental/read_json.cpp b/cpp/src/io/json/experimental/read_json.cpp index 70a0b66ebc6..c18b15708ab 100644 --- a/cpp/src/io/json/experimental/read_json.cpp +++ b/cpp/src/io/json/experimental/read_json.cpp @@ -80,7 +80,8 @@ rmm::device_uvector ingest_raw_input(host_span auto const uncomp_data = decompress(compression, buffer); return cudf::detail::make_device_uvector_sync( host_span{reinterpret_cast(uncomp_data.data()), uncomp_data.size()}, - stream); + stream, + rmm::mr::get_current_device_resource()); } } diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index d174cc8aca3..c937315969c 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -602,8 +602,10 @@ void make_device_json_column(device_span input, col.validity.data()}; } - auto d_ignore_vals = cudf::detail::make_device_uvector_async(ignore_vals, stream); - auto d_columns_data = cudf::detail::make_device_uvector_async(columns_data, stream); + auto d_ignore_vals = cudf::detail::make_device_uvector_async( + ignore_vals, stream, rmm::mr::get_current_device_resource()); + auto d_columns_data = cudf::detail::make_device_uvector_async( + columns_data, stream, rmm::mr::get_current_device_resource()); // 3. scatter string offsets to respective columns, set validity bits thrust::for_each_n( @@ -891,7 +893,8 @@ table_with_metadata device_parse_nested_json(device_span d_input, auto gpu_tree = [&]() { // Parse the JSON and get the token stream - const auto [tokens_gpu, token_indices_gpu] = get_token_stream(d_input, options, stream); + const auto [tokens_gpu, token_indices_gpu] = + get_token_stream(d_input, options, stream, rmm::mr::get_current_device_resource()); // gpu tree generation return get_tree_representation( tokens_gpu, token_indices_gpu, stream, rmm::mr::get_current_device_resource()); diff --git a/cpp/src/io/json/json_gpu.cu b/cpp/src/io/json/json_gpu.cu index 8b6c0f9d528..d1711db0484 100644 --- a/cpp/src/io/json/json_gpu.cu +++ b/cpp/src/io/json/json_gpu.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -578,7 +578,7 @@ std::vector detect_data_types( return d_column_infos; } else { return cudf::detail::make_zeroed_device_uvector_async( - num_columns, stream); + num_columns, stream, rmm::mr::get_current_device_resource()); } }(); diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu index 30b3911089f..77749b42781 100644 --- a/cpp/src/io/json/nested_json_gpu.cu +++ b/cpp/src/io/json/nested_json_gpu.cu @@ -1597,9 +1597,11 @@ std::pair, std::vector> json_column_to // Move string_offsets and string_lengths to GPU rmm::device_uvector d_string_offsets = - cudf::detail::make_device_uvector_async(json_col.string_offsets, stream); + cudf::detail::make_device_uvector_async( + json_col.string_offsets, stream, rmm::mr::get_current_device_resource()); rmm::device_uvector d_string_lengths = - cudf::detail::make_device_uvector_async(json_col.string_lengths, stream); + cudf::detail::make_device_uvector_async( + json_col.string_lengths, stream, rmm::mr::get_current_device_resource()); // Prepare iterator that returns (string_offset, string_length)-tuples auto offset_length_it = diff --git a/cpp/src/io/json/reader_impl.cu b/cpp/src/io/json/reader_impl.cu index 6e1089796de..7ae8deb8055 100644 --- a/cpp/src/io/json/reader_impl.cu +++ b/cpp/src/io/json/reader_impl.cu @@ -340,8 +340,8 @@ rmm::device_uvector upload_data_to_device(json_reader_options const& reade "Error finding the record within the specified byte range.\n"); // Upload the raw data that is within the rows of interest - return cudf::detail::make_device_uvector_async(h_data.subspan(start_offset, bytes_to_upload), - stream); + return cudf::detail::make_device_uvector_async( + h_data.subspan(start_offset, bytes_to_upload), stream, rmm::mr::get_current_device_resource()); } std::pair, col_map_ptr_type> get_column_names_and_map( @@ -512,11 +512,14 @@ table_with_metadata convert_data_to_table(parse_options_view const& parse_opts, h_valid[i] = out_buffers[i].null_mask(); } - auto d_dtypes = cudf::detail::make_device_uvector_async(h_dtypes, stream); - auto d_data = cudf::detail::make_device_uvector_async(h_data, stream); - auto d_valid = cudf::detail::make_device_uvector_async(h_valid, stream); - auto d_valid_counts = - cudf::detail::make_zeroed_device_uvector_async(num_columns, stream); + auto d_dtypes = cudf::detail::make_device_uvector_async( + h_dtypes, stream, rmm::mr::get_current_device_resource()); + auto d_data = cudf::detail::make_device_uvector_async( + h_data, stream, rmm::mr::get_current_device_resource()); + auto d_valid = cudf::detail::make_device_uvector_async( + h_valid, stream, rmm::mr::get_current_device_resource()); + auto d_valid_counts = cudf::detail::make_zeroed_device_uvector_async( + num_columns, stream, rmm::mr::get_current_device_resource()); cudf::io::json::gpu::convert_json_to_columns( parse_opts, data, rec_starts, d_dtypes, column_map, d_data, d_valid, d_valid_counts, stream); @@ -530,13 +533,18 @@ table_with_metadata convert_data_to_table(parse_options_view const& parse_opts, auto repl_chars = std::vector{'"', '\\', '\t', '\r', '\b'}; auto repl_offsets = std::vector{0, 1, 2, 3, 4, 5}; - auto target = make_strings_column(cudf::detail::make_device_uvector_async(target_chars, stream), - cudf::detail::make_device_uvector_async(target_offsets, stream), - {}, - 0, - stream); - auto repl = make_strings_column(cudf::detail::make_device_uvector_async(repl_chars, stream), - cudf::detail::make_device_uvector_async(repl_offsets, stream), + auto target = + make_strings_column(cudf::detail::make_device_uvector_async( + target_chars, stream, rmm::mr::get_current_device_resource()), + cudf::detail::make_device_uvector_async( + target_offsets, stream, rmm::mr::get_current_device_resource()), + {}, + 0, + stream); + auto repl = make_strings_column(cudf::detail::make_device_uvector_async( + repl_chars, stream, rmm::mr::get_current_device_resource()), + cudf::detail::make_device_uvector_async( + repl_offsets, stream, rmm::mr::get_current_device_resource()), {}, 0, stream); @@ -617,7 +625,8 @@ table_with_metadata read_json(std::vector>& sources, auto d_data = rmm::device_uvector(0, stream); if (should_load_whole_source(reader_opts)) { - d_data = cudf::detail::make_device_uvector_async(h_data, stream); + d_data = cudf::detail::make_device_uvector_async( + h_data, stream, rmm::mr::get_current_device_resource()); } auto rec_starts = find_record_starts(reader_opts, h_data, d_data, stream); diff --git a/cpp/src/io/json/write_json.cu b/cpp/src/io/json/write_json.cu index b4bcb5548de..9e56b20114c 100644 --- a/cpp/src/io/json/write_json.cu +++ b/cpp/src/io/json/write_json.cu @@ -552,14 +552,16 @@ std::unique_ptr make_strings_column_from_host(host_span offsets(host_strings.size() + 1, 0); std::transform_inclusive_scan(host_strings.begin(), host_strings.end(), offsets.begin() + 1, std::plus{}, [](auto& str) { return str.size(); }); - auto d_offsets = cudf::detail::make_device_uvector_sync(offsets, stream); + auto d_offsets = + cudf::detail::make_device_uvector_sync(offsets, stream, rmm::mr::get_current_device_resource()); return cudf::make_strings_column( host_strings.size(), std::move(d_offsets), std::move(d_chars), {}, 0); } diff --git a/cpp/src/io/orc/orc.hpp b/cpp/src/io/orc/orc.hpp index d30c3823080..21fc04a69ec 100644 --- a/cpp/src/io/orc/orc.hpp +++ b/cpp/src/io/orc/orc.hpp @@ -38,6 +38,8 @@ namespace io { namespace orc { static constexpr uint32_t block_header_size = 3; +// Seconds from January 1st, 1970 to January 1st, 2015 +static constexpr int64_t orc_utc_epoch = 1420070400; struct PostScript { uint64_t footerLength = 0; // the length of the footer section in bytes diff --git a/cpp/src/io/orc/orc_gpu.hpp b/cpp/src/io/orc/orc_gpu.hpp index 43f0565845c..05560a3ca62 100644 --- a/cpp/src/io/orc/orc_gpu.hpp +++ b/cpp/src/io/orc/orc_gpu.hpp @@ -16,7 +16,7 @@ #pragma once -#include "timezone.cuh" +#include #include "orc.hpp" @@ -294,7 +294,7 @@ void DecodeOrcColumnData(ColumnDesc* chunks, uint32_t num_columns, uint32_t num_stripes, size_t first_row, - timezone_table_view tz_table, + table_device_view tz_table, uint32_t num_rowgroups, uint32_t rowidx_stride, size_t level, diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 96eb20e1e66..bcf53159676 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -23,13 +23,13 @@ #include "orc_gpu.hpp" #include "reader_impl.hpp" -#include "timezone.cuh" #include #include #include #include +#include #include #include #include @@ -576,8 +576,8 @@ void scan_null_counts(cudf::detail::hostdevice_2dvector const& prefix_sums_to_update.emplace_back(col_idx, prefix_sums[col_idx]); } } - auto const d_prefix_sums_to_update = - cudf::detail::make_device_uvector_async(prefix_sums_to_update, stream); + auto const d_prefix_sums_to_update = cudf::detail::make_device_uvector_async( + prefix_sums_to_update, stream, rmm::mr::get_current_device_resource()); thrust::for_each(rmm::exec_policy(stream), d_prefix_sums_to_update.begin(), @@ -603,7 +603,7 @@ void scan_null_counts(cudf::detail::hostdevice_2dvector const& void reader::impl::decode_stream_data(cudf::detail::hostdevice_2dvector& chunks, size_t num_dicts, size_t skip_rows, - timezone_table_view tz_table, + table_device_view tz_table, cudf::detail::hostdevice_2dvector& row_groups, size_t row_index_stride, std::vector& out_buffers, @@ -915,11 +915,11 @@ reader::impl::impl(std::vector>&& sources, decimal128_columns = options.get_decimal128_columns(); } -timezone_table reader::impl::compute_timezone_table( +std::unique_ptr
reader::impl::compute_timezone_table( const std::vector& selected_stripes, rmm::cuda_stream_view stream) { - if (selected_stripes.empty()) return {}; + if (selected_stripes.empty()) return std::make_unique(); auto const has_timestamp_column = std::any_of( selected_columns.levels.cbegin(), selected_columns.levels.cend(), [&](auto& col_lvl) { @@ -927,10 +927,10 @@ timezone_table reader::impl::compute_timezone_table( return _metadata.get_col_type(col_meta.id).kind == TypeKind::TIMESTAMP; }); }); - if (not has_timestamp_column) return {}; + if (not has_timestamp_column) return std::make_unique(); - return build_timezone_transition_table(selected_stripes[0].stripe_info[0].second->writerTimezone, - stream); + return cudf::detail::make_timezone_transition_table( + {}, selected_stripes[0].stripe_info[0].second->writerTimezone, stream); } table_with_metadata reader::impl::read(size_type skip_rows, @@ -1038,7 +1038,7 @@ table_with_metadata reader::impl::read(size_type skip_rows, selected_columns.levels[level].size(), [&]() { return cudf::detail::make_zeroed_device_uvector_async( - total_num_stripes, stream); + total_num_stripes, stream, rmm::mr::get_current_device_resource()); }); // Tracker for eventually deallocating compressed and uncompressed data @@ -1238,10 +1238,11 @@ table_with_metadata reader::impl::read(size_type skip_rows, } if (not is_level_data_empty) { + auto const tz_table_dview = table_device_view::create(tz_table->view(), stream); decode_stream_data(chunks, num_dict_entries, skip_rows, - tz_table.view(), + *tz_table_dview, row_groups, _metadata.get_row_index_stride(), out_buffers[level], @@ -1270,7 +1271,8 @@ table_with_metadata reader::impl::read(size_type skip_rows, }); if (buff_data.size()) { - auto const dev_buff_data = cudf::detail::make_device_uvector_async(buff_data, stream); + auto const dev_buff_data = cudf::detail::make_device_uvector_async( + buff_data, stream, rmm::mr::get_current_device_resource()); generate_offsets_for_list(dev_buff_data, stream); } } diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index 96492e4c2b2..94b0fdc09d2 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -145,7 +145,7 @@ class reader::impl { void decode_stream_data(cudf::detail::hostdevice_2dvector& chunks, size_t num_dicts, size_t skip_rows, - timezone_table_view tz_table, + table_device_view tz_table, cudf::detail::hostdevice_2dvector& row_groups, size_t row_index_stride, std::vector& out_buffers, @@ -210,7 +210,7 @@ class reader::impl { * * @return Timezone table with timestamp offsets */ - timezone_table compute_timezone_table( + std::unique_ptr
compute_timezone_table( const std::vector& selected_stripes, rmm::cuda_stream_view stream); diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index d0d077d2611..8e698dd9dff 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,9 +43,6 @@ inline __device__ uint8_t is_rlev1(uint8_t encoding_mode) { return encoding_mode inline __device__ uint8_t is_dictionary(uint8_t encoding_mode) { return encoding_mode & 1; } -static __device__ __constant__ int64_t kORCTimeToUTC = - 1420070400; // Seconds from January 1st, 1970 to January 1st, 2015 - struct orc_bytestream_s { const uint8_t* base; uint32_t pos; @@ -101,7 +98,7 @@ struct orc_datadec_state_s { uint32_t max_vals; // max # of non-zero values to decode in this batch uint32_t nrows; // # of rows in current batch (up to block_size) uint32_t buffered_count; // number of buffered values in the secondary data stream - int64_t utc_epoch; // kORCTimeToUTC - gmtOffset + duration_s tz_epoch; // orc_ut_epoch - ut_offset RowGroup index; }; @@ -1374,7 +1371,7 @@ template __global__ void __launch_bounds__(block_size) gpuDecodeOrcColumnData(ColumnDesc* chunks, DictionaryEntry* global_dictionary, - timezone_table_view tz_table, + table_device_view tz_table, device_2dspan row_groups, size_t first_row, uint32_t rowidx_stride, @@ -1446,7 +1443,8 @@ __global__ void __launch_bounds__(block_size) } if (!is_dictionary(s->chunk.encoding_kind)) { s->chunk.dictionary_start = 0; } - s->top.data.utc_epoch = kORCTimeToUTC - tz_table.gmt_offset; + static constexpr duration_s d_orc_utc_epoch = duration_s{orc_utc_epoch}; + s->top.data.tz_epoch = d_orc_utc_epoch - get_ut_offset(tz_table, timestamp_s{d_orc_utc_epoch}); bytestream_init(&s->bs, s->chunk.streams[CI_DATA], s->chunk.strm_len[CI_DATA]); bytestream_init(&s->bs2, s->chunk.streams[CI_DATA2], s->chunk.strm_len[CI_DATA2]); @@ -1769,37 +1767,33 @@ __global__ void __launch_bounds__(block_size) break; } case TIMESTAMP: { - int64_t seconds = s->vals.i64[t + vals_skipped] + s->top.data.utc_epoch; - int64_t nanos = secondary_val; - nanos = (nanos >> 3) * kTimestampNanoScale[nanos & 7]; - if (!tz_table.ttimes.empty()) { - seconds += get_gmt_offset(tz_table.ttimes, tz_table.offsets, seconds); - } + auto seconds = s->top.data.tz_epoch + duration_s{s->vals.i64[t + vals_skipped]}; + // Convert to UTC + seconds += get_ut_offset(tz_table, timestamp_s{seconds}); + + duration_ns nanos = duration_ns{(static_cast(secondary_val) >> 3) * + kTimestampNanoScale[secondary_val & 7]}; + // Adjust seconds only for negative timestamps with positive nanoseconds. // Alternative way to represent negative timestamps is with negative nanoseconds // in which case the adjustment in not needed. // Comparing with 999999 instead of zero to match the apache writer. - if (seconds < 0 and nanos > 999999) { seconds -= 1; } - - duration_ns d_ns{nanos}; - duration_s d_s{seconds}; + if (seconds.count() < 0 and nanos.count() > 999999) { seconds -= duration_s{1}; } static_cast(data_out)[row] = [&]() { using cuda::std::chrono::duration_cast; switch (s->chunk.timestamp_type_id) { case type_id::TIMESTAMP_SECONDS: - return d_s.count() + duration_cast(d_ns).count(); + return (seconds + duration_cast(nanos)).count(); case type_id::TIMESTAMP_MILLISECONDS: - return duration_cast(d_s).count() + - duration_cast(d_ns).count(); + return (seconds + duration_cast(nanos)).count(); case type_id::TIMESTAMP_MICROSECONDS: - return duration_cast(d_s).count() + - duration_cast(d_ns).count(); + return (seconds + duration_cast(nanos)).count(); case type_id::TIMESTAMP_NANOSECONDS: default: - return duration_cast(d_s).count() + - d_ns.count(); // nanoseconds as output in case of `type_id::EMPTY` and - // `type_id::TIMESTAMP_NANOSECONDS` + // nanoseconds as output in case of `type_id::EMPTY` and + // `type_id::TIMESTAMP_NANOSECONDS` + return (seconds + nanos).count(); } }(); @@ -1887,7 +1881,7 @@ void __host__ DecodeOrcColumnData(ColumnDesc* chunks, uint32_t num_columns, uint32_t num_stripes, size_t first_row, - timezone_table_view tz_table, + table_device_view tz_table, uint32_t num_rowgroups, uint32_t rowidx_stride, size_t level, diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu index 9032e3d2502..427167e2d0f 100644 --- a/cpp/src/io/orc/stripe_enc.cu +++ b/cpp/src/io/orc/stripe_enc.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -49,9 +49,6 @@ constexpr int scratch_buffer_size = 512 * 4; // Workaround replaces zero-length patch lists by a dummy zero patch constexpr bool zero_pll_war = true; -static __device__ __constant__ int64_t kORCTimeToUTC = - 1420070400; // Seconds from January 1st, 1970 to January 1st, 2015 - struct byterle_enc_state_s { uint32_t literal_run; uint32_t repeat_run; @@ -814,7 +811,7 @@ __global__ void __launch_bounds__(block_size) int32_t ts_scale = powers_of_ten[9 - min(s->chunk.scale, 9)]; int64_t seconds = ts / ts_scale; int64_t nanos = (ts - seconds * ts_scale); - s->vals.i64[nz_idx] = seconds - kORCTimeToUTC; + s->vals.i64[nz_idx] = seconds - orc_utc_epoch; if (nanos != 0) { // Trailing zeroes are encoded in the lower 3-bits uint32_t zeroes = 0; diff --git a/cpp/src/io/orc/timezone.cuh b/cpp/src/io/orc/timezone.cuh deleted file mode 100644 index 52736d6451a..00000000000 --- a/cpp/src/io/orc/timezone.cuh +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include - -namespace cudf { -namespace io { - -struct timezone_table_view { - int32_t gmt_offset = 0; - cudf::device_span ttimes; - cudf::device_span offsets; -}; - -// Cycle in which the time offsets repeat -static constexpr int32_t cycle_years = 400; -// Number of seconds in 400 years -static constexpr int64_t cycle_seconds = - cuda::std::chrono::duration_cast(duration_D{365 * cycle_years + (100 - 3)}).count(); -// Two entries per year, over the length of the cycle -static constexpr uint32_t cycle_entry_cnt = 2 * cycle_years; - -/** - * @brief Returns the GMT offset for a given date and given timezone table. - * - * @param ttimes Transition times; trailing `cycle_entry_cnt` entries are used for all times - * beyond the one covered by the TZif file - * @param offsets Time offsets in specific intervals; trailing `cycle_entry_cnt` entries are used - * for all times beyond the one covered by the TZif file - * @param count Number of elements in @p ttimes and @p offsets - * @param ts ORC timestamp - * - * @return GMT offset - */ -CUDF_HOST_DEVICE inline int32_t get_gmt_offset_impl(int64_t const* ttimes, - int32_t const* offsets, - size_t count, - int64_t ts) -{ - // Returns start of the range if all elements are larger than the input timestamp - auto last_less_equal_ttime_idx = [&](long begin_idx, long end_idx, int64_t ts) { - auto const first_larger_ttime = - thrust::upper_bound(thrust::seq, ttimes + begin_idx, ttimes + end_idx, ts); - // Element before the first larger element is the last one less of equal - return std::max(first_larger_ttime - ttimes - 1, begin_idx); - }; - - auto const file_entry_cnt = count - cycle_entry_cnt; - // Search in the file entries if the timestamp is in range - if (ts <= ttimes[file_entry_cnt - 1]) { - return offsets[last_less_equal_ttime_idx(0, file_entry_cnt, ts)]; - } else { - // Search in the 400-year cycle if outside of the file entries range - return offsets[last_less_equal_ttime_idx( - file_entry_cnt, count, (ts + cycle_seconds) % cycle_seconds)]; - } -} - -/** - * @brief Host `get_gmt_offset` interface. - * - * Implemented in `get_gmt_offset_impl`. - */ -inline __host__ int32_t get_gmt_offset(cudf::host_span ttimes, - cudf::host_span offsets, - int64_t ts) -{ - CUDF_EXPECTS(ttimes.size() == offsets.size(), - "transition times and offsets must have the same length"); - return get_gmt_offset_impl(ttimes.begin(), offsets.begin(), ttimes.size(), ts); -} - -/** - * @brief Device `get_gmt_offset` interface. - * - * Implemented in `get_gmt_offset_impl`. - */ -inline __device__ int32_t get_gmt_offset(cudf::device_span ttimes, - cudf::device_span offsets, - int64_t ts) -{ - return get_gmt_offset_impl(ttimes.begin(), offsets.begin(), ttimes.size(), ts); -} - -class timezone_table { - int32_t gmt_offset = 0; - rmm::device_uvector ttimes; - rmm::device_uvector offsets; - - public: - // Safe to use the default stream, device_uvectors will not change after they are created empty - timezone_table() : ttimes{0, cudf::get_default_stream()}, offsets{0, cudf::get_default_stream()} - { - } - timezone_table(int32_t gmt_offset, - rmm::device_uvector&& ttimes, - rmm::device_uvector&& offsets) - : gmt_offset{gmt_offset}, ttimes{std::move(ttimes)}, offsets{std::move(offsets)} - { - } - [[nodiscard]] timezone_table_view view() const { return {gmt_offset, ttimes, offsets}; } -}; - -/** - * @brief Creates a transition table to convert ORC timestamps to UTC. - * - * Uses system's TZif files. Assumes little-endian platform when parsing these files. - * - * @param timezone_name standard timezone name (for example, "US/Pacific") - * @param stream CUDA stream used for device memory operations and kernel launches - * - * @return The transition table for the given timezone - */ -timezone_table build_timezone_transition_table(std::string const& timezone_name, - rmm::cuda_stream_view stream); - -} // namespace io -} // namespace cudf diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index 00b5c5428b1..d3bb0a45c12 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -55,6 +55,7 @@ #include #include #include +#include #include #include @@ -332,6 +333,136 @@ size_type orc_table_view::num_rows() const noexcept return columns.empty() ? 0 : columns.front().size(); } +orc_streams::orc_stream_offsets orc_streams::compute_offsets( + host_span columns, size_t num_rowgroups) const +{ + std::vector strm_offsets(streams.size()); + size_t non_rle_data_size = 0; + size_t rle_data_size = 0; + for (size_t i = 0; i < streams.size(); ++i) { + const auto& stream = streams[i]; + + auto const is_rle_data = [&]() { + // First stream is an index stream, don't check types, etc. + if (!stream.column_index().has_value()) return true; + + auto const& column = columns[stream.column_index().value()]; + // Dictionary encoded string column - dictionary characters or + // directly encoded string - column characters + if (column.orc_kind() == TypeKind::STRING && + ((stream.kind == DICTIONARY_DATA && column.orc_encoding() == DICTIONARY_V2) || + (stream.kind == DATA && column.orc_encoding() == DIRECT_V2))) + return false; + // Decimal data + if (column.orc_kind() == TypeKind::DECIMAL && stream.kind == DATA) return false; + + // Everything else uses RLE + return true; + }(); + // non-RLE and RLE streams are separated in the buffer that stores encoded data + // The computed offsets do not take the streams of the other type into account + if (is_rle_data) { + strm_offsets[i] = rle_data_size; + rle_data_size += (stream.length + 7) & ~7; + } else { + strm_offsets[i] = non_rle_data_size; + non_rle_data_size += stream.length; + } + } + non_rle_data_size = (non_rle_data_size + 7) & ~7; + + return {std::move(strm_offsets), non_rle_data_size, rle_data_size}; +} + +namespace { +struct string_length_functor { + __device__ inline size_type operator()(int const i) const + { + // we translate from 0 -> num_chunks * 2 because each statistic has a min and max + // string and we need to calculate lengths for both. + if (i >= num_chunks * 2) return 0; + + // min strings are even values, max strings are odd values of i + auto const should_copy_min = i % 2 == 0; + // index of the chunk + auto const idx = i / 2; + auto& str_val = should_copy_min ? stripe_stat_chunks[idx].min_value.str_val + : stripe_stat_chunks[idx].max_value.str_val; + auto const str = stripe_stat_merge[idx].stats_dtype == dtype_string; + return str ? str_val.length : 0; + } + + int const num_chunks; + statistics_chunk const* stripe_stat_chunks; + statistics_merge_group const* stripe_stat_merge; +}; + +__global__ void copy_string_data(char* string_pool, + size_type* offsets, + statistics_chunk* chunks, + statistics_merge_group const* groups) +{ + auto const idx = blockIdx.x / 2; + if (groups[idx].stats_dtype == dtype_string) { + // min strings are even values, max strings are odd values of i + auto const should_copy_min = blockIdx.x % 2 == 0; + auto& str_val = should_copy_min ? chunks[idx].min_value.str_val : chunks[idx].max_value.str_val; + auto dst = &string_pool[offsets[blockIdx.x]]; + auto src = str_val.ptr; + + for (int i = threadIdx.x; i < str_val.length; i += blockDim.x) { + dst[i] = src[i]; + } + if (threadIdx.x == 0) { str_val.ptr = dst; } + } +} + +} // namespace + +void persisted_statistics::persist(int num_table_rows, + bool single_write_mode, + intermediate_statistics& intermediate_stats, + rmm::cuda_stream_view stream) +{ + if (not single_write_mode) { + // persist the strings in the chunks into a string pool and update pointers + auto const num_chunks = static_cast(intermediate_stats.stripe_stat_chunks.size()); + // min offset and max offset + 1 for total size + rmm::device_uvector offsets((num_chunks * 2) + 1, stream); + + auto iter = cudf::detail::make_counting_transform_iterator( + 0, + string_length_functor{num_chunks, + intermediate_stats.stripe_stat_chunks.data(), + intermediate_stats.stripe_stat_merge.device_ptr()}); + thrust::exclusive_scan(rmm::exec_policy(stream), iter, iter + offsets.size(), offsets.begin()); + + // pull size back to host + auto const total_string_pool_size = offsets.element(num_chunks * 2, stream); + if (total_string_pool_size > 0) { + rmm::device_uvector string_pool(total_string_pool_size, stream); + + // offsets describes where in the string pool each string goes. Going with the simple + // approach for now, but it is possible something fancier with breaking up each thread into + // copying x bytes instead of a single string is the better method since we are dealing in + // min/max strings they almost certainly will not be uniform length. + copy_string_data<<>>( + string_pool.data(), + offsets.data(), + intermediate_stats.stripe_stat_chunks.data(), + intermediate_stats.stripe_stat_merge.device_ptr()); + string_pools.emplace_back(std::move(string_pool)); + } + } + + stripe_stat_chunks.emplace_back(std::move(intermediate_stats.stripe_stat_chunks)); + stripe_stat_merge.emplace_back(std::move(intermediate_stats.stripe_stat_merge)); + stats_dtypes = std::move(intermediate_stats.stats_dtypes); + col_types = std::move(intermediate_stats.col_types); + num_rows = num_table_rows; +} + +namespace { /** * @brief Gathers stripe information. * @@ -418,7 +549,7 @@ void init_dictionaries(orc_table_view& orc_table, [&](auto& col_idx) { auto& str_column = orc_table.column(col_idx); return cudf::detail::make_zeroed_device_uvector_async( - str_column.size(), stream); + str_column.size(), stream, rmm::mr::get_current_device_resource()); }); // Create views of the temporary buffers in device memory @@ -428,7 +559,8 @@ void init_dictionaries(orc_table_view& orc_table, dict_indices.begin(), dict_indices.end(), std::back_inserter(dict_indices_views), [](auto& di) { return device_span{di}; }); - auto d_dict_indices_views = cudf::detail::make_device_uvector_async(dict_indices_views, stream); + auto d_dict_indices_views = cudf::detail::make_device_uvector_async( + dict_indices_views, stream, rmm::mr::get_current_device_resource()); gpu::InitDictionaryIndices(orc_table.d_columns, *dict, @@ -441,12 +573,26 @@ void init_dictionaries(orc_table_view& orc_table, dict->device_to_host(stream, true); } -void writer::impl::build_dictionaries(orc_table_view& orc_table, - host_span stripe_bounds, - hostdevice_2dvector const& dict, - host_span> dict_index, - host_span dictionary_enabled, - hostdevice_2dvector& stripe_dict) +/** + * @brief Builds up per-stripe dictionaries for string columns. + * + * @param orc_table Non-owning view of a cuDF table w/ ORC-related info + * @param stripe_bounds List of stripe boundaries + * @param dict List of dictionary chunks [rowgroup][column] + * @param dict_index List of dictionary indices + * @param dictionary_enabled Whether dictionary encoding is enabled for a given column + * @param stripe_dict List of stripe dictionaries + * @param enable_dictionary Whether dictionary is enabled + * @param stream CUDA stream used for device memory operations and kernel launches + */ +void build_dictionaries(orc_table_view& orc_table, + host_span stripe_bounds, + hostdevice_2dvector const& dict, + host_span> dict_index, + host_span dictionary_enabled, + hostdevice_2dvector& stripe_dict, + bool enable_dictionary, + rmm::cuda_stream_view stream) { const auto num_rowgroups = dict.size().first; @@ -470,7 +616,7 @@ void writer::impl::build_dictionaries(orc_table_view& orc_table, sd.leaf_column = dict[0][dict_idx].leaf_column; } - if (enable_dictionary_) { + if (enable_dictionary) { struct string_column_cost { size_t direct = 0; size_t dictionary = 0; @@ -554,9 +700,20 @@ auto comp_block_alignment(CompressionKind compression_kind) return 1u << nvcomp::compress_output_alignment_bits(to_nvcomp_compression_type(compression_kind)); } -orc_streams writer::impl::create_streams(host_span columns, - file_segmentation const& segmentation, - std::map const& decimal_column_sizes) +/** + * @brief Builds up per-column streams. + * + * @param[in,out] columns List of columns + * @param[in] segmentation stripe and rowgroup ranges + * @param[in] decimal_column_sizes Sizes of encoded decimal columns + * @return List of stream descriptors + */ +orc_streams create_streams(host_span columns, + file_segmentation const& segmentation, + std::map const& decimal_column_sizes, + bool enable_dictionary, + CompressionKind compression_kind, + bool single_write_mode) { // 'column 0' row index stream std::vector streams{{ROW_INDEX, 0}}; // TODO: Separate index and data streams? @@ -599,7 +756,7 @@ orc_streams writer::impl::create_streams(host_span columns, auto add_stream = [&](gpu::StreamIndexType index_type, StreamKind kind, TypeKind type_kind, size_t size) { - auto const max_alignment_padding = uncomp_block_alignment(compression_kind_) - 1; + auto const max_alignment_padding = uncomp_block_alignment(compression_kind) - 1; const auto base = column.index() * gpu::CI_NUM_STREAMS; ids[base + index_type] = streams.size(); streams.push_back(orc::Stream{ @@ -636,7 +793,7 @@ orc_streams writer::impl::create_streams(host_span columns, column.set_orc_encoding(DIRECT); break; case TypeKind::STRING: { - bool enable_dict = enable_dictionary_; + bool enable_dict = enable_dictionary; size_t dict_data_size = 0; size_t dict_strings = 0; size_t dict_lengths_div512 = 0; @@ -711,47 +868,6 @@ orc_streams writer::impl::create_streams(host_span columns, return {std::move(streams), std::move(ids), std::move(types)}; } -orc_streams::orc_stream_offsets orc_streams::compute_offsets( - host_span columns, size_t num_rowgroups) const -{ - std::vector strm_offsets(streams.size()); - size_t non_rle_data_size = 0; - size_t rle_data_size = 0; - for (size_t i = 0; i < streams.size(); ++i) { - const auto& stream = streams[i]; - - auto const is_rle_data = [&]() { - // First stream is an index stream, don't check types, etc. - if (!stream.column_index().has_value()) return true; - - auto const& column = columns[stream.column_index().value()]; - // Dictionary encoded string column - dictionary characters or - // directly encoded string - column characters - if (column.orc_kind() == TypeKind::STRING && - ((stream.kind == DICTIONARY_DATA && column.orc_encoding() == DICTIONARY_V2) || - (stream.kind == DATA && column.orc_encoding() == DIRECT_V2))) - return false; - // Decimal data - if (column.orc_kind() == TypeKind::DECIMAL && stream.kind == DATA) return false; - - // Everything else uses RLE - return true; - }(); - // non-RLE and RLE streams are separated in the buffer that stores encoded data - // The computed offsets do not take the streams of the other type into account - if (is_rle_data) { - strm_offsets[i] = rle_data_size; - rle_data_size += (stream.length + 7) & ~7; - } else { - strm_offsets[i] = non_rle_data_size; - non_rle_data_size += stream.length; - } - } - non_rle_data_size = (non_rle_data_size + 7) & ~7; - - return {std::move(strm_offsets), non_rle_data_size, rle_data_size}; -} - std::vector> calculate_aligned_rowgroup_bounds( orc_table_view const& orc_table, file_segmentation const& segmentation, @@ -772,7 +888,8 @@ std::vector> calculate_aligned_rowgroup_bounds( aligned_rgs.count() * sizeof(rowgroup_rows), cudaMemcpyDefault, stream.value())); - auto const d_stripes = cudf::detail::make_device_uvector_async(segmentation.stripes, stream); + auto const d_stripes = cudf::detail::make_device_uvector_async( + segmentation.stripes, stream, rmm::mr::get_current_device_resource()); // One thread per column, per stripe thrust::for_each_n( @@ -1091,11 +1208,23 @@ encoded_data encode_columns(orc_table_view const& orc_table, return {std::move(encoded_data), std::move(chunk_streams)}; } -std::vector writer::impl::gather_stripes( +/** + * @brief Returns stripe information after compacting columns' individual data + * chunks into contiguous data streams. + * + * @param[in] num_index_streams Total number of index streams + * @param[in] segmentation stripe and rowgroup ranges + * @param[in,out] enc_streams List of encoder chunk streams [column][rowgroup] + * @param[in,out] strm_desc List of stream descriptors [stripe][data_stream] + * @param[in] stream CUDA stream used for device memory operations and kernel launches + * @return The stripes' information + */ +std::vector gather_stripes( size_t num_index_streams, file_segmentation const& segmentation, hostdevice_2dvector* enc_streams, - hostdevice_2dvector* strm_desc) + hostdevice_2dvector* strm_desc, + rmm::cuda_stream_view stream) { if (segmentation.num_stripes() == 0) { return {}; } std::vector stripes(segmentation.num_stripes()); @@ -1163,16 +1292,25 @@ hostdevice_vector allocate_and_encode_blobs( return blobs; } -writer::impl::intermediate_statistics writer::impl::gather_statistic_blobs( - statistics_freq const stats_freq, - orc_table_view const& orc_table, - file_segmentation const& segmentation) +/** + * @brief Returns column statistics in an intermediate format. + * + * @param statistics_freq Frequency of statistics to be included in the output file + * @param orc_table Table information to be written + * @param segmentation stripe and rowgroup ranges + * @param stream CUDA stream used for device memory operations and kernel launches + * @return The statistic information + */ +intermediate_statistics gather_statistic_blobs(statistics_freq const stats_freq, + orc_table_view const& orc_table, + file_segmentation const& segmentation, + rmm::cuda_stream_view stream) { auto const num_rowgroup_blobs = segmentation.rowgroups.count(); auto const num_stripe_blobs = segmentation.num_stripes() * orc_table.num_columns(); auto const are_statistics_enabled = stats_freq != statistics_freq::STATISTICS_NONE; if (not are_statistics_enabled or num_rowgroup_blobs + num_stripe_blobs == 0) { - return writer::impl::intermediate_statistics{stream}; + return intermediate_statistics{stream}; } hostdevice_vector stat_desc(orc_table.num_columns(), stream); @@ -1290,8 +1428,17 @@ writer::impl::intermediate_statistics writer::impl::gather_statistic_blobs( std::move(col_types)}; } -writer::impl::encoded_footer_statistics writer::impl::finish_statistic_blobs( - int num_stripes, writer::impl::persisted_statistics& per_chunk_stats) +/** + * @brief Returns column statistics encoded in ORC protobuf format stored in the footer. + * + * @param num_stripes number of stripes in the data + * @param incoming_stats intermediate statistics returned from `gather_statistic_blobs` + * @param stream CUDA stream used for device memory operations and kernel launches + * @return The encoded statistic blobs + */ +encoded_footer_statistics finish_statistic_blobs(int num_stripes, + persisted_statistics& per_chunk_stats, + rmm::cuda_stream_view stream) { auto stripe_size_iter = thrust::make_transform_iterator(per_chunk_stats.stripe_stat_merge.begin(), [](auto const& i) { return i.size(); }); @@ -1381,16 +1528,36 @@ writer::impl::encoded_footer_statistics writer::impl::finish_statistic_blobs( return {std::move(stripe_blobs), std::move(file_blobs)}; } -void writer::impl::write_index_stream(int32_t stripe_id, - int32_t stream_id, - host_span columns, - file_segmentation const& segmentation, - host_2dspan enc_streams, - host_2dspan strm_desc, - host_span comp_res, - std::vector const& rg_stats, - StripeInformation* stripe, - orc_streams* streams) +/** + * @brief Writes the specified column's row index stream. + * + * @param[in] stripe_id Stripe's identifier + * @param[in] stream_id Stream identifier (column id + 1) + * @param[in] columns List of columns + * @param[in] segmentation stripe and rowgroup ranges + * @param[in] enc_streams List of encoder chunk streams [column][rowgroup] + * @param[in] strm_desc List of stream descriptors + * @param[in] comp_res Output status for compressed streams + * @param[in] rg_stats row group level statistics + * @param[in,out] stripe Stream's parent stripe + * @param[in,out] streams List of all streams + * @param[in] compression_kind The compression kind + * @param[in] compression_blocksize The block size used for compression + * @param[in] out_sink Sink for writing data + */ +void write_index_stream(int32_t stripe_id, + int32_t stream_id, + host_span columns, + file_segmentation const& segmentation, + host_2dspan enc_streams, + host_2dspan strm_desc, + host_span comp_res, + std::vector const& rg_stats, + StripeInformation* stripe, + orc_streams* streams, + CompressionKind compression_kind, + size_t compression_blocksize, + std::unique_ptr const& out_sink) { row_group_index_info present; row_group_index_info data; @@ -1402,7 +1569,7 @@ void writer::impl::write_index_stream(int32_t stripe_id, row_group_index_info record; if (stream.ids[type] > 0) { record.pos = 0; - if (compression_kind_ != NONE) { + if (compression_kind != NONE) { auto const& ss = strm_desc[stripe_id][stream.ids[type] - (columns.size() + 1)]; record.blk_pos = ss.first_block; record.comp_pos = 0; @@ -1417,10 +1584,10 @@ void writer::impl::write_index_stream(int32_t stripe_id, if (record.pos >= 0) { record.pos += stream.lengths[type]; while ((record.pos >= 0) && (record.blk_pos >= 0) && - (static_cast(record.pos) >= compression_blocksize_) && + (static_cast(record.pos) >= compression_blocksize) && (record.comp_pos + block_header_size + comp_res[record.blk_pos].bytes_written < static_cast(record.comp_size))) { - record.pos -= compression_blocksize_; + record.pos -= compression_blocksize; record.comp_pos += block_header_size + comp_res[record.blk_pos].bytes_written; record.blk_pos += 1; } @@ -1442,7 +1609,7 @@ void writer::impl::write_index_stream(int32_t stripe_id, } } - ProtobufWriter pbw((compression_kind_ != NONE) ? 3 : 0); + ProtobufWriter pbw((compression_kind != NONE) ? 3 : 0); // Add row index entries auto const& rowgroups_range = segmentation.stripes[stripe_id]; @@ -1467,22 +1634,39 @@ void writer::impl::write_index_stream(int32_t stripe_id, }); (*streams)[stream_id].length = pbw.size(); - if (compression_kind_ != NONE) { + if (compression_kind != NONE) { uint32_t uncomp_ix_len = (uint32_t)((*streams)[stream_id].length - 3) * 2 + 1; pbw.buffer()[0] = static_cast(uncomp_ix_len >> 0); pbw.buffer()[1] = static_cast(uncomp_ix_len >> 8); pbw.buffer()[2] = static_cast(uncomp_ix_len >> 16); } - out_sink_->host_write(pbw.data(), pbw.size()); + out_sink->host_write(pbw.data(), pbw.size()); stripe->indexLength += pbw.size(); } -std::future writer::impl::write_data_stream(gpu::StripeStream const& strm_desc, - gpu::encoder_chunk_streams const& enc_stream, - uint8_t const* compressed_data, - uint8_t* stream_out, - StripeInformation* stripe, - orc_streams* streams) +/** + * @brief Write the specified column's data streams + * + * @param[in] strm_desc Stream's descriptor + * @param[in] enc_stream Chunk's streams + * @param[in] compressed_data Compressed stream data + * @param[in,out] stream_out Temporary host output buffer + * @param[in,out] stripe Stream's parent stripe + * @param[in,out] streams List of all streams + * @param[in] compression_kind The compression kind + * @param[in] out_sink Sink for writing data + * @param[in] stream CUDA stream used for device memory operations and kernel launches + * @return An std::future that should be synchronized to ensure the writing is complete + */ +std::future write_data_stream(gpu::StripeStream const& strm_desc, + gpu::encoder_chunk_streams const& enc_stream, + uint8_t const* compressed_data, + uint8_t* stream_out, + StripeInformation* stripe, + orc_streams* streams, + CompressionKind compression_kind, + std::unique_ptr const& out_sink, + rmm::cuda_stream_view stream) { const auto length = strm_desc.stream_size; (*streams)[enc_stream.ids[strm_desc.stream_type]].length = length; @@ -1490,18 +1674,18 @@ std::future writer::impl::write_data_stream(gpu::StripeStream const& strm_ return std::async(std::launch::deferred, [] {}); } - const auto* stream_in = (compression_kind_ == NONE) ? enc_stream.data_ptrs[strm_desc.stream_type] - : (compressed_data + strm_desc.bfr_offset); + const auto* stream_in = (compression_kind == NONE) ? enc_stream.data_ptrs[strm_desc.stream_type] + : (compressed_data + strm_desc.bfr_offset); auto write_task = [&]() { - if (out_sink_->is_device_write_preferred(length)) { - return out_sink_->device_write_async(stream_in, length, stream); + if (out_sink->is_device_write_preferred(length)) { + return out_sink->device_write_async(stream_in, length, stream); } else { CUDF_CUDA_TRY( cudaMemcpyAsync(stream_out, stream_in, length, cudaMemcpyDefault, stream.value())); stream.synchronize(); - out_sink_->host_write(stream_out, length); + out_sink->host_write(stream_out, length); return std::async(std::launch::deferred, [] {}); } }(); @@ -1509,18 +1693,27 @@ std::future writer::impl::write_data_stream(gpu::StripeStream const& strm_ return write_task; } -void writer::impl::add_uncompressed_block_headers(std::vector& v) +/** + * @brief Insert 3-byte uncompressed block headers in a byte vector + * + * @param compression_kind The compression kind + * @param compression_blocksize The block size used for compression + * @param v The destitation byte vector to write, which must include initial 3-byte header + */ +void add_uncompressed_block_headers(CompressionKind compression_kind, + size_t compression_blocksize, + std::vector& v) { - if (compression_kind_ != NONE) { + if (compression_kind != NONE) { size_t uncomp_len = v.size() - 3, pos = 0, block_len; - while (uncomp_len > compression_blocksize_) { - block_len = compression_blocksize_ * 2 + 1; + while (uncomp_len > compression_blocksize) { + block_len = compression_blocksize * 2 + 1; v[pos + 0] = static_cast(block_len >> 0); v[pos + 1] = static_cast(block_len >> 8); v[pos + 2] = static_cast(block_len >> 16); - pos += 3 + compression_blocksize_; + pos += 3 + compression_blocksize; v.insert(v.begin() + pos, 3, 0); - uncomp_len -= compression_blocksize_; + uncomp_len -= compression_blocksize; } block_len = uncomp_len * 2 + 1; v[pos + 0] = static_cast(block_len >> 0); @@ -1529,58 +1722,6 @@ void writer::impl::add_uncompressed_block_headers(std::vector& v) } } -writer::impl::impl(std::unique_ptr sink, - orc_writer_options const& options, - SingleWriteMode mode, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) - : _mr(mr), - stream(stream), - max_stripe_size{options.get_stripe_size_bytes(), options.get_stripe_size_rows()}, - row_index_stride{options.get_row_index_stride()}, - compression_kind_(to_orc_compression(options.get_compression())), - compression_blocksize_(compression_block_size(compression_kind_)), - stats_freq_(options.get_statistics_freq()), - single_write_mode(mode == SingleWriteMode::YES), - kv_meta(options.get_key_value_metadata()), - out_sink_(std::move(sink)) -{ - if (options.get_metadata()) { - table_meta = std::make_unique(*options.get_metadata()); - } - init_state(); -} - -writer::impl::impl(std::unique_ptr sink, - chunked_orc_writer_options const& options, - SingleWriteMode mode, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) - : _mr(mr), - stream(stream), - max_stripe_size{options.get_stripe_size_bytes(), options.get_stripe_size_rows()}, - row_index_stride{options.get_row_index_stride()}, - compression_kind_(to_orc_compression(options.get_compression())), - compression_blocksize_(compression_block_size(compression_kind_)), - stats_freq_(options.get_statistics_freq()), - single_write_mode(mode == SingleWriteMode::YES), - kv_meta(options.get_key_value_metadata()), - out_sink_(std::move(sink)) -{ - if (options.get_metadata()) { - table_meta = std::make_unique(*options.get_metadata()); - } - init_state(); -} - -writer::impl::~impl() { close(); } - -void writer::impl::init_state() -{ - // Write file header - out_sink_->host_write(MAGIC, std::strlen(MAGIC)); -} - void pushdown_lists_null_mask(orc_column_view const& col, device_span d_columns, bitmask_type const* parent_pd_mask, @@ -1675,7 +1816,8 @@ pushdown_null_masks init_pushdown_null_masks(orc_table_view& orc_table, } // Attach null masks to device column views (async) - auto const d_mask_ptrs = cudf::detail::make_device_uvector_async(mask_ptrs, stream); + auto const d_mask_ptrs = cudf::detail::make_device_uvector_async( + mask_ptrs, stream, rmm::mr::get_current_device_resource()); thrust::for_each_n( rmm::exec_policy(stream), thrust::make_counting_iterator(0ul), @@ -1765,7 +1907,8 @@ orc_table_view make_orc_table_view(table_view const& table, orc_columns.cbegin(), orc_columns.cend(), std::back_inserter(type_kinds), [](auto& orc_column) { return orc_column.orc_kind(); }); - auto const d_type_kinds = cudf::detail::make_device_uvector_async(type_kinds, stream); + auto const d_type_kinds = cudf::detail::make_device_uvector_async( + type_kinds, stream, rmm::mr::get_current_device_resource()); rmm::device_uvector d_orc_columns(orc_columns.size(), stream); using stack_value_type = thrust::pair>; @@ -1815,7 +1958,8 @@ orc_table_view make_orc_table_view(table_view const& table, return {std::move(orc_columns), std::move(d_orc_columns), str_col_indexes, - cudf::detail::make_device_uvector_sync(str_col_indexes, stream)}; + cudf::detail::make_device_uvector_sync( + str_col_indexes, stream, rmm::mr::get_current_device_resource())}; } hostdevice_2dvector calculate_rowgroup_bounds(orc_table_view const& orc_table, @@ -1983,7 +2127,7 @@ string_dictionaries allocate_dictionaries(orc_table_view const& orc_table, std::back_inserter(data), [&](auto& idx) { return cudf::detail::make_zeroed_device_uvector_async( - orc_table.columns[idx].size(), stream); + orc_table.columns[idx].size(), stream, rmm::mr::get_current_device_resource()); }); std::vector> index; std::transform(orc_table.string_column_indices.begin(), @@ -1991,7 +2135,7 @@ string_dictionaries allocate_dictionaries(orc_table_view const& orc_table, std::back_inserter(index), [&](auto& idx) { return cudf::detail::make_zeroed_device_uvector_async( - orc_table.columns[idx].size(), stream); + orc_table.columns[idx].size(), stream, rmm::mr::get_current_device_resource()); }); stream.synchronize(); @@ -2006,53 +2150,13 @@ string_dictionaries allocate_dictionaries(orc_table_view const& orc_table, return {std::move(data), std::move(index), - cudf::detail::make_device_uvector_sync(data_ptrs, stream), - cudf::detail::make_device_uvector_sync(index_ptrs, stream), + cudf::detail::make_device_uvector_sync( + data_ptrs, stream, rmm::mr::get_current_device_resource()), + cudf::detail::make_device_uvector_sync( + index_ptrs, stream, rmm::mr::get_current_device_resource()), std::move(is_dict_enabled)}; } -struct string_length_functor { - __device__ inline size_type operator()(int const i) const - { - // we translate from 0 -> num_chunks * 2 because each statistic has a min and max - // string and we need to calculate lengths for both. - if (i >= num_chunks * 2) return 0; - - // min strings are even values, max strings are odd values of i - auto const should_copy_min = i % 2 == 0; - // index of the chunk - auto const idx = i / 2; - auto& str_val = should_copy_min ? stripe_stat_chunks[idx].min_value.str_val - : stripe_stat_chunks[idx].max_value.str_val; - auto const str = stripe_stat_merge[idx].stats_dtype == dtype_string; - return str ? str_val.length : 0; - } - - int const num_chunks; - statistics_chunk const* stripe_stat_chunks; - statistics_merge_group const* stripe_stat_merge; -}; - -__global__ void copy_string_data(char* string_pool, - size_type* offsets, - statistics_chunk* chunks, - statistics_merge_group const* groups) -{ - auto const idx = blockIdx.x / 2; - if (groups[idx].stats_dtype == dtype_string) { - // min strings are even values, max strings are odd values of i - auto const should_copy_min = blockIdx.x % 2 == 0; - auto& str_val = should_copy_min ? chunks[idx].min_value.str_val : chunks[idx].max_value.str_val; - auto dst = &string_pool[offsets[blockIdx.x]]; - auto src = str_val.ptr; - - for (int i = threadIdx.x; i < str_val.length; i += blockDim.x) { - dst[i] = src[i]; - } - if (threadIdx.x == 0) { str_val.ptr = dst; } - } -} - size_t max_compression_output_size(CompressionKind compression_kind, uint32_t compression_blocksize) { if (compression_kind == NONE) return 0; @@ -2061,60 +2165,14 @@ size_t max_compression_output_size(CompressionKind compression_kind, uint32_t co compression_blocksize); } -void writer::impl::persisted_statistics::persist(int num_table_rows, - bool single_write_mode, - intermediate_statistics& intermediate_stats, - rmm::cuda_stream_view stream) -{ - if (not single_write_mode) { - // persist the strings in the chunks into a string pool and update pointers - auto const num_chunks = static_cast(intermediate_stats.stripe_stat_chunks.size()); - // min offset and max offset + 1 for total size - rmm::device_uvector offsets((num_chunks * 2) + 1, stream); - - auto iter = cudf::detail::make_counting_transform_iterator( - 0, - string_length_functor{num_chunks, - intermediate_stats.stripe_stat_chunks.data(), - intermediate_stats.stripe_stat_merge.device_ptr()}); - thrust::exclusive_scan(rmm::exec_policy(stream), iter, iter + offsets.size(), offsets.begin()); - - // pull size back to host - auto const total_string_pool_size = offsets.element(num_chunks * 2, stream); - if (total_string_pool_size > 0) { - rmm::device_uvector string_pool(total_string_pool_size, stream); - - // offsets describes where in the string pool each string goes. Going with the simple - // approach for now, but it is possible something fancier with breaking up each thread into - // copying x bytes instead of a single string is the better method since we are dealing in - // min/max strings they almost certainly will not be uniform length. - copy_string_data<<>>( - string_pool.data(), - offsets.data(), - intermediate_stats.stripe_stat_chunks.data(), - intermediate_stats.stripe_stat_merge.device_ptr()); - string_pools.emplace_back(std::move(string_pool)); - } - } - - stripe_stat_chunks.emplace_back(std::move(intermediate_stats.stripe_stat_chunks)); - stripe_stat_merge.emplace_back(std::move(intermediate_stats.stripe_stat_merge)); - stats_dtypes = std::move(intermediate_stats.stats_dtypes); - col_types = std::move(intermediate_stats.col_types); - num_rows = num_table_rows; -} - -void writer::impl::write(table_view const& table) +std::unique_ptr make_table_meta(table_view const& input) { - CUDF_EXPECTS(not closed, "Data has already been flushed to out and closed"); - auto const num_rows = table.num_rows(); - - if (not table_meta) { table_meta = std::make_unique(table); } + auto table_meta = std::make_unique(input); // Fill unnamed columns' names in table_meta std::function add_default_name = [&](column_in_metadata& col_meta, std::string default_name) { - if (col_meta.get_name().empty()) col_meta.set_name(default_name); + if (col_meta.get_name().empty()) { col_meta.set_name(default_name); } for (size_type i = 0; i < col_meta.num_children(); ++i) { add_default_name(col_meta.child(i), std::to_string(i)); } @@ -2123,9 +2181,51 @@ void writer::impl::write(table_view const& table) add_default_name(table_meta->column_metadata[i], "_col" + std::to_string(i)); } - auto const d_table = table_device_view::create(table, stream); + return table_meta; +} + +/** + * @brief Perform the processing steps needed to convert the input table into the output ORC data + * for writing, such as compression and ORC encoding. + * + * @param input The input table + * @param table_meta The table metadata + * @param max_stripe_size Maximum size of stripes in the output file + * @param row_index_stride The row index stride + * @param enable_dictionary Whether dictionary is enabled + * @param compression_kind The compression kind + * @param compression_blocksize The block size used for compression + * @param stats_freq Column statistics granularity type for parquet/orc writers + * @param single_write_mode Flag to indicate if there is only a single table write + * @param out_sink Sink for writing data + * @param stream CUDA stream used for device memory operations and kernel launches + * @return A tuple of the intermediate results containing the processed data + */ +std::tuple, + hostdevice_2dvector, + encoded_data, + file_segmentation, + std::vector, + orc_table_view, + rmm::device_buffer, + intermediate_statistics, + pinned_buffer> +convert_table_to_orc_data(table_view const& input, + table_input_metadata const& table_meta, + stripe_size_limits max_stripe_size, + size_type row_index_stride, + bool enable_dictionary, + CompressionKind compression_kind, + size_t compression_blocksize, + statistics_freq stats_freq, + bool single_write_mode, + data_sink const& out_sink, + rmm::cuda_stream_view stream) +{ + auto const input_tview = table_device_view::create(input, stream); - auto orc_table = make_orc_table_view(table, *d_table, *table_meta, stream); + auto orc_table = make_orc_table_view(input, *input_tview, table_meta, stream); auto const pd_masks = init_pushdown_null_masks(orc_table, stream); @@ -2145,7 +2245,7 @@ void writer::impl::write(table_view const& table) } // Decide stripe boundaries based on rowgroups and dict chunks - auto const segmentation = + auto segmentation = calculate_segmentation(orc_table.columns, std::move(rowgroup_bounds), max_stripe_size); // Build stripe-level dictionaries @@ -2157,15 +2257,22 @@ void writer::impl::write(table_view const& table) dict, dictionaries.index, dictionaries.dictionary_enabled, - stripe_dict); + stripe_dict, + enable_dictionary, + stream); } auto dec_chunk_sizes = decimal_chunk_sizes(orc_table, segmentation, stream); - auto const uncompressed_block_align = uncomp_block_alignment(compression_kind_); - auto const compressed_block_align = comp_block_alignment(compression_kind_); - auto streams = - create_streams(orc_table.columns, segmentation, decimal_column_sizes(dec_chunk_sizes.rg_sizes)); + auto const uncompressed_block_align = uncomp_block_alignment(compression_kind); + auto const compressed_block_align = comp_block_alignment(compression_kind); + + auto streams = create_streams(orc_table.columns, + segmentation, + decimal_column_sizes(dec_chunk_sizes.rg_sizes), + enable_dictionary, + compression_kind, + single_write_mode); auto enc_data = encode_columns(orc_table, std::move(dictionaries), std::move(dec_chunk_sizes), @@ -2174,152 +2281,314 @@ void writer::impl::write(table_view const& table) uncompressed_block_align, stream); + auto const num_rows = input.num_rows(); + // Assemble individual disparate column chunks into contiguous data streams size_type const num_index_streams = (orc_table.num_columns() + 1); const auto num_data_streams = streams.size() - num_index_streams; hostdevice_2dvector strm_descs( segmentation.num_stripes(), num_data_streams, stream); - auto stripes = gather_stripes(num_index_streams, segmentation, &enc_data.streams, &strm_descs); - - if (num_rows > 0) { - // Allocate intermediate output stream buffer - size_t compressed_bfr_size = 0; - size_t num_compressed_blocks = 0; - - auto const max_compressed_block_size = - max_compression_output_size(compression_kind_, compression_blocksize_); - auto const padded_max_compressed_block_size = - util::round_up_unsafe(max_compressed_block_size, compressed_block_align); - auto const padded_block_header_size = - util::round_up_unsafe(block_header_size, compressed_block_align); - - auto stream_output = [&]() { - size_t max_stream_size = 0; - bool all_device_write = true; - - for (auto& ss : strm_descs.host_view().flat_view()) { - if (!out_sink_->is_device_write_preferred(ss.stream_size)) { all_device_write = false; } - size_t stream_size = ss.stream_size; - if (compression_kind_ != NONE) { - ss.first_block = num_compressed_blocks; - ss.bfr_offset = compressed_bfr_size; - - auto num_blocks = std::max( - (stream_size + compression_blocksize_ - 1) / compression_blocksize_, 1); - stream_size += num_blocks * block_header_size; - num_compressed_blocks += num_blocks; - compressed_bfr_size += - (padded_block_header_size + padded_max_compressed_block_size) * num_blocks; - } - max_stream_size = std::max(max_stream_size, stream_size); - } + auto stripes = + gather_stripes(num_index_streams, segmentation, &enc_data.streams, &strm_descs, stream); + + if (num_rows == 0) { + return {std::move(streams), + hostdevice_vector{}, // comp_results + std::move(strm_descs), + std::move(enc_data), + std::move(segmentation), + std::move(stripes), + std::move(orc_table), + rmm::device_buffer{}, // compressed_data + intermediate_statistics{stream}, + pinned_buffer{nullptr, cudaFreeHost}}; + } - if (all_device_write) { - return pinned_buffer{nullptr, cudaFreeHost}; - } else { - return pinned_buffer{[](size_t size) { - uint8_t* ptr = nullptr; - CUDF_CUDA_TRY(cudaMallocHost(&ptr, size)); - return ptr; - }(max_stream_size), - cudaFreeHost}; + // Allocate intermediate output stream buffer + size_t compressed_bfr_size = 0; + size_t num_compressed_blocks = 0; + + auto const max_compressed_block_size = + max_compression_output_size(compression_kind, compression_blocksize); + auto const padded_max_compressed_block_size = + util::round_up_unsafe(max_compressed_block_size, compressed_block_align); + auto const padded_block_header_size = + util::round_up_unsafe(block_header_size, compressed_block_align); + + auto stream_output = [&]() { + size_t max_stream_size = 0; + bool all_device_write = true; + + for (auto& ss : strm_descs.host_view().flat_view()) { + if (!out_sink.is_device_write_preferred(ss.stream_size)) { all_device_write = false; } + size_t stream_size = ss.stream_size; + if (compression_kind != NONE) { + ss.first_block = num_compressed_blocks; + ss.bfr_offset = compressed_bfr_size; + + auto num_blocks = + std::max((stream_size + compression_blocksize - 1) / compression_blocksize, 1); + stream_size += num_blocks * block_header_size; + num_compressed_blocks += num_blocks; + compressed_bfr_size += + (padded_block_header_size + padded_max_compressed_block_size) * num_blocks; } - }(); + max_stream_size = std::max(max_stream_size, stream_size); + } - // Compress the data streams - rmm::device_buffer compressed_data(compressed_bfr_size, stream); - hostdevice_vector comp_results(num_compressed_blocks, stream); - thrust::fill(rmm::exec_policy(stream), - comp_results.d_begin(), - comp_results.d_end(), - compression_result{0, compression_status::FAILURE}); - if (compression_kind_ != NONE) { - strm_descs.host_to_device(stream); - gpu::CompressOrcDataStreams(static_cast(compressed_data.data()), - num_compressed_blocks, - compression_kind_, - compression_blocksize_, - max_compressed_block_size, - compressed_block_align, - strm_descs, - enc_data.streams, - comp_results, - stream); - - // deallocate encoded data as it is not needed anymore - enc_data.data = rmm::device_uvector{0, stream}; - - strm_descs.device_to_host(stream); - comp_results.device_to_host(stream, true); + if (all_device_write) { + return pinned_buffer{nullptr, cudaFreeHost}; + } else { + return pinned_buffer{[](size_t size) { + uint8_t* ptr = nullptr; + CUDF_CUDA_TRY(cudaMallocHost(&ptr, size)); + return ptr; + }(max_stream_size), + cudaFreeHost}; } + }(); + + // Compress the data streams + rmm::device_buffer compressed_data(compressed_bfr_size, stream); + hostdevice_vector comp_results(num_compressed_blocks, stream); + thrust::fill(rmm::exec_policy(stream), + comp_results.d_begin(), + comp_results.d_end(), + compression_result{0, compression_status::FAILURE}); + if (compression_kind != NONE) { + strm_descs.host_to_device(stream); + gpu::CompressOrcDataStreams(static_cast(compressed_data.data()), + num_compressed_blocks, + compression_kind, + compression_blocksize, + max_compressed_block_size, + compressed_block_align, + strm_descs, + enc_data.streams, + comp_results, + stream); + + // deallocate encoded data as it is not needed anymore + enc_data.data = rmm::device_uvector{0, stream}; + + strm_descs.device_to_host(stream); + comp_results.device_to_host(stream, true); + } - auto intermediate_stats = gather_statistic_blobs(stats_freq_, orc_table, segmentation); + auto intermediate_stats = gather_statistic_blobs(stats_freq, orc_table, segmentation, stream); + + return {std::move(streams), + std::move(comp_results), + std::move(strm_descs), + std::move(enc_data), + std::move(segmentation), + std::move(stripes), + std::move(orc_table), + std::move(compressed_data), + std::move(intermediate_stats), + std::move(stream_output)}; +} - if (intermediate_stats.stripe_stat_chunks.size() > 0) { - persisted_stripe_statistics.persist( - orc_table.num_rows(), single_write_mode, intermediate_stats, stream); +} // namespace + +writer::impl::impl(std::unique_ptr sink, + orc_writer_options const& options, + SingleWriteMode mode, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + : _mr(mr), + stream(stream), + max_stripe_size{options.get_stripe_size_bytes(), options.get_stripe_size_rows()}, + row_index_stride{options.get_row_index_stride()}, + compression_kind_(to_orc_compression(options.get_compression())), + compression_blocksize_(compression_block_size(compression_kind_)), + stats_freq_(options.get_statistics_freq()), + single_write_mode(mode == SingleWriteMode::YES), + kv_meta(options.get_key_value_metadata()), + out_sink_(std::move(sink)) +{ + if (options.get_metadata()) { + table_meta = std::make_unique(*options.get_metadata()); + } + init_state(); +} + +writer::impl::impl(std::unique_ptr sink, + chunked_orc_writer_options const& options, + SingleWriteMode mode, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + : _mr(mr), + stream(stream), + max_stripe_size{options.get_stripe_size_bytes(), options.get_stripe_size_rows()}, + row_index_stride{options.get_row_index_stride()}, + compression_kind_(to_orc_compression(options.get_compression())), + compression_blocksize_(compression_block_size(compression_kind_)), + stats_freq_(options.get_statistics_freq()), + single_write_mode(mode == SingleWriteMode::YES), + kv_meta(options.get_key_value_metadata()), + out_sink_(std::move(sink)) +{ + if (options.get_metadata()) { + table_meta = std::make_unique(*options.get_metadata()); + } + init_state(); +} + +writer::impl::~impl() { close(); } + +void writer::impl::init_state() +{ + // Write file header + out_sink_->host_write(MAGIC, std::strlen(MAGIC)); +} + +void writer::impl::write(table_view const& input) +{ + CUDF_EXPECTS(not closed, "Data has already been flushed to out and closed"); + + if (not table_meta) { table_meta = make_table_meta(input); } + + // All kinds of memory allocation and data compressions/encoding are performed here. + // If any error occurs, such as out-of-memory exception, the internal state of the current writer + // is still intact. + // Note that `out_sink_` is intentionally passed by const reference to prevent accidentally + // writing anything to it. + auto [streams, + comp_results, + strm_descs, + enc_data, + segmentation, + stripes, + orc_table, + compressed_data, + intermediate_stats, + stream_output] = [&] { + try { + return convert_table_to_orc_data(input, + *table_meta, + max_stripe_size, + row_index_stride, + enable_dictionary_, + compression_kind_, + compression_blocksize_, + stats_freq_, + single_write_mode, + *out_sink_, + stream); + } catch (...) { // catch any exception type + CUDF_LOG_ERROR( + "ORC writer encountered exception during processing. " + "No data has been written to the sink."); + throw; // this throws the same exception } + }(); - // Write stripes - std::vector> write_tasks; - for (size_t stripe_id = 0; stripe_id < stripes.size(); ++stripe_id) { - auto& stripe = stripes[stripe_id]; - - stripe.offset = out_sink_->bytes_written(); - - // Column (skippable) index streams appear at the start of the stripe - for (size_type stream_id = 0; stream_id < num_index_streams; ++stream_id) { - write_index_stream(stripe_id, - stream_id, - orc_table.columns, - segmentation, - enc_data.streams, - strm_descs, - comp_results, - intermediate_stats.rowgroup_blobs, - &stripe, - &streams); - } + // Compression/encoding were all successful. Now write the intermediate results. + write_orc_data_to_sink(streams, + comp_results, + strm_descs, + enc_data, + segmentation, + stripes, + orc_table, + compressed_data, + intermediate_stats, + stream_output.get()); + + // Update data into the footer. This needs to be called even when num_rows==0. + add_table_to_footer_data(orc_table, stripes); +} - // Column data consisting one or more separate streams - for (auto const& strm_desc : strm_descs[stripe_id]) { - write_tasks.push_back(write_data_stream( - strm_desc, - enc_data.streams[strm_desc.column_id][segmentation.stripes[stripe_id].first], - static_cast(compressed_data.data()), - stream_output.get(), - &stripe, - &streams)); - } +void writer::impl::write_orc_data_to_sink(orc_streams& streams, + hostdevice_vector const& comp_results, + hostdevice_2dvector const& strm_descs, + encoded_data const& enc_data, + file_segmentation const& segmentation, + std::vector& stripes, + orc_table_view const& orc_table, + rmm::device_buffer const& compressed_data, + intermediate_statistics& intermediate_stats, + uint8_t* stream_output) +{ + if (orc_table.num_rows() == 0) { return; } - // Write stripefooter consisting of stream information - StripeFooter sf; - sf.streams = streams; - sf.columns.resize(orc_table.num_columns() + 1); - sf.columns[0].kind = DIRECT; - for (size_t i = 1; i < sf.columns.size(); ++i) { - sf.columns[i].kind = orc_table.column(i - 1).orc_encoding(); - sf.columns[i].dictionarySize = - (sf.columns[i].kind == DICTIONARY_V2) - ? orc_table.column(i - 1).host_stripe_dict(stripe_id)->num_strings - : 0; - if (orc_table.column(i - 1).orc_kind() == TIMESTAMP) { sf.writerTimezone = "UTC"; } - } - ProtobufWriter pbw((compression_kind_ != NONE) ? 3 : 0); - pbw.write(sf); - stripe.footerLength = pbw.size(); - if (compression_kind_ != NONE) { - uint32_t uncomp_sf_len = (stripe.footerLength - 3) * 2 + 1; - pbw.buffer()[0] = static_cast(uncomp_sf_len >> 0); - pbw.buffer()[1] = static_cast(uncomp_sf_len >> 8); - pbw.buffer()[2] = static_cast(uncomp_sf_len >> 16); - } - out_sink_->host_write(pbw.data(), pbw.size()); + if (intermediate_stats.stripe_stat_chunks.size() > 0) { + persisted_stripe_statistics.persist( + orc_table.num_rows(), single_write_mode, intermediate_stats, stream); + } + + // Write stripes + std::vector> write_tasks; + for (size_t stripe_id = 0; stripe_id < stripes.size(); ++stripe_id) { + auto& stripe = stripes[stripe_id]; + + stripe.offset = out_sink_->bytes_written(); + + // Column (skippable) index streams appear at the start of the stripe + size_type const num_index_streams = (orc_table.num_columns() + 1); + for (size_type stream_id = 0; stream_id < num_index_streams; ++stream_id) { + write_index_stream(stripe_id, + stream_id, + orc_table.columns, + segmentation, + enc_data.streams, + strm_descs, + comp_results, + intermediate_stats.rowgroup_blobs, + &stripe, + &streams, + compression_kind_, + compression_blocksize_, + out_sink_); } - for (auto const& task : write_tasks) { - task.wait(); + + // Column data consisting one or more separate streams + for (auto const& strm_desc : strm_descs[stripe_id]) { + write_tasks.push_back(write_data_stream( + strm_desc, + enc_data.streams[strm_desc.column_id][segmentation.stripes[stripe_id].first], + static_cast(compressed_data.data()), + stream_output, + &stripe, + &streams, + compression_kind_, + out_sink_, + stream)); } + + // Write stripefooter consisting of stream information + StripeFooter sf; + sf.streams = streams; + sf.columns.resize(orc_table.num_columns() + 1); + sf.columns[0].kind = DIRECT; + for (size_t i = 1; i < sf.columns.size(); ++i) { + sf.columns[i].kind = orc_table.column(i - 1).orc_encoding(); + sf.columns[i].dictionarySize = + (sf.columns[i].kind == DICTIONARY_V2) + ? orc_table.column(i - 1).host_stripe_dict(stripe_id)->num_strings + : 0; + if (orc_table.column(i - 1).orc_kind() == TIMESTAMP) { sf.writerTimezone = "UTC"; } + } + ProtobufWriter pbw((compression_kind_ != NONE) ? 3 : 0); + pbw.write(sf); + stripe.footerLength = pbw.size(); + if (compression_kind_ != NONE) { + uint32_t uncomp_sf_len = (stripe.footerLength - 3) * 2 + 1; + pbw.buffer()[0] = static_cast(uncomp_sf_len >> 0); + pbw.buffer()[1] = static_cast(uncomp_sf_len >> 8); + pbw.buffer()[2] = static_cast(uncomp_sf_len >> 16); + } + out_sink_->host_write(pbw.data(), pbw.size()); + } + for (auto const& task : write_tasks) { + task.wait(); } +} + +void writer::impl::add_table_to_footer_data(orc_table_view const& orc_table, + std::vector& stripes) +{ if (ff.headerLength == 0) { // First call ff.headerLength = std::strlen(MAGIC); @@ -2365,7 +2634,7 @@ void writer::impl::write(table_view const& table) ff.stripes.insert(ff.stripes.end(), std::make_move_iterator(stripes.begin()), std::make_move_iterator(stripes.end())); - ff.numberOfRows += num_rows; + ff.numberOfRows += orc_table.num_rows(); } void writer::impl::close() @@ -2374,7 +2643,8 @@ void writer::impl::close() closed = true; PostScript ps; - auto const statistics = finish_statistic_blobs(ff.stripes.size(), persisted_stripe_statistics); + auto const statistics = + finish_statistic_blobs(ff.stripes.size(), persisted_stripe_statistics, stream); // File-level statistics if (not statistics.file_level.empty()) { @@ -2418,7 +2688,7 @@ void writer::impl::close() if (md.stripeStats.size() != 0) { ProtobufWriter pbw((compression_kind_ != NONE) ? 3 : 0); pbw.write(md); - add_uncompressed_block_headers(pbw.buffer()); + add_uncompressed_block_headers(compression_kind_, compression_blocksize_, pbw.buffer()); ps.metadataLength = pbw.size(); out_sink_->host_write(pbw.data(), pbw.size()); } else { @@ -2426,7 +2696,7 @@ void writer::impl::close() } ProtobufWriter pbw((compression_kind_ != NONE) ? 3 : 0); pbw.write(ff); - add_uncompressed_block_headers(pbw.buffer()); + add_uncompressed_block_headers(compression_kind_, compression_blocksize_, pbw.buffer()); // Write postscript metadata ps.footerLength = pbw.size(); diff --git a/cpp/src/io/orc/writer_impl.hpp b/cpp/src/io/orc/writer_impl.hpp index 691fba6bac2..27d74e45b46 100644 --- a/cpp/src/io/orc/writer_impl.hpp +++ b/cpp/src/io/orc/writer_impl.hpp @@ -176,6 +176,72 @@ struct stripe_size_limits { size_type rows; }; +/** + * @brief Statistics data stored between calls to write for chunked writes + * + */ +struct intermediate_statistics { + explicit intermediate_statistics(rmm::cuda_stream_view stream) : stripe_stat_chunks(0, stream) {} + + intermediate_statistics(std::vector rb, + rmm::device_uvector sc, + hostdevice_vector smg, + std::vector sdt, + std::vector sct) + : rowgroup_blobs(std::move(rb)), + stripe_stat_chunks(std::move(sc)), + stripe_stat_merge(std::move(smg)), + stats_dtypes(std::move(sdt)), + col_types(std::move(sct)) + { + } + + // blobs for the rowgroups. Not persisted + std::vector rowgroup_blobs; + + rmm::device_uvector stripe_stat_chunks; + hostdevice_vector stripe_stat_merge; + std::vector stats_dtypes; + std::vector col_types; +}; + +/** + * @brief used for chunked writes to persist data between calls to write. + * + */ +struct persisted_statistics { + void clear() + { + stripe_stat_chunks.clear(); + stripe_stat_merge.clear(); + string_pools.clear(); + stats_dtypes.clear(); + col_types.clear(); + num_rows = 0; + } + + void persist(int num_table_rows, + bool single_write_mode, + intermediate_statistics& intermediate_stats, + rmm::cuda_stream_view stream); + + std::vector> stripe_stat_chunks; + std::vector> stripe_stat_merge; + std::vector> string_pools; + std::vector stats_dtypes; + std::vector col_types; + int num_rows = 0; +}; + +/** + * @brief Protobuf encoded statistics created at file close + * + */ +struct encoded_footer_statistics { + std::vector stripe_level; + std::vector file_level; +}; + /** * @brief Implementation for ORC writer */ @@ -227,7 +293,7 @@ class writer::impl { /** * @brief Writes a single subtable as part of a larger ORC file/table write. * - * @param[in] table The table information to be written + * @param table The table information to be written */ void write(table_view const& table); @@ -238,186 +304,41 @@ class writer::impl { private: /** - * @brief Builds up per-stripe dictionaries for string columns. - * - * @param orc_table Non-owning view of a cuDF table w/ ORC-related info - * @param stripe_bounds List of stripe boundaries - * @param dict List of dictionary chunks [rowgroup][column] - * @param dict_index List of dictionary indices - * @param dictionary_enabled Whether dictionary encoding is enabled for a given column - * @param stripe_dict List of stripe dictionaries - */ - void build_dictionaries(orc_table_view& orc_table, - host_span stripe_bounds, - hostdevice_2dvector const& dict, - host_span> dict_index, - host_span dictionary_enabled, - hostdevice_2dvector& stripe_dict); - - /** - * @brief Builds up per-column streams. - * - * @param[in,out] columns List of columns - * @param[in] segmentation stripe and rowgroup ranges - * @param[in] decimal_column_sizes Sizes of encoded decimal columns - * @return List of stream descriptors - */ - orc_streams create_streams(host_span columns, - file_segmentation const& segmentation, - std::map const& decimal_column_sizes); - - /** - * @brief Returns stripe information after compacting columns' individual data - * chunks into contiguous data streams. - * - * @param[in] num_index_streams Total number of index streams - * @param[in] segmentation stripe and rowgroup ranges - * @param[in,out] enc_streams List of encoder chunk streams [column][rowgroup] - * @param[in,out] strm_desc List of stream descriptors [stripe][data_stream] + * @brief Write the intermediate ORC data into the data sink. * - * @return The stripes' information - */ - std::vector gather_stripes( - size_t num_index_streams, - file_segmentation const& segmentation, - hostdevice_2dvector* enc_streams, - hostdevice_2dvector* strm_desc); - - /** - * @brief Statistics data stored between calls to write for chunked writes - * - */ - struct intermediate_statistics { - explicit intermediate_statistics(rmm::cuda_stream_view stream) - : stripe_stat_chunks(0, stream){}; - intermediate_statistics(std::vector rb, - rmm::device_uvector sc, - hostdevice_vector smg, - std::vector sdt, - std::vector sct) - : rowgroup_blobs(std::move(rb)), - stripe_stat_chunks(std::move(sc)), - stripe_stat_merge(std::move(smg)), - stats_dtypes(std::move(sdt)), - col_types(std::move(sct)){}; - - // blobs for the rowgroups. Not persisted - std::vector rowgroup_blobs; - - rmm::device_uvector stripe_stat_chunks; - hostdevice_vector stripe_stat_merge; - std::vector stats_dtypes; - std::vector col_types; - }; - - /** - * @brief used for chunked writes to persist data between calls to write. - * - */ - struct persisted_statistics { - void clear() - { - stripe_stat_chunks.clear(); - stripe_stat_merge.clear(); - string_pools.clear(); - stats_dtypes.clear(); - col_types.clear(); - num_rows = 0; - } - - void persist(int num_table_rows, - bool single_write_mode, - intermediate_statistics& intermediate_stats, - rmm::cuda_stream_view stream); - - std::vector> stripe_stat_chunks; - std::vector> stripe_stat_merge; - std::vector> string_pools; - std::vector stats_dtypes; - std::vector col_types; - int num_rows = 0; - }; - - /** - * @brief Protobuf encoded statistics created at file close - * - */ - struct encoded_footer_statistics { - std::vector stripe_level; - std::vector file_level; - }; - - /** - * @brief Returns column statistics in an intermediate format. - * - * @param statistics_freq Frequency of statistics to be included in the output file - * @param orc_table Table information to be written - * @param segmentation stripe and rowgroup ranges - * @return The statistic information - */ - intermediate_statistics gather_statistic_blobs(statistics_freq const statistics_freq, - orc_table_view const& orc_table, - file_segmentation const& segmentation); - - /** - * @brief Returns column statistics encoded in ORC protobuf format stored in the footer. - * - * @param num_stripes number of stripes in the data - * @param incoming_stats intermediate statistics returned from `gather_statistic_blobs` - * @return The encoded statistic blobs - */ - encoded_footer_statistics finish_statistic_blobs( - int num_stripes, writer::impl::persisted_statistics& incoming_stats); - - /** - * @brief Writes the specified column's row index stream. - * - * @param[in] stripe_id Stripe's identifier - * @param[in] stream_id Stream identifier (column id + 1) - * @param[in] columns List of columns - * @param[in] segmentation stripe and rowgroup ranges - * @param[in] enc_streams List of encoder chunk streams [column][rowgroup] - * @param[in] strm_desc List of stream descriptors - * @param[in] comp_out Output status for compressed streams - * @param[in] rg_stats row group level statistics - * @param[in,out] stripe Stream's parent stripe - * @param[in,out] streams List of all streams - */ - void write_index_stream(int32_t stripe_id, - int32_t stream_id, - host_span columns, - file_segmentation const& segmentation, - host_2dspan enc_streams, - host_2dspan strm_desc, - host_span comp_out, - std::vector const& rg_stats, - StripeInformation* stripe, - orc_streams* streams); - - /** - * @brief Write the specified column's data streams + * The intermediate data is generated from processing (compressing/encoding) an cuDF input table + * by `process_for_write` called in the `write()` function. * - * @param[in] strm_desc Stream's descriptor - * @param[in] enc_stream Chunk's streams - * @param[in] compressed_data Compressed stream data - * @param[in,out] stream_out Temporary host output buffer - * @param[in,out] stripe Stream's parent stripe - * @param[in,out] streams List of all streams - * @return An std::future that should be synchronized to ensure the writing is complete + * @param streams List of stream descriptors + * @param comp_results Status of data compression + * @param strm_descs List of stream descriptors + * @param enc_data ORC per-chunk streams of encoded data + * @param segmentation Description of how the ORC file is segmented into stripes and rowgroups + * @param stripes List of stripe description + * @param orc_table Non-owning view of a cuDF table that includes ORC-related information + * @param compressed_data Compressed stream data + * @param intermediate_stats Statistics data stored between calls to write + * @param stream_output Temporary host output buffer */ - std::future write_data_stream(gpu::StripeStream const& strm_desc, - gpu::encoder_chunk_streams const& enc_stream, - uint8_t const* compressed_data, - uint8_t* stream_out, - StripeInformation* stripe, - orc_streams* streams); + void write_orc_data_to_sink(orc_streams& streams, + hostdevice_vector const& comp_results, + hostdevice_2dvector const& strm_descs, + encoded_data const& enc_data, + file_segmentation const& segmentation, + std::vector& stripes, + orc_table_view const& orc_table, + rmm::device_buffer const& compressed_data, + intermediate_statistics& intermediate_stats, + uint8_t* stream_output); /** - * @brief Insert 3-byte uncompressed block headers in a byte vector + * @brief Add the processed table data into the internal file footer. * - * @param byte_vector Raw data (must include initial 3-byte header) + * @param orc_table Non-owning view of a cuDF table that includes ORC-related information + * @param stripes List of stripe description */ - void add_uncompressed_block_headers(std::vector& byte_vector); + void add_table_to_footer_data(orc_table_view const& orc_table, + std::vector& stripes); private: rmm::mr::device_memory_resource* _mr = nullptr; diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu index 6b5d4ba3640..e5c2b7aa842 100644 --- a/cpp/src/io/parquet/reader_impl_preprocess.cu +++ b/cpp/src/io/parquet/reader_impl_preprocess.cu @@ -468,10 +468,12 @@ void decode_page_headers(hostdevice_vector& chunks, host_span const> comp_in_view{comp_in.data() + start_pos, codec.num_pages}; - auto const d_comp_in = cudf::detail::make_device_uvector_async(comp_in_view, stream); + auto const d_comp_in = cudf::detail::make_device_uvector_async( + comp_in_view, stream, rmm::mr::get_current_device_resource()); host_span const> comp_out_view(comp_out.data() + start_pos, codec.num_pages); - auto const d_comp_out = cudf::detail::make_device_uvector_async(comp_out_view, stream); + auto const d_comp_out = cudf::detail::make_device_uvector_async( + comp_out_view, stream, rmm::mr::get_current_device_resource()); device_span d_comp_res_view(comp_res.data() + start_pos, codec.num_pages); switch (codec.compression_type) { @@ -523,8 +525,10 @@ void decode_page_headers(hostdevice_vector& chunks, // now copy the uncompressed V2 def and rep level data if (not copy_in.empty()) { - auto const d_copy_in = cudf::detail::make_device_uvector_async(copy_in, stream); - auto const d_copy_out = cudf::detail::make_device_uvector_async(copy_out, stream); + auto const d_copy_in = cudf::detail::make_device_uvector_async( + copy_in, stream, rmm::mr::get_current_device_resource()); + auto const d_copy_out = cudf::detail::make_device_uvector_async( + copy_out, stream, rmm::mr::get_current_device_resource()); gpu_copy_uncompressed_blocks(d_copy_in, d_copy_out, stream); stream.synchronize(); @@ -1489,8 +1493,8 @@ void reader::impl::preprocess_pages(size_t skip_rows, // Build index for string dictionaries since they can't be indexed // directly due to variable-sized elements _chunk_itm_data.str_dict_index = - cudf::detail::make_zeroed_device_uvector_async(total_str_dict_indexes, - _stream); + cudf::detail::make_zeroed_device_uvector_async( + total_str_dict_indexes, _stream, rmm::mr::get_current_device_resource()); // Update chunks with pointers to string dict indices for (size_t c = 0, page_count = 0, str_ofs = 0; c < chunks.size(); c++) { diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 5f407b5e774..e6e14908f36 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -853,7 +853,8 @@ parquet_column_view::parquet_column_view(schema_tree_node const& schema_node, _nullability = std::vector(r_nullability.crbegin(), r_nullability.crend()); // TODO(cp): Explore doing this for all columns in a single go outside this ctor. Maybe using // hostdevice_vector. Currently this involves a cudaMemcpyAsync for each column. - _d_nullability = cudf::detail::make_device_uvector_async(_nullability, stream); + _d_nullability = cudf::detail::make_device_uvector_async( + _nullability, stream, rmm::mr::get_current_device_resource()); _is_list = (_max_rep_level > 0); @@ -928,7 +929,8 @@ void writer::impl::init_row_group_fragments( device_span part_frag_offset, uint32_t fragment_size) { - auto d_partitions = cudf::detail::make_device_uvector_async(partitions, stream); + auto d_partitions = cudf::detail::make_device_uvector_async( + partitions, stream, rmm::mr::get_current_device_resource()); gpu::InitRowGroupFragments(frag, col_desc, d_partitions, part_frag_offset, fragment_size, stream); frag.device_to_host(stream, true); } @@ -936,7 +938,8 @@ void writer::impl::init_row_group_fragments( void writer::impl::calculate_page_fragments(device_span frag, host_span frag_sizes) { - auto d_frag_sz = cudf::detail::make_device_uvector_async(frag_sizes, stream); + auto d_frag_sz = cudf::detail::make_device_uvector_async( + frag_sizes, stream, rmm::mr::get_current_device_resource()); gpu::CalculatePageFragments(frag, d_frag_sz, stream); } @@ -1507,7 +1510,8 @@ void writer::impl::write(table_view const& table, std::vector co num_frag_in_part.begin(), num_frag_in_part.end(), std::back_inserter(part_frag_offset), 0); part_frag_offset.push_back(part_frag_offset.back() + num_frag_in_part.back()); - auto d_part_frag_offset = cudf::detail::make_device_uvector_async(part_frag_offset, stream); + auto d_part_frag_offset = cudf::detail::make_device_uvector_async( + part_frag_offset, stream, rmm::mr::get_current_device_resource()); cudf::detail::hostdevice_2dvector row_group_fragments( num_columns, num_fragments, stream); diff --git a/cpp/src/io/text/multibyte_split.cu b/cpp/src/io/text/multibyte_split.cu index a0ba3e3ee35..afa260e215a 100644 --- a/cpp/src/io/text/multibyte_split.cu +++ b/cpp/src/io/text/multibyte_split.cu @@ -379,9 +379,11 @@ std::unique_ptr multibyte_split(cudf::io::text::data_chunk_source // must be at least 32 when using warp-reduce on partials // must be at least 1 more than max possible concurrent tiles // best when at least 32 more than max possible concurrent tiles, due to rolling `invalid`s - auto num_tile_states = std::max(32, TILES_PER_CHUNK * concurrency + 32); - auto tile_multistates = scan_tile_state(num_tile_states, stream); - auto tile_offsets = scan_tile_state(num_tile_states, stream); + auto num_tile_states = std::max(32, TILES_PER_CHUNK * concurrency + 32); + auto tile_multistates = + scan_tile_state(num_tile_states, stream, rmm::mr::get_current_device_resource()); + auto tile_offsets = + scan_tile_state(num_tile_states, stream, rmm::mr::get_current_device_resource()); multibyte_split_init_kernel<< #include +#include + namespace cudf { namespace io { namespace detail { @@ -43,7 +45,8 @@ void column_buffer::create(size_type _size, // make_zeroed_device_uvector_async here and instead let it use the // default rmm memory resource. _strings = std::make_unique>( - cudf::detail::make_zeroed_device_uvector_async(size, stream)); + cudf::detail::make_zeroed_device_uvector_async( + size, stream, rmm::mr::get_current_device_resource())); break; // list columns store a buffer of int32's as offsets to represent diff --git a/cpp/src/io/utilities/parsing_utils.cu b/cpp/src/io/utilities/parsing_utils.cu index a03789464cc..5c5cbd1c01d 100644 --- a/cpp/src/io/utilities/parsing_utils.cu +++ b/cpp/src/io/utilities/parsing_utils.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -126,7 +126,8 @@ cudf::size_type find_all_from_set(device_span data, cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size, count_and_set_positions)); const int grid_size = divCeil(data.size(), (size_t)block_size); - auto d_count = cudf::detail::make_zeroed_device_uvector_async(1, stream); + auto d_count = cudf::detail::make_zeroed_device_uvector_async( + 1, stream, rmm::mr::get_current_device_resource()); for (char key : keys) { count_and_set_positions<<>>( data.data(), data.size(), result_offset, key, d_count.data(), positions); @@ -143,7 +144,8 @@ cudf::size_type find_all_from_set(host_span data, rmm::cuda_stream_view stream) { rmm::device_buffer d_chunk(std::min(max_chunk_bytes, data.size()), stream); - auto d_count = cudf::detail::make_zeroed_device_uvector_async(1, stream); + auto d_count = cudf::detail::make_zeroed_device_uvector_async( + 1, stream, rmm::mr::get_current_device_resource()); int block_size = 0; // suggested thread count to use int min_grid_size = 0; // minimum block count required diff --git a/cpp/src/io/utilities/trie.cu b/cpp/src/io/utilities/trie.cu index bf03d6a6a89..e2ace7258f7 100644 --- a/cpp/src/io/utilities/trie.cu +++ b/cpp/src/io/utilities/trie.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -101,7 +101,8 @@ rmm::device_uvector create_serialized_trie(const std::vector> left_semi_anti_join( // Previously, the gather map was generated directly without this array but by calling to // `map.contains` inside the `thrust::copy_if` kernel. However, that led to increasing register // usage and reducing performance, as reported here: https://github.com/rapidsai/cudf/pull/10511. - auto const flagged = - cudf::detail::contains(right_keys, left_keys, compare_nulls, nan_equality::ALL_EQUAL, stream); + auto const flagged = cudf::detail::contains(right_keys, + left_keys, + compare_nulls, + nan_equality::ALL_EQUAL, + stream, + rmm::mr::get_current_device_resource()); auto const left_num_rows = left_keys.num_rows(); auto gather_map = diff --git a/cpp/src/lists/combine/concatenate_rows.cu b/cpp/src/lists/combine/concatenate_rows.cu index 8b006548391..b890a0c82a2 100644 --- a/cpp/src/lists/combine/concatenate_rows.cu +++ b/cpp/src/lists/combine/concatenate_rows.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -246,7 +246,8 @@ std::unique_ptr concatenate_rows(table_view const& input, auto const row_index = i % num_rows; return row_null_counts[row_index] != num_columns; }, - stream); + stream, + rmm::mr::get_current_device_resource()); } // NULLIFY_OUTPUT_ROW. Output row is nullfied if any input row is null return cudf::detail::valid_if( @@ -257,7 +258,8 @@ std::unique_ptr concatenate_rows(table_view const& input, auto const row_index = i % num_rows; return row_null_counts[row_index] == 0; }, - stream); + stream, + rmm::mr::get_current_device_resource()); }(); concat->set_null_mask(std::move(null_mask), null_count); } diff --git a/cpp/src/lists/dremel.cu b/cpp/src/lists/dremel.cu index c96a21df905..5136cc8cd37 100644 --- a/cpp/src/lists/dremel.cu +++ b/cpp/src/lists/dremel.cu @@ -266,7 +266,8 @@ dremel_data get_encoding(column_view h_col, max_vals_size += column_ends[l] - column_offsets[l]; } - auto d_nullability = cudf::detail::make_device_uvector_async(nullability, stream); + auto d_nullability = cudf::detail::make_device_uvector_async( + nullability, stream, rmm::mr::get_current_device_resource()); rmm::device_uvector rep_level(max_vals_size, stream); rmm::device_uvector def_level(max_vals_size, stream); diff --git a/cpp/src/lists/extract.cu b/cpp/src/lists/extract.cu index bff63871e29..5d4a20d1cb8 100644 --- a/cpp/src/lists/extract.cu +++ b/cpp/src/lists/extract.cu @@ -101,8 +101,10 @@ std::unique_ptr make_index_child(size_type index, */ std::unique_ptr make_index_offsets(size_type num_lists, rmm::cuda_stream_view stream) { - return cudf::detail::sequence( - num_lists + 1, cudf::scalar_type_t(0, true, stream), stream); + return cudf::detail::sequence(num_lists + 1, + cudf::scalar_type_t(0, true, stream), + stream, + rmm::mr::get_current_device_resource()); } } // namespace diff --git a/cpp/src/lists/set_operations.cu b/cpp/src/lists/set_operations.cu index 8df99153d74..c05ef2fd644 100644 --- a/cpp/src/lists/set_operations.cu +++ b/cpp/src/lists/set_operations.cu @@ -83,8 +83,8 @@ std::unique_ptr have_overlap(lists_column_view const& lhs, auto const rhs_table = table_view{{rhs_labels->view(), rhs_child}}; // Check existence for each row of the rhs_table in lhs_table. - auto const contained = - cudf::detail::contains(lhs_table, rhs_table, nulls_equal, nans_equal, stream); + auto const contained = cudf::detail::contains( + lhs_table, rhs_table, nulls_equal, nans_equal, stream, rmm::mr::get_current_device_resource()); auto const num_rows = lhs.size(); @@ -151,8 +151,8 @@ std::unique_ptr intersect_distinct(lists_column_view const& lhs, auto const lhs_table = table_view{{lhs_labels->view(), lhs_child}}; auto const rhs_table = table_view{{rhs_labels->view(), rhs_child}}; - auto const contained = - cudf::detail::contains(lhs_table, rhs_table, nulls_equal, nans_equal, stream); + auto const contained = cudf::detail::contains( + lhs_table, rhs_table, nulls_equal, nans_equal, stream, rmm::mr::get_current_device_resource()); auto const intersect_table = cudf::detail::copy_if( rhs_table, @@ -195,8 +195,11 @@ std::unique_ptr union_distinct(lists_column_view const& lhs, // Algorithm: `return distinct(concatenate_rows(lhs, rhs))`. - auto const union_col = lists::detail::concatenate_rows( - table_view{{lhs.parent(), rhs.parent()}}, concatenate_null_policy::NULLIFY_OUTPUT_ROW, stream); + auto const union_col = + lists::detail::concatenate_rows(table_view{{lhs.parent(), rhs.parent()}}, + concatenate_null_policy::NULLIFY_OUTPUT_ROW, + stream, + rmm::mr::get_current_device_resource()); return cudf::lists::detail::distinct( lists_column_view{union_col->view()}, nulls_equal, nans_equal, stream, mr); @@ -228,8 +231,8 @@ std::unique_ptr difference_distinct(lists_column_view const& lhs, auto const lhs_table = table_view{{lhs_labels->view(), lhs_child}}; auto const rhs_table = table_view{{rhs_labels->view(), rhs_child}}; - auto const contained = - cudf::detail::contains(rhs_table, lhs_table, nulls_equal, nans_equal, stream); + auto const contained = cudf::detail::contains( + rhs_table, lhs_table, nulls_equal, nans_equal, stream, rmm::mr::get_current_device_resource()); auto const difference_table = cudf::detail::copy_if( lhs_table, diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu index ec0cc5af44d..83ee6793efb 100644 --- a/cpp/src/merge/merge.cu +++ b/cpp/src/merge/merge.cu @@ -187,10 +187,12 @@ index_vector generate_merged_indices(table_view const& left_table, auto lhs_device_view = table_device_view::create(left_table, stream); auto rhs_device_view = table_device_view::create(right_table, stream); - auto d_column_order = cudf::detail::make_device_uvector_async(column_order, stream); + auto d_column_order = cudf::detail::make_device_uvector_async( + column_order, stream, rmm::mr::get_current_device_resource()); if (nullable) { - auto d_null_precedence = cudf::detail::make_device_uvector_async(null_precedence, stream); + auto d_null_precedence = cudf::detail::make_device_uvector_async( + null_precedence, stream, rmm::mr::get_current_device_resource()); auto ineq_op = detail::row_lexicographic_tagged_comparator( *lhs_device_view, *rhs_device_view, d_column_order.data(), d_null_precedence.data()); diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu index 54dffc85aca..13f46195392 100644 --- a/cpp/src/partitioning/partitioning.cu +++ b/cpp/src/partitioning/partitioning.cu @@ -493,11 +493,11 @@ std::pair, std::vector> hash_partition_table( rmm::device_uvector(grid_size * num_partitions, stream); // Holds the total number of rows in each partition - auto global_partition_sizes = - cudf::detail::make_zeroed_device_uvector_async(num_partitions, stream); + auto global_partition_sizes = cudf::detail::make_zeroed_device_uvector_async( + num_partitions, stream, rmm::mr::get_current_device_resource()); - auto row_partition_offset = - cudf::detail::make_zeroed_device_uvector_async(num_rows, stream); + auto row_partition_offset = cudf::detail::make_zeroed_device_uvector_async( + num_rows, stream, rmm::mr::get_current_device_resource()); auto const row_hasher = experimental::row::hash::row_hasher(table_to_hash, stream); auto const hasher = diff --git a/cpp/src/quantiles/quantile.cu b/cpp/src/quantiles/quantile.cu index 785aa839956..4a9c2e3a902 100644 --- a/cpp/src/quantiles/quantile.cu +++ b/cpp/src/quantiles/quantile.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -85,7 +85,8 @@ struct quantile_functor { auto d_input = column_device_view::create(input, stream); auto d_output = mutable_column_device_view::create(output->mutable_view(), stream); - auto q_device = cudf::detail::make_device_uvector_sync(q, stream); + auto q_device = + cudf::detail::make_device_uvector_sync(q, stream, rmm::mr::get_current_device_resource()); if (!cudf::is_dictionary(input.type())) { auto sorted_data = diff --git a/cpp/src/quantiles/quantiles.cu b/cpp/src/quantiles/quantiles.cu index e71508bab09..c6760e77403 100644 --- a/cpp/src/quantiles/quantiles.cu +++ b/cpp/src/quantiles/quantiles.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -49,7 +49,8 @@ std::unique_ptr
quantiles(table_view const& input, return detail::select_quantile(selector, size, q, interp); }; - auto const q_device = cudf::detail::make_device_uvector_async(q, stream); + auto const q_device = + cudf::detail::make_device_uvector_async(q, stream, rmm::mr::get_current_device_resource()); auto quantile_idx_iter = thrust::make_transform_iterator(q_device.begin(), quantile_idx_lookup); diff --git a/cpp/src/reductions/struct_minmax_util.cuh b/cpp/src/reductions/struct_minmax_util.cuh index 796d10a3477..b2106066ff2 100644 --- a/cpp/src/reductions/struct_minmax_util.cuh +++ b/cpp/src/reductions/struct_minmax_util.cuh @@ -118,7 +118,8 @@ class comparison_binop_generator { // level structs column (which is stored at the first position in the null_orders array) to // achieve this purpose. if (input.has_nulls()) { null_orders.front() = cudf::null_order::AFTER; } - null_orders_dvec = cudf::detail::make_device_uvector_async(null_orders, stream); + null_orders_dvec = cudf::detail::make_device_uvector_async( + null_orders, stream, rmm::mr::get_current_device_resource()); } // else: Don't need to generate nulls order to copy to device memory if we have all null orders // are BEFORE (that happens when we have is_min_op == false). diff --git a/cpp/src/rolling/grouped_rolling.cu b/cpp/src/rolling/grouped_rolling.cu index 2b4b6373c35..b208e7cd980 100644 --- a/cpp/src/rolling/grouped_rolling.cu +++ b/cpp/src/rolling/grouped_rolling.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -467,8 +467,10 @@ get_null_bounds_for_orderby_column(column_view const& orderby_column, cudf::device_span(group_offsets.data(), num_groups); // When there are no nulls, just copy the input group offsets to the output. - return std::make_tuple(cudf::detail::make_device_uvector_async(group_offsets_span, stream), - cudf::detail::make_device_uvector_async(group_offsets_span, stream)); + return std::make_tuple(cudf::detail::make_device_uvector_async( + group_offsets_span, stream, rmm::mr::get_current_device_resource()), + cudf::detail::make_device_uvector_async( + group_offsets_span, stream, rmm::mr::get_current_device_resource())); } } diff --git a/cpp/src/strings/convert/convert_datetime.cu b/cpp/src/strings/convert/convert_datetime.cu index 177fcab03f9..8d273eff4bb 100644 --- a/cpp/src/strings/convert/convert_datetime.cu +++ b/cpp/src/strings/convert/convert_datetime.cu @@ -160,7 +160,8 @@ struct format_compiler { } // copy format_items to device memory - d_items = cudf::detail::make_device_uvector_async(items, stream); + d_items = cudf::detail::make_device_uvector_async( + items, stream, rmm::mr::get_current_device_resource()); } device_span format_items() { return device_span(d_items); } diff --git a/cpp/src/strings/copying/concatenate.cu b/cpp/src/strings/copying/concatenate.cu index e3ee59c631f..92b71d128e1 100644 --- a/cpp/src/strings/copying/concatenate.cu +++ b/cpp/src/strings/copying/concatenate.cu @@ -85,7 +85,8 @@ auto create_strings_device_views(host_span views, rmm::cuda_s return static_cast(col.size()); }); thrust::inclusive_scan(thrust::host, offset_it, input_offsets.end(), offset_it); - auto d_input_offsets = cudf::detail::make_device_uvector_async(input_offsets, stream); + auto d_input_offsets = cudf::detail::make_device_uvector_async( + input_offsets, stream, rmm::mr::get_current_device_resource()); auto const output_size = input_offsets.back(); // Compute the partition offsets and size of chars column diff --git a/cpp/src/strings/filter_chars.cu b/cpp/src/strings/filter_chars.cu index 8a6a4d44b1e..3e38b5fa775 100644 --- a/cpp/src/strings/filter_chars.cu +++ b/cpp/src/strings/filter_chars.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -132,7 +132,8 @@ std::unique_ptr filter_characters( characters_to_filter.begin(), characters_to_filter.end(), htable.begin(), [](auto entry) { return char_range{entry.first, entry.second}; }); - rmm::device_uvector table = cudf::detail::make_device_uvector_async(htable, stream); + rmm::device_uvector table = + cudf::detail::make_device_uvector_async(htable, stream, rmm::mr::get_current_device_resource()); auto d_strings = column_device_view::create(strings.parent(), stream); diff --git a/cpp/src/strings/json/json_path.cu b/cpp/src/strings/json/json_path.cu index c6ea47ec0f3..128d450cbe8 100644 --- a/cpp/src/strings/json/json_path.cu +++ b/cpp/src/strings/json/json_path.cu @@ -673,11 +673,10 @@ std::pair>, int> build_comma } while (op.type != path_operator_type::END); auto const is_empty = h_operators.size() == 1 && h_operators[0].type == path_operator_type::END; - return is_empty - ? std::pair(thrust::nullopt, 0) - : std::pair( - thrust::make_optional(cudf::detail::make_device_uvector_sync(h_operators, stream)), - max_stack_depth); + return is_empty ? std::pair(thrust::nullopt, 0) + : std::pair(thrust::make_optional(cudf::detail::make_device_uvector_sync( + h_operators, stream, rmm::mr::get_current_device_resource())), + max_stack_depth); } #define PARSE_TRY(_x) \ diff --git a/cpp/src/strings/replace/backref_re.cu b/cpp/src/strings/replace/backref_re.cu index 383337c9088..d25af8c8931 100644 --- a/cpp/src/strings/replace/backref_re.cu +++ b/cpp/src/strings/replace/backref_re.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -118,9 +118,9 @@ std::unique_ptr replace_with_backrefs(strings_column_view const& input, // parse the repl string for back-ref indicators auto group_count = std::min(99, d_prog->group_counts()); // group count should NOT exceed 99 - auto const parse_result = parse_backrefs(replacement, group_count); - rmm::device_uvector backrefs = - cudf::detail::make_device_uvector_async(parse_result.second, stream); + auto const parse_result = parse_backrefs(replacement, group_count); + rmm::device_uvector backrefs = cudf::detail::make_device_uvector_async( + parse_result.second, stream, rmm::mr::get_current_device_resource()); string_scalar repl_scalar(parse_result.first, true, stream); string_view const d_repl_template = repl_scalar.value(); diff --git a/cpp/src/strings/replace/multi_re.cu b/cpp/src/strings/replace/multi_re.cu index f3bc7fc82ec..50b2dc27671 100644 --- a/cpp/src/strings/replace/multi_re.cu +++ b/cpp/src/strings/replace/multi_re.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -169,7 +169,8 @@ std::unique_ptr replace_re(strings_column_view const& input, prog->set_working_memory(d_buffer, size); return *prog; }); - auto d_progs = cudf::detail::make_device_uvector_async(progs, stream); + auto d_progs = + cudf::detail::make_device_uvector_async(progs, stream, rmm::mr::get_current_device_resource()); auto const d_strings = column_device_view::create(input.parent(), stream); auto const d_repls = column_device_view::create(replacements.parent(), stream); diff --git a/cpp/src/strings/translate.cu b/cpp/src/strings/translate.cu index 7f134059ded..e7b637c52f3 100644 --- a/cpp/src/strings/translate.cu +++ b/cpp/src/strings/translate.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -106,7 +106,7 @@ std::unique_ptr translate(strings_column_view const& strings, }); // copy translate table to device memory rmm::device_uvector table = - cudf::detail::make_device_uvector_async(htable, stream); + cudf::detail::make_device_uvector_async(htable, stream, rmm::mr::get_current_device_resource()); auto d_strings = column_device_view::create(strings.parent(), stream); diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 8a63a6f6411..0c6747f2d12 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -268,7 +268,8 @@ auto list_lex_preprocess(table_view table, rmm::cuda_stream_view stream) dremel_device_views.push_back(dremel_data.back()); } } - auto d_dremel_device_views = detail::make_device_uvector_sync(dremel_device_views, stream); + auto d_dremel_device_views = detail::make_device_uvector_sync( + dremel_device_views, stream, rmm::mr::get_current_device_resource()); return std::make_tuple(std::move(dremel_data), std::move(d_dremel_device_views)); } @@ -333,7 +334,7 @@ void check_shape_compatibility(table_view const& lhs, table_view const& rhs) CUDF_EXPECTS(lhs.num_columns() == rhs.num_columns(), "Cannot compare tables with different number of columns"); for (size_type i = 0; i < lhs.num_columns(); ++i) { - CUDF_EXPECTS(column_types_equal(lhs.column(i), rhs.column(i)), + CUDF_EXPECTS(column_types_equivalent(lhs.column(i), rhs.column(i)), "Cannot compare tables with different column types"); } } @@ -355,10 +356,13 @@ std::shared_ptr preprocessed_table::create( auto [verticalized_lhs, new_column_order, new_null_precedence, verticalized_col_depths] = decompose_structs(t, column_order, null_precedence); - auto d_t = table_device_view::create(verticalized_lhs, stream); - auto d_column_order = detail::make_device_uvector_async(new_column_order, stream); - auto d_null_precedence = detail::make_device_uvector_async(new_null_precedence, stream); - auto d_depths = detail::make_device_uvector_async(verticalized_col_depths, stream); + auto d_t = table_device_view::create(verticalized_lhs, stream); + auto d_column_order = detail::make_device_uvector_async( + new_column_order, stream, rmm::mr::get_current_device_resource()); + auto d_null_precedence = detail::make_device_uvector_async( + new_null_precedence, stream, rmm::mr::get_current_device_resource()); + auto d_depths = detail::make_device_uvector_async( + verticalized_col_depths, stream, rmm::mr::get_current_device_resource()); if (detail::has_nested_columns(t)) { auto [dremel_data, d_dremel_device_view] = list_lex_preprocess(verticalized_lhs, stream); diff --git a/cpp/src/transform/row_bit_count.cu b/cpp/src/transform/row_bit_count.cu index 634fdd70831..b982a010e6e 100644 --- a/cpp/src/transform/row_bit_count.cu +++ b/cpp/src/transform/row_bit_count.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -500,7 +500,8 @@ std::unique_ptr row_bit_count(table_view const& t, auto d_cols = contiguous_copy_column_device_views(cols, stream); // move stack info to the gpu - rmm::device_uvector d_info = cudf::detail::make_device_uvector_async(info, stream); + rmm::device_uvector d_info = + cudf::detail::make_device_uvector_async(info, stream, rmm::mr::get_current_device_resource()); // each thread needs to maintain a stack of row spans of size max_branch_depth. we will use // shared memory to do this rather than allocating a potentially gigantic temporary buffer diff --git a/cpp/src/utilities/type_checks.cpp b/cpp/src/utilities/type_checks.cpp index d297148de45..d6f5c65593a 100644 --- a/cpp/src/utilities/type_checks.cpp +++ b/cpp/src/utilities/type_checks.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -69,4 +69,10 @@ bool column_types_equal(column_view const& lhs, column_view const& rhs) return type_dispatcher(lhs.type(), columns_equal_fn{}, lhs, rhs); } +bool column_types_equivalent(column_view const& lhs, column_view const& rhs) +{ + if (lhs.type().id() != rhs.type().id()) { return false; } + return type_dispatcher(lhs.type(), columns_equal_fn{}, lhs, rhs); +} + } // namespace cudf diff --git a/cpp/tests/bitmask/bitmask_tests.cpp b/cpp/tests/bitmask/bitmask_tests.cpp index 00ec7bd218b..7805828ad55 100644 --- a/cpp/tests/bitmask/bitmask_tests.cpp +++ b/cpp/tests/bitmask/bitmask_tests.cpp @@ -87,7 +87,7 @@ rmm::device_uvector make_mask(cudf::size_type size, bool fil { if (!fill_valid) { return cudf::detail::make_zeroed_device_uvector_sync( - size, cudf::get_default_stream()); + size, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); } else { auto ret = rmm::device_uvector(size, cudf::get_default_stream()); CUDF_CUDA_TRY(cudaMemsetAsync(ret.data(), diff --git a/cpp/tests/bitmask/valid_if_tests.cu b/cpp/tests/bitmask/valid_if_tests.cu index cdc453be8e4..cb086cda179 100644 --- a/cpp/tests/bitmask/valid_if_tests.cu +++ b/cpp/tests/bitmask/valid_if_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,7 +43,8 @@ TEST_F(ValidIfTest, EmptyRange) auto actual = cudf::detail::valid_if(thrust::make_counting_iterator(0), thrust::make_counting_iterator(0), odds_valid{}, - cudf::get_default_stream()); + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); auto const& buffer = actual.first; EXPECT_EQ(0u, buffer.size()); EXPECT_EQ(nullptr, buffer.data()); @@ -55,7 +56,8 @@ TEST_F(ValidIfTest, InvalidRange) EXPECT_THROW(cudf::detail::valid_if(thrust::make_counting_iterator(1), thrust::make_counting_iterator(0), odds_valid{}, - cudf::get_default_stream()), + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()), cudf::logic_error); } @@ -66,7 +68,8 @@ TEST_F(ValidIfTest, OddsValid) auto actual = cudf::detail::valid_if(thrust::make_counting_iterator(0), thrust::make_counting_iterator(10000), odds_valid{}, - cudf::get_default_stream()); + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); CUDF_TEST_EXPECT_EQUAL_BUFFERS(expected.data(), actual.first.data(), expected.size()); EXPECT_EQ(5000, actual.second); } @@ -78,7 +81,8 @@ TEST_F(ValidIfTest, AllValid) auto actual = cudf::detail::valid_if(thrust::make_counting_iterator(0), thrust::make_counting_iterator(10000), all_valid{}, - cudf::get_default_stream()); + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); CUDF_TEST_EXPECT_EQUAL_BUFFERS(expected.data(), actual.first.data(), expected.size()); EXPECT_EQ(0, actual.second); } @@ -90,7 +94,8 @@ TEST_F(ValidIfTest, AllNull) auto actual = cudf::detail::valid_if(thrust::make_counting_iterator(0), thrust::make_counting_iterator(10000), all_null{}, - cudf::get_default_stream()); + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); CUDF_TEST_EXPECT_EQUAL_BUFFERS(expected.data(), actual.first.data(), expected.size()); EXPECT_EQ(10000, actual.second); } diff --git a/cpp/tests/copying/get_value_tests.cpp b/cpp/tests/copying/get_value_tests.cpp index 1c51eab1f94..a35bbab0176 100644 --- a/cpp/tests/copying/get_value_tests.cpp +++ b/cpp/tests/copying/get_value_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -812,7 +812,7 @@ TYPED_TEST(StructGetValueTestTyped, mixed_types_valid) // col fields cudf::test::fixed_width_column_wrapper f1{1, 2, 3}; cudf::test::strings_column_wrapper f2{"aa", "bbb", "c"}; - cudf::test::dictionary_column_wrapper f3{42, 42, 24}; + cudf::test::dictionary_column_wrapper f3{42, 42, 24}; LCW f4{LCW{8, 8, 8}, LCW{9, 9}, LCW{10}}; cudf::test::structs_column_wrapper col{f1, f2, f3, f4}; @@ -824,7 +824,7 @@ TYPED_TEST(StructGetValueTestTyped, mixed_types_valid) // expect fields cudf::test::fixed_width_column_wrapper ef1{3}; cudf::test::strings_column_wrapper ef2{"c"}; - cudf::test::dictionary_column_wrapper ef3{24}; + cudf::test::dictionary_column_wrapper ef3{24}; LCW ef4{LCW{10}}; cudf::table_view expect_data{{ef1, ef2, ef3, ef4}}; diff --git a/cpp/tests/device_atomics/device_atomics_test.cu b/cpp/tests/device_atomics/device_atomics_test.cu index 43874b84114..5694513647b 100644 --- a/cpp/tests/device_atomics/device_atomics_test.cu +++ b/cpp/tests/device_atomics/device_atomics_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -141,9 +141,10 @@ struct AtomicsTest : public cudf::test::BaseFixture { result_init[4] = result_init[1]; result_init[5] = result_init[2]; - auto dev_data = cudf::detail::make_device_uvector_sync(v, cudf::get_default_stream()); - auto dev_result = - cudf::detail::make_device_uvector_sync(result_init, cudf::get_default_stream()); + auto dev_data = cudf::detail::make_device_uvector_sync( + v, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto dev_result = cudf::detail::make_device_uvector_sync( + result_init, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); if (block_size == 0) { block_size = vec_size; } @@ -294,8 +295,10 @@ struct AtomicsBitwiseOpTest : public cudf::test::BaseFixture { exact[2] = std::accumulate( v.begin(), v.end(), identity[2], [](T acc, uint64_t i) { return acc ^ T(i); }); - auto dev_result = cudf::detail::make_device_uvector_sync(identity, cudf::get_default_stream()); - auto dev_data = cudf::detail::make_device_uvector_sync(v, cudf::get_default_stream()); + auto dev_result = cudf::detail::make_device_uvector_sync( + identity, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto dev_data = cudf::detail::make_device_uvector_sync( + v, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); if (block_size == 0) { block_size = vec_size; } diff --git a/cpp/tests/fixed_point/fixed_point_tests.cu b/cpp/tests/fixed_point/fixed_point_tests.cu index ab9970dc370..9631e433a5e 100644 --- a/cpp/tests/fixed_point/fixed_point_tests.cu +++ b/cpp/tests/fixed_point/fixed_point_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -83,7 +83,8 @@ TEST_F(FixedPointTest, DecimalXXThrustOnDevice) using decimal32 = fixed_point; std::vector vec1(1000, decimal32{1, scale_type{-2}}); - auto d_vec1 = cudf::detail::make_device_uvector_sync(vec1, cudf::get_default_stream()); + auto d_vec1 = cudf::detail::make_device_uvector_sync( + vec1, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const sum = thrust::reduce(rmm::exec_policy(cudf::get_default_stream()), std::cbegin(d_vec1), @@ -96,7 +97,8 @@ TEST_F(FixedPointTest, DecimalXXThrustOnDevice) // change inclusive scan to run on device (avoid copying to host) thrust::inclusive_scan(std::cbegin(vec1), std::cend(vec1), std::begin(vec1)); - d_vec1 = cudf::detail::make_device_uvector_sync(vec1, cudf::get_default_stream()); + d_vec1 = cudf::detail::make_device_uvector_sync( + vec1, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); std::vector vec2(1000); std::iota(std::begin(vec2), std::end(vec2), 1); diff --git a/cpp/tests/interop/from_arrow_test.cpp b/cpp/tests/interop/from_arrow_test.cpp index d2b159fc208..3f4d5bcf20f 100644 --- a/cpp/tests/interop/from_arrow_test.cpp +++ b/cpp/tests/interop/from_arrow_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -264,7 +264,7 @@ TEST_F(FromArrowTest, DictionaryIndicesType) auto arrow_table = arrow::Table::Make(schema, {array1, array2, array3}); std::vector> columns; - auto col = cudf::test::fixed_width_column_wrapper({1, 2, 5, 2, 7}, {1, 0, 1, 1, 1}); + auto col = cudf::test::fixed_width_column_wrapper({1, 2, 5, 2, 7}, {1, 0, 1, 1, 1}); columns.emplace_back(std::move(cudf::dictionary::encode(col))); columns.emplace_back(std::move(cudf::dictionary::encode(col))); columns.emplace_back(std::move(cudf::dictionary::encode(col))); diff --git a/cpp/tests/io/json_tree.cpp b/cpp/tests/io/json_tree.cpp index 94a7c8edcf9..0ae0360c4d9 100644 --- a/cpp/tests/io/json_tree.cpp +++ b/cpp/tests/io/json_tree.cpp @@ -586,8 +586,8 @@ TEST_F(JsonTest, TreeRepresentation) cudf::io::json_reader_options const options{}; // Parse the JSON and get the token stream - const auto [tokens_gpu, token_indices_gpu] = - cudf::io::json::detail::get_token_stream(d_input, options, stream); + const auto [tokens_gpu, token_indices_gpu] = cudf::io::json::detail::get_token_stream( + d_input, options, stream, rmm::mr::get_current_device_resource()); // Get the JSON's tree representation auto gpu_tree = cuio_json::detail::get_tree_representation( @@ -673,8 +673,8 @@ TEST_F(JsonTest, TreeRepresentation2) cudf::io::json_reader_options const options{}; // Parse the JSON and get the token stream - const auto [tokens_gpu, token_indices_gpu] = - cudf::io::json::detail::get_token_stream(d_input, options, stream); + const auto [tokens_gpu, token_indices_gpu] = cudf::io::json::detail::get_token_stream( + d_input, options, stream, rmm::mr::get_current_device_resource()); // Get the JSON's tree representation auto gpu_tree = cuio_json::detail::get_tree_representation( @@ -747,8 +747,8 @@ TEST_F(JsonTest, TreeRepresentation3) options.enable_lines(true); // Parse the JSON and get the token stream - const auto [tokens_gpu, token_indices_gpu] = - cudf::io::json::detail::get_token_stream(d_input, options, stream); + const auto [tokens_gpu, token_indices_gpu] = cudf::io::json::detail::get_token_stream( + d_input, options, stream, rmm::mr::get_current_device_resource()); // Get the JSON's tree representation auto gpu_tree = cuio_json::detail::get_tree_representation( @@ -772,8 +772,8 @@ TEST_F(JsonTest, TreeRepresentationError) cudf::io::json_reader_options const options{}; // Parse the JSON and get the token stream - const auto [tokens_gpu, token_indices_gpu] = - cudf::io::json::detail::get_token_stream(d_input, options, stream); + const auto [tokens_gpu, token_indices_gpu] = cudf::io::json::detail::get_token_stream( + d_input, options, stream, rmm::mr::get_current_device_resource()); // Get the JSON's tree representation // This JSON is invalid and will raise an exception. @@ -855,8 +855,8 @@ TEST_P(JsonTreeTraversalTest, CPUvsGPUTraversal) static_cast(d_scalar.size())}; // Parse the JSON and get the token stream - const auto [tokens_gpu, token_indices_gpu] = - cudf::io::json::detail::get_token_stream(d_input, options, stream); + const auto [tokens_gpu, token_indices_gpu] = cudf::io::json::detail::get_token_stream( + d_input, options, stream, rmm::mr::get_current_device_resource()); // host tree generation auto cpu_tree = get_tree_representation_cpu(tokens_gpu, token_indices_gpu, options, stream); bool const is_array_of_arrays = diff --git a/cpp/tests/io/nested_json_test.cpp b/cpp/tests/io/nested_json_test.cpp index 3c01bd4de25..5b797a00ca1 100644 --- a/cpp/tests/io/nested_json_test.cpp +++ b/cpp/tests/io/nested_json_test.cpp @@ -262,8 +262,8 @@ TEST_F(JsonTest, TokenStream) cudf::device_span{d_scalar.data(), static_cast(d_scalar.size())}; // Parse the JSON and get the token stream - auto [d_tokens_gpu, d_token_indices_gpu] = - cuio_json::detail::get_token_stream(d_input, default_options, stream); + auto [d_tokens_gpu, d_token_indices_gpu] = cuio_json::detail::get_token_stream( + d_input, default_options, stream, rmm::mr::get_current_device_resource()); // Copy back the number of tokens that were written thrust::host_vector const tokens_gpu = cudf::detail::make_host_vector_async(d_tokens_gpu, stream); @@ -398,8 +398,8 @@ TEST_F(JsonTest, TokenStream2) cudf::device_span{d_scalar.data(), static_cast(d_scalar.size())}; // Parse the JSON and get the token stream - auto [d_tokens_gpu, d_token_indices_gpu] = - cuio_json::detail::get_token_stream(d_input, default_options, stream); + auto [d_tokens_gpu, d_token_indices_gpu] = cuio_json::detail::get_token_stream( + d_input, default_options, stream, rmm::mr::get_current_device_resource()); // Copy back the number of tokens that were written thrust::host_vector const tokens_gpu = cudf::detail::make_host_vector_async(d_tokens_gpu, stream); @@ -470,7 +470,9 @@ TEST_P(JsonParserTest, ExtractColumn) std::string const input = R"( [{"a":0.0, "b":1.0}, {"a":0.1, "b":1.1}, {"a":0.2, "b":1.2}] )"; auto const d_input = cudf::detail::make_device_uvector_async( - cudf::host_span{input.c_str(), input.size()}, stream); + cudf::host_span{input.c_str(), input.size()}, + stream, + rmm::mr::get_current_device_resource()); // Get the JSON's tree representation auto const cudf_table = json_parser(d_input, default_options, stream, mr); @@ -508,7 +510,9 @@ TEST_P(JsonParserTest, UTF_JSON) {"a":1,"b":null,"c":null}, {"a":1,"b":Infinity,"c":[null], "d": {"year":-600,"author": "Kaniyan"}}])"; auto const d_ascii_pass = cudf::detail::make_device_uvector_sync( - cudf::host_span{ascii_pass.c_str(), ascii_pass.size()}, stream); + cudf::host_span{ascii_pass.c_str(), ascii_pass.size()}, + stream, + rmm::mr::get_current_device_resource()); CUDF_EXPECT_NO_THROW(json_parser(d_ascii_pass, default_options, stream, mr)); @@ -521,7 +525,9 @@ TEST_P(JsonParserTest, UTF_JSON) {"a":1,"b":null,"c":null}, {"a":1,"b":Infinity,"c":[null], "d": {"year":-600,"author": "filip Ê’akotÉ›"}}])"; auto const d_utf_failed = cudf::detail::make_device_uvector_sync( - cudf::host_span{utf_failed.c_str(), utf_failed.size()}, stream); + cudf::host_span{utf_failed.c_str(), utf_failed.size()}, + stream, + rmm::mr::get_current_device_resource()); CUDF_EXPECT_NO_THROW(json_parser(d_utf_failed, default_options, stream, mr)); // utf-8 string that passes parsing. @@ -534,7 +540,9 @@ TEST_P(JsonParserTest, UTF_JSON) {"a":1,"b":Infinity,"c":[null], "d": {"year":-600,"author": "Kaniyan"}}, {"a":1,"b":NaN,"c":[null, null], "d": {"year": 2, "author": "filip Ê’akotÉ›"}}])"; auto const d_utf_pass = cudf::detail::make_device_uvector_sync( - cudf::host_span{utf_pass.c_str(), utf_pass.size()}, stream); + cudf::host_span{utf_pass.c_str(), utf_pass.size()}, + stream, + rmm::mr::get_current_device_resource()); CUDF_EXPECT_NO_THROW(json_parser(d_utf_pass, default_options, stream, mr)); } @@ -555,7 +563,9 @@ TEST_P(JsonParserTest, ExtractColumnWithQuotes) std::string const input = R"( [{"a":"0.0", "b":1.0}, {"b":1.1}, {"b":2.1, "a":"2.0"}] )"; auto const d_input = cudf::detail::make_device_uvector_async( - cudf::host_span{input.c_str(), input.size()}, stream); + cudf::host_span{input.c_str(), input.size()}, + stream, + rmm::mr::get_current_device_resource()); // Get the JSON's tree representation auto const cudf_table = json_parser(d_input, options, stream, mr); @@ -599,14 +609,18 @@ TEST_P(JsonParserTest, ExpectFailMixStructAndList) // libcudf does not currently support a mix of lists and structs. for (auto const& input : inputs_fail) { auto const d_input = cudf::detail::make_device_uvector_async( - cudf::host_span{input.c_str(), input.size()}, stream); + cudf::host_span{input.c_str(), input.size()}, + stream, + rmm::mr::get_current_device_resource()); EXPECT_THROW(auto const cudf_table = json_parser(d_input, options, stream, mr), cudf::logic_error); } for (auto const& input : inputs_succeed) { auto const d_input = cudf::detail::make_device_uvector_async( - cudf::host_span{input.c_str(), input.size()}, stream); + cudf::host_span{input.c_str(), input.size()}, + stream, + rmm::mr::get_current_device_resource()); CUDF_EXPECT_NO_THROW(auto const cudf_table = json_parser(d_input, options, stream, mr)); } } @@ -626,8 +640,10 @@ TEST_P(JsonParserTest, EmptyString) cudf::io::json_reader_options default_options{}; std::string const input = R"([])"; - auto const d_input = cudf::detail::make_device_uvector_sync( - cudf::host_span{input.c_str(), input.size()}, stream); + auto const d_input = + cudf::detail::make_device_uvector_sync(cudf::host_span{input.c_str(), input.size()}, + stream, + rmm::mr::get_current_device_resource()); // Get the JSON's tree representation auto const cudf_table = json_parser(d_input, default_options, stream, mr); diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index e82b0c670b8..8a16fd9a05a 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -1128,7 +1128,8 @@ TEST_F(ParquetWriterTest, BufferSource) auto const d_input = cudf::detail::make_device_uvector_sync( cudf::host_span{reinterpret_cast(out_buffer.data()), out_buffer.size()}, - cudf::get_default_stream()); + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); auto const d_buffer = cudf::device_span( reinterpret_cast(d_input.data()), d_input.size()); cudf::io::parquet_reader_options in_opts = diff --git a/cpp/tests/io/type_inference_test.cu b/cpp/tests/io/type_inference_test.cu index ea6eb9b93ef..81c6563cd2d 100644 --- a/cpp/tests/io/type_inference_test.cu +++ b/cpp/tests/io/type_inference_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -51,12 +51,12 @@ TEST_F(TypeInference, Basic) auto d_data = cudf::make_string_scalar(data); auto& d_string_scalar = static_cast(*d_data); - auto const string_offset = std::vector{1, 4, 7}; - auto const string_length = std::vector{2, 2, 1}; - auto const d_string_offset = - cudf::detail::make_device_uvector_async(string_offset, cudf::get_default_stream()); - auto const d_string_length = - cudf::detail::make_device_uvector_async(string_length, cudf::get_default_stream()); + auto const string_offset = std::vector{1, 4, 7}; + auto const string_length = std::vector{2, 2, 1}; + auto const d_string_offset = cudf::detail::make_device_uvector_async( + string_offset, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto const d_string_length = cudf::detail::make_device_uvector_async( + string_length, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto d_col_strings = thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin())); @@ -84,12 +84,12 @@ TEST_F(TypeInference, Null) auto d_data = cudf::make_string_scalar(data); auto& d_string_scalar = static_cast(*d_data); - auto const string_offset = std::vector{1, 1, 4}; - auto const string_length = std::vector{0, 2, 1}; - auto const d_string_offset = - cudf::detail::make_device_uvector_async(string_offset, cudf::get_default_stream()); - auto const d_string_length = - cudf::detail::make_device_uvector_async(string_length, cudf::get_default_stream()); + auto const string_offset = std::vector{1, 1, 4}; + auto const string_length = std::vector{0, 2, 1}; + auto const d_string_offset = cudf::detail::make_device_uvector_async( + string_offset, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto const d_string_length = cudf::detail::make_device_uvector_async( + string_length, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto d_col_strings = thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin())); @@ -117,12 +117,12 @@ TEST_F(TypeInference, AllNull) auto d_data = cudf::make_string_scalar(data); auto& d_string_scalar = static_cast(*d_data); - auto const string_offset = std::vector{1, 1, 1}; - auto const string_length = std::vector{0, 0, 4}; - auto const d_string_offset = - cudf::detail::make_device_uvector_async(string_offset, cudf::get_default_stream()); - auto const d_string_length = - cudf::detail::make_device_uvector_async(string_length, cudf::get_default_stream()); + auto const string_offset = std::vector{1, 1, 1}; + auto const string_length = std::vector{0, 0, 4}; + auto const d_string_offset = cudf::detail::make_device_uvector_async( + string_offset, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto const d_string_length = cudf::detail::make_device_uvector_async( + string_length, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto d_col_strings = thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin())); @@ -150,12 +150,12 @@ TEST_F(TypeInference, String) auto d_data = cudf::make_string_scalar(data); auto& d_string_scalar = static_cast(*d_data); - auto const string_offset = std::vector{1, 8, 12}; - auto const string_length = std::vector{6, 3, 4}; - auto const d_string_offset = - cudf::detail::make_device_uvector_async(string_offset, cudf::get_default_stream()); - auto const d_string_length = - cudf::detail::make_device_uvector_async(string_length, cudf::get_default_stream()); + auto const string_offset = std::vector{1, 8, 12}; + auto const string_length = std::vector{6, 3, 4}; + auto const d_string_offset = cudf::detail::make_device_uvector_async( + string_offset, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto const d_string_length = cudf::detail::make_device_uvector_async( + string_length, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto d_col_strings = thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin())); @@ -183,12 +183,12 @@ TEST_F(TypeInference, Bool) auto d_data = cudf::make_string_scalar(data); auto& d_string_scalar = static_cast(*d_data); - auto const string_offset = std::vector{1, 6, 12}; - auto const string_length = std::vector{4, 5, 5}; - auto const d_string_offset = - cudf::detail::make_device_uvector_async(string_offset, cudf::get_default_stream()); - auto const d_string_length = - cudf::detail::make_device_uvector_async(string_length, cudf::get_default_stream()); + auto const string_offset = std::vector{1, 6, 12}; + auto const string_length = std::vector{4, 5, 5}; + auto const d_string_offset = cudf::detail::make_device_uvector_async( + string_offset, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto const d_string_length = cudf::detail::make_device_uvector_async( + string_length, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto d_col_strings = thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin())); @@ -216,12 +216,12 @@ TEST_F(TypeInference, Timestamp) auto d_data = cudf::make_string_scalar(data); auto& d_string_scalar = static_cast(*d_data); - auto const string_offset = std::vector{1, 10}; - auto const string_length = std::vector{8, 9}; - auto const d_string_offset = - cudf::detail::make_device_uvector_async(string_offset, cudf::get_default_stream()); - auto const d_string_length = - cudf::detail::make_device_uvector_async(string_length, cudf::get_default_stream()); + auto const string_offset = std::vector{1, 10}; + auto const string_length = std::vector{8, 9}; + auto const d_string_offset = cudf::detail::make_device_uvector_async( + string_offset, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto const d_string_length = cudf::detail::make_device_uvector_async( + string_length, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto d_col_strings = thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin())); @@ -250,12 +250,12 @@ TEST_F(TypeInference, InvalidInput) auto d_data = cudf::make_string_scalar(data); auto& d_string_scalar = static_cast(*d_data); - auto const string_offset = std::vector{1, 3, 5, 7, 9}; - auto const string_length = std::vector{1, 1, 1, 1, 1}; - auto const d_string_offset = - cudf::detail::make_device_uvector_async(string_offset, cudf::get_default_stream()); - auto const d_string_length = - cudf::detail::make_device_uvector_async(string_length, cudf::get_default_stream()); + auto const string_offset = std::vector{1, 3, 5, 7, 9}; + auto const string_length = std::vector{1, 1, 1, 1, 1}; + auto const d_string_offset = cudf::detail::make_device_uvector_async( + string_offset, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto const d_string_length = cudf::detail::make_device_uvector_async( + string_length, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto d_col_strings = thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin())); diff --git a/cpp/tests/iterator/iterator_tests.cuh b/cpp/tests/iterator/iterator_tests.cuh index 894e117ba40..882de994e67 100644 --- a/cpp/tests/iterator/iterator_tests.cuh +++ b/cpp/tests/iterator/iterator_tests.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -87,8 +87,8 @@ struct IteratorTest : public cudf::test::BaseFixture { { InputIterator d_in_last = d_in + num_items; EXPECT_EQ(thrust::distance(d_in, d_in_last), num_items); - auto dev_expected = - cudf::detail::make_device_uvector_sync(expected, cudf::get_default_stream()); + auto dev_expected = cudf::detail::make_device_uvector_sync( + expected, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); // using a temporary vector and calling transform and all_of separately is // equivalent to thrust::equal but compiles ~3x faster diff --git a/cpp/tests/iterator/value_iterator_test.cuh b/cpp/tests/iterator/value_iterator_test.cuh index fa931d34a0e..8252ce88f39 100644 --- a/cpp/tests/iterator/value_iterator_test.cuh +++ b/cpp/tests/iterator/value_iterator_test.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,7 +25,8 @@ template void non_null_iterator(IteratorTest& testFixture) { auto host_array = cudf::test::make_type_param_vector({0, 6, 0, -14, 13, 64, -13, -20, 45}); - auto dev_array = cudf::detail::make_device_uvector_sync(host_array, cudf::get_default_stream()); + auto dev_array = cudf::detail::make_device_uvector_sync( + host_array, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); // calculate the expected value by CPU. thrust::host_vector replaced_array(host_array); diff --git a/cpp/tests/iterator/value_iterator_test_strings.cu b/cpp/tests/iterator/value_iterator_test_strings.cu index 8b4080fa493..d0e62c09a03 100644 --- a/cpp/tests/iterator/value_iterator_test_strings.cu +++ b/cpp/tests/iterator/value_iterator_test_strings.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,7 +30,8 @@ auto strings_to_string_views(std::vector& input_strings) std::vector offsets; std::tie(chars, offsets) = cudf::test::detail::make_chars_and_offsets( input_strings.begin(), input_strings.end(), all_valid); - auto dev_chars = cudf::detail::make_device_uvector_sync(chars, cudf::get_default_stream()); + auto dev_chars = cudf::detail::make_device_uvector_sync( + chars, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); // calculate the expected value by CPU. (but contains device pointers) thrust::host_vector replaced_array(input_strings.size()); @@ -51,8 +52,9 @@ TEST_F(StringIteratorTest, string_view_null_iterator) using T = cudf::string_view; std::string zero("zero"); // the char data has to be in GPU - auto initmsg = cudf::detail::make_device_uvector_sync(zero, cudf::get_default_stream()); - T init = T{initmsg.data(), int(initmsg.size())}; + auto initmsg = cudf::detail::make_device_uvector_sync( + zero, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + T init = T{initmsg.data(), int(initmsg.size())}; // data and valid arrays std::vector host_values( @@ -86,8 +88,9 @@ TEST_F(StringIteratorTest, string_view_no_null_iterator) // T init = T{"", 0}; std::string zero("zero"); // the char data has to be in GPU - auto initmsg = cudf::detail::make_device_uvector_sync(zero, cudf::get_default_stream()); - T init = T{initmsg.data(), int(initmsg.size())}; + auto initmsg = cudf::detail::make_device_uvector_sync( + zero, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + T init = T{initmsg.data(), int(initmsg.size())}; // data array std::vector host_values( @@ -110,8 +113,9 @@ TEST_F(StringIteratorTest, string_scalar_iterator) // T init = T{"", 0}; std::string zero("zero"); // the char data has to be in GPU - auto initmsg = cudf::detail::make_device_uvector_sync(zero, cudf::get_default_stream()); - T init = T{initmsg.data(), int(initmsg.size())}; + auto initmsg = cudf::detail::make_device_uvector_sync( + zero, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + T init = T{initmsg.data(), int(initmsg.size())}; // data array std::vector host_values(100, zero); diff --git a/cpp/tests/partitioning/hash_partition_test.cpp b/cpp/tests/partitioning/hash_partition_test.cpp index 9d206c5397d..a1508b5b973 100644 --- a/cpp/tests/partitioning/hash_partition_test.cpp +++ b/cpp/tests/partitioning/hash_partition_test.cpp @@ -308,8 +308,8 @@ void run_fixed_width_test(size_t cols, // Make a table view of the partition numbers constexpr cudf::data_type dtype{cudf::type_id::INT32}; - auto d_partitions = - cudf::detail::make_device_uvector_sync(partitions, cudf::get_default_stream()); + auto d_partitions = cudf::detail::make_device_uvector_sync( + partitions, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); cudf::column_view partitions_col(dtype, rows, d_partitions.data()); cudf::table_view partitions_table({partitions_col}); diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp index 47bcbb874cf..40b0d268580 100644 --- a/cpp/tests/reductions/segmented_reduction_tests.cpp +++ b/cpp/tests/reductions/segmented_reduction_tests.cpp @@ -49,9 +49,9 @@ TYPED_TEST(SegmentedReductionTest, SumExcludeNulls) // output nullmask: {1, 1, 1, 0, 0, 0} auto const input = cudf::test::fixed_width_column_wrapper{ {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}}; - auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::fixed_width_column_wrapper{{6, 4, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}}; @@ -97,9 +97,9 @@ TYPED_TEST(SegmentedReductionTest, ProductExcludeNulls) // output nullmask: {1, 1, 1, 0, 0, 0} auto const input = cudf::test::fixed_width_column_wrapper{ {1, 3, 5, XXX, 3, 5, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 1, 1, 1, 0, 0, 0}}; - auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::fixed_width_column_wrapper{{15, 15, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}}; @@ -147,9 +147,9 @@ TYPED_TEST(SegmentedReductionTest, MaxExcludeNulls) // output nullmask: {1, 1, 1, 0, 0, 0} auto const input = cudf::test::fixed_width_column_wrapper{ {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}}; - auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::fixed_width_column_wrapper{{3, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}}; @@ -195,9 +195,9 @@ TYPED_TEST(SegmentedReductionTest, MinExcludeNulls) // output nullmask: {1, 1, 1, 0, 0, 0} auto const input = cudf::test::fixed_width_column_wrapper{ {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}}; - auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::fixed_width_column_wrapper{{1, 1, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}}; @@ -244,9 +244,9 @@ TYPED_TEST(SegmentedReductionTest, AnyExcludeNulls) auto const input = cudf::test::fixed_width_column_wrapper{ {0, 0, 0, 0, XXX, 0, 0, 1, 0, 1, XXX, 0, 0, 1, XXX, XXX, XXX}, {1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0}}; - auto const offsets = std::vector{0, 3, 6, 9, 12, 12, 13, 14, 15, 17}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 9, 12, 12, 13, 14, 15, 17}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::fixed_width_column_wrapper{ {false, false, true, true, bool{XXX}, false, true, bool{XXX}, bool{XXX}}, {true, true, true, true, false, true, true, false, false}}; @@ -284,9 +284,9 @@ TYPED_TEST(SegmentedReductionTest, AllExcludeNulls) auto const input = cudf::test::fixed_width_column_wrapper{ {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX, 1, 0, 3, 1, XXX, 0, 0}, {1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1}}; - auto const offsets = std::vector{0, 3, 6, 6, 7, 8, 10, 13, 16, 17}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 6, 7, 8, 10, 13, 16, 17}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::fixed_width_column_wrapper{ {true, true, bool{XXX}, true, bool{XXX}, bool{XXX}, false, false, false}, {true, true, false, true, false, false, true, true, true}}; @@ -335,9 +335,9 @@ TYPED_TEST(SegmentedReductionTest, SumIncludeNulls) // output nullmask: {1, 0, 1, 0, 0, 0} auto const input = cudf::test::fixed_width_column_wrapper{ {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}}; - auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::fixed_width_column_wrapper{{6, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}}; @@ -386,9 +386,9 @@ TYPED_TEST(SegmentedReductionTest, ProductIncludeNulls) // output nullmask: {1, 0, 1, 0, 0, 0} auto const input = cudf::test::fixed_width_column_wrapper{ {1, 3, 5, XXX, 3, 5, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 1, 1, 1, 0, 0, 0}}; - auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::fixed_width_column_wrapper{{15, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}}; @@ -439,9 +439,9 @@ TYPED_TEST(SegmentedReductionTest, MaxIncludeNulls) // output nullmask: {1, 0, 1, 0, 0, 0} auto const input = cudf::test::fixed_width_column_wrapper{ {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}}; - auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::fixed_width_column_wrapper{{3, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}}; @@ -490,9 +490,9 @@ TYPED_TEST(SegmentedReductionTest, MinIncludeNulls) // output nullmask: {1, 0, 1, 0, 0, 0} auto const input = cudf::test::fixed_width_column_wrapper{ {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 1, 0, 1, 1, 0, 0}}; - auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::fixed_width_column_wrapper{{1, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}}; @@ -542,9 +542,9 @@ TYPED_TEST(SegmentedReductionTest, AnyIncludeNulls) auto const input = cudf::test::fixed_width_column_wrapper{ {0, 0, 0, 0, XXX, 0, 0, 1, 0, 1, XXX, 0, 0, 1, XXX, XXX, XXX}, {1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0}}; - auto const offsets = std::vector{0, 3, 6, 9, 12, 12, 13, 14, 15, 17}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 9, 12, 12, 13, 14, 15, 17}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::fixed_width_column_wrapper{ {false, bool{XXX}, true, bool{XXX}, bool{XXX}, false, true, bool{XXX}, bool{XXX}}, {true, false, true, false, false, true, true, false, false}}; @@ -605,9 +605,9 @@ TYPED_TEST(SegmentedReductionTest, AllIncludeNulls) auto const input = cudf::test::fixed_width_column_wrapper{ {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX, 1, 0, 3, 1, XXX, 0, 0}, {1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1}}; - auto const offsets = std::vector{0, 3, 6, 6, 7, 8, 10, 13, 16, 17}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 6, 7, 8, 10, 13, 16, 17}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::fixed_width_column_wrapper{ {true, bool{XXX}, bool{XXX}, true, bool{XXX}, bool{XXX}, false, bool{XXX}, false}, {true, false, false, true, false, false, true, false, true}}; @@ -670,9 +670,9 @@ TEST_F(SegmentedReductionTestUntyped, PartialSegmentReduction) auto const input = cudf::test::fixed_width_column_wrapper{ {1, 2, 3, 4, 5, 6, 7}, {true, true, true, true, true, true, true}}; - auto const offsets = std::vector{1, 3, 4}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{1, 3, 4}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::fixed_width_column_wrapper{{5, 4}, {true, true}}; auto res = @@ -720,10 +720,10 @@ TEST_F(SegmentedReductionTestUntyped, NonNullableInput) // outputs: {1, 5, 4} // output nullmask: {1, 1, 1} - auto const input = cudf::test::fixed_width_column_wrapper{1, 2, 3, 4, 5, 6, 7}; - auto const offsets = std::vector{0, 1, 1, 3, 7}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const input = cudf::test::fixed_width_column_wrapper{1, 2, 3, 4, 5, 6, 7}; + auto const offsets = std::vector{0, 1, 1, 3, 7}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::fixed_width_column_wrapper{{1, XXX, 5, 22}, {true, false, true, true}}; @@ -767,9 +767,9 @@ TEST_F(SegmentedReductionTestUntyped, Mean) { auto const input = cudf::test::fixed_width_column_wrapper{10, 20, 30, 40, 50, 60, 70, 80, 90}; - auto const offsets = std::vector{0, 1, 1, 4, 9}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const agg = cudf::make_mean_aggregation(); auto const output_type = cudf::data_type{cudf::type_id::FLOAT32}; @@ -786,9 +786,9 @@ TEST_F(SegmentedReductionTestUntyped, MeanNulls) { auto const input = cudf::test::fixed_width_column_wrapper( {10, 20, 30, 40, 50, 60, 0, 80, 90}, {1, 1, 1, 1, 1, 1, 0, 1, 1}); - auto const offsets = std::vector{0, 1, 1, 4, 9}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const agg = cudf::make_mean_aggregation(); auto const output_type = cudf::data_type{cudf::type_id::FLOAT64}; @@ -806,9 +806,9 @@ TEST_F(SegmentedReductionTestUntyped, SumOfSquares) { auto const input = cudf::test::fixed_width_column_wrapper{10, 20, 30, 40, 50, 60, 70, 80, 90}; - auto const offsets = std::vector{0, 1, 1, 4, 9}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const agg = cudf::make_sum_of_squares_aggregation(); auto const output_type = cudf::data_type{cudf::type_id::INT32}; @@ -826,9 +826,9 @@ TEST_F(SegmentedReductionTestUntyped, SumOfSquaresNulls) { auto const input = cudf::test::fixed_width_column_wrapper( {10, 20, 30, 40, 50, 60, 0, 80, 90}, {1, 1, 1, 1, 1, 1, 0, 1, 1}); - auto const offsets = std::vector{0, 1, 1, 4, 9}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const agg = cudf::make_sum_of_squares_aggregation(); auto const output_type = cudf::data_type{cudf::type_id::INT64}; @@ -848,9 +848,9 @@ TEST_F(SegmentedReductionTestUntyped, StandardDeviation) constexpr float NaN{std::numeric_limits::quiet_NaN()}; auto const input = cudf::test::fixed_width_column_wrapper{10, 20, 30, 40, 50, 60, 70, 80, 90}; - auto const offsets = std::vector{0, 1, 1, 4, 9}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const agg = cudf::make_std_aggregation(); auto const output_type = cudf::data_type{cudf::type_id::FLOAT32}; @@ -868,9 +868,9 @@ TEST_F(SegmentedReductionTestUntyped, StandardDeviationNulls) constexpr double NaN{std::numeric_limits::quiet_NaN()}; auto const input = cudf::test::fixed_width_column_wrapper( {10, 0, 20, 30, 54, 63, 0, 72, 81}, {1, 0, 1, 1, 1, 1, 0, 1, 1}); - auto const offsets = std::vector{0, 1, 1, 4, 9}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const agg = cudf::make_std_aggregation(); auto const output_type = cudf::data_type{cudf::type_id::FLOAT64}; @@ -890,9 +890,9 @@ TEST_F(SegmentedReductionTestUntyped, Variance) constexpr float NaN{std::numeric_limits::quiet_NaN()}; auto const input = cudf::test::fixed_width_column_wrapper{10, 20, 30, 40, 50, 60, 70, 80, 90}; - auto const offsets = std::vector{0, 1, 1, 4, 9}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const agg = cudf::make_variance_aggregation(); auto const output_type = cudf::data_type{cudf::type_id::FLOAT32}; @@ -910,9 +910,9 @@ TEST_F(SegmentedReductionTestUntyped, VarianceNulls) constexpr double NaN{std::numeric_limits::quiet_NaN()}; auto const input = cudf::test::fixed_width_column_wrapper( {10, 0, 20, 30, 54, 63, 0, 72, 81}, {1, 0, 1, 1, 1, 1, 0, 1, 1}); - auto const offsets = std::vector{0, 1, 1, 4, 9}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const agg = cudf::make_variance_aggregation(); auto const output_type = cudf::data_type{cudf::type_id::FLOAT64}; @@ -931,9 +931,9 @@ TEST_F(SegmentedReductionTestUntyped, Errors) { auto const input = cudf::test::fixed_width_column_wrapper( {10, 0, 20, 30, 54, 63, 0, 72, 81}, {1, 0, 1, 1, 1, 1, 0, 1, 1}); - auto const offsets = std::vector{0, 1, 1, 4, 9}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 1, 1, 4, 9}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const null_policy = cudf::null_policy::EXCLUDE; auto const output_type = cudf::data_type{cudf::type_id::TIMESTAMP_DAYS}; auto const str_input = @@ -999,10 +999,10 @@ TEST_F(SegmentedReductionTestUntyped, Errors) TEST_F(SegmentedReductionTestUntyped, ReduceEmptyColumn) { - auto const input = cudf::test::fixed_width_column_wrapper{}; - auto const offsets = std::vector{0}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const input = cudf::test::fixed_width_column_wrapper{}; + auto const offsets = std::vector{0}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::fixed_width_column_wrapper{}; auto res = @@ -1036,10 +1036,10 @@ TEST_F(SegmentedReductionTestUntyped, ReduceEmptyColumn) TEST_F(SegmentedReductionTestUntyped, EmptyInputWithOffsets) { - auto const input = cudf::test::fixed_width_column_wrapper{}; - auto const offsets = std::vector{0, 0, 0, 0, 0, 0}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const input = cudf::test::fixed_width_column_wrapper{}; + auto const offsets = std::vector{0, 0, 0, 0, 0, 0}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::fixed_width_column_wrapper{{XXX, XXX, XXX, XXX, XXX}, {0, 0, 0, 0, 0}}; @@ -1087,9 +1087,9 @@ TYPED_TEST(SegmentedReductionFixedPointTest, MaxWithNulls) { using RepType = cudf::device_storage_type_t; - auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const agg = cudf::make_max_aggregation(); for (auto scale : {-2, 0, 5}) { @@ -1115,9 +1115,9 @@ TYPED_TEST(SegmentedReductionFixedPointTest, MinWithNulls) { using RepType = cudf::device_storage_type_t; - auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const agg = cudf::make_min_aggregation(); for (auto scale : {-2, 0, 5}) { @@ -1143,9 +1143,9 @@ TYPED_TEST(SegmentedReductionFixedPointTest, MaxNonNullableInput) { using RepType = cudf::device_storage_type_t; - auto const offsets = std::vector{0, 3, 4, 4}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 4, 4}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const agg = cudf::make_max_aggregation(); for (auto scale : {-2, 0, 5}) { @@ -1168,9 +1168,9 @@ TYPED_TEST(SegmentedReductionFixedPointTest, MinNonNullableInput) { using RepType = cudf::device_storage_type_t; - auto const offsets = std::vector{0, 3, 4, 4}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 4, 4}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const agg = cudf::make_min_aggregation(); for (auto scale : {-2, 0, 5}) { @@ -1193,9 +1193,9 @@ TYPED_TEST(SegmentedReductionFixedPointTest, Sum) { using RepType = cudf::device_storage_type_t; - auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const agg = cudf::make_sum_aggregation(); for (auto scale : {-2, 0, 5}) { @@ -1231,9 +1231,9 @@ TYPED_TEST(SegmentedReductionFixedPointTest, Product) { using RepType = cudf::device_storage_type_t; - auto const offsets = std::vector{0, 3, 6, 7, 8, 12, 12}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 7, 8, 12, 12}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const agg = cudf::make_product_aggregation(); for (auto scale : {-2, 0, 5}) { @@ -1268,9 +1268,9 @@ TYPED_TEST(SegmentedReductionFixedPointTest, SumOfSquares) { using RepType = cudf::device_storage_type_t; - auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const agg = cudf::make_sum_of_squares_aggregation(); for (auto scale : {-2, 0, 5}) { @@ -1431,10 +1431,10 @@ TEST_F(SegmentedReductionStringTest, MinExcludeNulls) TEST_F(SegmentedReductionStringTest, EmptyInputWithOffsets) { - auto const input = cudf::test::strings_column_wrapper{}; - auto const offsets = std::vector{0, 0, 0, 0}; - auto const d_offsets = - cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const input = cudf::test::strings_column_wrapper{}; + auto const offsets = std::vector{0, 0, 0, 0}; + auto const d_offsets = cudf::detail::make_device_uvector_async( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const expect = cudf::test::strings_column_wrapper({XXX, XXX, XXX}, {0, 0, 0}); auto result = diff --git a/cpp/tests/scalar/scalar_device_view_test.cu b/cpp/tests/scalar/scalar_device_view_test.cu index c7365d63e1c..9e0f68573a5 100644 --- a/cpp/tests/scalar/scalar_device_view_test.cu +++ b/cpp/tests/scalar/scalar_device_view_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -130,7 +130,8 @@ TEST_F(StringScalarDeviceViewTest, Value) auto scalar_device_view = cudf::get_scalar_device_view(s); rmm::device_scalar result{cudf::get_default_stream()}; - auto value_v = cudf::detail::make_device_uvector_sync(value, cudf::get_default_stream()); + auto value_v = cudf::detail::make_device_uvector_sync( + value, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); test_string_value<<<1, 1, 0, cudf::get_default_stream().value()>>>( scalar_device_view, value_v.data(), value.size(), result.data()); diff --git a/cpp/tests/strings/contains_tests.cpp b/cpp/tests/strings/contains_tests.cpp index 5331c4c34d8..316f24e4167 100644 --- a/cpp/tests/strings/contains_tests.cpp +++ b/cpp/tests/strings/contains_tests.cpp @@ -298,9 +298,11 @@ TEST_F(StringsContainsTests, HexTest) std::vector offsets( {thrust::make_counting_iterator(0), thrust::make_counting_iterator(0) + count + 1}); - auto d_chars = cudf::detail::make_device_uvector_sync(ascii_chars, cudf::get_default_stream()); - auto d_offsets = cudf::detail::make_device_uvector_sync(offsets, cudf::get_default_stream()); - auto input = cudf::make_strings_column(d_chars, d_offsets); + auto d_chars = cudf::detail::make_device_uvector_sync( + ascii_chars, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto d_offsets = cudf::detail::make_device_uvector_sync( + offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto input = cudf::make_strings_column(d_chars, d_offsets); auto strings_view = cudf::strings_column_view(input->view()); for (auto ch : ascii_chars) { diff --git a/cpp/tests/strings/factories_test.cu b/cpp/tests/strings/factories_test.cu index e3df8db721d..77857049e7a 100644 --- a/cpp/tests/strings/factories_test.cu +++ b/cpp/tests/strings/factories_test.cu @@ -78,7 +78,8 @@ TEST_F(StringsFactoriesTest, CreateColumnFromPair) } h_offsets[idx + 1] = offset; } - auto d_strings = cudf::detail::make_device_uvector_sync(strings, cudf::get_default_stream()); + auto d_strings = cudf::detail::make_device_uvector_sync( + strings, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); CUDF_CUDA_TRY(cudaMemcpy(d_buffer.data(), h_buffer.data(), memsize, cudaMemcpyDefault)); auto column = cudf::make_strings_column(d_strings); EXPECT_EQ(column->type(), cudf::data_type{cudf::type_id::STRING}); @@ -143,10 +144,13 @@ TEST_F(StringsFactoriesTest, CreateColumnFromOffsets) } std::vector h_nulls{h_null_mask}; - auto d_buffer = cudf::detail::make_device_uvector_sync(h_buffer, cudf::get_default_stream()); - auto d_offsets = cudf::detail::make_device_uvector_sync(h_offsets, cudf::get_default_stream()); - auto d_nulls = cudf::detail::make_device_uvector_sync(h_nulls, cudf::get_default_stream()); - auto column = cudf::make_strings_column(d_buffer, d_offsets, d_nulls, null_count); + auto d_buffer = cudf::detail::make_device_uvector_sync( + h_buffer, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto d_offsets = cudf::detail::make_device_uvector_sync( + h_offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto d_nulls = cudf::detail::make_device_uvector_sync( + h_nulls, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto column = cudf::make_strings_column(d_buffer, d_offsets, d_nulls, null_count); EXPECT_EQ(column->type(), cudf::data_type{cudf::type_id::STRING}); EXPECT_EQ(column->null_count(), null_count); EXPECT_EQ(2, column->num_children()); @@ -184,8 +188,8 @@ TEST_F(StringsFactoriesTest, CreateScalar) TEST_F(StringsFactoriesTest, EmptyStringsColumn) { rmm::device_uvector d_chars{0, cudf::get_default_stream()}; - auto d_offsets = - cudf::detail::make_zeroed_device_uvector_sync(1, cudf::get_default_stream()); + auto d_offsets = cudf::detail::make_zeroed_device_uvector_sync( + 1, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); rmm::device_uvector d_nulls{0, cudf::get_default_stream()}; auto results = cudf::make_strings_column(d_chars, d_offsets, d_nulls, 0); diff --git a/cpp/tests/strings/integers_tests.cpp b/cpp/tests/strings/integers_tests.cpp index 04e6886a08a..79e96ff5121 100644 --- a/cpp/tests/strings/integers_tests.cpp +++ b/cpp/tests/strings/integers_tests.cpp @@ -297,8 +297,9 @@ TYPED_TEST(StringsIntegerConvertTest, FromToInteger) std::iota(h_integers.begin(), h_integers.end(), -(TypeParam)(h_integers.size() / 2)); h_integers.push_back(std::numeric_limits::min()); h_integers.push_back(std::numeric_limits::max()); - auto d_integers = cudf::detail::make_device_uvector_sync(h_integers, cudf::get_default_stream()); - auto integers = cudf::make_numeric_column(cudf::data_type{cudf::type_to_id()}, + auto d_integers = cudf::detail::make_device_uvector_sync( + h_integers, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto integers = cudf::make_numeric_column(cudf::data_type{cudf::type_to_id()}, (cudf::size_type)d_integers.size()); auto integers_view = integers->mutable_view(); CUDF_CUDA_TRY(cudaMemcpy(integers_view.data(), diff --git a/cpp/tests/table/table_view_tests.cu b/cpp/tests/table/table_view_tests.cu index 0542d007ca0..5127f69162f 100644 --- a/cpp/tests/table/table_view_tests.cu +++ b/cpp/tests/table/table_view_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -47,8 +47,8 @@ void row_comparison(cudf::table_view input1, auto device_table_1 = cudf::table_device_view::create(input1, stream); auto device_table_2 = cudf::table_device_view::create(input2, stream); - auto d_column_order = - cudf::detail::make_device_uvector_sync(column_order, cudf::get_default_stream()); + auto d_column_order = cudf::detail::make_device_uvector_sync( + column_order, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto comparator = cudf::row_lexicographic_comparator( cudf::nullate::NO{}, *device_table_1, *device_table_2, d_column_order.data()); diff --git a/cpp/tests/types/type_dispatcher_test.cu b/cpp/tests/types/type_dispatcher_test.cu index 911911851f2..a27d8931ee6 100644 --- a/cpp/tests/types/type_dispatcher_test.cu +++ b/cpp/tests/types/type_dispatcher_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -69,7 +69,8 @@ __global__ void dispatch_test_kernel(cudf::type_id id, bool* d_result) TYPED_TEST(TypedDispatcherTest, DeviceDispatch) { - auto result = cudf::detail::make_zeroed_device_uvector_sync(1, cudf::get_default_stream()); + auto result = cudf::detail::make_zeroed_device_uvector_sync( + 1, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); dispatch_test_kernel<<<1, 1, 0, cudf::get_default_stream().value()>>>( cudf::type_to_id(), result.data()); CUDF_CUDA_TRY(cudaDeviceSynchronize()); @@ -130,7 +131,8 @@ __global__ void double_dispatch_test_kernel(cudf::type_id id1, cudf::type_id id2 TYPED_TEST(TypedDoubleDispatcherTest, DeviceDoubleDispatch) { - auto result = cudf::detail::make_zeroed_device_uvector_sync(1, cudf::get_default_stream()); + auto result = cudf::detail::make_zeroed_device_uvector_sync( + 1, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); double_dispatch_test_kernel<<<1, 1, 0, cudf::get_default_stream().value()>>>( cudf::type_to_id(), cudf::type_to_id(), result.data()); CUDF_CUDA_TRY(cudaDeviceSynchronize()); diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu index 6c441539621..3a94aac1cc9 100644 --- a/cpp/tests/utilities/column_utilities.cu +++ b/cpp/tests/utilities/column_utilities.cu @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -371,55 +372,56 @@ struct column_property_comparator { } }; +template class corresponding_rows_unequal { public: - corresponding_rows_unequal(table_device_view d_lhs, - table_device_view d_rhs, - column_device_view lhs_row_indices_, + corresponding_rows_unequal(column_device_view lhs_row_indices_, column_device_view rhs_row_indices_, - size_type /*fp_ulps*/) - : comp(cudf::nullate::YES{}, d_lhs, d_rhs, cudf::null_equality::EQUAL), - lhs_row_indices(lhs_row_indices_), - rhs_row_indices(rhs_row_indices_) + size_type /*fp_ulps*/, + DeviceComparator comp_, + column_device_view /*lhs*/, + column_device_view /*rhs*/) + : lhs_row_indices(lhs_row_indices_), rhs_row_indices(rhs_row_indices_), comp(comp_) { } - cudf::row_equality_comparator comp; - __device__ bool operator()(size_type index) { - return !comp(lhs_row_indices.element(index), - rhs_row_indices.element(index)); + using cudf::experimental::row::lhs_index_type; + using cudf::experimental::row::rhs_index_type; + + return !comp(lhs_index_type{lhs_row_indices.element(index)}, + rhs_index_type{rhs_row_indices.element(index)}); } column_device_view lhs_row_indices; column_device_view rhs_row_indices; + DeviceComparator comp; }; +template class corresponding_rows_not_equivalent { - table_device_view d_lhs; - table_device_view d_rhs; - column_device_view lhs_row_indices; column_device_view rhs_row_indices; - size_type const fp_ulps; + DeviceComparator comp; + column_device_view lhs; + column_device_view rhs; public: - corresponding_rows_not_equivalent(table_device_view d_lhs, - table_device_view d_rhs, - column_device_view lhs_row_indices_, + corresponding_rows_not_equivalent(column_device_view lhs_row_indices_, column_device_view rhs_row_indices_, - size_type fp_ulps_) - : d_lhs(d_lhs), - d_rhs(d_rhs), - comp(cudf::nullate::YES{}, d_lhs, d_rhs, null_equality::EQUAL), - lhs_row_indices(lhs_row_indices_), + size_type fp_ulps_, + DeviceComparator comp_, + column_device_view lhs_, + column_device_view rhs_) + : lhs_row_indices(lhs_row_indices_), rhs_row_indices(rhs_row_indices_), - fp_ulps(fp_ulps_) + fp_ulps(fp_ulps_), + comp(comp_), + lhs(lhs_), + rhs(rhs_) { - CUDF_EXPECTS(d_lhs.num_columns() == 1 and d_rhs.num_columns() == 1, - "Unsupported number of columns"); } struct typed_element_not_equivalent { @@ -459,23 +461,17 @@ class corresponding_rows_not_equivalent { } }; - cudf::row_equality_comparator comp; - __device__ bool operator()(size_type index) { + using cudf::experimental::row::lhs_index_type; + using cudf::experimental::row::rhs_index_type; + auto const lhs_index = lhs_row_indices.element(index); auto const rhs_index = rhs_row_indices.element(index); - if (not comp(lhs_index, rhs_index)) { - auto lhs_col = this->d_lhs.column(0); - auto rhs_col = this->d_rhs.column(0); - return type_dispatcher(lhs_col.type(), - typed_element_not_equivalent{}, - lhs_col, - rhs_col, - lhs_index, - rhs_index, - fp_ulps); + if (not comp(lhs_index_type{lhs_index}, rhs_index_type{rhs_index})) { + return type_dispatcher( + lhs.type(), typed_element_not_equivalent{}, lhs, rhs, lhs_index, rhs_index, fp_ulps); } return false; } @@ -536,25 +532,42 @@ struct column_comparator_impl { size_type fp_ulps, int depth) { - auto d_lhs = cudf::table_device_view::create(table_view{{lhs}}); - auto d_rhs = cudf::table_device_view::create(table_view{{rhs}}); - auto d_lhs_row_indices = cudf::column_device_view::create(lhs_row_indices); auto d_rhs_row_indices = cudf::column_device_view::create(rhs_row_indices); - using ComparatorType = std::conditional_t; + auto d_lhs = cudf::column_device_view::create(lhs); + auto d_rhs = cudf::column_device_view::create(rhs); + + auto lhs_tview = table_view{{lhs}}; + auto rhs_tview = table_view{{rhs}}; + + auto const comparator = cudf::experimental::row::equality::two_table_comparator{ + lhs_tview, rhs_tview, cudf::get_default_stream()}; + auto const has_nulls = cudf::has_nested_nulls(lhs_tview) or cudf::has_nested_nulls(rhs_tview); + + auto const device_comparator = comparator.equal_to(cudf::nullate::DYNAMIC{has_nulls}); + + using ComparatorType = + std::conditional_t, + corresponding_rows_not_equivalent>; auto differences = rmm::device_uvector( - lhs.size(), cudf::get_default_stream()); // worst case: everything different + lhs_row_indices.size(), cudf::get_default_stream()); // worst case: everything different auto input_iter = thrust::make_counting_iterator(0); - auto diff_iter = thrust::copy_if( + + thrust::transform( rmm::exec_policy(cudf::get_default_stream()), input_iter, input_iter + lhs_row_indices.size(), differences.begin(), - ComparatorType(*d_lhs, *d_rhs, *d_lhs_row_indices, *d_rhs_row_indices, fp_ulps)); + ComparatorType( + *d_lhs_row_indices, *d_rhs_row_indices, fp_ulps, device_comparator, *d_lhs, *d_rhs)); + + auto diff_iter = thrust::remove(rmm::exec_policy(cudf::get_default_stream()), + differences.begin(), + differences.end(), + 0); // remove the zero entries differences.resize(thrust::distance(differences.begin(), diff_iter), cudf::get_default_stream()); // shrink back down diff --git a/cpp/tests/utilities/tdigest_utilities.cu b/cpp/tests/utilities/tdigest_utilities.cu index 15998e32bd0..d2e95812894 100644 --- a/cpp/tests/utilities/tdigest_utilities.cu +++ b/cpp/tests/utilities/tdigest_utilities.cu @@ -64,12 +64,12 @@ void tdigest_sample_compare(cudf::tdigest::tdigest_column_view const& tdv, }); } - auto d_expected_src = - cudf::detail::make_device_uvector_async(h_expected_src, cudf::get_default_stream()); - auto d_expected_mean = - cudf::detail::make_device_uvector_async(h_expected_mean, cudf::get_default_stream()); - auto d_expected_weight = - cudf::detail::make_device_uvector_async(h_expected_weight, cudf::get_default_stream()); + auto d_expected_src = cudf::detail::make_device_uvector_async( + h_expected_src, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto d_expected_mean = cudf::detail::make_device_uvector_async( + h_expected_mean, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto d_expected_weight = cudf::detail::make_device_uvector_async( + h_expected_weight, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto iter = thrust::make_counting_iterator(0); thrust::for_each( diff --git a/cpp/tests/utilities_tests/span_tests.cu b/cpp/tests/utilities_tests/span_tests.cu index a043e723eda..66f9fbfc0d6 100644 --- a/cpp/tests/utilities_tests/span_tests.cu +++ b/cpp/tests/utilities_tests/span_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -238,8 +238,8 @@ __global__ void simple_device_kernel(device_span result) { result[0] = tru TEST(SpanTest, CanUseDeviceSpan) { - auto d_message = - cudf::detail::make_zeroed_device_uvector_async(1, cudf::get_default_stream()); + auto d_message = cudf::detail::make_zeroed_device_uvector_async( + 1, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto d_span = device_span(d_message.data(), d_message.size()); diff --git a/cpp/tests/utilities_tests/type_check_tests.cpp b/cpp/tests/utilities_tests/type_check_tests.cpp index 84a2d15d477..f65c3652dc9 100644 --- a/cpp/tests/utilities_tests/type_check_tests.cpp +++ b/cpp/tests/utilities_tests/type_check_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -147,6 +147,7 @@ TEST_F(ColumnTypeCheckTest, DifferentFixedWidth) fixed_point_column_wrapper rhs5({10000}, numeric::scale_type{0}); EXPECT_FALSE(column_types_equal(lhs5, rhs5)); + EXPECT_TRUE(column_types_equivalent(lhs5, rhs5)); // Different rep, same scale fixed_point_column_wrapper lhs6({10000}, numeric::scale_type{-1}); diff --git a/docs/cudf/source/api_docs/general_functions.rst b/docs/cudf/source/api_docs/general_functions.rst index 112df2fdf9f..5c28b4e7e85 100644 --- a/docs/cudf/source/api_docs/general_functions.rst +++ b/docs/cudf/source/api_docs/general_functions.rst @@ -27,6 +27,7 @@ Top-level conversions cudf.to_numeric cudf.from_dlpack + cudf.from_pandas Top-level dealing with datetimelike ----------------------------------- diff --git a/docs/dask_cudf/Makefile b/docs/dask_cudf/Makefile new file mode 100644 index 00000000000..d0c3cbf1020 --- /dev/null +++ b/docs/dask_cudf/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/dask_cudf/make.bat b/docs/dask_cudf/make.bat new file mode 100644 index 00000000000..747ffb7b303 --- /dev/null +++ b/docs/dask_cudf/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/dask_cudf/source/_static/RAPIDS-logo-purple.png b/docs/dask_cudf/source/_static/RAPIDS-logo-purple.png new file mode 100644 index 00000000000..d884e01374d Binary files /dev/null and b/docs/dask_cudf/source/_static/RAPIDS-logo-purple.png differ diff --git a/docs/dask_cudf/source/api.rst b/docs/dask_cudf/source/api.rst new file mode 100644 index 00000000000..893f5dd7434 --- /dev/null +++ b/docs/dask_cudf/source/api.rst @@ -0,0 +1,79 @@ +=============== + API reference +=============== + +This page provides a list of all publicly accessible modules, methods, +and classes in the ``dask_cudf`` namespace. + + +Creating and storing DataFrames +=============================== + +:doc:`Like Dask `, Dask-cuDF supports creation +of DataFrames from a variety of storage formats. For on-disk data that +are not supported directly in Dask-cuDF, we recommend using Dask's +data reading facilities, followed by calling +:func:`.from_dask_dataframe` to obtain a Dask-cuDF object. + +.. automodule:: dask_cudf + :members: + from_cudf, + from_dask_dataframe, + read_csv, + read_json, + read_orc, + to_orc, + read_text, + read_parquet + +.. warning:: + + FIXME: where should the following live? + + .. autofunction:: dask_cudf.concat + + .. autofunction:: dask_cudf.from_delayed + +Grouping +======== + +As discussed in the :doc:`Dask documentation for groupby +`, ``groupby``, ``join``, and ``merge``, and +similar operations that require matching up rows of a DataFrame become +significantly more challenging in a parallel setting than they are in +serial. Dask-cuDF has the same challenges, however for certain groupby +operations, we can take advantage of functionality in cuDF that allows +us to compute multiple aggregations at once. There are therefore two +interfaces to grouping in Dask-cuDF, the general +:meth:`DataFrame.groupby` which returns a +:class:`.CudfDataFrameGroupBy` object, and a specialized +:func:`.groupby_agg`. Generally speaking, you should not need to call +:func:`.groupby_agg` directly, since Dask-cuDF will arrange to call it +if possible. + +.. autoclass:: dask_cudf.groupby.CudfDataFrameGroupBy + :members: + :inherited-members: + :show-inheritance: + +.. autofunction:: dask_cudf.groupby_agg + + +DataFrames and Series +===================== + +The core distributed objects provided by Dask-cuDF are the +:class:`.DataFrame` and :class:`.Series`. These inherit respectively +from :class:`dask.dataframe.DataFrame` and +:class:`dask.dataframe.Series`, and so the API is essentially +identical. The full API is provided below. + +.. autoclass:: dask_cudf.DataFrame + :members: + :inherited-members: + :show-inheritance: + +.. autoclass:: dask_cudf.Series + :members: + :inherited-members: + :show-inheritance: diff --git a/docs/dask_cudf/source/conf.py b/docs/dask_cudf/source/conf.py new file mode 100644 index 00000000000..1341e7fd9e7 --- /dev/null +++ b/docs/dask_cudf/source/conf.py @@ -0,0 +1,82 @@ +# Copyright (c) 2018-2023, NVIDIA CORPORATION. + +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = "dask-cudf" +copyright = "2018-2023, NVIDIA Corporation" +author = "NVIDIA Corporation" +version = "23.04" +release = "23.04.00" + +language = "en" + + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [ + "sphinx.ext.intersphinx", + "sphinx.ext.autodoc", + "sphinx_copybutton", + "numpydoc", + "IPython.sphinxext.ipython_console_highlighting", + "IPython.sphinxext.ipython_directive", + "myst_nb", +] + +templates_path = ["_templates"] +exclude_patterns = [] + +copybutton_prompt_text = ">>> " + +# Enable automatic generation of systematic, namespaced labels for sections +myst_heading_anchors = 2 + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = "pydata_sphinx_theme" +html_logo = "_static/RAPIDS-logo-purple.png" +htmlhelp_basename = "dask-cudfdoc" +html_use_modindex = True + +html_static_path = ["_static"] + +pygments_style = "sphinx" + +html_theme_options = { + "external_links": [], + "github_url": "https://github.com/rapidsai/cudf", + "twitter_url": "https://twitter.com/rapidsai", + "show_toc_level": 1, + "navbar_align": "right", +} +include_pandas_compat = True + +intersphinx_mapping = { + "python": ("https://docs.python.org/3", None), + "cupy": ("https://docs.cupy.dev/en/stable/", None), + "numpy": ("https://numpy.org/doc/stable", None), + "pyarrow": ("https://arrow.apache.org/docs/", None), + "cudf": ("https://docs.rapids.ai/api/cudf/stable/", None), + "dask": ("https://docs.dask.org/en/stable/", None), + "pandas": ("https://pandas.pydata.org/docs/", None), +} + +numpydoc_show_inherited_class_members = True +numpydoc_class_members_toctree = False +numpydoc_attributes_as_param_list = False + + +def setup(app): + app.add_css_file("https://docs.rapids.ai/assets/css/custom.css") + app.add_js_file( + "https://docs.rapids.ai/assets/js/custom.js", loading_method="defer" + ) diff --git a/docs/dask_cudf/source/index.rst b/docs/dask_cudf/source/index.rst new file mode 100644 index 00000000000..0442ab0929a --- /dev/null +++ b/docs/dask_cudf/source/index.rst @@ -0,0 +1,112 @@ +.. dask-cudf documentation coordinating file, created by + sphinx-quickstart on Mon Feb 6 18:48:11 2023. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to dask-cudf's documentation! +===================================== + +Dask-cuDF is an extension library for the `Dask `__ +parallel computing framework that provides a `cuDF +`__-backed distributed +dataframe with the same API as `Dask dataframes +`__. + +If you are familiar with Dask and `pandas `__ or +`cuDF `__, then Dask-cuDF +should feel familiar to you. If not, we recommend starting with `10 +minutes to Dask +`__ followed +by `10 minutes to cuDF and Dask-cuDF +`__. + +When running on multi-GPU systems, `Dask-CUDA +`__ is recommended to +simplify the setup of the cluster, taking advantage of all features of +the GPU and networking hardware. + +Using Dask-cuDF +--------------- + +When installed, Dask-cuDF registers itself as a dataframe backend for +Dask. This means that in many cases, using cuDF-backed dataframes requires +only small changes to an existing workflow. The minimal change is to +select cuDF as the dataframe backend in :doc:`Dask's +configuration `. To do so, we must set the option +``dataframe.backend`` to ``cudf``. From Python, this can be achieved +like so:: + + import dask + + dask.config.set({"dataframe.backend": "cudf"}) + +Alternatively, you can set ``DASK_DATAFRAME__BACKEND=cudf`` in the +environment before running your code. + +Dataframe creation from on-disk formats +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If your workflow creates Dask dataframes from on-disk formats +(for example using :func:`dask.dataframe.read_parquet`), then setting +the backend may well be enough to migrate your workflow. + +For example, consider reading a dataframe from parquet:: + + import dask.dataframe as dd + + # By default, we obtain a pandas-backed dataframe + df = dd.read_parquet("data.parquet", ...) + + +To obtain a cuDF-backed dataframe, we must set the +``dataframe.backend`` configuration option:: + + import dask + import dask.dataframe as dd + + dask.config.set({"dataframe.backend": "cudf"}) + # This gives us a cuDF-backed dataframe + df = dd.read_parquet("data.parquet", ...) + +This code will use cuDF's GPU-accelerated :func:`parquet reader +` to read partitions of the data. + +Dataframe creation from in-memory formats +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you already have a dataframe in memory and want to convert it to a +cuDF-backend one, there are two options depending on whether the +dataframe is already a Dask one or not. If you have a Dask dataframe, +then you can call :func:`dask.dataframe.to_backend` passing ``"cudf"`` +as the backend; if you have a pandas dataframe then you can either +call :func:`dask.dataframe.from_pandas` followed by +:func:`~dask.dataframe.to_backend` or first convert the dataframe with +:func:`cudf.from_pandas` and then parallelise this with +:func:`dask_cudf.from_cudf`. + +API Reference +------------- + +Generally speaking, Dask-cuDF tries to offer exactly the same API as +Dask itself. There are, however, some minor differences mostly because +cuDF does not :doc:`perfectly mirror ` +the pandas API, or because cuDF provides additional configuration +flags (these mostly occur in data reading and writing interfaces). + +As a result, straightforward workflows can be migrated without too +much trouble, but more complex ones that utilise more features may +need a bit of tweaking. The API documentation describes details of the +differences and all functionality that Dask-cuDF supports. + +.. toctree:: + :maxdepth: 2 + + api + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/java/src/main/native/src/ColumnViewJni.cu b/java/src/main/native/src/ColumnViewJni.cu index 8a2c0b2b411..86c2add851a 100644 --- a/java/src/main/native/src/ColumnViewJni.cu +++ b/java/src/main/native/src/ColumnViewJni.cu @@ -56,7 +56,7 @@ new_column_with_boolean_column_as_validity(cudf::column_view const &exemplar, auto [null_mask, null_count] = cudf::detail::valid_if( validity_begin, validity_end, [] __device__(auto optional_bool) { return optional_bool.value_or(false); }, - cudf::get_default_stream()); + cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto const exemplar_without_null_mask = cudf::column_view{ exemplar.type(), exemplar.size(), @@ -153,8 +153,9 @@ void post_process_list_overlap(cudf::column_view const &lhs, cudf::column_view c }); // Create a new nullmask from the validity data. - auto [new_null_mask, new_null_count] = cudf::detail::valid_if( - validity.begin(), validity.end(), thrust::identity{}, cudf::get_default_stream()); + auto [new_null_mask, new_null_count] = + cudf::detail::valid_if(validity.begin(), validity.end(), thrust::identity{}, + cudf::get_default_stream(), rmm::mr::get_current_device_resource()); if (new_null_count > 0) { // If the `overlap_result` column is nullable, perform `bitmask_and` of its nullmask and the diff --git a/java/src/main/native/src/maps_column_view.cu b/java/src/main/native/src/maps_column_view.cu index 23254c0d501..1af7689f972 100644 --- a/java/src/main/native/src/maps_column_view.cu +++ b/java/src/main/native/src/maps_column_view.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -55,7 +55,8 @@ std::unique_ptr get_values_for_impl(maps_column_view const &maps_view, CUDF_EXPECTS(lookup_keys.type().id() == keys_.child().type().id(), "Lookup keys must have the same type as the keys of the map column."); auto key_indices = - lists::detail::index_of(keys_, lookup_keys, lists::duplicate_find_option::FIND_LAST, stream); + lists::detail::index_of(keys_, lookup_keys, lists::duplicate_find_option::FIND_LAST, stream, + rmm::mr::get_current_device_resource()); auto constexpr absent_offset = size_type{-1}; auto constexpr nullity_offset = std::numeric_limits::min(); thrust::replace(rmm::exec_policy(stream), key_indices->mutable_view().template begin(), @@ -86,7 +87,8 @@ std::unique_ptr contains_impl(maps_column_view const &maps_view, KeyT co auto const keys = maps_view.keys(); CUDF_EXPECTS(lookup_keys.type().id() == keys.child().type().id(), "Lookup keys must have the same type as the keys of the map column."); - auto const contains = lists::detail::contains(keys, lookup_keys, stream); + auto const contains = + lists::detail::contains(keys, lookup_keys, stream, rmm::mr::get_current_device_resource()); // Replace nulls with BOOL8{false}; auto const scalar_false = numeric_scalar{false, true, stream}; return detail::replace_nulls(contains->view(), scalar_false, stream, mr); diff --git a/java/src/main/native/src/row_conversion.cu b/java/src/main/native/src/row_conversion.cu index 5cf7658106f..84f84f8b46f 100644 --- a/java/src/main/native/src/row_conversion.cu +++ b/java/src/main/native/src/row_conversion.cu @@ -226,7 +226,8 @@ build_string_row_offsets(table_view const &tbl, size_type fixed_width_and_validi std::copy_if(offsets_iter, offsets_iter + tbl.num_columns(), std::back_inserter(offsets_iterators), [](auto const &offset_ptr) { return offset_ptr != nullptr; }); - return make_device_uvector_async(offsets_iterators, stream); + return make_device_uvector_async(offsets_iterators, stream, + rmm::mr::get_current_device_resource()); }(); auto const num_columns = static_cast(d_offsets_iterators.size()); @@ -1256,7 +1257,7 @@ static std::unique_ptr fixed_width_convert_to_rows( // Allocate and set the offsets row for the byte array std::unique_ptr offsets = - cudf::detail::sequence(num_rows + 1, zero, scalar_size_per_row, stream); + cudf::detail::sequence(num_rows + 1, zero, scalar_size_per_row, stream, mr); std::unique_ptr data = make_numeric_column(data_type(type_id::INT8), static_cast(total_allocation), @@ -1539,7 +1540,9 @@ batch_data build_batches(size_type num_rows, RowSize row_sizes, bool all_fixed_w last_row_end = row_end; } - return {std::move(batch_row_offsets), make_device_uvector_async(batch_row_boundaries, stream), + return {std::move(batch_row_offsets), + make_device_uvector_async(batch_row_boundaries, stream, + rmm::mr::get_current_device_resource()), std::move(batch_row_boundaries), std::move(row_batches)}; } @@ -1750,8 +1753,10 @@ std::vector> convert_to_rows( return table_view(cols); }; - auto dev_col_sizes = make_device_uvector_async(column_info.column_sizes, stream); - auto dev_col_starts = make_device_uvector_async(column_info.column_starts, stream); + auto dev_col_sizes = make_device_uvector_async(column_info.column_sizes, stream, + rmm::mr::get_current_device_resource()); + auto dev_col_starts = make_device_uvector_async(column_info.column_starts, stream, + rmm::mr::get_current_device_resource()); // Get the pointers to the input columnar data ready auto const data_begin = thrust::make_transform_iterator(tbl.begin(), [](auto const &c) { @@ -1764,8 +1769,10 @@ std::vector> convert_to_rows( thrust::make_transform_iterator(tbl.begin(), [](auto const &c) { return c.null_mask(); }); std::vector input_nm(nm_begin, nm_begin + tbl.num_columns()); - auto dev_input_data = make_device_uvector_async(input_data, stream); - auto dev_input_nm = make_device_uvector_async(input_nm, stream); + auto dev_input_data = + make_device_uvector_async(input_data, stream, rmm::mr::get_current_device_resource()); + auto dev_input_nm = + make_device_uvector_async(input_nm, stream, rmm::mr::get_current_device_resource()); // the first batch always exists unless we were sent an empty table auto const first_batch_size = batch_info.row_batches[0].row_count; @@ -1811,7 +1818,8 @@ std::vector> convert_to_rows( auto validity_tile_infos = detail::build_validity_tile_infos( tbl.num_columns(), num_rows, shmem_limit_per_tile, batch_info.row_batches); - auto dev_validity_tile_infos = make_device_uvector_async(validity_tile_infos, stream); + auto dev_validity_tile_infos = make_device_uvector_async(validity_tile_infos, stream, + rmm::mr::get_current_device_resource()); auto const validity_offset = column_info.column_starts.back(); @@ -1847,9 +1855,10 @@ std::vector> convert_to_rows( std::vector variable_width_input_data( variable_data_begin, variable_data_begin + variable_width_table.num_columns()); - auto dev_variable_input_data = make_device_uvector_async(variable_width_input_data, stream); - auto dev_variable_col_output_offsets = - make_device_uvector_async(column_info.variable_width_column_starts, stream); + auto dev_variable_input_data = make_device_uvector_async( + variable_width_input_data, stream, rmm::mr::get_current_device_resource()); + auto dev_variable_col_output_offsets = make_device_uvector_async( + column_info.variable_width_column_starts, stream, rmm::mr::get_current_device_resource()); for (uint i = 0; i < batch_info.row_batches.size(); i++) { auto const batch_row_offset = batch_info.batch_row_boundaries[i]; @@ -2076,8 +2085,10 @@ std::unique_ptr
convert_from_rows(lists_column_view const &input, // Ideally we would check that the offsets are all the same, etc. but for now this is probably // fine CUDF_EXPECTS(size_per_row * num_rows <= child.size(), "The layout of the data appears to be off"); - auto dev_col_starts = make_device_uvector_async(column_info.column_starts, stream); - auto dev_col_sizes = make_device_uvector_async(column_info.column_sizes, stream); + auto dev_col_starts = make_device_uvector_async(column_info.column_starts, stream, + rmm::mr::get_current_device_resource()); + auto dev_col_sizes = make_device_uvector_async(column_info.column_sizes, stream, + rmm::mr::get_current_device_resource()); // Allocate the columns we are going to write into std::vector> output_columns; @@ -2118,16 +2129,20 @@ std::unique_ptr
convert_from_rows(lists_column_view const &input, } } - auto dev_string_row_offsets = make_device_uvector_async(string_row_offsets, stream); - auto dev_string_lengths = make_device_uvector_async(string_lengths, stream); + auto dev_string_row_offsets = + make_device_uvector_async(string_row_offsets, stream, rmm::mr::get_current_device_resource()); + auto dev_string_lengths = + make_device_uvector_async(string_lengths, stream, rmm::mr::get_current_device_resource()); // build the row_batches from the passed in list column std::vector row_batches; row_batches.push_back( {detail::row_batch{child.size(), num_rows, device_uvector(0, stream)}}); - auto dev_output_data = make_device_uvector_async(output_data, stream); - auto dev_output_nm = make_device_uvector_async(output_nm, stream); + auto dev_output_data = + make_device_uvector_async(output_data, stream, rmm::mr::get_current_device_resource()); + auto dev_output_nm = + make_device_uvector_async(output_nm, stream, rmm::mr::get_current_device_resource()); // only ever get a single batch when going from rows, so boundaries are 0, num_rows constexpr auto num_batches = 2; @@ -2164,7 +2179,8 @@ std::unique_ptr
convert_from_rows(lists_column_view const &input, auto validity_tile_infos = detail::build_validity_tile_infos(schema.size(), num_rows, shmem_limit_per_tile, row_batches); - auto dev_validity_tile_infos = make_device_uvector_async(validity_tile_infos, stream); + auto dev_validity_tile_infos = make_device_uvector_async(validity_tile_infos, stream, + rmm::mr::get_current_device_resource()); dim3 const validity_blocks(validity_tile_infos.size()); @@ -2221,8 +2237,10 @@ std::unique_ptr
convert_from_rows(lists_column_view const &input, string_col_offsets.push_back(std::move(output_string_offsets)); string_data_cols.push_back(std::move(string_data)); } - auto dev_string_col_offsets = make_device_uvector_async(string_col_offset_ptrs, stream); - auto dev_string_data_cols = make_device_uvector_async(string_data_col_ptrs, stream); + auto dev_string_col_offsets = make_device_uvector_async(string_col_offset_ptrs, stream, + rmm::mr::get_current_device_resource()); + auto dev_string_data_cols = make_device_uvector_async(string_data_col_ptrs, stream, + rmm::mr::get_current_device_resource()); dim3 const string_blocks( std::min(std::max(MIN_STRING_BLOCKS, num_rows / NUM_STRING_ROWS_PER_BLOCK_FROM_ROWS), @@ -2274,8 +2292,10 @@ std::unique_ptr
convert_from_rows_fixed_width_optimized( // fine CUDF_EXPECTS(size_per_row * num_rows == child.size(), "The layout of the data appears to be off"); - auto dev_column_start = make_device_uvector_async(column_start, stream); - auto dev_column_size = make_device_uvector_async(column_size, stream); + auto dev_column_start = + make_device_uvector_async(column_start, stream, rmm::mr::get_current_device_resource()); + auto dev_column_size = + make_device_uvector_async(column_size, stream, rmm::mr::get_current_device_resource()); // Allocate the columns we are going to write into std::vector> output_columns; diff --git a/python/cudf/cudf/core/cut.py b/python/cudf/cudf/core/cut.py index 6590cf2940d..ccf730c91fb 100644 --- a/python/cudf/cudf/core/cut.py +++ b/python/cudf/cudf/core/cut.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# Copyright (c) 2021-2023, NVIDIA CORPORATION. from collections import abc @@ -279,12 +279,8 @@ def cut( if labels is not None: if labels is not ordered and len(set(labels)) != len(labels): # when we have duplicate labels and ordered is False, we - # should allow duplicate categories. The categories are - # returned in order - new_data = [interval_labels[i][0] for i in index_labels.values] - return cudf.CategoricalIndex( - new_data, categories=sorted(set(labels)), ordered=False - ) + # should allow duplicate categories. + return interval_labels[index_labels] col = build_categorical_column( categories=interval_labels, diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index e50c324a8f4..672e663d316 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -259,9 +259,12 @@ def _getitem_tuple_arg(self, arg): else: if isinstance(arg, tuple): - return columns_df.index._get_row_major(columns_df, arg[0]) + row_arg = arg[0] + elif is_scalar(arg): + row_arg = (arg,) else: - return columns_df.index._get_row_major(columns_df, arg) + row_arg = arg + return columns_df.index._get_row_major(columns_df, row_arg) else: if isinstance(arg[0], slice): out = _get_label_range_or_mask( diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 413e005b798..d1408fec160 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -1403,7 +1403,7 @@ def __repr__(self): @_cudf_nvtx_annotate def __getitem__(self, index): res = self._get_elements_from_column(index) - if not isinstance(index, int): + if isinstance(res, ColumnBase): res = as_index(res) res.name = self.name return res diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 79927c60a85..8ec08b7c92a 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -250,7 +250,11 @@ def __getitem__(self, arg: Any) -> Union[ScalarLike, DataFrameOrSeries]: if isinstance(self._frame.index, cudf.MultiIndex) and not isinstance( arg, cudf.MultiIndex ): - result = self._frame.index._get_row_major(self._frame, arg) + if is_scalar(arg): + row_arg = (arg,) + else: + row_arg = arg + result = self._frame.index._get_row_major(self._frame, row_arg) if ( isinstance(arg, tuple) and len(arg) == self._frame._index.nlevels diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index d043b917251..0b0c5fba7fa 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -2886,3 +2886,22 @@ def test_index_to_pandas_nullable(data, expected_dtype): expected = pd.Index(data, dtype=expected_dtype) assert_eq(pi, expected) + + +class TestIndexScalarGetItem: + @pytest.fixture( + params=[range(1, 10, 2), [1, 2, 3], ["a", "b", "c"], [1.5, 2.5, 3.5]] + ) + def index_values(self, request): + return request.param + + @pytest.fixture(params=[int, np.int8, np.int32, np.int64]) + def i(self, request): + return request.param(1) + + def test_scalar_getitem(self, index_values, i): + index = cudf.Index(index_values) + + assert not isinstance(index[i], cudf.Index) + assert index[i] == index_values[i] + assert_eq(index, index.to_pandas()) diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py index 5012ae0979f..95936c48b7c 100644 --- a/python/cudf/cudf/tests/test_indexing.py +++ b/python/cudf/cudf/tests/test_indexing.py @@ -1446,6 +1446,8 @@ def test_loc_zero_dim_array(): reason="https://github.com/pandas-dev/pandas/issues/46704" ), ), + 1, + 2, ], ) def test_loc_series_multiindex(arg): diff --git a/python/cudf/cudf/tests/test_multiindex.py b/python/cudf/cudf/tests/test_multiindex.py index 0f04e8c0f2d..a0e027d4c86 100644 --- a/python/cudf/cudf/tests/test_multiindex.py +++ b/python/cudf/cudf/tests/test_multiindex.py @@ -319,6 +319,9 @@ def test_multiindex_getitem(pdf, gdf, pdfIndex): (("a", "store"), slice(None)), # return 2 rows, n-1 remaining keys = dataframe with n-k index columns ("a",), + "a", + "b", + "c", (("a",), slice(None)), # return 1 row, 0 remaining keys = dataframe with entire index ("a", "store", "storm", "smoke"), diff --git a/python/dask_cudf/dask_cudf/core.py b/python/dask_cudf/dask_cudf/core.py index 60bbe5d9571..d2858876fcd 100644 --- a/python/dask_cudf/dask_cudf/core.py +++ b/python/dask_cudf/dask_cudf/core.py @@ -1,6 +1,7 @@ -# Copyright (c) 2018-2022, NVIDIA CORPORATION. +# Copyright (c) 2018-2023, NVIDIA CORPORATION. import math +import textwrap import warnings import numpy as np @@ -68,6 +69,18 @@ def to_dask_dataframe(self, **kwargs): class DataFrame(_Frame, dd.core.DataFrame): + """ + A distributed Dask DataFrame where the backing dataframe is a + :class:`cuDF DataFrame `. + + Typically you would not construct this object directly, but rather + use one of Dask-cuDF's IO routines. + + Most operations on :doc:`Dask DataFrames ` are + supported, with many of the same caveats. + + """ + _partition_type = cudf.DataFrame @_dask_cudf_nvtx_annotate @@ -671,12 +684,35 @@ def from_cudf(data, npartitions=None, chunksize=None, sort=True, name=None): from_cudf.__doc__ = ( - "Wraps main-line Dask from_pandas...\n" + dd.from_pandas.__doc__ + textwrap.dedent( + """ + Create a :class:`.DataFrame` from a :class:`cudf.DataFrame`. + + This function is a thin wrapper around + :func:`dask.dataframe.from_pandas`, accepting the same + arguments (described below) excepting that it operates on cuDF + rather than pandas objects.\n + """ + ) + + textwrap.dedent(dd.from_pandas.__doc__) ) @_dask_cudf_nvtx_annotate def from_dask_dataframe(df): + """ + Convert a Dask :class:`dask.dataframe.DataFrame` to a Dask-cuDF + one. + + Parameters + ---------- + df : dask.dataframe.DataFrame + The Dask dataframe to convert + + Returns + ------- + dask_cudf.DataFrame : A new Dask collection backed by cuDF objects + """ return df.map_partitions(cudf.from_pandas) diff --git a/python/dask_cudf/dask_cudf/groupby.py b/python/dask_cudf/dask_cudf/groupby.py index f91738bdab0..f4bbcaf4dd1 100644 --- a/python/dask_cudf/dask_cudf/groupby.py +++ b/python/dask_cudf/dask_cudf/groupby.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. from functools import wraps from typing import Set @@ -433,22 +433,55 @@ def groupby_agg( ): """Optimized groupby aggregation for Dask-CuDF. - This aggregation algorithm only supports the following options: - - - "count" - - "mean" - - "std" - - "var" - - "sum" - - "min" - - "max" - - "collect" - - "first" - - "last" - - This "optimized" approach is more performant than the algorithm - in `dask.dataframe`, because it allows the cudf backend to - perform multiple aggregations at once. + Parameters + ---------- + ddf : DataFrame + DataFrame object to perform grouping on. + gb_cols : str or list[str] + Column names to group by. + aggs_in : str, list, or dict + Aggregations to perform. + split_every : int (optional) + How to group intermediate aggregates. + dropna : bool + Drop grouping key values corresponding to NA values. + as_index : bool + Currently ignored. + sort : bool + Sort the group keys, better performance is obtained when + not sorting. + shuffle : str (optional) + Control how shuffling of the DataFrame is performed. + sep : str + Internal usage. + + + Notes + ----- + This "optimized" approach is more performant than the algorithm in + implemented in :meth:`DataFrame.apply` because it allows the cuDF + backend to perform multiple aggregations at once. + + This aggregation algorithm only supports the following options + + * "collect" + * "count" + * "first" + * "last" + * "max" + * "mean" + * "min" + * "std" + * "sum" + * "var" + + + See Also + -------- + DataFrame.groupby : generic groupby of a DataFrame + dask.dataframe.apply_concat_apply : for more description of the + split_every argument. + """ # Assert that aggregations are supported aggs = _redirect_aggs(aggs_in) diff --git a/python/dask_cudf/dask_cudf/io/csv.py b/python/dask_cudf/dask_cudf/io/csv.py index b4d080fd182..fd27083bbf4 100644 --- a/python/dask_cudf/dask_cudf/io/csv.py +++ b/python/dask_cudf/dask_cudf/io/csv.py @@ -16,9 +16,10 @@ def read_csv(path, blocksize="default", **kwargs): """ - Read CSV files into a dask_cudf.DataFrame + Read CSV files into a :class:`.DataFrame`. - This API parallelizes the ``cudf.read_csv`` function in the following ways: + This API parallelizes the :func:`cudf:cudf.read_csv` function in + the following ways: It supports loading many files at once using globstrings: @@ -34,23 +35,26 @@ def read_csv(path, blocksize="default", **kwargs): >>> df = dask_cudf.read_csv("s3://bucket/myfiles.*.csv") >>> df = dask_cudf.read_csv("https://www.mycloud.com/sample.csv") - Internally ``dask_cudf.read_csv`` uses ``cudf.read_csv`` and supports - many of the same keyword arguments with the same performance guarantees. - See the docstring for ``cudf.read_csv()`` for more information on available + Internally ``read_csv`` uses :func:`cudf:cudf.read_csv` and + supports many of the same keyword arguments with the same + performance guarantees. See the docstring for + :func:`cudf:cudf.read_csv` for more information on available keyword arguments. Parameters ---------- path : str, path object, or file-like object - Either a path to a file (a str, pathlib.Path, or - py._path.local.LocalPath), URL (including http, ftp, and S3 locations), - or any object with a read() method (such as builtin open() file - handler function or StringIO). + Either a path to a file (a str, :py:class:`pathlib.Path`, or + py._path.local.LocalPath), URL (including http, ftp, and S3 + locations), or any object with a read() method (such as + builtin :py:func:`open` file handler function or + :py:class:`~io.StringIO`). blocksize : int or str, default "256 MiB" - The target task partition size. If `None`, a single block + The target task partition size. If ``None``, a single block is used for each file. **kwargs : dict - Passthrough key-word arguments that are sent to ``cudf.read_csv``. + Passthrough key-word arguments that are sent to + :func:`cudf:cudf.read_csv`. Examples -------- @@ -61,6 +65,7 @@ def read_csv(path, blocksize="default", **kwargs): 0 1 hi 1 2 hello 2 3 ai + """ # Handle `chunksize` deprecation diff --git a/python/dask_cudf/dask_cudf/io/json.py b/python/dask_cudf/dask_cudf/io/json.py index bb3d0f3c601..2a6ad603414 100644 --- a/python/dask_cudf/dask_cudf/io/json.py +++ b/python/dask_cudf/dask_cudf/io/json.py @@ -10,30 +10,33 @@ def read_json(url_path, engine="auto", **kwargs): - """Create a dask_cudf DataFrame collection from JSON data + """Read JSON data into a :class:`.DataFrame`. - This function wraps ``dask.dataframe.read_json``, and passes + This function wraps :func:`dask.dataframe.read_json`, and passes ``engine=partial(cudf.read_json, engine="auto")`` by default. Parameters ---------- - url_path: str, list of str + url_path : str, list of str Location to read from. If a string, can include a glob character to find a set of file names. Supports protocol specifications such as ``"s3://"``. engine : str or Callable, default "auto" - If str, this value will be used as the ``engine`` argument when - ``cudf.read_json`` is used to create each partition. If Callable, - this value will be used as the underlying function used to create - each partition from JSON data. The default value is "auto", so - that ``engine=partial(cudf.read_json, engine="auto")`` will be - passed to ``dask.dataframe.read_json`` by default. + + If str, this value will be used as the ``engine`` argument + when :func:`cudf.read_json` is used to create each partition. + If a :obj:`~typing.Callable`, this value will be used as the + underlying function used to create each partition from JSON + data. The default value is "auto", so that + ``engine=partial(cudf.read_json, engine="auto")`` will be + passed to :func:`dask.dataframe.read_json` by default. + **kwargs : - Key-word arguments to pass through to ``dask.dataframe.read_json``. + Key-word arguments to pass through to :func:`dask.dataframe.read_json`. Returns ------- - dask_cudf.DataFrame + :class:`.DataFrame` Examples -------- @@ -53,7 +56,8 @@ def read_json(url_path, engine="auto", **kwargs): See Also -------- - dask.dataframe.io.json.read_json + dask.dataframe.read_json + """ # TODO: Add optimized code path to leverage the diff --git a/python/dask_cudf/dask_cudf/io/orc.py b/python/dask_cudf/dask_cudf/io/orc.py index e731057ed90..49fea0d7602 100644 --- a/python/dask_cudf/dask_cudf/io/orc.py +++ b/python/dask_cudf/dask_cudf/io/orc.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. from io import BufferedWriter, IOBase @@ -25,37 +25,45 @@ def _read_orc_stripe(fs, path, stripe, columns, kwargs=None): def read_orc(path, columns=None, filters=None, storage_options=None, **kwargs): - """Read cudf dataframe from ORC file(s). + """Read ORC files into a :class:`.DataFrame`. Note that this function is mostly borrowed from upstream Dask. Parameters ---------- - path: str or list(str) + path : str or list[str] Location of file(s), which can be a full URL with protocol specifier, and may include glob character if a single string. - columns: None or list(str) + columns : None or list[str] Columns to load. If None, loads all. filters : None or list of tuple or list of lists of tuples - If not None, specifies a filter predicate used to filter out row groups - using statistics stored for each row group as Parquet metadata. Row - groups that do not match the given filter predicate are not read. The - predicate is expressed in disjunctive normal form (DNF) like - `[[('x', '=', 0), ...], ...]`. DNF allows arbitrary boolean logical - combinations of single column predicates. The innermost tuples each - describe a single column predicate. The list of inner predicates is - interpreted as a conjunction (AND), forming a more selective and - multiple column predicate. Finally, the outermost list combines - these filters as a disjunction (OR). Predicates may also be passed - as a list of tuples. This form is interpreted as a single conjunction. - To express OR in predicates, one must use the (preferred) notation of - list of lists of tuples. - storage_options: None or dict + If not None, specifies a filter predicate used to filter out + row groups using statistics stored for each row group as + Parquet metadata. Row groups that do not match the given + filter predicate are not read. The predicate is expressed in + `disjunctive normal form (DNF) + `__ + like ``[[('x', '=', 0), ...], ...]``. DNF allows arbitrary + boolean logical combinations of single column predicates. The + innermost tuples each describe a single column predicate. The + list of inner predicates is interpreted as a conjunction + (AND), forming a more selective and multiple column predicate. + Finally, the outermost list combines these filters as a + disjunction (OR). Predicates may also be passed as a list of + tuples. This form is interpreted as a single conjunction. To + express OR in predicates, one must use the (preferred) + notation of list of lists of tuples. + storage_options : None or dict Further parameters to pass to the bytes backend. + See Also + -------- + dask.dataframe.read_orc + Returns ------- - cudf.DataFrame + dask_cudf.DataFrame + """ storage_options = storage_options or {} @@ -133,22 +141,25 @@ def to_orc( compute=True, **kwargs, ): - """Write a dask_cudf dataframe to ORC file(s) (one file per partition). + """ + Write a :class:`.DataFrame` to ORC file(s) (one file per partition). Parameters ---------- - df : dask_cudf.DataFrame - path: string or pathlib.Path + df : DataFrame + path : str or pathlib.Path Destination directory for data. Prepend with protocol like ``s3://`` or ``hdfs://`` for remote data. write_index : boolean, optional Whether or not to write the index. Defaults to True. - storage_options: None or dict + storage_options : None or dict Further parameters to pass to the bytes backend. compression : string or dict, optional compute : bool, optional - If True (default) then the result is computed immediately. If False - then a ``dask.delayed`` object is returned for future computation. + If True (default) then the result is computed immediately. If + False then a :class:`~dask.delayed.Delayed` object is returned + for future computation. + """ from dask import compute as dask_compute, delayed diff --git a/python/dask_cudf/dask_cudf/io/parquet.py b/python/dask_cudf/dask_cudf/io/parquet.py index 452f2f8914a..b03ac256b05 100644 --- a/python/dask_cudf/dask_cudf/io/parquet.py +++ b/python/dask_cudf/dask_cudf/io/parquet.py @@ -438,13 +438,14 @@ def set_object_dtypes_from_pa_schema(df, schema): def read_parquet(path, columns=None, **kwargs): - """Read parquet files into a Dask DataFrame + """ + Read parquet files into a :class:`.DataFrame`. - Calls ``dask.dataframe.read_parquet`` with ``engine=CudfEngine`` - to coordinate the execution of ``cudf.read_parquet``, and to - ultimately create a ``dask_cudf.DataFrame`` collection. + Calls :func:`dask.dataframe.read_parquet` with ``engine=CudfEngine`` + to coordinate the execution of :func:`cudf.read_parquet`, and to + ultimately create a :class:`.DataFrame` collection. - See the ``dask.dataframe.read_parquet`` documentation for + See the :func:`dask.dataframe.read_parquet` documentation for all available options. Examples @@ -469,6 +470,7 @@ def read_parquet(path, columns=None, **kwargs): See Also -------- cudf.read_parquet + dask.dataframe.read_parquet """ if isinstance(columns, str): columns = [columns]