From f3af69925e5321563dea82d8d2ae463faa45ba44 Mon Sep 17 00:00:00 2001 From: Raymond Douglass Date: Fri, 23 Sep 2022 11:38:52 -0400 Subject: [PATCH 001/202] DOC --- CHANGELOG.md | 4 ++++ ci/checks/style.sh | 2 +- ci/gpu/build.sh | 2 +- ci/gpu/java.sh | 2 +- conda/environments/cudf_dev_cuda11.5.yml | 4 ++-- cpp/CMakeLists.txt | 2 +- cpp/doxygen/Doxyfile | 4 ++-- cpp/examples/basic/CMakeLists.txt | 2 +- cpp/libcudf_kafka/CMakeLists.txt | 2 +- docs/cudf/source/conf.py | 4 ++-- fetch_rapids.cmake | 2 +- java/src/main/native/CMakeLists.txt | 2 +- python/cudf/CMakeLists.txt | 2 +- 13 files changed, 19 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 89049dff3b6..092b62d6c63 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +# cuDF 22.12.00 (Date TBD) + +Please see https://github.com/rapidsai/cudf/releases/tag/v22.12.00a for the latest changes to this development branch. + # cuDF 22.10.00 (Date TBD) Please see https://github.com/rapidsai/cudf/releases/tag/v22.10.00a for the latest changes to this development branch. diff --git a/ci/checks/style.sh b/ci/checks/style.sh index de3f8c01d83..29f5474fd87 100755 --- a/ci/checks/style.sh +++ b/ci/checks/style.sh @@ -14,7 +14,7 @@ LANG=C.UTF-8 . /opt/conda/etc/profile.d/conda.sh conda activate rapids -FORMAT_FILE_URL=https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.10/cmake-format-rapids-cmake.json +FORMAT_FILE_URL=https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.12/cmake-format-rapids-cmake.json export RAPIDS_CMAKE_FORMAT_FILE=/tmp/rapids_cmake_ci/cmake-formats-rapids-cmake.json mkdir -p $(dirname ${RAPIDS_CMAKE_FORMAT_FILE}) wget -O ${RAPIDS_CMAKE_FORMAT_FILE} ${FORMAT_FILE_URL} diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 118bdb263af..f3c302173c8 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -35,7 +35,7 @@ unset GIT_DESCRIBE_TAG export INSTALL_DASK_MAIN=1 # ucx-py version -export UCX_PY_VERSION='0.28.*' +export UCX_PY_VERSION='0.29.*' ################################################################################ # TRAP - Setup trap for removing jitify cache diff --git a/ci/gpu/java.sh b/ci/gpu/java.sh index b110303662b..e1d3bab2bc5 100755 --- a/ci/gpu/java.sh +++ b/ci/gpu/java.sh @@ -31,7 +31,7 @@ export GIT_DESCRIBE_TAG=`git describe --tags` export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'` # ucx-py version -export UCX_PY_VERSION='0.28.*' +export UCX_PY_VERSION='0.29.*' ################################################################################ # TRAP - Setup trap for removing jitify cache diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml index 973ca731853..c3e41927a05 100644 --- a/conda/environments/cudf_dev_cuda11.5.yml +++ b/conda/environments/cudf_dev_cuda11.5.yml @@ -13,7 +13,7 @@ dependencies: - clang=11.1.0 - clang-tools=11.1.0 - cupy>=9.5.0,<12.0.0a0 - - rmm=22.10.* + - rmm=22.12.* - cmake>=3.20.1,!=3.23.0 - cmake_setuptools>=0.1.3 - scikit-build>=0.13.1 @@ -62,7 +62,7 @@ dependencies: - sphinx-autobuild - myst-nb - scipy - - dask-cuda=22.10.* + - dask-cuda=22.12.* - mimesis<4.1 - packaging - protobuf>=3.20.1,<3.21.0a0 diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 7efa186aede..6b743662e0e 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -25,7 +25,7 @@ rapids_cuda_init_architectures(CUDF) project( CUDF - VERSION 22.10.00 + VERSION 22.12.00 LANGUAGES C CXX CUDA ) if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.5) diff --git a/cpp/doxygen/Doxyfile b/cpp/doxygen/Doxyfile index 871632b053d..4684e180f00 100644 --- a/cpp/doxygen/Doxyfile +++ b/cpp/doxygen/Doxyfile @@ -38,7 +38,7 @@ PROJECT_NAME = "libcudf" # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = 22.10.00 +PROJECT_NUMBER = 22.12.00 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a @@ -2162,7 +2162,7 @@ SKIP_FUNCTION_MACROS = YES # the path). If a tag file is not located in the directory in which doxygen is # run, you must also specify the path to the tagfile here. -TAGFILES = rmm.tag=https://docs.rapids.ai/api/librmm/22.10 +TAGFILES = rmm.tag=https://docs.rapids.ai/api/librmm/22.12 # When a file name is specified after GENERATE_TAGFILE, doxygen will create a # tag file that is based on the input files it reads. See section "Linking to diff --git a/cpp/examples/basic/CMakeLists.txt b/cpp/examples/basic/CMakeLists.txt index f4bc205d4ba..b182cb08774 100644 --- a/cpp/examples/basic/CMakeLists.txt +++ b/cpp/examples/basic/CMakeLists.txt @@ -16,7 +16,7 @@ file( ) include(${CMAKE_BINARY_DIR}/cmake/get_cpm.cmake) -set(CUDF_TAG branch-22.10) +set(CUDF_TAG branch-22.12) CPMFindPackage( NAME cudf GIT_REPOSITORY https://github.com/rapidsai/cudf GIT_TAG ${CUDF_TAG} diff --git a/cpp/libcudf_kafka/CMakeLists.txt b/cpp/libcudf_kafka/CMakeLists.txt index 76a012e7c6e..71341277109 100644 --- a/cpp/libcudf_kafka/CMakeLists.txt +++ b/cpp/libcudf_kafka/CMakeLists.txt @@ -22,7 +22,7 @@ include(rapids-find) project( CUDA_KAFKA - VERSION 22.10.00 + VERSION 22.12.00 LANGUAGES CXX ) diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index db471316830..ec5b1bd2aac 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -85,9 +85,9 @@ # built documents. # # The short X.Y version. -version = '22.10' +version = '22.12' # The full version, including alpha/beta/rc tags. -release = '22.10.00' +release = '22.12.00' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/fetch_rapids.cmake b/fetch_rapids.cmake index 9e2917ffc07..cc2e201fdc3 100644 --- a/fetch_rapids.cmake +++ b/fetch_rapids.cmake @@ -12,7 +12,7 @@ # the License. # ============================================================================= if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/CUDF_RAPIDS.cmake) - file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.10/RAPIDS.cmake + file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.12/RAPIDS.cmake ${CMAKE_CURRENT_BINARY_DIR}/CUDF_RAPIDS.cmake ) endif() diff --git a/java/src/main/native/CMakeLists.txt b/java/src/main/native/CMakeLists.txt index 9410f8eacf3..26923927378 100755 --- a/java/src/main/native/CMakeLists.txt +++ b/java/src/main/native/CMakeLists.txt @@ -28,7 +28,7 @@ rapids_cuda_init_architectures(CUDF_JNI) project( CUDF_JNI - VERSION 22.10.00 + VERSION 22.12.00 LANGUAGES C CXX CUDA ) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 72e1779401f..6dc0f1800e0 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -14,7 +14,7 @@ cmake_minimum_required(VERSION 3.20.1 FATAL_ERROR) -set(cudf_version 22.10.00) +set(cudf_version 22.12.00) include(../../fetch_rapids.cmake) From f72c4ce715080525fbf79d4298b18af862822bd7 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Wed, 28 Sep 2022 10:04:22 -0400 Subject: [PATCH 002/202] add change from 11771 --- python/strings_udf/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/strings_udf/CMakeLists.txt b/python/strings_udf/CMakeLists.txt index 53d31575363..41d0d0090cb 100644 --- a/python/strings_udf/CMakeLists.txt +++ b/python/strings_udf/CMakeLists.txt @@ -14,7 +14,7 @@ cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR) -set(strings_udf_version 22.10.00) +set(strings_udf_version 22.12.00) include(../../fetch_rapids.cmake) From ec4cdd8c010736c09135edf4e35be345c1c1ada1 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Thu, 29 Sep 2022 10:16:07 -0400 Subject: [PATCH 003/202] Fix compile warning from CUDF_FUNC_RANGE in a member function (#11798) Compile warning was introduced in #11652 in `bgzip_data_chunk_source.cu`. The warning can be seen here https://gpuci.gpuopenanalytics.com/job/rapidsai/job/gpuci/job/cudf/job/prb/job/cudf-cpu-cuda-build/CUDA=11.5/12417/consoleFull (search for `177-D`) ``` /cudf/cpp/src/io/text/bgzip_data_chunk_source.cu(362): warning #177-D: variable "nvtx3_range__" was declared but never referenced ``` The `nvtx3_range__` is part of the `CUDF_FUNC_RANGE()` macro. The warning is incorrect and likely a compiler bug. The workaround in this PR is to add `[[maybe_unused]]` to the variable declaration. I was not able to create a small reproducer for compile bug filing. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Tobias Ribizel (https://github.com/upsj) - MithunR (https://github.com/mythrocks) URL: https://github.com/rapidsai/cudf/pull/11798 --- cpp/include/cudf/detail/nvtx/nvtx3.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/detail/nvtx/nvtx3.hpp b/cpp/include/cudf/detail/nvtx/nvtx3.hpp index fb90ea668f5..c77714181ef 100644 --- a/cpp/include/cudf/detail/nvtx/nvtx3.hpp +++ b/cpp/include/cudf/detail/nvtx/nvtx3.hpp @@ -1907,7 +1907,7 @@ inline void mark(event_attributes const& attr) noexcept #define NVTX3_FUNC_RANGE_IN(D) \ static ::nvtx3::registered_message const nvtx3_func_name__{__func__}; \ static ::nvtx3::event_attributes const nvtx3_func_attr__{nvtx3_func_name__}; \ - ::nvtx3::domain_thread_range const nvtx3_range__{nvtx3_func_attr__}; + [[maybe_unused]] ::nvtx3::domain_thread_range const nvtx3_range__{nvtx3_func_attr__}; /** * @brief Convenience macro for generating a range in the global domain from the From 0b28d34658ba51f9517f6e7f240ea7aa3e2b0ed5 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Mon, 3 Oct 2022 16:32:00 -0700 Subject: [PATCH 004/202] Remove `cudf_io` namespace alias (#11827) Some cuIO tests and benchmarks declare `cudf_io` alias for `cudf::io`. This saves a single letter so it's considered to be of very low utility. Removing all occurrences of this alias. Also removed a couple of builder calls where the option was being set to default value. Authors: - Vukasin Milovanovic (https://github.com/vuule) Approvers: - Nghia Truong (https://github.com/ttnghia) - Bradley Dice (https://github.com/bdice) - Yunsong Wang (https://github.com/PointKernel) URL: https://github.com/rapidsai/cudf/pull/11827 --- cpp/benchmarks/io/csv/csv_writer.cpp | 15 +- cpp/benchmarks/io/cuio_common.cpp | 16 +- cpp/tests/io/csv_test.cpp | 450 +++++------ cpp/tests/io/json_test.cpp | 172 ++-- cpp/tests/io/orc_test.cpp | 536 ++++++------- cpp/tests/io/parquet_test.cpp | 1089 +++++++++++++------------- 6 files changed, 1133 insertions(+), 1145 deletions(-) diff --git a/cpp/benchmarks/io/csv/csv_writer.cpp b/cpp/benchmarks/io/csv/csv_writer.cpp index d02305cf478..5d61d81bb71 100644 --- a/cpp/benchmarks/io/csv/csv_writer.cpp +++ b/cpp/benchmarks/io/csv/csv_writer.cpp @@ -26,8 +26,6 @@ constexpr size_t data_size = 256 << 20; constexpr cudf::size_type num_cols = 64; -namespace cudf_io = cudf::io; - class CsvWrite : public cudf::benchmark { }; @@ -44,9 +42,9 @@ void BM_csv_write_varying_inout(benchmark::State& state) auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 - cudf_io::csv_writer_options options = - cudf_io::csv_writer_options::builder(source_sink.make_sink_info(), view).include_header(true); - cudf_io::write_csv(options); + cudf::io::csv_writer_options options = + cudf::io::csv_writer_options::builder(source_sink.make_sink_info(), view); + cudf::io::write_csv(options); } state.SetBytesProcessed(data_size * state.iterations()); @@ -74,12 +72,11 @@ void BM_csv_write_varying_options(benchmark::State& state) auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 - cudf_io::csv_writer_options options = - cudf_io::csv_writer_options::builder(source_sink.make_sink_info(), view) - .include_header(true) + cudf::io::csv_writer_options options = + cudf::io::csv_writer_options::builder(source_sink.make_sink_info(), view) .na_rep(na_per) .rows_per_chunk(rows_per_chunk); - cudf_io::write_csv(options); + cudf::io::write_csv(options); } state.SetBytesProcessed(data_size * state.iterations()); diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp index da64c1bbf3c..1a9c7153644 100644 --- a/cpp/benchmarks/io/cuio_common.cpp +++ b/cpp/benchmarks/io/cuio_common.cpp @@ -23,8 +23,6 @@ #include -namespace cudf_io = cudf::io; - temp_directory const cuio_source_sink_pair::tmpdir{"cudf_gbench"}; std::string random_file_in_dir(std::string const& dir_path) @@ -43,21 +41,21 @@ cuio_source_sink_pair::cuio_source_sink_pair(io_type type) { } -cudf_io::source_info cuio_source_sink_pair::make_source_info() +cudf::io::source_info cuio_source_sink_pair::make_source_info() { switch (type) { - case io_type::FILEPATH: return cudf_io::source_info(file_name); - case io_type::HOST_BUFFER: return cudf_io::source_info(buffer.data(), buffer.size()); + case io_type::FILEPATH: return cudf::io::source_info(file_name); + case io_type::HOST_BUFFER: return cudf::io::source_info(buffer.data(), buffer.size()); default: CUDF_FAIL("invalid input type"); } } -cudf_io::sink_info cuio_source_sink_pair::make_sink_info() +cudf::io::sink_info cuio_source_sink_pair::make_sink_info() { switch (type) { - case io_type::VOID: return cudf_io::sink_info(&void_sink); - case io_type::FILEPATH: return cudf_io::sink_info(file_name); - case io_type::HOST_BUFFER: return cudf_io::sink_info(&buffer); + case io_type::VOID: return cudf::io::sink_info(&void_sink); + case io_type::FILEPATH: return cudf::io::sink_info(file_name); + case io_type::HOST_BUFFER: return cudf::io::sink_info(&buffer); default: CUDF_FAIL("invalid output type"); } } diff --git a/cpp/tests/io/csv_test.cpp b/cpp/tests/io/csv_test.cpp index 4f0bdbd9b31..f532836ef95 100644 --- a/cpp/tests/io/csv_test.cpp +++ b/cpp/tests/io/csv_test.cpp @@ -49,8 +49,6 @@ #include #include -namespace cudf_io = cudf::io; - using cudf::data_type; using cudf::type_id; using cudf::type_to_id; @@ -113,12 +111,12 @@ struct CsvFixedPointReaderTest : public CsvReaderTest { return acc.empty() ? rhs : (acc + "\n" + rhs); }); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .dtypes({data_type{type_to_id(), scale}}) .header(-1); - const auto result = cudf_io::read_csv(in_opts); + const auto result = cudf::io::read_csv(in_opts); const auto result_view = result.tbl->view(); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*input_column, result_view.column(0)); @@ -283,7 +281,7 @@ void write_csv_helper(std::string const& filename, std::vector const& names = {}) { // csv_writer_options only keeps a pointer to metadata (non-owning) - cudf_io::table_metadata metadata{}; + cudf::io::table_metadata metadata{}; if (not names.empty()) { metadata.column_names = names; @@ -297,14 +295,14 @@ void write_csv_helper(std::string const& filename, }); } - cudf_io::csv_writer_options writer_options = - cudf_io::csv_writer_options::builder(cudf_io::sink_info(filename), table) + cudf::io::csv_writer_options writer_options = + cudf::io::csv_writer_options::builder(cudf::io::sink_info(filename), table) .include_header(include_header) .rows_per_chunk( 1) // Note: this gets adjusted to multiple of 8 (per legacy code logic and requirements) .metadata(&metadata); - cudf_io::write_csv(writer_options); + cudf::io::write_csv(writer_options); } template @@ -352,9 +350,9 @@ TYPED_TEST(CsvReaderNumericTypeTest, SingleColumn) std::copy(sequence, sequence + num_rows, output_iterator); } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}).header(-1); - auto result = cudf_io::read_csv(in_opts); + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}).header(-1); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); expect_column_data_equal(std::vector(sequence, sequence + num_rows), view.column(0)); @@ -407,11 +405,11 @@ TYPED_TEST(CsvFixedPointWriterTest, SingleColumnNegativeScale) auto filepath = temp_env->get_temp_dir() + "FixedPointSingleColumnNegativeScale.csv"; - cudf_io::csv_writer_options writer_options = - cudf_io::csv_writer_options::builder(cudf_io::sink_info(filepath), input_table) + cudf::io::csv_writer_options writer_options = + cudf::io::csv_writer_options::builder(cudf::io::sink_info(filepath), input_table) .include_header(false); - cudf_io::write_csv(writer_options); + cudf::io::write_csv(writer_options); std::vector result_strings; result_strings.reserve(reference_strings.size()); @@ -454,11 +452,11 @@ TYPED_TEST(CsvFixedPointWriterTest, SingleColumnPositiveScale) auto filepath = temp_env->get_temp_dir() + "FixedPointSingleColumnPositiveScale.csv"; - cudf_io::csv_writer_options writer_options = - cudf_io::csv_writer_options::builder(cudf_io::sink_info(filepath), input_table) + cudf::io::csv_writer_options writer_options = + cudf::io::csv_writer_options::builder(cudf::io::sink_info(filepath), input_table) .include_header(false); - cudf_io::write_csv(writer_options); + cudf::io::write_csv(writer_options); std::vector result_strings; result_strings.reserve(reference_strings.size()); @@ -500,8 +498,8 @@ TEST_F(CsvReaderTest, MultiColumn) outfile << line.str(); } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .header(-1) .dtypes({dtype(), dtype(), @@ -513,7 +511,7 @@ TEST_F(CsvReaderTest, MultiColumn) dtype(), dtype(), dtype()}); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); expect_column_data_equal(int8_values, view.column(0)); @@ -548,14 +546,14 @@ TEST_F(CsvReaderTest, RepeatColumn) } // repeats column in indexes and names, misses 1 column. - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .dtypes({dtype(), dtype(), dtype(), dtype()}) .names({"A", "B", "C", "D"}) .use_cols_indexes({1, 0, 0}) .use_cols_names({"D", "B", "B"}) .header(-1); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(3, view.num_columns()); @@ -573,14 +571,14 @@ TEST_F(CsvReaderTest, Booleans) "true\nYes,5,foo,false\n"; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A", "B", "C", "D"}) .dtypes({dtype(), dtype(), dtype(), dtype()}) .true_values({"yes", "Yes", "YES", "foo", "FOO"}) .false_values({"no", "No", "NO", "Bar", "bar"}) .header(-1); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); // Booleans are the same (integer) data type, but valued at 0 or 1 const auto view = result.tbl->view(); @@ -605,13 +603,13 @@ TEST_F(CsvReaderTest, Dates) outfile << "16/09/2005T1:2:30.400PM\n2/2/1970\n"; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({data_type{type_id::TIMESTAMP_MILLISECONDS}}) .dayfirst(true) .header(-1); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(1, view.num_columns()); @@ -641,13 +639,13 @@ TEST_F(CsvReaderTest, DatesCastToTimestampSeconds) outfile << "16/09/2005T1:2:30.400PM\n2/2/1970\n"; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({data_type{type_id::TIMESTAMP_SECONDS}}) .dayfirst(true) .header(-1); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(1, view.num_columns()); @@ -677,13 +675,13 @@ TEST_F(CsvReaderTest, DatesCastToTimestampMilliSeconds) outfile << "16/09/2005T1:2:30.400PM\n2/2/1970\n"; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({data_type{type_id::TIMESTAMP_MILLISECONDS}}) .dayfirst(true) .header(-1); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(1, view.num_columns()); @@ -713,13 +711,13 @@ TEST_F(CsvReaderTest, DatesCastToTimestampMicroSeconds) outfile << "16/09/2005T1:2:30.400PM\n2/2/1970\n"; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({data_type{type_id::TIMESTAMP_MICROSECONDS}}) .dayfirst(true) .header(-1); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(1, view.num_columns()); @@ -749,13 +747,13 @@ TEST_F(CsvReaderTest, DatesCastToTimestampNanoSeconds) outfile << "16/09/2005T1:2:30.400PM\n2/2/1970\n"; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({data_type{type_id::TIMESTAMP_NANOSECONDS}}) .dayfirst(true) .header(-1); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(1, view.num_columns()); @@ -789,12 +787,12 @@ TEST_F(CsvReaderTest, IntegersCastToTimestampSeconds) } } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({data_type{type_id::TIMESTAMP_SECONDS}}) .header(-1); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(1, view.num_columns()); @@ -817,12 +815,12 @@ TEST_F(CsvReaderTest, IntegersCastToTimestampMilliSeconds) } } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({data_type{type_id::TIMESTAMP_MILLISECONDS}}) .header(-1); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(1, view.num_columns()); @@ -845,12 +843,12 @@ TEST_F(CsvReaderTest, IntegersCastToTimestampMicroSeconds) } } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({data_type{type_id::TIMESTAMP_MICROSECONDS}}) .header(-1); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(1, view.num_columns()); @@ -873,12 +871,12 @@ TEST_F(CsvReaderTest, IntegersCastToTimestampNanoSeconds) } } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({data_type{type_id::TIMESTAMP_NANOSECONDS}}) .header(-1); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(1, view.num_columns()); @@ -897,13 +895,13 @@ TEST_F(CsvReaderTest, FloatingPoint) "98007199999998;"; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({dtype()}) .lineterminator(';') .header(-1); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(1, view.num_columns()); @@ -930,12 +928,12 @@ TEST_F(CsvReaderTest, Strings) outfile << "30,stu \"\"vwx\"\" yz" << '\n'; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names(names) .dtypes(std::vector{dtype(), dtype()}) - .quoting(cudf_io::quote_style::NONE); - auto result = cudf_io::read_csv(in_opts); + .quoting(cudf::io::quote_style::NONE); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(2, view.num_columns()); @@ -960,12 +958,12 @@ TEST_F(CsvReaderTest, StringsQuotes) outfile << "30,stu `vwx` yz" << '\n'; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names(names) .dtypes(std::vector{dtype(), dtype()}) .quotechar('`'); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(2, view.num_columns()); @@ -989,13 +987,13 @@ TEST_F(CsvReaderTest, StringsQuotesIgnored) outfile << "30,stu \"vwx\" yz" << '\n'; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names(names) .dtypes(std::vector{dtype(), dtype()}) - .quoting(cudf_io::quote_style::NONE) + .quoting(cudf::io::quote_style::NONE) .doublequote(false); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(2, view.num_columns()); @@ -1015,14 +1013,14 @@ TEST_F(CsvReaderTest, SkiprowsNrows) outfile << "1\n2\n3\n4\n5\n6\n7\n8\n9\n"; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({dtype()}) .header(1) .skiprows(2) .nrows(2); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(1, view.num_columns()); @@ -1039,14 +1037,14 @@ TEST_F(CsvReaderTest, ByteRange) outfile << "1000\n2000\n3000\n4000\n5000\n6000\n7000\n8000\n9000\n"; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({dtype()}) .header(-1) .byte_range_offset(11) .byte_range_size(15); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(1, view.num_columns()); @@ -1058,13 +1056,13 @@ TEST_F(CsvReaderTest, ByteRange) TEST_F(CsvReaderTest, ByteRangeStrings) { std::string input = "\"a\"\n\"b\"\n\"c\""; - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{input.c_str(), input.size()}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{input.c_str(), input.size()}) .names({"A"}) .dtypes({dtype()}) .header(-1) .byte_range_offset(4); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(1, view.num_columns()); @@ -1081,13 +1079,13 @@ TEST_F(CsvReaderTest, BlanksAndComments) outfile << "1\n#blank\n3\n4\n5\n#blank\n\n\n8\n9\n"; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({dtype()}) .header(-1) .comment('#'); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(1, view.num_columns()); @@ -1104,9 +1102,9 @@ TEST_F(CsvReaderTest, EmptyFile) outfile << ""; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_csv(in_opts); + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(0, view.num_columns()); @@ -1120,9 +1118,9 @@ TEST_F(CsvReaderTest, NoDataFile) outfile << "\n\n"; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_csv(in_opts); + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(0, view.num_columns()); @@ -1136,9 +1134,9 @@ TEST_F(CsvReaderTest, HeaderOnlyFile) outfile << "\"a\",\"b\",\"c\"\n\n"; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_csv(in_opts); + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(0, view.num_rows()); @@ -1156,11 +1154,11 @@ TEST_F(CsvReaderTest, ArrowFileSource) std::shared_ptr infile; ASSERT_TRUE(arrow::io::ReadableFile::Open(filepath).Value(&infile).ok()); - auto arrow_source = cudf_io::arrow_io_source{infile}; - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{&arrow_source}) + auto arrow_source = cudf::io::arrow_io_source{infile}; + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{&arrow_source}) .dtypes({dtype()}); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(1, view.num_columns()); @@ -1177,12 +1175,12 @@ TEST_F(CsvReaderTest, InvalidFloatingPoint) outfile << "1.2e1+\n3.4e2-\n5.6e3e\n7.8e3A\n9.0Be1\n1C.2"; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({dtype()}) .header(-1); - const auto result = cudf_io::read_csv(in_opts); + const auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(1, view.num_columns()); @@ -1199,10 +1197,10 @@ TEST_F(CsvReaderTest, InvalidFloatingPoint) TEST_F(CsvReaderTest, StringInference) { std::string buffer = "\"-1\"\n"; - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .header(-1); - const auto result = cudf_io::read_csv(in_opts); + const auto result = cudf::io::read_csv(in_opts); EXPECT_EQ(result.tbl->num_columns(), 1); EXPECT_EQ(result.tbl->get_column(0).type().id(), type_id::STRING); @@ -1211,11 +1209,11 @@ TEST_F(CsvReaderTest, StringInference) TEST_F(CsvReaderTest, TypeInferenceThousands) { std::string buffer = "1`400,123,1`234.56\n123`456,123456,12.34"; - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .header(-1) .thousands('`'); - const auto result = cudf_io::read_csv(in_opts); + const auto result = cudf::io::read_csv(in_opts); const auto result_view = result.tbl->view(); EXPECT_EQ(result_view.num_columns(), 3); @@ -1238,12 +1236,12 @@ TEST_F(CsvReaderTest, TypeInferenceWithDecimal) // col#1 => STRING (contains digits and period character, which is NOT the decimal point here) // col#2 => FLOAT64 (column contains digits and decimal point (i.e., ';')) std::string buffer = "1`400,1.23,1`234;56\n123`456,123.456,12;34"; - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .header(-1) .thousands('`') .decimal(';'); - const auto result = cudf_io::read_csv(in_opts); + const auto result = cudf::io::read_csv(in_opts); const auto result_view = result.tbl->view(); EXPECT_EQ(result_view.num_columns(), 3); @@ -1263,17 +1261,17 @@ TEST_F(CsvReaderTest, SkipRowsXorSkipFooter) { std::string buffer = "1,2,3"; - cudf_io::csv_reader_options skiprows_options = - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options skiprows_options = + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .header(-1) .skiprows(1); - EXPECT_NO_THROW(cudf_io::read_csv(skiprows_options)); + EXPECT_NO_THROW(cudf::io::read_csv(skiprows_options)); - cudf_io::csv_reader_options skipfooter_options = - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options skipfooter_options = + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .header(-1) .skipfooter(1); - EXPECT_NO_THROW(cudf_io::read_csv(skipfooter_options)); + EXPECT_NO_THROW(cudf::io::read_csv(skipfooter_options)); } TEST_F(CsvReaderTest, nullHandling) @@ -1286,13 +1284,13 @@ TEST_F(CsvReaderTest, nullHandling) // Test disabling na_filter { - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .na_filter(false) .dtypes({dtype()}) .header(-1) .skip_blank_lines(false); - const auto result = cudf_io::read_csv(in_opts); + const auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); auto expect = cudf::test::strings_column_wrapper({"NULL", "", "null", "n/a", "Null", "NA", "nan"}); @@ -1301,12 +1299,12 @@ TEST_F(CsvReaderTest, nullHandling) // Test enabling na_filter { - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .dtypes({dtype()}) .header(-1) .skip_blank_lines(false); - const auto result = cudf_io::read_csv(in_opts); + const auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); auto expect = cudf::test::strings_column_wrapper({"NULL", "", "null", "n/a", "Null", "NA", "nan"}, @@ -1317,13 +1315,13 @@ TEST_F(CsvReaderTest, nullHandling) // Setting na_values with default values { - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .na_values({"Null"}) .dtypes({dtype()}) .header(-1) .skip_blank_lines(false); - const auto result = cudf_io::read_csv(in_opts); + const auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); auto expect = cudf::test::strings_column_wrapper({"NULL", "", "null", "n/a", "Null", "NA", "nan"}, @@ -1334,14 +1332,14 @@ TEST_F(CsvReaderTest, nullHandling) // Setting na_values without default values { - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .keep_default_na(false) .na_values({"Null"}) .dtypes({dtype()}) .header(-1) .skip_blank_lines(false); - const auto result = cudf_io::read_csv(in_opts); + const auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); auto expect = cudf::test::strings_column_wrapper({"NULL", "", "null", "n/a", "Null", "NA", "nan"}, @@ -1356,14 +1354,14 @@ TEST_F(CsvReaderTest, FailCases) std::string buffer = "1,2,3"; { EXPECT_THROW( - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .byte_range_offset(4) .skiprows(1), cudf::logic_error); } { EXPECT_THROW( - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .byte_range_offset(4) .skipfooter(1), cudf::logic_error); @@ -1371,77 +1369,77 @@ TEST_F(CsvReaderTest, FailCases) { EXPECT_THROW( - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .byte_range_offset(4) .nrows(1), cudf::logic_error); } { EXPECT_THROW( - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .byte_range_size(4) .skiprows(1), cudf::logic_error); } { EXPECT_THROW( - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .byte_range_size(4) .skipfooter(1), cudf::logic_error); } { EXPECT_THROW( - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .byte_range_size(4) .nrows(1), cudf::logic_error); } { EXPECT_THROW( - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .skiprows(1) .byte_range_offset(4), cudf::logic_error); } { EXPECT_THROW( - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .skipfooter(1) .byte_range_offset(4), cudf::logic_error); } { EXPECT_THROW( - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .nrows(1) .byte_range_offset(4), cudf::logic_error); } { EXPECT_THROW( - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .skiprows(1) .byte_range_size(4), cudf::logic_error); } { EXPECT_THROW( - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .skipfooter(1) .byte_range_size(4), cudf::logic_error); } { EXPECT_THROW( - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .nrows(1) .byte_range_size(4), cudf::logic_error); } { EXPECT_THROW( - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .nrows(1) .skipfooter(1), cudf::logic_error); @@ -1449,14 +1447,14 @@ TEST_F(CsvReaderTest, FailCases) } { EXPECT_THROW( - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .skipfooter(1) .nrows(1), cudf::logic_error); } { EXPECT_THROW( - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .na_filter(false) .na_values({"Null"}), cudf::logic_error); @@ -1472,13 +1470,13 @@ TEST_F(CsvReaderTest, HexTest) } // specify hex columns by name { - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({dtype()}) .header(-1) .parse_hex({"A"}); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); expect_column_data_equal( std::vector{0, -4096, 16702650, 11259375, 11259375, 2501034507}, @@ -1487,13 +1485,13 @@ TEST_F(CsvReaderTest, HexTest) // specify hex columns by index { - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({dtype()}) .header(-1) .parse_hex(std::vector{0}); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); expect_column_data_equal( std::vector{0, -4096, 16702650, 11259375, 11259375, 2501034507}, @@ -1513,9 +1511,9 @@ TYPED_TEST(CsvReaderNumericTypeTest, SingleColumnWithWriter) write_csv_helper(filepath, input_table, false); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}).header(-1); - auto result = cudf_io::read_csv(in_opts); + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}).header(-1); + auto result = cudf::io::read_csv(in_opts); const auto result_table = result.tbl->view(); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(input_table, result_table); @@ -1581,8 +1579,8 @@ TEST_F(CsvReaderTest, MultiColumnWithWriter) write_csv_helper(filepath, input_table, false); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .header(-1) .dtypes({dtype(), dtype(), @@ -1594,7 +1592,7 @@ TEST_F(CsvReaderTest, MultiColumnWithWriter) dtype(), dtype(), dtype()}); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto result_table = result.tbl->view(); @@ -1629,13 +1627,13 @@ TEST_F(CsvReaderTest, DatesWithWriter) // TODO need to add a dayfirst flag? write_csv_helper(filepath, input_table, false); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({data_type{type_id::TIMESTAMP_MILLISECONDS}}) .dayfirst(true) .header(-1); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto result_table = result.tbl->view(); @@ -1654,9 +1652,11 @@ TEST_F(CsvReaderTest, DatesStringWithWriter) write_csv_helper(filepath, input_table, false); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}).names({"A"}).header(-1); - auto result = cudf_io::read_csv(in_opts); + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) + .names({"A"}) + .header(-1); + auto result = cudf::io::read_csv(in_opts); const auto result_table = result.tbl->view(); @@ -1675,9 +1675,11 @@ TEST_F(CsvReaderTest, DatesStringWithWriter) write_csv_helper(filepath, input_table, false); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}).names({"A"}).header(-1); - auto result = cudf_io::read_csv(in_opts); + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) + .names({"A"}) + .header(-1); + auto result = cudf::io::read_csv(in_opts); const auto result_table = result.tbl->view(); @@ -1696,9 +1698,11 @@ TEST_F(CsvReaderTest, DatesStringWithWriter) write_csv_helper(filepath, input_table, false); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}).names({"A"}).header(-1); - auto result = cudf_io::read_csv(in_opts); + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) + .names({"A"}) + .header(-1); + auto result = cudf::io::read_csv(in_opts); const auto result_table = result.tbl->view(); @@ -1718,9 +1722,11 @@ TEST_F(CsvReaderTest, DatesStringWithWriter) write_csv_helper(filepath, input_table, false); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}).names({"A"}).header(-1); - auto result = cudf_io::read_csv(in_opts); + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) + .names({"A"}) + .header(-1); + auto result = cudf::io::read_csv(in_opts); const auto result_table = result.tbl->view(); @@ -1739,9 +1745,11 @@ TEST_F(CsvReaderTest, DatesStringWithWriter) write_csv_helper(filepath, input_table, false); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}).names({"A"}).header(-1); - auto result = cudf_io::read_csv(in_opts); + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) + .names({"A"}) + .header(-1); + auto result = cudf::io::read_csv(in_opts); const auto result_table = result.tbl->view(); @@ -1760,13 +1768,13 @@ TEST_F(CsvReaderTest, FloatingPointWithWriter) // TODO add lineterminator=";" write_csv_helper(filepath, input_table, false); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({dtype()}) .header(-1); // in_opts.lineterminator = ';'; - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto result_table = result.tbl->view(); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(input_table, result_table); @@ -1786,12 +1794,12 @@ TEST_F(CsvReaderTest, StringsWithWriter) // TODO add quoting style flag? write_csv_helper(filepath, input_table, true, names); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names(names) .dtypes(std::vector{dtype(), dtype()}) - .quoting(cudf_io::quote_style::NONE); - auto result = cudf_io::read_csv(in_opts); + .quoting(cudf::io::quote_style::NONE); + auto result = cudf::io::read_csv(in_opts); const auto result_table = result.tbl->view(); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(input_table.column(0), result_table.column(0)); @@ -1811,12 +1819,12 @@ TEST_F(CsvReaderTest, StringsWithWriterSimple) // TODO add quoting style flag? write_csv_helper(filepath, input_table, true, names); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names(names) .dtypes(std::vector{dtype(), dtype()}) - .quoting(cudf_io::quote_style::NONE); - auto result = cudf_io::read_csv(in_opts); + .quoting(cudf::io::quote_style::NONE); + auto result = cudf::io::read_csv(in_opts); const auto result_table = result.tbl->view(); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(input_table.column(0), result_table.column(0)); @@ -1835,11 +1843,11 @@ TEST_F(CsvReaderTest, StringsEmbeddedDelimiter) write_csv_helper(filepath, input_table, true, names); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names(names) .dtypes(std::vector{dtype(), dtype()}); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(input_table, result.tbl->view()); } @@ -1858,15 +1866,15 @@ TEST_F(CsvReaderTest, HeaderEmbeddedDelimiter) write_csv_helper(filepath, input_table, true, names); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names(names) .dtypes({dtype(), dtype(), dtype(), dtype(), dtype()}); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(input_table, result.tbl->view()); } @@ -1877,9 +1885,9 @@ TEST_F(CsvReaderTest, EmptyFileWithWriter) cudf::table_view empty_table; write_csv_helper(filepath, empty_table, false); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_csv(in_opts); + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_csv(in_opts); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(empty_table, result.tbl->view()); } @@ -1918,11 +1926,11 @@ TEST_F(CsvReaderTest, UserImplementedSource) << "\n"; } TestSource source{csv_data.str()}; - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{&source}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{&source}) .dtypes({dtype(), dtype(), dtype()}) .header(-1); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); auto const view = result.tbl->view(); expect_column_data_equal(int8_values, view.column(0)); @@ -1962,15 +1970,15 @@ TEST_F(CsvReaderTest, DurationsWithWriter) write_csv_helper(filepath, input_table, true, names); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names(names) .dtypes({data_type{type_id::DURATION_DAYS}, data_type{type_id::DURATION_SECONDS}, data_type{type_id::DURATION_MILLISECONDS}, data_type{type_id::DURATION_MICROSECONDS}, data_type{type_id::DURATION_NANOSECONDS}}); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto result_table = result.tbl->view(); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(input_table, result_table); @@ -2038,9 +2046,9 @@ TEST_F(CsvReaderTest, ParseInRangeIntegers) write_csv_helper(filepath, input_table, false); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}).header(-1); - auto result = cudf_io::read_csv(in_opts); + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}).header(-1); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); @@ -2117,9 +2125,9 @@ TEST_F(CsvReaderTest, ParseOutOfRangeIntegers) write_csv_helper(filepath, input_table, false); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}).header(-1); - auto result = cudf_io::read_csv(in_opts); + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}).header(-1); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); @@ -2148,9 +2156,9 @@ TEST_F(CsvReaderTest, ReadMaxNumericValue) std::copy(sequence, sequence + num_rows, output_iterator); } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}).header(-1); - auto result = cudf_io::read_csv(in_opts); + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}).header(-1); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); expect_column_data_equal(std::vector(sequence, sequence + num_rows), view.column(0)); @@ -2164,8 +2172,8 @@ TEST_F(CsvReaderTest, DefaultWriteChunkSize) auto input_column = column_wrapper(sequence, sequence + num_rows); auto input_table = cudf::table_view{std::vector{input_column}}; - cudf_io::csv_writer_options opts = - cudf_io::csv_writer_options::builder(cudf_io::sink_info{"unused.path"}, input_table); + cudf::io::csv_writer_options opts = + cudf::io::csv_writer_options::builder(cudf::io::sink_info{"unused.path"}, input_table); ASSERT_EQ(num_rows, opts.get_rows_per_chunk()); } } @@ -2174,12 +2182,12 @@ TEST_F(CsvReaderTest, DtypesMap) { std::string csv_in{"12,9\n34,8\n56,7"}; - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{csv_in.c_str(), csv_in.size()}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{csv_in.c_str(), csv_in.size()}) .names({"A", "B"}) .dtypes({{"B", dtype()}, {"A", dtype()}}) .header(-1); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto result_table = result.tbl->view(); ASSERT_EQ(result_table.num_columns(), 2); @@ -2191,12 +2199,12 @@ TEST_F(CsvReaderTest, DtypesMap) TEST_F(CsvReaderTest, DtypesMapPartial) { - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{nullptr, 0}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{nullptr, 0}) .names({"A", "B"}) .dtypes({{"A", dtype()}}); { - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); ASSERT_EQ(type_id::INT16, view.column(0).type().id()); @@ -2206,7 +2214,7 @@ TEST_F(CsvReaderTest, DtypesMapPartial) in_opts.set_dtypes({{"B", dtype()}}); { - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); ASSERT_EQ(type_id::STRING, view.column(0).type().id()); @@ -2216,12 +2224,12 @@ TEST_F(CsvReaderTest, DtypesMapPartial) TEST_F(CsvReaderTest, DtypesArrayInvalid) { - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{nullptr, 0}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{nullptr, 0}) .names({"A", "B", "C"}) .dtypes(std::vector{dtype(), dtype()}); - EXPECT_THROW(cudf_io::read_csv(in_opts), cudf::logic_error); + EXPECT_THROW(cudf::io::read_csv(in_opts), cudf::logic_error); } TEST_F(CsvReaderTest, CsvDefaultOptionsWriteReadMatch) @@ -2234,16 +2242,16 @@ TEST_F(CsvReaderTest, CsvDefaultOptionsWriteReadMatch) cudf::table_view input_table(std::vector{int_column, str_column}); // write that dataframe to a csv using default options to some temporary file - cudf_io::csv_writer_options writer_options = - cudf_io::csv_writer_options::builder(cudf_io::sink_info{filepath}, input_table); - cudf_io::write_csv(writer_options); + cudf::io::csv_writer_options writer_options = + cudf::io::csv_writer_options::builder(cudf::io::sink_info{filepath}, input_table); + cudf::io::write_csv(writer_options); // read the temp csv file using default options - cudf_io::csv_reader_options read_options = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options read_options = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .dtypes(std::vector{dtype(), dtype()}); - cudf_io::table_with_metadata new_table_and_metadata = cudf_io::read_csv(read_options); + cudf::io::table_with_metadata new_table_and_metadata = cudf::io::read_csv(read_options); // verify that the tables are identical, or as identical as expected. const auto new_table_view = new_table_and_metadata.tbl->view(); diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp index 5a0db6e3c64..d7ab881861a 100644 --- a/cpp/tests/io/json_test.cpp +++ b/cpp/tests/io/json_test.cpp @@ -62,8 +62,6 @@ using column_wrapper = cudf::test::strings_column_wrapper, cudf::test::fixed_width_column_wrapper>::type; -namespace cudf_io = cudf::io; - cudf::test::TempDirTestEnvironment* const temp_env = static_cast( ::testing::AddGlobalTestEnvironment(new cudf::test::TempDirTestEnvironment)); @@ -239,12 +237,12 @@ TEST_P(JsonReaderParamTest, BasicJsonLines) {{{"0", "1"}, {"1", "1.1"}}, {{"0", "2"}, {"1", "2.2"}}, {{"0", "3"}, {"1", "3.3"}}}, "\n"); std::string data = (test_opt == json_test_t::json_lines_row_orient) ? row_orient : record_orient; - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{data.data(), data.size()}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) .dtypes(std::vector{dtype(), dtype()}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 2); EXPECT_EQ(result.tbl->num_rows(), 3); @@ -286,13 +284,13 @@ TEST_P(JsonReaderParamTest, FloatingPoint) outfile << data; } - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}) .dtypes({dtype()}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 1); EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::FLOAT32); @@ -318,13 +316,13 @@ TEST_P(JsonReaderParamTest, JsonLinesStrings) "\n"); std::string data = (test_opt == json_test_t::json_lines_row_orient) ? row_orient : record_orient; - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{data.data(), data.size()}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) .dtypes({{"2", dtype()}, {"0", dtype()}, {"1", dtype()}}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 3); EXPECT_EQ(result.tbl->num_rows(), 2); @@ -386,8 +384,8 @@ TEST_P(JsonReaderParamTest, MultiColumn) outfile << line.str(); } - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}) .dtypes({dtype(), dtype(), dtype(), @@ -396,7 +394,7 @@ TEST_P(JsonReaderParamTest, MultiColumn) dtype()}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; }); @@ -443,12 +441,12 @@ TEST_P(JsonReaderParamTest, Booleans) outfile << data; } - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}) .dtypes({dtype()}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); // Booleans are the same (integer) data type, but valued at 0 or 1 const auto view = result.tbl->view(); @@ -488,13 +486,13 @@ TEST_P(JsonReaderParamTest, Dates) outfile << data; } - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}) .dtypes({data_type{type_id::TIMESTAMP_MILLISECONDS}}) .lines(true) .dayfirst(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); const auto view = result.tbl->view(); EXPECT_EQ(result.tbl->num_columns(), 1); @@ -544,12 +542,12 @@ TEST_P(JsonReaderParamTest, Durations) outfile << data; } - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}) .dtypes({data_type{type_id::DURATION_NANOSECONDS}}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); const auto view = result.tbl->view(); EXPECT_EQ(result.tbl->num_columns(), 1); @@ -583,12 +581,12 @@ TEST_P(JsonReaderParamTest, JsonLinesDtypeInference) "\n"); std::string data = (test_opt == json_test_t::json_lines_row_orient) ? row_orient : record_orient; - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{data.data(), data.size()}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 3); EXPECT_EQ(result.tbl->num_rows(), 2); @@ -623,12 +621,12 @@ TEST_P(JsonReaderParamTest, JsonLinesFileInput) outfile << data; outfile.close(); - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{fname}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{fname}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 2); EXPECT_EQ(result.tbl->num_rows(), 2); @@ -652,13 +650,13 @@ TEST_F(JsonReaderTest, JsonLinesByteRange) outfile << "[1000]\n[2000]\n[3000]\n[4000]\n[5000]\n[6000]\n[7000]\n[8000]\n[9000]\n"; outfile.close(); - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{fname}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{fname}) .lines(true) .byte_range_offset(11) .byte_range_size(20); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 1); EXPECT_EQ(result.tbl->num_rows(), 3); @@ -681,12 +679,12 @@ TEST_P(JsonReaderDualTest, JsonLinesObjects) outfile << " {\"co\\\"l1\" : 1, \"col2\" : 2.0} \n"; outfile.close(); - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{fname}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{fname}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 2); EXPECT_EQ(result.tbl->num_rows(), 1); @@ -707,12 +705,12 @@ TEST_P(JsonReaderDualTest, JsonLinesObjectsStrings) auto const test_opt = GetParam(); bool const test_experimental = (test_opt == json_test_t::json_experimental_record_orient); auto test_json_objects = [test_experimental](std::string const& data) { - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{data.data(), data.size()}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 3); EXPECT_EQ(result.tbl->num_rows(), 2); @@ -751,12 +749,12 @@ TEST_P(JsonReaderDualTest, JsonLinesObjectsMissingData) std::string const data = "{ \"col2\":1.1, \"col3\":\"aaa\"}\n" "{\"col1\":200, \"col3\":\"bbb\"}\n"; - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{data.data(), data.size()}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 3); EXPECT_EQ(result.tbl->num_rows(), 2); @@ -790,12 +788,12 @@ TEST_P(JsonReaderDualTest, JsonLinesObjectsOutOfOrder) "{\"col1\":100, \"col2\":1.1, \"col3\":\"aaa\"}\n" "{\"col3\":\"bbb\", \"col1\":200, \"col2\":2.2}\n"; - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{data.data(), data.size()}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 3); EXPECT_EQ(result.tbl->num_rows(), 2); @@ -825,9 +823,9 @@ TEST_F(JsonReaderTest, EmptyFile) outfile << ""; } - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{filepath}).lines(true); - auto result = cudf_io::read_json(in_options); + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}).lines(true); + auto result = cudf::io::read_json(in_options); const auto view = result.tbl->view(); EXPECT_EQ(0, view.num_columns()); @@ -842,9 +840,9 @@ TEST_F(JsonReaderTest, NoDataFile) outfile << "{}\n"; } - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{filepath}).lines(true); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}).lines(true); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); const auto view = result.tbl->view(); EXPECT_EQ(0, view.num_columns()); @@ -862,13 +860,13 @@ TEST_F(JsonReaderTest, ArrowFileSource) std::shared_ptr infile; ASSERT_TRUE(arrow::io::ReadableFile::Open(fname).Value(&infile).ok()); - auto arrow_source = cudf_io::arrow_io_source{infile}; - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{&arrow_source}) + auto arrow_source = cudf::io::arrow_io_source{infile}; + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{&arrow_source}) .dtypes({dtype()}) .lines(true); ; - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 1); EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::INT8); @@ -899,12 +897,12 @@ TEST_P(JsonReaderParamTest, InvalidFloatingPoint) outfile << data; } - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}) .dtypes({dtype()}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 1); EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::FLOAT32); @@ -925,11 +923,11 @@ TEST_P(JsonReaderParamTest, StringInference) std::string record_orient = to_records_orient({{{"0", R"("-1")"}}}, "\n"); std::string data = (test_opt == json_test_t::json_lines_row_orient) ? row_orient : record_orient; - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{data.c_str(), data.size()}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{data.c_str(), data.size()}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 1); EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::STRING); @@ -1009,12 +1007,12 @@ TEST_P(JsonReaderParamTest, ParseInRangeIntegers) std::ofstream outfile(filepath, std::ofstream::out); outfile << line.str(); } - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); const auto view = result.tbl->view(); @@ -1114,12 +1112,12 @@ TEST_P(JsonReaderParamTest, ParseOutOfRangeIntegers) std::ofstream outfile(filepath, std::ofstream::out); outfile << line.str(); } - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); const auto view = result.tbl->view(); @@ -1155,12 +1153,12 @@ TEST_P(JsonReaderParamTest, JsonLinesMultipleFileInputs) outfile2 << data[1]; outfile2.close(); - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{{file1, file2}}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{{file1, file2}}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 2); EXPECT_EQ(result.tbl->num_rows(), 4); @@ -1183,23 +1181,23 @@ TEST_F(JsonReaderTest, BadDtypeParams) { std::string buffer = "[1,2,3,4]"; - cudf_io::json_reader_options options_vec = - cudf_io::json_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::json_reader_options options_vec = + cudf::io::json_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .lines(true) .dtypes({dtype()}); // should throw because there are four columns and only one dtype - EXPECT_THROW(cudf_io::read_json(options_vec), cudf::logic_error); + EXPECT_THROW(cudf::io::read_json(options_vec), cudf::logic_error); - cudf_io::json_reader_options options_map = - cudf_io::json_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::json_reader_options options_map = + cudf::io::json_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .lines(true) .dtypes(std::map{{"0", dtype()}, {"1", dtype()}, {"2", dtype()}, {"wrong_name", dtype()}}); // should throw because one of the columns is not in the dtype map - EXPECT_THROW(cudf_io::read_json(options_map), cudf::logic_error); + EXPECT_THROW(cudf::io::read_json(options_map), cudf::logic_error); } TEST_F(JsonReaderTest, JsonExperimentalBasic) @@ -1209,9 +1207,9 @@ TEST_F(JsonReaderTest, JsonExperimentalBasic) outfile << R"([{"a":"11", "b":"1.1"},{"a":"22", "b":"2.2"}])"; outfile.close(); - cudf_io::json_reader_options options = - cudf_io::json_reader_options::builder(cudf_io::source_info{fname}).experimental(true); - auto result = cudf_io::read_json(options); + cudf::io::json_reader_options options = + cudf::io::json_reader_options::builder(cudf::io::source_info{fname}).experimental(true); + auto result = cudf::io::read_json(options); EXPECT_EQ(result.tbl->num_columns(), 2); EXPECT_EQ(result.tbl->num_rows(), 2); @@ -1366,15 +1364,15 @@ TEST_P(JsonReaderParamTest, JsonDtypeSchema) std::string data = (test_opt == json_test_t::json_lines_row_orient) ? row_orient : record_orient; - std::map dtype_schema{ + std::map dtype_schema{ {"2", {dtype()}}, {"0", {dtype()}}, {"1", {dtype()}}}; - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{data.data(), data.size()}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) .dtypes(dtype_schema) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 3); EXPECT_EQ(result.tbl->num_rows(), 2); @@ -1399,7 +1397,7 @@ TEST_F(JsonReaderTest, JsonNestedDtypeSchema) { std::string json_string = R"( [{"a":[123, {"0": 123}], "b":1.0}, {"b":1.1}, {"b":2.1}])"; - std::map dtype_schema{ + std::map dtype_schema{ {"a", { data_type{cudf::type_id::LIST}, @@ -1408,14 +1406,14 @@ TEST_F(JsonReaderTest, JsonNestedDtypeSchema) {"b", {dtype()}}, }; - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder( - cudf_io::source_info{json_string.data(), json_string.size()}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder( + cudf::io::source_info{json_string.data(), json_string.size()}) .dtypes(dtype_schema) .lines(false) .experimental(true); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); // Make sure we have columns "a" and "b" ASSERT_EQ(result.tbl->num_columns(), 2); diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index a658ed0a55d..2f761eeac66 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -42,8 +42,6 @@ #define ZSTD_SUPPORTED 0 #endif -namespace cudf_io = cudf::io; - template using column_wrapper = typename std::conditional, @@ -182,9 +180,9 @@ struct SkipRowTest { sequence, sequence + file_num_rows); table_view input_table({input_col}); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, input_table); - cudf_io::write_orc(out_opts); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, input_table); + cudf::io::write_orc(out_opts); auto begin_sequence = sequence, end_sequence = sequence; if (skip_rows < file_num_rows) { @@ -203,12 +201,12 @@ struct SkipRowTest { auto filepath = temp_env->get_temp_filepath("SkipRowTest" + std::to_string(test_calls++) + ".orc"); auto expected_result = get_expected_result(filepath, skip_rows, file_num_rows, read_num_rows); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) .use_index(false) .skip_rows(skip_rows) .num_rows(read_num_rows); - auto result = cudf_io::read_orc(in_opts); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected_result->view(), result.tbl->view()); } @@ -218,11 +216,11 @@ struct SkipRowTest { temp_env->get_temp_filepath("SkipRowTest" + std::to_string(test_calls++) + ".orc"); auto expected_result = get_expected_result(filepath, skip_rows, file_num_rows, file_num_rows - skip_rows); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) .use_index(false) .skip_rows(skip_rows); - auto result = cudf_io::read_orc(in_opts); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected_result->view(), result.tbl->view()); } }; @@ -239,13 +237,13 @@ TYPED_TEST(OrcWriterNumericTypeTest, SingleColumn) table_view expected({col}); auto filepath = temp_env->get_temp_filepath("OrcSingleColumn.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected); - cudf_io::write_orc(out_opts); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}).use_index(false); - auto result = cudf_io::read_orc(in_opts); + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } @@ -261,13 +259,13 @@ TYPED_TEST(OrcWriterNumericTypeTest, SingleColumnWithNulls) table_view expected({col}); auto filepath = temp_env->get_temp_filepath("OrcSingleColumnWithNulls.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected); - cudf_io::write_orc(out_opts); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}).use_index(false); - auto result = cudf_io::read_orc(in_opts); + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } @@ -283,15 +281,15 @@ TYPED_TEST(OrcWriterTimestampTypeTest, Timestamps) table_view expected({col}); auto filepath = temp_env->get_temp_filepath("OrcTimestamps.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected); - cudf_io::write_orc(out_opts); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) .use_index(false) .timestamp_type(this->type()); - auto result = cudf_io::read_orc(in_opts); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } @@ -309,15 +307,15 @@ TYPED_TEST(OrcWriterTimestampTypeTest, TimestampsWithNulls) table_view expected({col}); auto filepath = temp_env->get_temp_filepath("OrcTimestampsWithNulls.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected); - cudf_io::write_orc(out_opts); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) .use_index(false) .timestamp_type(this->type()); - auto result = cudf_io::read_orc(in_opts); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } @@ -333,15 +331,15 @@ TYPED_TEST(OrcWriterTimestampTypeTest, TimestampOverflow) table_view expected({col}); auto filepath = temp_env->get_temp_filepath("OrcTimestampOverflow.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected); - cudf_io::write_orc(out_opts); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) .use_index(false) .timestamp_type(this->type()); - auto result = cudf_io::read_orc(in_opts); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } @@ -381,7 +379,7 @@ TEST_F(OrcWriterTest, MultiColumn) table_view expected({col0, col1, col2, col3, col4, col5, col6, col7, col8, col9}); - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("bools"); expected_metadata.column_metadata[1].set_name("int8s"); expected_metadata.column_metadata[2].set_name("int16s"); @@ -394,14 +392,14 @@ TEST_F(OrcWriterTest, MultiColumn) expected_metadata.column_metadata[9].set_name("structs"); auto filepath = temp_env->get_temp_filepath("OrcMultiColumn.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected) + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&expected_metadata); - cudf_io::write_orc(out_opts); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}).use_index(false); - auto result = cudf_io::read_orc(in_opts); + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -449,7 +447,7 @@ TEST_F(OrcWriterTest, MultiColumnWithNulls) struct_col col8{{ages_col}, {0, 1, 1, 0, 1, 1, 0, 1, 1, 0}}; table_view expected({col0, col1, col2, col3, col4, col5, col6, col7, col8}); - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("bools"); expected_metadata.column_metadata[1].set_name("int8s"); expected_metadata.column_metadata[2].set_name("int16s"); @@ -461,14 +459,14 @@ TEST_F(OrcWriterTest, MultiColumnWithNulls) expected_metadata.column_metadata[8].set_name("structs"); auto filepath = temp_env->get_temp_filepath("OrcMultiColumnWithNulls.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected) + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&expected_metadata); - cudf_io::write_orc(out_opts); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}).use_index(false); - auto result = cudf_io::read_orc(in_opts); + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -484,15 +482,15 @@ TEST_F(OrcWriterTest, ReadZeroRows) table_view expected({col}); auto filepath = temp_env->get_temp_filepath("OrcSingleColumn.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected); - cudf_io::write_orc(out_opts); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) .use_index(false) .num_rows(0); - auto result = cudf_io::read_orc(in_opts); + auto result = cudf::io::read_orc(in_opts); EXPECT_EQ(0, result.tbl->num_rows()); EXPECT_EQ(1, result.tbl->num_columns()); @@ -513,20 +511,20 @@ TEST_F(OrcWriterTest, Strings) table_view expected({col0, col1, col2}); - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("col_other"); expected_metadata.column_metadata[1].set_name("col_string"); expected_metadata.column_metadata[2].set_name("col_another"); auto filepath = temp_env->get_temp_filepath("OrcStrings.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected) + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&expected_metadata); - cudf_io::write_orc(out_opts); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}).use_index(false); - auto result = cudf_io::read_orc(in_opts); + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -560,7 +558,7 @@ TEST_F(OrcWriterTest, SlicedTable) table_view expected({col0, col1, col2, col3, col4, col5}); - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("col_other"); expected_metadata.column_metadata[1].set_name("col_string"); expected_metadata.column_metadata[2].set_name("col_another"); @@ -571,14 +569,14 @@ TEST_F(OrcWriterTest, SlicedTable) auto expected_slice = cudf::slice(expected, {2, static_cast(num_rows)}); auto filepath = temp_env->get_temp_filepath("SlicedTable.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected_slice) + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected_slice) .metadata(&expected_metadata); - cudf_io::write_orc(out_opts); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_orc(in_opts); + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected_slice, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -592,19 +590,20 @@ TEST_F(OrcWriterTest, HostBuffer) table_view expected{{col}}; - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("col_other"); std::vector out_buffer; - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info(&out_buffer), expected) + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info(&out_buffer), expected) .metadata(&expected_metadata); - cudf_io::write_orc(out_opts); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info(out_buffer.data(), out_buffer.size())) + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder( + cudf::io::source_info(out_buffer.data(), out_buffer.size())) .use_index(false); - const auto result = cudf_io::read_orc(in_opts); + const auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -625,14 +624,14 @@ TEST_F(OrcWriterTest, negTimestampsNano) table_view expected({timestamps_ns}); auto filepath = temp_env->get_temp_filepath("OrcNegTimestamp.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); - cudf_io::write_orc(out_opts); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}).use_index(false); - auto result = cudf_io::read_orc(in_opts); + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_COLUMNS_EQUAL( expected.column(0), result.tbl->view().column(0), cudf::test::debug_output_level::ALL_ERRORS); @@ -647,13 +646,13 @@ TEST_F(OrcWriterTest, Slice) cudf::table_view tbl{result}; auto filepath = temp_env->get_temp_filepath("Slice.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, tbl); - cudf_io::write_orc(out_opts); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, tbl); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}); - auto read_table = cudf_io::read_orc(in_opts); + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto read_table = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(read_table.tbl->view(), tbl); } @@ -664,13 +663,13 @@ TEST_F(OrcChunkedWriterTest, SingleTable) auto table1 = create_random_fixed_table(5, 5, true); auto filepath = temp_env->get_temp_filepath("ChunkedSingle.orc"); - cudf_io::chunked_orc_writer_options opts = - cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::orc_chunked_writer(opts).write(*table1); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer(opts).write(*table1); - cudf_io::orc_reader_options read_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_orc(read_opts); + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *table1); } @@ -684,13 +683,13 @@ TEST_F(OrcChunkedWriterTest, SimpleTable) auto full_table = cudf::concatenate(std::vector({*table1, *table2})); auto filepath = temp_env->get_temp_filepath("ChunkedSimple.orc"); - cudf_io::chunked_orc_writer_options opts = - cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::orc_chunked_writer(opts).write(*table1).write(*table2); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer(opts).write(*table1).write(*table2); - cudf_io::orc_reader_options read_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_orc(read_opts); + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); } @@ -704,13 +703,13 @@ TEST_F(OrcChunkedWriterTest, LargeTables) auto full_table = cudf::concatenate(std::vector({*table1, *table2})); auto filepath = temp_env->get_temp_filepath("ChunkedLarge.orc"); - cudf_io::chunked_orc_writer_options opts = - cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::orc_chunked_writer(opts).write(*table1).write(*table2); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer(opts).write(*table1).write(*table2); - cudf_io::orc_reader_options read_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_orc(read_opts); + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); } @@ -730,17 +729,17 @@ TEST_F(OrcChunkedWriterTest, ManyTables) auto expected = cudf::concatenate(table_views); auto filepath = temp_env->get_temp_filepath("ChunkedManyTables.orc"); - cudf_io::chunked_orc_writer_options opts = - cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::orc_chunked_writer writer(opts); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer writer(opts); std::for_each(table_views.begin(), table_views.end(), [&writer](table_view const& tbl) { writer.write(tbl); }); writer.close(); - cudf_io::orc_reader_options read_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_orc(read_opts); + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected); } @@ -760,20 +759,20 @@ TEST_F(OrcChunkedWriterTest, Metadata) table_view expected({col0, col1, col2}); - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("col_other"); expected_metadata.column_metadata[1].set_name("col_string"); expected_metadata.column_metadata[2].set_name("col_another"); auto filepath = temp_env->get_temp_filepath("ChunkedMetadata.orc"); - cudf_io::chunked_orc_writer_options opts = - cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath}) + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}) .metadata(&expected_metadata); - cudf_io::orc_chunked_writer(opts).write(expected).write(expected); + cudf::io::orc_chunked_writer(opts).write(expected).write(expected); - cudf_io::orc_reader_options read_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_orc(read_opts); + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(read_opts); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); } @@ -793,13 +792,13 @@ TEST_F(OrcChunkedWriterTest, Strings) auto expected = cudf::concatenate(std::vector({tbl1, tbl2})); auto filepath = temp_env->get_temp_filepath("ChunkedStrings.orc"); - cudf_io::chunked_orc_writer_options opts = - cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::orc_chunked_writer(opts).write(tbl1).write(tbl2); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer(opts).write(tbl1).write(tbl2); - cudf_io::orc_reader_options read_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_orc(read_opts); + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected); } @@ -811,9 +810,9 @@ TEST_F(OrcChunkedWriterTest, MismatchedTypes) auto table2 = create_random_fixed_table(4, 4, true); auto filepath = temp_env->get_temp_filepath("ChunkedMismatchedTypes.orc"); - cudf_io::chunked_orc_writer_options opts = - cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::orc_chunked_writer writer(opts); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer writer(opts); writer.write(*table1); EXPECT_THROW(writer.write(*table2), cudf::logic_error); } @@ -824,9 +823,9 @@ TEST_F(OrcChunkedWriterTest, ChunkedWritingAfterClosing) auto table1 = create_random_fixed_table(4, 4, true); auto filepath = temp_env->get_temp_filepath("ChunkedWritingAfterClosing.orc"); - cudf_io::chunked_orc_writer_options opts = - cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::orc_chunked_writer writer(opts); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer writer(opts); writer.write(*table1); writer.close(); EXPECT_THROW(writer.write(*table1), cudf::logic_error); @@ -839,9 +838,9 @@ TEST_F(OrcChunkedWriterTest, MismatchedStructure) auto table2 = create_random_fixed_table(3, 4, true); auto filepath = temp_env->get_temp_filepath("ChunkedMismatchedStructure.orc"); - cudf_io::chunked_orc_writer_options opts = - cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::orc_chunked_writer writer(opts); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer writer(opts); writer.write(*table1); EXPECT_THROW(writer.write(*table2), cudf::logic_error); } @@ -855,13 +854,13 @@ TEST_F(OrcChunkedWriterTest, ReadStripes) auto full_table = cudf::concatenate(std::vector({*table2, *table1, *table2})); auto filepath = temp_env->get_temp_filepath("ChunkedStripes.orc"); - cudf_io::chunked_orc_writer_options opts = - cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::orc_chunked_writer(opts).write(*table1).write(*table2); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer(opts).write(*table1).write(*table2); - cudf_io::orc_reader_options read_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}).stripes({{1, 0, 1}}); - auto result = cudf_io::read_orc(read_opts); + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).stripes({{1, 0, 1}}); + auto result = cudf::io::read_orc(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); } @@ -872,15 +871,15 @@ TEST_F(OrcChunkedWriterTest, ReadStripesError) auto table1 = create_random_fixed_table(5, 5, true); auto filepath = temp_env->get_temp_filepath("ChunkedStripesError.orc"); - cudf_io::chunked_orc_writer_options opts = - cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::orc_chunked_writer(opts).write(*table1); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer(opts).write(*table1); - cudf_io::orc_reader_options read_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}).stripes({{0, 1}}); - EXPECT_THROW(cudf_io::read_orc(read_opts), cudf::logic_error); + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).stripes({{0, 1}}); + EXPECT_THROW(cudf::io::read_orc(read_opts), cudf::logic_error); read_opts.set_stripes({{-1}}); - EXPECT_THROW(cudf_io::read_orc(read_opts), cudf::logic_error); + EXPECT_THROW(cudf::io::read_orc(read_opts), cudf::logic_error); } TYPED_TEST(OrcChunkedWriterNumericTypeTest, UnalignedSize) @@ -915,13 +914,13 @@ TYPED_TEST(OrcChunkedWriterNumericTypeTest, UnalignedSize) auto expected = cudf::concatenate(std::vector({tbl1, tbl2})); auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize.orc"); - cudf_io::chunked_orc_writer_options opts = - cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::orc_chunked_writer(opts).write(tbl1).write(tbl2); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer(opts).write(tbl1).write(tbl2); - cudf_io::orc_reader_options read_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_orc(read_opts); + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected); } @@ -958,13 +957,13 @@ TYPED_TEST(OrcChunkedWriterNumericTypeTest, UnalignedSize2) auto expected = cudf::concatenate(std::vector({tbl1, tbl2})); auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize2.orc"); - cudf_io::chunked_orc_writer_options opts = - cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::orc_chunked_writer(opts).write(tbl1).write(tbl2); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer(opts).write(tbl1).write(tbl2); - cudf_io::orc_reader_options read_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_orc(read_opts); + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected); } @@ -1002,30 +1001,30 @@ TEST_F(OrcStatisticsTest, Basic) auto filepath = temp_env->get_temp_filepath("OrcStatsMerge.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected); - cudf_io::write_orc(out_opts); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_orc(out_opts); - auto const stats = cudf_io::read_parsed_orc_statistics(cudf_io::source_info{filepath}); + auto const stats = cudf::io::read_parsed_orc_statistics(cudf::io::source_info{filepath}); auto const expected_column_names = std::vector{"", "_col0", "_col1", "_col2", "_col3", "_col4"}; EXPECT_EQ(stats.column_names, expected_column_names); - auto validate_statistics = [&](std::vector const& stats) { + auto validate_statistics = [&](std::vector const& stats) { auto& s0 = stats[0]; EXPECT_EQ(*s0.number_of_values, 9ul); auto& s1 = stats[1]; EXPECT_EQ(*s1.number_of_values, 4ul); - auto& ts1 = std::get(s1.type_specific_stats); + auto& ts1 = std::get(s1.type_specific_stats); EXPECT_EQ(*ts1.minimum, 1); EXPECT_EQ(*ts1.maximum, 7); EXPECT_EQ(*ts1.sum, 16); auto& s2 = stats[2]; EXPECT_EQ(*s2.number_of_values, 4ul); - auto& ts2 = std::get(s2.type_specific_stats); + auto& ts2 = std::get(s2.type_specific_stats); EXPECT_EQ(*ts2.minimum, 1.); EXPECT_EQ(*ts2.maximum, 7.); // No sum ATM, filed #7087 @@ -1033,18 +1032,18 @@ TEST_F(OrcStatisticsTest, Basic) auto& s3 = stats[3]; EXPECT_EQ(*s3.number_of_values, 9ul); - auto& ts3 = std::get(s3.type_specific_stats); + auto& ts3 = std::get(s3.type_specific_stats); EXPECT_EQ(*ts3.minimum, "Friday"); EXPECT_EQ(*ts3.maximum, "Wednesday"); EXPECT_EQ(*ts3.sum, 58ul); auto& s4 = stats[4]; EXPECT_EQ(*s4.number_of_values, 9ul); - EXPECT_EQ(std::get(s4.type_specific_stats).count[0], 8ul); + EXPECT_EQ(std::get(s4.type_specific_stats).count[0], 8ul); auto& s5 = stats[5]; EXPECT_EQ(*s5.number_of_values, 4ul); - auto& ts5 = std::get(s5.type_specific_stats); + auto& ts5 = std::get(s5.type_specific_stats); EXPECT_EQ(*ts5.minimum_utc, 1000); EXPECT_EQ(*ts5.maximum_utc, 7000); ASSERT_FALSE(ts5.minimum); @@ -1070,18 +1069,18 @@ TEST_F(OrcWriterTest, SlicedValidMask) auto sliced_col = cudf::slice(static_cast(col), indices); cudf::table_view tbl{sliced_col}; - cudf_io::table_input_metadata expected_metadata(tbl); + cudf::io::table_input_metadata expected_metadata(tbl); expected_metadata.column_metadata[0].set_name("col_string"); auto filepath = temp_env->get_temp_filepath("OrcStrings.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, tbl) + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, tbl) .metadata(&expected_metadata); - cudf_io::write_orc(out_opts); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}).use_index(false); - auto result = cudf_io::read_orc(in_opts); + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(tbl, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -1093,13 +1092,13 @@ TEST_F(OrcReaderTest, SingleInputs) auto table1 = create_random_fixed_table(5, 5, true); auto filepath1 = temp_env->get_temp_filepath("SimpleTable1.orc"); - cudf_io::orc_writer_options write_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath1}, table1->view()); - cudf_io::write_orc(write_opts); + cudf::io::orc_writer_options write_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath1}, table1->view()); + cudf::io::write_orc(write_opts); - cudf_io::orc_reader_options read_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{{filepath1}}); - auto result = cudf_io::read_orc(read_opts); + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{{filepath1}}); + auto result = cudf::io::read_orc(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *table1); } @@ -1125,11 +1124,11 @@ TEST_F(OrcReaderTest, zstdCompressionRegression) auto source = cudf::io::source_info(reinterpret_cast(input_buffer), sizeof(input_buffer)); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(source).use_index(false); + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(source).use_index(false); cudf::io::table_with_metadata result; - CUDF_EXPECT_NO_THROW(result = cudf_io::read_orc(in_opts)); + CUDF_EXPECT_NO_THROW(result = cudf::io::read_orc(in_opts)); EXPECT_EQ(1920800, result.tbl->num_rows()); } @@ -1143,21 +1142,21 @@ TEST_F(OrcReaderTest, MultipleInputs) auto const filepath1 = temp_env->get_temp_filepath("SimpleTable1.orc"); { - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath1}, table1->view()); - cudf_io::write_orc(out_opts); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath1}, table1->view()); + cudf::io::write_orc(out_opts); } auto const filepath2 = temp_env->get_temp_filepath("SimpleTable2.orc"); { - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath2}, table2->view()); - cudf_io::write_orc(out_opts); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath2}, table2->view()); + cudf::io::write_orc(out_opts); } - cudf_io::orc_reader_options read_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{{filepath1, filepath2}}); - auto result = cudf_io::read_orc(read_opts); + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{{filepath1, filepath2}}); + auto result = cudf::io::read_orc(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); } @@ -1180,14 +1179,14 @@ TEST_P(OrcWriterTestDecimal, Decimal64) cudf::table_view tbl({static_cast(col)}); auto filepath = temp_env->get_temp_filepath("Decimal64.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, tbl); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, tbl); - cudf_io::write_orc(out_opts); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_orc(in_opts); + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_COLUMNS_EQUAL(tbl.column(0), result.tbl->view().column(0)); } @@ -1211,14 +1210,14 @@ TEST_F(OrcWriterTest, Decimal32) cudf::table_view expected({col}); auto filepath = temp_env->get_temp_filepath("Decimal32.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); - cudf_io::write_orc(out_opts); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_orc(in_opts); + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_COLUMNS_EQUAL(col, result.tbl->view().column(0)); } @@ -1248,15 +1247,15 @@ TEST_F(OrcStatisticsTest, Overflow) auto filepath = temp_env->get_temp_filepath("OrcStatsOverflow.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, tbl); - cudf_io::write_orc(out_opts); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, tbl); + cudf::io::write_orc(out_opts); - auto const stats = cudf_io::read_parsed_orc_statistics(cudf_io::source_info{filepath}); + auto const stats = cudf::io::read_parsed_orc_statistics(cudf::io::source_info{filepath}); auto check_sum_exist = [&](int idx, bool expected) { auto const& s = stats.file_stats[idx]; - auto const& ts = std::get(s.type_specific_stats); + auto const& ts = std::get(s.type_specific_stats); EXPECT_EQ(ts.sum.has_value(), expected); }; check_sum_exist(1, false); @@ -1311,8 +1310,8 @@ TEST_F(OrcStatisticsTest, HasNull) 0x4F, 0x52, 0x43, 0x17, }; - auto const stats = cudf_io::read_parsed_orc_statistics( - cudf_io::source_info{reinterpret_cast(nulls_orc.data()), nulls_orc.size()}); + auto const stats = cudf::io::read_parsed_orc_statistics( + cudf::io::source_info{reinterpret_cast(nulls_orc.data()), nulls_orc.size()}); EXPECT_EQ(stats.file_stats[1].has_null, true); EXPECT_EQ(stats.file_stats[2].has_null, false); @@ -1343,35 +1342,35 @@ TEST_P(OrcWriterTestStripes, StripeSize) auto validate = [&](std::vector const& orc_buffer) { auto const expected_stripe_num = std::max(num_rows / size_rows, (num_rows * sizeof(int64_t)) / size_bytes); - auto const stats = cudf_io::read_parsed_orc_statistics( - cudf_io::source_info(orc_buffer.data(), orc_buffer.size())); + auto const stats = cudf::io::read_parsed_orc_statistics( + cudf::io::source_info(orc_buffer.data(), orc_buffer.size())); EXPECT_EQ(stats.stripes_stats.size(), expected_stripe_num); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder( - cudf_io::source_info(orc_buffer.data(), orc_buffer.size())) + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder( + cudf::io::source_info(orc_buffer.data(), orc_buffer.size())) .use_index(false); - auto result = cudf_io::read_orc(in_opts); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result.tbl->view()); }; { std::vector out_buffer_chunked; - cudf_io::chunked_orc_writer_options opts = - cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info(&out_buffer_chunked)) + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info(&out_buffer_chunked)) .stripe_size_rows(size_rows) .stripe_size_bytes(size_bytes); - cudf_io::orc_chunked_writer(opts).write(expected->view()); + cudf::io::orc_chunked_writer(opts).write(expected->view()); validate(out_buffer_chunked); } { std::vector out_buffer; - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info(&out_buffer), expected->view()) + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info(&out_buffer), expected->view()) .stripe_size_rows(size_rows) .stripe_size_bytes(size_bytes); - cudf_io::write_orc(out_opts); + cudf::io::write_orc(out_opts); validate(out_buffer); } } @@ -1392,15 +1391,15 @@ TEST_F(OrcWriterTest, StripeSizeInvalid) std::vector out_buffer; EXPECT_THROW( - cudf_io::orc_writer_options::builder(cudf_io::sink_info(&out_buffer), unused_table->view()) + cudf::io::orc_writer_options::builder(cudf::io::sink_info(&out_buffer), unused_table->view()) .stripe_size_rows(511), cudf::logic_error); EXPECT_THROW( - cudf_io::orc_writer_options::builder(cudf_io::sink_info(&out_buffer), unused_table->view()) + cudf::io::orc_writer_options::builder(cudf::io::sink_info(&out_buffer), unused_table->view()) .stripe_size_bytes(63 << 10), cudf::logic_error); EXPECT_THROW( - cudf_io::orc_writer_options::builder(cudf_io::sink_info(&out_buffer), unused_table->view()) + cudf::io::orc_writer_options::builder(cudf::io::sink_info(&out_buffer), unused_table->view()) .row_index_stride(511), cudf::logic_error); } @@ -1438,18 +1437,18 @@ TEST_F(OrcWriterTest, TestMap) table_view expected({*list_col}); - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_list_column_as_map(); auto filepath = temp_env->get_temp_filepath("MapColumn.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected) + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&expected_metadata); - cudf_io::write_orc(out_opts); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}).use_index(false); - auto result = cudf_io::read_orc(in_opts); + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -1466,22 +1465,22 @@ TEST_F(OrcReaderTest, NestedColumnSelection) struct_col s_col{child_col1, child_col2}; table_view expected({s_col}); - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("struct_s"); expected_metadata.column_metadata[0].child(0).set_name("field_a"); expected_metadata.column_metadata[0].child(1).set_name("field_b"); auto filepath = temp_env->get_temp_filepath("OrcNestedSelection.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected) + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&expected_metadata); - cudf_io::write_orc(out_opts); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) .use_index(false) .columns({"struct_s.field_b"}); - auto result = cudf_io::read_orc(in_opts); + auto result = cudf::io::read_orc(in_opts); // Verify that only one child column is included in the output table ASSERT_EQ(1, result.tbl->view().column(0).num_children()); @@ -1503,20 +1502,20 @@ TEST_F(OrcReaderTest, DecimalOptions) dec128_col col{col_data, col_data + num_rows, mask}; table_view expected({col}); - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("dec"); auto filepath = temp_env->get_temp_filepath("OrcDecimalOptions.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected) + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&expected_metadata); - cudf_io::write_orc(out_opts); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options valid_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::orc_reader_options valid_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) .decimal128_columns({"dec", "fake_name"}); // Should not throw, even with "fake name" - EXPECT_NO_THROW(cudf_io::read_orc(valid_opts)); + EXPECT_NO_THROW(cudf::io::read_orc(valid_opts)); } TEST_F(OrcWriterTest, DecimalOptionsNested) @@ -1547,24 +1546,24 @@ TEST_F(OrcWriterTest, DecimalOptionsNested) table_view expected({*map_list_col}); - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("maps"); expected_metadata.column_metadata[0].set_list_column_as_map(); expected_metadata.column_metadata[0].child(1).child(0).child(0).set_name("dec64"); expected_metadata.column_metadata[0].child(1).child(0).child(1).set_name("dec128"); auto filepath = temp_env->get_temp_filepath("OrcMultiColumn.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected) + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&expected_metadata); - cudf_io::write_orc(out_opts); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) .use_index(false) // One less level of nesting because children of map columns are the child struct's children .decimal128_columns({"maps.0.dec64"}); - auto result = cudf_io::read_orc(in_opts); + auto result = cudf::io::read_orc(in_opts); // Both columns should be read as decimal128 CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result.tbl->view().column(0).child(1).child(0).child(0), @@ -1577,14 +1576,15 @@ TEST_F(OrcReaderTest, EmptyColumnsParam) auto const expected = create_random_fixed_table(2, 4, false); std::vector out_buffer; - cudf_io::orc_writer_options args = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{&out_buffer}, *expected); - cudf_io::write_orc(args); + cudf::io::orc_writer_options args = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{&out_buffer}, *expected); + cudf::io::write_orc(args); - cudf_io::orc_reader_options read_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{out_buffer.data(), out_buffer.size()}) + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder( + cudf::io::source_info{out_buffer.data(), out_buffer.size()}) .columns({}); - auto const result = cudf_io::read_orc(read_opts); + auto const result = cudf::io::read_orc(read_opts); EXPECT_EQ(result.tbl->num_columns(), 0); EXPECT_EQ(result.tbl->num_rows(), 0); diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index cf22ab8a525..8a98efabcb5 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -45,8 +45,6 @@ #include #include -namespace cudf_io = cudf::io; - template using column_wrapper = typename std::conditional, @@ -430,13 +428,13 @@ TYPED_TEST(ParquetWriterNumericTypeTest, SingleColumn) auto expected = table_view{{col}}; auto filepath = temp_env->get_temp_filepath("SingleColumn.parquet"); - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected); - cudf_io::write_parquet(out_opts); + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_parquet(out_opts); - cudf_io::parquet_reader_options in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(in_opts); + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } @@ -453,13 +451,13 @@ TYPED_TEST(ParquetWriterNumericTypeTest, SingleColumnWithNulls) auto expected = table_view{{col}}; auto filepath = temp_env->get_temp_filepath("SingleColumnWithNulls.parquet"); - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected); - cudf_io::write_parquet(out_opts); + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_parquet(out_opts); - cudf_io::parquet_reader_options in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(in_opts); + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } @@ -477,14 +475,14 @@ TYPED_TEST(ParquetWriterChronoTypeTest, Chronos) auto expected = table_view{{col}}; auto filepath = temp_env->get_temp_filepath("Chronos.parquet"); - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected); - cudf_io::write_parquet(out_opts); + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_parquet(out_opts); - cudf_io::parquet_reader_options in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) .timestamp_type(this->type()); - auto result = cudf_io::read_parquet(in_opts); + auto result = cudf::io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } @@ -503,14 +501,14 @@ TYPED_TEST(ParquetWriterChronoTypeTest, ChronosWithNulls) auto expected = table_view{{col}}; auto filepath = temp_env->get_temp_filepath("ChronosWithNulls.parquet"); - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected); - cudf_io::write_parquet(out_opts); + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_parquet(out_opts); - cudf_io::parquet_reader_options in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) .timestamp_type(this->type()); - auto result = cudf_io::read_parquet(in_opts); + auto result = cudf::io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } @@ -527,14 +525,14 @@ TYPED_TEST(ParquetWriterTimestampTypeTest, TimestampOverflow) table_view expected({col}); auto filepath = temp_env->get_temp_filepath("ParquetTimestampOverflow.parquet"); - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected); - cudf_io::write_parquet(out_opts); + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_parquet(out_opts); - cudf_io::parquet_reader_options in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) .timestamp_type(this->type()); - auto result = cudf_io::read_parquet(in_opts); + auto result = cudf::io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } @@ -576,7 +574,7 @@ TEST_F(ParquetWriterTest, MultiColumn) auto expected = table_view{{col1, col2, col3, col4, col5, col6, col7, col8}}; - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); // expected_metadata.column_metadata[0].set_name( "bools"); expected_metadata.column_metadata[0].set_name("int8s"); expected_metadata.column_metadata[1].set_name("int16s"); @@ -588,14 +586,14 @@ TEST_F(ParquetWriterTest, MultiColumn) expected_metadata.column_metadata[7].set_name("decimal128s").set_decimal_precision(40); auto filepath = temp_env->get_temp_filepath("MultiColumn.parquet"); - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected) + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&expected_metadata); - cudf_io::write_parquet(out_opts); + cudf::io::write_parquet(out_opts); - cudf_io::parquet_reader_options in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(in_opts); + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -647,7 +645,7 @@ TEST_F(ParquetWriterTest, MultiColumnWithNulls) auto expected = table_view{{/*col0, */ col1, col2, col3, col4, col5, col6, col7}}; - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); // expected_metadata.column_names.emplace_back("bools"); expected_metadata.column_metadata[0].set_name("int8s"); expected_metadata.column_metadata[1].set_name("int16s"); @@ -658,15 +656,15 @@ TEST_F(ParquetWriterTest, MultiColumnWithNulls) expected_metadata.column_metadata[6].set_name("decimal64s").set_decimal_precision(20); auto filepath = temp_env->get_temp_filepath("MultiColumnWithNulls.parquet"); - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected) + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&expected_metadata); - cudf_io::write_parquet(out_opts); + cudf::io::write_parquet(out_opts); - cudf_io::parquet_reader_options in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(in_opts); + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); // TODO: Need to be able to return metadata in tree form from reader so they can be compared. @@ -691,20 +689,20 @@ TEST_F(ParquetWriterTest, Strings) auto expected = table_view{{col0, col1, col2}}; - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("col_other"); expected_metadata.column_metadata[1].set_name("col_string"); expected_metadata.column_metadata[2].set_name("col_another"); auto filepath = temp_env->get_temp_filepath("Strings.parquet"); - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected) + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&expected_metadata); - cudf_io::write_parquet(out_opts); + cudf::io::write_parquet(out_opts); - cudf_io::parquet_reader_options in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(in_opts); + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -740,7 +738,7 @@ TEST_F(ParquetWriterTest, StringsAsBinary) auto write_tbl = table_view{{col0, col1, col2, col3, col4}}; - cudf_io::table_input_metadata expected_metadata(write_tbl); + cudf::io::table_input_metadata expected_metadata(write_tbl); expected_metadata.column_metadata[0].set_name("col_single").set_output_as_binary(true); expected_metadata.column_metadata[1].set_name("col_string").set_output_as_binary(true); expected_metadata.column_metadata[2].set_name("col_another").set_output_as_binary(true); @@ -748,20 +746,20 @@ TEST_F(ParquetWriterTest, StringsAsBinary) expected_metadata.column_metadata[4].set_name("col_binary"); auto filepath = temp_env->get_temp_filepath("BinaryStrings.parquet"); - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, write_tbl) + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, write_tbl) .metadata(&expected_metadata); - cudf_io::write_parquet(out_opts); + cudf::io::write_parquet(out_opts); - cudf_io::parquet_reader_options in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) .set_column_schema( - {cudf_io::reader_column_schema().set_convert_binary_to_strings(false), - cudf_io::reader_column_schema().set_convert_binary_to_strings(false), - cudf_io::reader_column_schema().set_convert_binary_to_strings(false), - cudf_io::reader_column_schema().add_child(cudf_io::reader_column_schema()), - cudf_io::reader_column_schema().add_child(cudf_io::reader_column_schema())}); - auto result = cudf_io::read_parquet(in_opts); + {cudf::io::reader_column_schema().set_convert_binary_to_strings(false), + cudf::io::reader_column_schema().set_convert_binary_to_strings(false), + cudf::io::reader_column_schema().set_convert_binary_to_strings(false), + cudf::io::reader_column_schema().add_child(cudf::io::reader_column_schema()), + cudf::io::reader_column_schema().add_child(cudf::io::reader_column_schema())}); + auto result = cudf::io::read_parquet(in_opts); auto expected = table_view{{col3, col4, col3, col3, col4}}; CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); @@ -855,7 +853,7 @@ TEST_F(ParquetWriterTest, SlicedTable) // auto expected_slice = expected; auto expected_slice = cudf::slice(expected, {2, static_cast(num_rows) - 1}); - cudf_io::table_input_metadata expected_metadata(expected_slice); + cudf::io::table_input_metadata expected_metadata(expected_slice); expected_metadata.column_metadata[0].set_name("col_other"); expected_metadata.column_metadata[1].set_name("col_string"); expected_metadata.column_metadata[2].set_name("col_another"); @@ -869,14 +867,14 @@ TEST_F(ParquetWriterTest, SlicedTable) expected_metadata.column_metadata[6].child(1).child(1).set_name("flats"); auto filepath = temp_env->get_temp_filepath("SlicedTable.parquet"); - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected_slice) + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected_slice) .metadata(&expected_metadata); - cudf_io::write_parquet(out_opts); + cudf::io::write_parquet(out_opts); - cudf_io::parquet_reader_options in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(in_opts); + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected_slice, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -956,7 +954,7 @@ TEST_F(ParquetWriterTest, ListColumn) table_view expected({col0, col1, col2, col3, /* col4, */ col5, col6, col7}); - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("col_list_int_0"); expected_metadata.column_metadata[1].set_name("col_list_list_int_1"); expected_metadata.column_metadata[2].set_name("col_list_list_int_nullable_2"); @@ -967,14 +965,14 @@ TEST_F(ParquetWriterTest, ListColumn) expected_metadata.column_metadata[6].set_name("col_list_list_list_7"); auto filepath = temp_env->get_temp_filepath("ListColumn.parquet"); - auto out_opts = cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected) + auto out_opts = cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&expected_metadata) - .compression(cudf_io::compression_type::NONE); + .compression(cudf::io::compression_type::NONE); - cudf_io::write_parquet(out_opts); + cudf::io::write_parquet(out_opts); - auto in_opts = cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(in_opts); + auto in_opts = cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -999,7 +997,7 @@ TEST_F(ParquetWriterTest, MultiIndex) auto expected = table_view{{col0, col1, col2, col3, col4}}; - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("int8s"); expected_metadata.column_metadata[1].set_name("int16s"); expected_metadata.column_metadata[2].set_name("int32s"); @@ -1007,18 +1005,18 @@ TEST_F(ParquetWriterTest, MultiIndex) expected_metadata.column_metadata[4].set_name("doubles"); auto filepath = temp_env->get_temp_filepath("MultiIndex.parquet"); - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected) + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&expected_metadata) .key_value_metadata( {{{"pandas", "\"index_columns\": [\"int8s\", \"int16s\"], \"column1\": [\"int32s\"]"}}}); - cudf_io::write_parquet(out_opts); + cudf::io::write_parquet(out_opts); - cudf_io::parquet_reader_options in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) .use_pandas_metadata(true) .columns({"int32s", "floats", "doubles"}); - auto result = cudf_io::read_parquet(in_opts); + auto result = cudf::io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -1034,17 +1032,17 @@ TEST_F(ParquetWriterTest, HostBuffer) const auto expected = table_view{{col}}; - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("col_other"); std::vector out_buffer; - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info(&out_buffer), expected) + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer), expected) .metadata(&expected_metadata); - cudf_io::write_parquet(out_opts); - cudf_io::parquet_reader_options in_opts = cudf_io::parquet_reader_options::builder( - cudf_io::source_info(out_buffer.data(), out_buffer.size())); - const auto result = cudf_io::read_parquet(in_opts); + cudf::io::write_parquet(out_opts); + cudf::io::parquet_reader_options in_opts = cudf::io::parquet_reader_options::builder( + cudf::io::source_info(out_buffer.data(), out_buffer.size())); + const auto result = cudf::io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -1056,13 +1054,13 @@ TEST_F(ParquetWriterTest, NonNullable) auto expected = create_random_fixed_table(9, 9, false); auto filepath = temp_env->get_temp_filepath("NonNullable.parquet"); - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, *expected); - cudf_io::write_parquet(args); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected); + cudf::io::write_parquet(args); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected); } @@ -1095,13 +1093,13 @@ TEST_F(ParquetWriterTest, Struct) auto expected = table_view({*struct_2}); auto filepath = temp_env->get_temp_filepath("Struct.parquet"); - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected); - cudf_io::write_parquet(args); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_parquet(args); - cudf_io::parquet_reader_options read_args = - cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath)); - cudf_io::read_parquet(read_args); + cudf::io::parquet_reader_options read_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath)); + cudf::io::read_parquet(read_args); } TEST_F(ParquetWriterTest, StructOfList) @@ -1156,7 +1154,7 @@ TEST_F(ParquetWriterTest, StructOfList) auto expected = table_view({*struct_2}); - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("being"); expected_metadata.column_metadata[0].child(0).set_name("human?"); expected_metadata.column_metadata[0].child(1).set_name("particulars"); @@ -1166,14 +1164,14 @@ TEST_F(ParquetWriterTest, StructOfList) expected_metadata.column_metadata[0].child(1).child(3).set_name("flats"); auto filepath = temp_env->get_temp_filepath("StructOfList.parquet"); - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected) + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&expected_metadata); - cudf_io::write_parquet(args); + cudf::io::write_parquet(args); - cudf_io::parquet_reader_options read_args = - cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath)); - const auto result = cudf_io::read_parquet(read_args); + cudf::io::parquet_reader_options read_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath)); + const auto result = cudf::io::read_parquet(read_args); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -1213,7 +1211,7 @@ TEST_F(ParquetWriterTest, ListOfStruct) auto expected = table_view({*list_col}); - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("family"); expected_metadata.column_metadata[0].child(1).child(0).set_name("human?"); expected_metadata.column_metadata[0].child(1).child(1).set_name("particulars"); @@ -1221,14 +1219,14 @@ TEST_F(ParquetWriterTest, ListOfStruct) expected_metadata.column_metadata[0].child(1).child(1).child(1).set_name("age"); auto filepath = temp_env->get_temp_filepath("ListOfStruct.parquet"); - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected) + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&expected_metadata); - cudf_io::write_parquet(args); + cudf::io::write_parquet(args); - cudf_io::parquet_reader_options read_args = - cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath)); - const auto result = cudf_io::read_parquet(read_args); + cudf::io::parquet_reader_options read_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath)); + const auto result = cudf::io::read_parquet(read_args); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -1284,36 +1282,34 @@ TEST_F(ParquetWriterTest, CustomDataSink) auto filepath = temp_env->get_temp_filepath("CustomDataSink.parquet"); custom_test_data_sink custom_sink(filepath); - namespace cudf_io = cudf::io; - srand(31337); auto expected = create_random_fixed_table(5, 10, false); // write out using the custom sink { - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{&custom_sink}, *expected); - cudf_io::write_parquet(args); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected); + cudf::io::write_parquet(args); } // write out using a memmapped sink std::vector buf_sink; { - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{&buf_sink}, *expected); - cudf_io::write_parquet(args); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&buf_sink}, *expected); + cudf::io::write_parquet(args); } // read them back in and make sure everything matches - cudf_io::parquet_reader_options custom_args = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto custom_tbl = cudf_io::read_parquet(custom_args); + cudf::io::parquet_reader_options custom_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto custom_tbl = cudf::io::read_parquet(custom_args); CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view()); - cudf_io::parquet_reader_options buf_args = cudf_io::parquet_reader_options::builder( - cudf_io::source_info{buf_sink.data(), buf_sink.size()}); - auto buf_tbl = cudf_io::read_parquet(buf_args); + cudf::io::parquet_reader_options buf_args = cudf::io::parquet_reader_options::builder( + cudf::io::source_info{buf_sink.data(), buf_sink.size()}); + auto buf_tbl = cudf::io::read_parquet(buf_args); CUDF_TEST_EXPECT_TABLES_EQUAL(buf_tbl.tbl->view(), expected->view()); } @@ -1322,20 +1318,18 @@ TEST_F(ParquetWriterTest, DeviceWriteLargeishFile) auto filepath = temp_env->get_temp_filepath("DeviceWriteLargeishFile.parquet"); custom_test_data_sink custom_sink(filepath); - namespace cudf_io = cudf::io; - // exercises multiple rowgroups srand(31337); auto expected = create_random_fixed_table(4, 4 * 1024 * 1024, false); // write out using the custom sink (which uses device writes) - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{&custom_sink}, *expected); - cudf_io::write_parquet(args); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected); + cudf::io::write_parquet(args); - cudf_io::parquet_reader_options custom_args = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto custom_tbl = cudf_io::read_parquet(custom_args); + cudf::io::parquet_reader_options custom_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto custom_tbl = cudf::io::read_parquet(custom_args); CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view()); } @@ -1354,19 +1348,19 @@ TEST_F(ParquetWriterTest, PartitionedWrite) auto expected2 = cudf::slice(*source, {partition2.start_row, partition2.start_row + partition2.num_rows}); - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder( - cudf_io::sink_info(std::vector{filepath1, filepath2}), *source) + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder( + cudf::io::sink_info(std::vector{filepath1, filepath2}), *source) .partitions({partition1, partition2}) - .compression(cudf_io::compression_type::NONE); - cudf_io::write_parquet(args); + .compression(cudf::io::compression_type::NONE); + cudf::io::write_parquet(args); - auto result1 = cudf_io::read_parquet( - cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath1))); + auto result1 = cudf::io::read_parquet( + cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath1))); CUDF_TEST_EXPECT_TABLES_EQUAL(expected1, result1.tbl->view()); - auto result2 = cudf_io::read_parquet( - cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath2))); + auto result2 = cudf::io::read_parquet( + cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath2))); CUDF_TEST_EXPECT_TABLES_EQUAL(expected2, result2.tbl->view()); } @@ -1385,19 +1379,19 @@ TEST_F(ParquetWriterTest, PartitionedWriteEmptyPartitions) auto expected2 = cudf::slice(*source, {partition2.start_row, partition2.start_row + partition2.num_rows}); - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder( - cudf_io::sink_info(std::vector{filepath1, filepath2}), *source) + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder( + cudf::io::sink_info(std::vector{filepath1, filepath2}), *source) .partitions({partition1, partition2}) - .compression(cudf_io::compression_type::NONE); - cudf_io::write_parquet(args); + .compression(cudf::io::compression_type::NONE); + cudf::io::write_parquet(args); - auto result1 = cudf_io::read_parquet( - cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath1))); + auto result1 = cudf::io::read_parquet( + cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath1))); CUDF_TEST_EXPECT_TABLES_EQUAL(expected1, result1.tbl->view()); - auto result2 = cudf_io::read_parquet( - cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath2))); + auto result2 = cudf::io::read_parquet( + cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath2))); CUDF_TEST_EXPECT_TABLES_EQUAL(expected2, result2.tbl->view()); } @@ -1416,19 +1410,19 @@ TEST_F(ParquetWriterTest, PartitionedWriteEmptyColumns) auto expected2 = cudf::slice(*source, {partition2.start_row, partition2.start_row + partition2.num_rows}); - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder( - cudf_io::sink_info(std::vector{filepath1, filepath2}), *source) + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder( + cudf::io::sink_info(std::vector{filepath1, filepath2}), *source) .partitions({partition1, partition2}) - .compression(cudf_io::compression_type::NONE); - cudf_io::write_parquet(args); + .compression(cudf::io::compression_type::NONE); + cudf::io::write_parquet(args); - auto result1 = cudf_io::read_parquet( - cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath1))); + auto result1 = cudf::io::read_parquet( + cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath1))); CUDF_TEST_EXPECT_TABLES_EQUAL(expected1, result1.tbl->view()); - auto result2 = cudf_io::read_parquet( - cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath2))); + auto result2 = cudf::io::read_parquet( + cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath2))); CUDF_TEST_EXPECT_TABLES_EQUAL(expected2, result2.tbl->view()); } @@ -1439,9 +1433,9 @@ std::string create_parquet_file(int num_cols) auto const table = create_random_fixed_table(num_cols, 10, true); auto const filepath = temp_env->get_temp_filepath(typeid(T).name() + std::to_string(num_cols) + ".parquet"); - cudf_io::parquet_writer_options const out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, table->view()); - cudf_io::write_parquet(out_opts); + cudf::io::parquet_writer_options const out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, table->view()); + cudf::io::write_parquet(out_opts); return filepath; } @@ -1451,16 +1445,16 @@ TEST_F(ParquetWriterTest, MultipleMismatchedSources) { auto const float5file = create_parquet_file(5); std::vector files{int5file, float5file}; - cudf_io::parquet_reader_options const read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{files}); - EXPECT_THROW(cudf_io::read_parquet(read_opts), cudf::logic_error); + cudf::io::parquet_reader_options const read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{files}); + EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error); } { auto const int10file = create_parquet_file(10); std::vector files{int5file, int10file}; - cudf_io::parquet_reader_options const read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{files}); - EXPECT_THROW(cudf_io::read_parquet(read_opts), cudf::logic_error); + cudf::io::parquet_reader_options const read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{files}); + EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error); } } @@ -1473,13 +1467,13 @@ TEST_F(ParquetWriterTest, Slice) cudf::table_view tbl{result}; auto filepath = temp_env->get_temp_filepath("Slice.parquet"); - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, tbl); - cudf_io::write_parquet(out_opts); + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl); + cudf::io::write_parquet(out_opts); - cudf_io::parquet_reader_options in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto read_table = cudf_io::read_parquet(in_opts); + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto read_table = cudf::io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(read_table.tbl->view(), tbl); } @@ -1490,13 +1484,13 @@ TEST_F(ParquetChunkedWriterTest, SingleTable) auto table1 = create_random_fixed_table(5, 5, true); auto filepath = temp_env->get_temp_filepath("ChunkedSingle.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::parquet_chunked_writer(args).write(*table1); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::parquet_chunked_writer(args).write(*table1); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *table1); } @@ -1510,13 +1504,13 @@ TEST_F(ParquetChunkedWriterTest, SimpleTable) auto full_table = cudf::concatenate(std::vector({*table1, *table2})); auto filepath = temp_env->get_temp_filepath("ChunkedSimple.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::parquet_chunked_writer(args).write(*table1).write(*table2); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); } @@ -1530,14 +1524,14 @@ TEST_F(ParquetChunkedWriterTest, LargeTables) auto full_table = cudf::concatenate(std::vector({*table1, *table2})); auto filepath = temp_env->get_temp_filepath("ChunkedLarge.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); - auto md = cudf_io::parquet_chunked_writer(args).write(*table1).write(*table2).close(); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); + auto md = cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2).close(); CUDF_EXPECTS(!md, "The return value should be null."); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); } @@ -1557,18 +1551,18 @@ TEST_F(ParquetChunkedWriterTest, ManyTables) auto expected = cudf::concatenate(table_views); auto filepath = temp_env->get_temp_filepath("ChunkedManyTables.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::parquet_chunked_writer writer(args); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::parquet_chunked_writer writer(args); std::for_each(table_views.begin(), table_views.end(), [&writer](table_view const& tbl) { writer.write(tbl); }); auto md = writer.close({"dummy/path"}); CUDF_EXPECTS(md, "The returned metadata should not be null."); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected); } @@ -1592,13 +1586,13 @@ TEST_F(ParquetChunkedWriterTest, Strings) auto expected = cudf::concatenate(std::vector({tbl1, tbl2})); auto filepath = temp_env->get_temp_filepath("ChunkedStrings.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::parquet_chunked_writer(args).write(tbl1).write(tbl2); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::parquet_chunked_writer(args).write(tbl1).write(tbl2); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected); } @@ -1651,13 +1645,13 @@ TEST_F(ParquetChunkedWriterTest, ListColumn) auto expected = cudf::concatenate(std::vector({tbl0, tbl1})); auto filepath = temp_env->get_temp_filepath("ChunkedLists.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::parquet_chunked_writer(args).write(tbl0).write(tbl1); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::parquet_chunked_writer(args).write(tbl0).write(tbl1); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected); } @@ -1704,7 +1698,7 @@ TEST_F(ParquetChunkedWriterTest, ListOfStruct) auto full_table = cudf::concatenate(std::vector({table_1, table_2})); - cudf_io::table_input_metadata expected_metadata(table_1); + cudf::io::table_input_metadata expected_metadata(table_1); expected_metadata.column_metadata[0].set_name("family"); expected_metadata.column_metadata[0].child(1).set_nullability(false); expected_metadata.column_metadata[0].child(1).child(0).set_name("human?"); @@ -1713,14 +1707,14 @@ TEST_F(ParquetChunkedWriterTest, ListOfStruct) expected_metadata.column_metadata[0].child(1).child(1).child(1).set_name("age"); auto filepath = temp_env->get_temp_filepath("ChunkedListOfStruct.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); args.set_metadata(&expected_metadata); - cudf_io::parquet_chunked_writer(args).write(table_1).write(table_2); + cudf::io::parquet_chunked_writer(args).write(table_1).write(table_2); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *full_table); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -1795,7 +1789,7 @@ TEST_F(ParquetChunkedWriterTest, ListOfStructOfStructOfListOfList) auto full_table = cudf::concatenate(std::vector({table_1, table_2})); - cudf_io::table_input_metadata expected_metadata(table_1); + cudf::io::table_input_metadata expected_metadata(table_1); expected_metadata.column_metadata[0].set_name("family"); expected_metadata.column_metadata[0].child(1).set_nullability(false); expected_metadata.column_metadata[0].child(1).child(0).set_name("human?"); @@ -1806,14 +1800,14 @@ TEST_F(ParquetChunkedWriterTest, ListOfStructOfStructOfListOfList) expected_metadata.column_metadata[0].child(1).child(1).child(3).set_name("flats"); auto filepath = temp_env->get_temp_filepath("ListOfStructOfStructOfListOfList.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); args.set_metadata(&expected_metadata); - cudf_io::parquet_chunked_writer(args).write(table_1).write(table_2); + cudf::io::parquet_chunked_writer(args).write(table_1).write(table_2); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *full_table); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -1831,9 +1825,9 @@ TEST_F(ParquetChunkedWriterTest, MismatchedTypes) auto table2 = create_random_fixed_table(4, 4, true); auto filepath = temp_env->get_temp_filepath("ChunkedMismatchedTypes.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::parquet_chunked_writer writer(args); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::parquet_chunked_writer writer(args); writer.write(*table1); EXPECT_THROW(writer.write(*table2), cudf::logic_error); writer.close(); @@ -1845,9 +1839,9 @@ TEST_F(ParquetChunkedWriterTest, ChunkedWriteAfterClosing) auto table = create_random_fixed_table(4, 4, true); auto filepath = temp_env->get_temp_filepath("ChunkedWriteAfterClosing.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::parquet_chunked_writer writer(args); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::parquet_chunked_writer writer(args); writer.write(*table).close(); EXPECT_THROW(writer.write(*table), cudf::logic_error); } @@ -1858,14 +1852,14 @@ TEST_F(ParquetChunkedWriterTest, ReadingUnclosedFile) auto table = create_random_fixed_table(4, 4, true); auto filepath = temp_env->get_temp_filepath("ReadingUnclosedFile.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::parquet_chunked_writer writer(args); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::parquet_chunked_writer writer(args); writer.write(*table); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - EXPECT_THROW(cudf_io::read_parquet(read_opts), cudf::logic_error); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error); } TEST_F(ParquetChunkedWriterTest, MismatchedStructure) @@ -1875,9 +1869,9 @@ TEST_F(ParquetChunkedWriterTest, MismatchedStructure) auto table2 = create_random_fixed_table(3, 4, true); auto filepath = temp_env->get_temp_filepath("ChunkedMismatchedStructure.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::parquet_chunked_writer writer(args); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::parquet_chunked_writer writer(args); writer.write(*table1); EXPECT_THROW(writer.write(*table2), cudf::logic_error); writer.close(); @@ -1915,9 +1909,9 @@ TEST_F(ParquetChunkedWriterTest, MismatchedStructureList) auto tbl1 = table_view({col01, col11}); auto filepath = temp_env->get_temp_filepath("ChunkedLists.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::parquet_chunked_writer writer(args); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::parquet_chunked_writer writer(args); writer.write(tbl0); EXPECT_THROW(writer.write(tbl1), cudf::logic_error); } @@ -1931,13 +1925,13 @@ TEST_F(ParquetChunkedWriterTest, DifferentNullability) auto full_table = cudf::concatenate(std::vector({*table1, *table2})); auto filepath = temp_env->get_temp_filepath("ChunkedNullable.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::parquet_chunked_writer(args).write(*table1).write(*table2); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); } @@ -1969,7 +1963,7 @@ TEST_F(ParquetChunkedWriterTest, DifferentNullabilityStruct) auto full_table = cudf::concatenate(std::vector({table_1, table_2})); - cudf_io::table_input_metadata expected_metadata(table_1); + cudf::io::table_input_metadata expected_metadata(table_1); expected_metadata.column_metadata[0].set_name("being"); expected_metadata.column_metadata[0].child(0).set_name("human?"); expected_metadata.column_metadata[0].child(1).set_name("particulars"); @@ -1977,14 +1971,14 @@ TEST_F(ParquetChunkedWriterTest, DifferentNullabilityStruct) expected_metadata.column_metadata[0].child(1).child(1).set_name("age"); auto filepath = temp_env->get_temp_filepath("ChunkedNullableStruct.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); args.set_metadata(&expected_metadata); - cudf_io::parquet_chunked_writer(args).write(table_1).write(table_2); + cudf::io::parquet_chunked_writer(args).write(table_1).write(table_2); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *full_table); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -2000,7 +1994,7 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullability) auto filepath = temp_env->get_temp_filepath("ChunkedNoNullable.parquet"); - cudf_io::table_input_metadata metadata(*table1); + cudf::io::table_input_metadata metadata(*table1); // In the absence of prescribed per-column nullability in metadata, the writer assumes the worst // and considers all columns nullable. However cudf::concatenate will not force nulls in case no @@ -2010,14 +2004,14 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullability) col_meta.set_nullability(false); } - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}) + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}) .metadata(&metadata); - cudf_io::parquet_chunked_writer(args).write(*table1).write(*table2); + cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); } @@ -2057,7 +2051,7 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullabilityList) auto full_table = cudf::concatenate(std::vector({table1, table2})); - cudf_io::table_input_metadata metadata(table1); + cudf::io::table_input_metadata metadata(table1); metadata.column_metadata[0].set_nullability(true); // List is nullable at first (root) level metadata.column_metadata[0].child(1).set_nullability( false); // non-nullable at second (leaf) level @@ -2065,14 +2059,14 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullabilityList) auto filepath = temp_env->get_temp_filepath("ChunkedListNullable.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}) + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}) .metadata(&metadata); - cudf_io::parquet_chunked_writer(args).write(table1).write(table2); + cudf::io::parquet_chunked_writer(args).write(table1).write(table2); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); } @@ -2102,7 +2096,7 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullabilityStruct) auto full_table = cudf::concatenate(std::vector({table_1, table_2})); - cudf_io::table_input_metadata expected_metadata(table_1); + cudf::io::table_input_metadata expected_metadata(table_1); expected_metadata.column_metadata[0].set_name("being").set_nullability(false); expected_metadata.column_metadata[0].child(0).set_name("human?").set_nullability(false); expected_metadata.column_metadata[0].child(1).set_name("particulars"); @@ -2110,14 +2104,14 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullabilityStruct) expected_metadata.column_metadata[0].child(1).child(1).set_name("age"); auto filepath = temp_env->get_temp_filepath("ChunkedNullableStruct.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); args.set_metadata(&expected_metadata); - cudf_io::parquet_chunked_writer(args).write(table_1).write(table_2); + cudf::io::parquet_chunked_writer(args).write(table_1).write(table_2); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -2132,16 +2126,16 @@ TEST_F(ParquetChunkedWriterTest, ReadRowGroups) auto full_table = cudf::concatenate(std::vector({*table2, *table1, *table2})); auto filepath = temp_env->get_temp_filepath("ChunkedRowGroups.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); { - cudf_io::parquet_chunked_writer(args).write(*table1).write(*table2); + cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2); } - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) .row_groups({{1, 0, 1}}); - auto result = cudf_io::read_parquet(read_opts); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); } @@ -2152,17 +2146,17 @@ TEST_F(ParquetChunkedWriterTest, ReadRowGroupsError) auto table1 = create_random_fixed_table(5, 5, true); auto filepath = temp_env->get_temp_filepath("ChunkedRowGroupsError.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::parquet_chunked_writer(args).write(*table1); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::parquet_chunked_writer(args).write(*table1); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}).row_groups({{0, 1}}); - EXPECT_THROW(cudf_io::read_parquet(read_opts), cudf::logic_error); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).row_groups({{0, 1}}); + EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error); read_opts.set_row_groups({{-1}}); - EXPECT_THROW(cudf_io::read_parquet(read_opts), cudf::logic_error); + EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error); read_opts.set_row_groups({{0}, {0}}); - EXPECT_THROW(cudf_io::read_parquet(read_opts), cudf::logic_error); + EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error); } TEST_F(ParquetWriterTest, DecimalWrite) @@ -2182,26 +2176,26 @@ TEST_F(ParquetWriterTest, DecimalWrite) auto table = table_view({col0, col1}); auto filepath = temp_env->get_temp_filepath("DecimalWrite.parquet"); - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, table); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, table); - cudf_io::table_input_metadata expected_metadata(table); + cudf::io::table_input_metadata expected_metadata(table); // verify failure if too small a precision is given expected_metadata.column_metadata[0].set_decimal_precision(7); expected_metadata.column_metadata[1].set_decimal_precision(1); args.set_metadata(&expected_metadata); - EXPECT_THROW(cudf_io::write_parquet(args), cudf::logic_error); + EXPECT_THROW(cudf::io::write_parquet(args), cudf::logic_error); // verify success if equal precision is given expected_metadata.column_metadata[0].set_decimal_precision(7); expected_metadata.column_metadata[1].set_decimal_precision(9); args.set_metadata(&expected_metadata); - cudf_io::write_parquet(args); + cudf::io::write_parquet(args); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, table); } @@ -2243,13 +2237,13 @@ TYPED_TEST(ParquetChunkedWriterNumericTypeTest, UnalignedSize) auto expected = cudf::concatenate(std::vector({tbl1, tbl2})); auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::parquet_chunked_writer(args).write(tbl1).write(tbl2); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::parquet_chunked_writer(args).write(tbl1).write(tbl2); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected); } @@ -2291,13 +2285,13 @@ TYPED_TEST(ParquetChunkedWriterNumericTypeTest, UnalignedSize2) auto expected = cudf::concatenate(std::vector({tbl1, tbl2})); auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize2.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::parquet_chunked_writer(args).write(tbl1).write(tbl2); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::parquet_chunked_writer(args).write(tbl1).write(tbl2); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected); } @@ -2350,20 +2344,18 @@ TEST_F(ParquetWriterStressTest, LargeTableWeakCompression) mm_buf.reserve(4 * 1024 * 1024 * 16); custom_test_memmap_sink custom_sink(&mm_buf); - namespace cudf_io = cudf::io; - // exercises multiple rowgroups srand(31337); auto expected = create_random_fixed_table(16, 4 * 1024 * 1024, false); // write out using the custom sink (which uses device writes) - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{&custom_sink}, *expected); - cudf_io::write_parquet(args); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected); + cudf::io::write_parquet(args); - cudf_io::parquet_reader_options custom_args = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{mm_buf.data(), mm_buf.size()}); - auto custom_tbl = cudf_io::read_parquet(custom_args); + cudf::io::parquet_reader_options custom_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()}); + auto custom_tbl = cudf::io::read_parquet(custom_args); CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view()); } @@ -2373,20 +2365,18 @@ TEST_F(ParquetWriterStressTest, LargeTableGoodCompression) mm_buf.reserve(4 * 1024 * 1024 * 16); custom_test_memmap_sink custom_sink(&mm_buf); - namespace cudf_io = cudf::io; - // exercises multiple rowgroups srand(31337); auto expected = create_compressible_fixed_table(16, 4 * 1024 * 1024, 128 * 1024, false); // write out using the custom sink (which uses device writes) - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{&custom_sink}, *expected); - cudf_io::write_parquet(args); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected); + cudf::io::write_parquet(args); - cudf_io::parquet_reader_options custom_args = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{mm_buf.data(), mm_buf.size()}); - auto custom_tbl = cudf_io::read_parquet(custom_args); + cudf::io::parquet_reader_options custom_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()}); + auto custom_tbl = cudf::io::read_parquet(custom_args); CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view()); } @@ -2396,20 +2386,18 @@ TEST_F(ParquetWriterStressTest, LargeTableWithValids) mm_buf.reserve(4 * 1024 * 1024 * 16); custom_test_memmap_sink custom_sink(&mm_buf); - namespace cudf_io = cudf::io; - // exercises multiple rowgroups srand(31337); auto expected = create_compressible_fixed_table(16, 4 * 1024 * 1024, 6, true); // write out using the custom sink (which uses device writes) - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{&custom_sink}, *expected); - cudf_io::write_parquet(args); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected); + cudf::io::write_parquet(args); - cudf_io::parquet_reader_options custom_args = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{mm_buf.data(), mm_buf.size()}); - auto custom_tbl = cudf_io::read_parquet(custom_args); + cudf::io::parquet_reader_options custom_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()}); + auto custom_tbl = cudf::io::read_parquet(custom_args); CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view()); } @@ -2419,20 +2407,18 @@ TEST_F(ParquetWriterStressTest, DeviceWriteLargeTableWeakCompression) mm_buf.reserve(4 * 1024 * 1024 * 16); custom_test_memmap_sink custom_sink(&mm_buf); - namespace cudf_io = cudf::io; - // exercises multiple rowgroups srand(31337); auto expected = create_random_fixed_table(16, 4 * 1024 * 1024, false); // write out using the custom sink (which uses device writes) - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{&custom_sink}, *expected); - cudf_io::write_parquet(args); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected); + cudf::io::write_parquet(args); - cudf_io::parquet_reader_options custom_args = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{mm_buf.data(), mm_buf.size()}); - auto custom_tbl = cudf_io::read_parquet(custom_args); + cudf::io::parquet_reader_options custom_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()}); + auto custom_tbl = cudf::io::read_parquet(custom_args); CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view()); } @@ -2442,20 +2428,18 @@ TEST_F(ParquetWriterStressTest, DeviceWriteLargeTableGoodCompression) mm_buf.reserve(4 * 1024 * 1024 * 16); custom_test_memmap_sink custom_sink(&mm_buf); - namespace cudf_io = cudf::io; - // exercises multiple rowgroups srand(31337); auto expected = create_compressible_fixed_table(16, 4 * 1024 * 1024, 128 * 1024, false); // write out using the custom sink (which uses device writes) - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{&custom_sink}, *expected); - cudf_io::write_parquet(args); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected); + cudf::io::write_parquet(args); - cudf_io::parquet_reader_options custom_args = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{mm_buf.data(), mm_buf.size()}); - auto custom_tbl = cudf_io::read_parquet(custom_args); + cudf::io::parquet_reader_options custom_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()}); + auto custom_tbl = cudf::io::read_parquet(custom_args); CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view()); } @@ -2465,20 +2449,18 @@ TEST_F(ParquetWriterStressTest, DeviceWriteLargeTableWithValids) mm_buf.reserve(4 * 1024 * 1024 * 16); custom_test_memmap_sink custom_sink(&mm_buf); - namespace cudf_io = cudf::io; - // exercises multiple rowgroups srand(31337); auto expected = create_compressible_fixed_table(16, 4 * 1024 * 1024, 6, true); // write out using the custom sink (which uses device writes) - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{&custom_sink}, *expected); - cudf_io::write_parquet(args); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected); + cudf::io::write_parquet(args); - cudf_io::parquet_reader_options custom_args = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{mm_buf.data(), mm_buf.size()}); - auto custom_tbl = cudf_io::read_parquet(custom_args); + cudf::io::parquet_reader_options custom_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()}); + auto custom_tbl = cudf::io::read_parquet(custom_args); CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view()); } @@ -2491,14 +2473,14 @@ TEST_F(ParquetReaderTest, UserBounds) auto expected = create_random_fixed_table(4, 4, false); auto filepath = temp_env->get_temp_filepath("TooManyRows.parquet"); - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, *expected); - cudf_io::write_parquet(args); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected); + cudf::io::write_parquet(args); // attempt to read more rows than there actually are - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}).num_rows(16); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).num_rows(16); + auto result = cudf::io::read_parquet(read_opts); // we should only get back 4 rows EXPECT_EQ(result.tbl->view().column(0).size(), 4); @@ -2511,14 +2493,14 @@ TEST_F(ParquetReaderTest, UserBounds) auto expected = create_random_fixed_table(4, 4, false); auto filepath = temp_env->get_temp_filepath("PastBounds.parquet"); - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, *expected); - cudf_io::write_parquet(args); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected); + cudf::io::write_parquet(args); // attempt to read more rows than there actually are - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}).skip_rows(4); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).skip_rows(4); + auto result = cudf::io::read_parquet(read_opts); // we should get empty columns back EXPECT_EQ(result.tbl->view().num_columns(), 4); @@ -2533,14 +2515,14 @@ TEST_F(ParquetReaderTest, UserBounds) auto expected = create_random_fixed_table(4, 4, false); auto filepath = temp_env->get_temp_filepath("ZeroRows.parquet"); - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, *expected); - cudf_io::write_parquet(args); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected); + cudf::io::write_parquet(args); // attempt to read more rows than there actually are - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}).num_rows(0); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).num_rows(0); + auto result = cudf::io::read_parquet(read_opts); EXPECT_EQ(result.tbl->view().num_columns(), 4); EXPECT_EQ(result.tbl->view().column(0).size(), 0); @@ -2553,16 +2535,16 @@ TEST_F(ParquetReaderTest, UserBounds) auto expected = create_random_fixed_table(4, 4, false); auto filepath = temp_env->get_temp_filepath("ZeroRowsPastBounds.parquet"); - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, *expected); - cudf_io::write_parquet(args); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected); + cudf::io::write_parquet(args); // attempt to read more rows than there actually are - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) .skip_rows(4) .num_rows(0); - auto result = cudf_io::read_parquet(read_opts); + auto result = cudf::io::read_parquet(read_opts); // we should get empty columns back EXPECT_EQ(result.tbl->view().num_columns(), 4); @@ -2578,9 +2560,9 @@ TEST_F(ParquetReaderTest, UserBoundsWithNulls) // clang-format on cudf::table_view tbl({col}); auto filepath = temp_env->get_temp_filepath("UserBoundsWithNulls.parquet"); - cudf_io::parquet_writer_options out_args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, tbl); - cudf_io::write_parquet(out_args); + cudf::io::parquet_writer_options out_args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl); + cudf::io::write_parquet(out_args); // skip_rows / num_rows // clang-format off @@ -2592,11 +2574,11 @@ TEST_F(ParquetReaderTest, UserBoundsWithNulls) {62, 2}, {63, 1}}; // clang-format on for (auto p : params) { - cudf_io::parquet_reader_options read_args = - cudf::io::parquet_reader_options::builder(cudf_io::source_info{filepath}); + cudf::io::parquet_reader_options read_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); if (p.first >= 0) { read_args.set_skip_rows(p.first); } if (p.second >= 0) { read_args.set_num_rows(p.second); } - auto result = cudf_io::read_parquet(read_args); + auto result = cudf::io::read_parquet(read_args); p.first = p.first < 0 ? 0 : p.first; p.second = p.second < 0 ? static_cast(col).size() - p.first : p.second; @@ -2622,9 +2604,9 @@ TEST_F(ParquetReaderTest, UserBoundsWithNullsLarge) // this file will have row groups of 1,000,000 each cudf::table_view tbl({col}); auto filepath = temp_env->get_temp_filepath("UserBoundsWithNullsLarge.parquet"); - cudf_io::parquet_writer_options out_args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, tbl); - cudf_io::write_parquet(out_args); + cudf::io::parquet_writer_options out_args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl); + cudf::io::write_parquet(out_args); // skip_rows / num_rows // clang-format off @@ -2636,11 +2618,11 @@ TEST_F(ParquetReaderTest, UserBoundsWithNullsLarge) {24001231, 17}, {29000001, 989999}, {29999999, 1} }; // clang-format on for (auto p : params) { - cudf_io::parquet_reader_options read_args = - cudf::io::parquet_reader_options::builder(cudf_io::source_info{filepath}); + cudf::io::parquet_reader_options read_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); if (p.first >= 0) { read_args.set_skip_rows(p.first); } if (p.second >= 0) { read_args.set_num_rows(p.second); } - auto result = cudf_io::read_parquet(read_args); + auto result = cudf::io::read_parquet(read_args); p.first = p.first < 0 ? 0 : p.first; p.second = p.second < 0 ? static_cast(col).size() - p.first : p.second; @@ -2660,9 +2642,9 @@ TEST_F(ParquetReaderTest, ListUserBoundsWithNullsLarge) // this file will have row groups of 1,000,000 each cudf::table_view tbl({col}); auto filepath = temp_env->get_temp_filepath("ListUserBoundsWithNullsLarge.parquet"); - cudf_io::parquet_writer_options out_args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, tbl); - cudf_io::write_parquet(out_args); + cudf::io::parquet_writer_options out_args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl); + cudf::io::write_parquet(out_args); // skip_rows / num_rows // clang-format off @@ -2674,11 +2656,11 @@ TEST_F(ParquetReaderTest, ListUserBoundsWithNullsLarge) {4001231, 17}, {1900000, 989999}, {4999999, 1} }; // clang-format on for (auto p : params) { - cudf_io::parquet_reader_options read_args = - cudf::io::parquet_reader_options::builder(cudf_io::source_info{filepath}); + cudf::io::parquet_reader_options read_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); if (p.first >= 0) { read_args.set_skip_rows(p.first); } if (p.second >= 0) { read_args.set_num_rows(p.second); } - auto result = cudf_io::read_parquet(read_args); + auto result = cudf::io::read_parquet(read_args); p.first = p.first < 0 ? 0 : p.first; p.second = p.second < 0 ? static_cast(col).size() - p.first : p.second; @@ -2697,17 +2679,18 @@ TEST_F(ParquetReaderTest, ReorderedColumns) cudf::table_view tbl{{a, b}}; auto filepath = temp_env->get_temp_filepath("ReorderedColumns.parquet"); - cudf_io::table_input_metadata md(tbl); + cudf::io::table_input_metadata md(tbl); md.column_metadata[0].set_name("a"); md.column_metadata[1].set_name("b"); - cudf_io::parquet_writer_options opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, tbl).metadata(&md); - cudf_io::write_parquet(opts); + cudf::io::parquet_writer_options opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl).metadata(&md); + cudf::io::write_parquet(opts); // read them out of order - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}).columns({"b", "a"}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) + .columns({"b", "a"}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), b); CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), a); @@ -2719,17 +2702,18 @@ TEST_F(ParquetReaderTest, ReorderedColumns) cudf::table_view tbl{{a, b}}; auto filepath = temp_env->get_temp_filepath("ReorderedColumns2.parquet"); - cudf_io::table_input_metadata md(tbl); + cudf::io::table_input_metadata md(tbl); md.column_metadata[0].set_name("a"); md.column_metadata[1].set_name("b"); - cudf_io::parquet_writer_options opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, tbl).metadata(&md); - cudf_io::write_parquet(opts); + cudf::io::parquet_writer_options opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl).metadata(&md); + cudf::io::write_parquet(opts); // read them out of order - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}).columns({"b", "a"}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) + .columns({"b", "a"}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), b); CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), a); @@ -2744,21 +2728,21 @@ TEST_F(ParquetReaderTest, ReorderedColumns) cudf::table_view tbl{{a, b, c, d}}; auto filepath = temp_env->get_temp_filepath("ReorderedColumns3.parquet"); - cudf_io::table_input_metadata md(tbl); + cudf::io::table_input_metadata md(tbl); md.column_metadata[0].set_name("a"); md.column_metadata[1].set_name("b"); md.column_metadata[2].set_name("c"); md.column_metadata[3].set_name("d"); - cudf_io::parquet_writer_options opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, tbl).metadata(&md); - cudf_io::write_parquet(opts); + cudf::io::parquet_writer_options opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl).metadata(&md); + cudf::io::write_parquet(opts); { // read them out of order - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) .columns({"d", "a", "b", "c"}); - auto result = cudf_io::read_parquet(read_opts); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), d); CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), a); @@ -2768,10 +2752,10 @@ TEST_F(ParquetReaderTest, ReorderedColumns) { // read them out of order - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) .columns({"c", "d", "a", "b"}); - auto result = cudf_io::read_parquet(read_opts); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), c); CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), d); @@ -2781,10 +2765,10 @@ TEST_F(ParquetReaderTest, ReorderedColumns) { // read them out of order - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) .columns({"d", "c", "b", "a"}); - auto result = cudf_io::read_parquet(read_opts); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), d); CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), c); @@ -2818,7 +2802,7 @@ TEST_F(ParquetReaderTest, SelectNestedColumn) auto input = table_view({*struct_2}); - cudf_io::table_input_metadata input_metadata(input); + cudf::io::table_input_metadata input_metadata(input); input_metadata.column_metadata[0].set_name("being"); input_metadata.column_metadata[0].child(0).set_name("human?"); input_metadata.column_metadata[0].child(1).set_name("particulars"); @@ -2826,16 +2810,16 @@ TEST_F(ParquetReaderTest, SelectNestedColumn) input_metadata.column_metadata[0].child(1).child(1).set_name("age"); auto filepath = temp_env->get_temp_filepath("SelectNestedColumn.parquet"); - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, input) + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, input) .metadata(&input_metadata); - cudf_io::write_parquet(args); + cudf::io::write_parquet(args); { // Test selecting a single leaf from the table - cudf_io::parquet_reader_options read_args = - cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath)) + cudf::io::parquet_reader_options read_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath)) .columns({"being.particulars.age"}); - const auto result = cudf_io::read_parquet(read_args); + const auto result = cudf::io::read_parquet(read_args); auto expect_ages_col = cudf::test::fixed_width_column_wrapper{ {48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}}; @@ -2844,7 +2828,7 @@ TEST_F(ParquetReaderTest, SelectNestedColumn) cudf::test::structs_column_wrapper{{expect_s_1}, {0, 1, 1, 1, 1, 1}}.release(); auto expected = table_view({*expect_s_2}); - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("being"); expected_metadata.column_metadata[0].child(0).set_name("particulars"); expected_metadata.column_metadata[0].child(0).child(0).set_name("age"); @@ -2854,10 +2838,10 @@ TEST_F(ParquetReaderTest, SelectNestedColumn) } { // Test selecting a non-leaf and expecting all hierarchy from that node onwards - cudf_io::parquet_reader_options read_args = - cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath)) + cudf::io::parquet_reader_options read_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath)) .columns({"being.particulars"}); - const auto result = cudf_io::read_parquet(read_args); + const auto result = cudf::io::read_parquet(read_args); auto expected_weights_col = cudf::test::fixed_width_column_wrapper{1.1, 2.4, 5.3, 8.0, 9.6, 6.9}; @@ -2872,7 +2856,7 @@ TEST_F(ParquetReaderTest, SelectNestedColumn) cudf::test::structs_column_wrapper{{expected_s_1}, {0, 1, 1, 1, 1, 1}}.release(); auto expected = table_view({*expect_s_2}); - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("being"); expected_metadata.column_metadata[0].child(0).set_name("particulars"); expected_metadata.column_metadata[0].child(0).child(0).set_name("weight"); @@ -2883,10 +2867,10 @@ TEST_F(ParquetReaderTest, SelectNestedColumn) } { // Test selecting struct children out of order - cudf_io::parquet_reader_options read_args = - cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath)) + cudf::io::parquet_reader_options read_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath)) .columns({"being.particulars.age", "being.particulars.weight", "being.human?"}); - const auto result = cudf_io::read_parquet(read_args); + const auto result = cudf::io::read_parquet(read_args); auto expected_weights_col = cudf::test::fixed_width_column_wrapper{1.1, 2.4, 5.3, 8.0, 9.6, 6.9}; @@ -2906,7 +2890,7 @@ TEST_F(ParquetReaderTest, SelectNestedColumn) auto expected = table_view({*expect_s_2}); - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("being"); expected_metadata.column_metadata[0].child(0).set_name("particulars"); expected_metadata.column_metadata[0].child(0).child(0).set_name("age"); @@ -3086,9 +3070,9 @@ TEST_F(ParquetReaderTest, DecimalRead) 0x00, 0x00, 0x00, 0xd3, 0x02, 0x00, 0x00, 0x50, 0x41, 0x52, 0x31}; unsigned int decimals_parquet_len = 2366; - cudf_io::parquet_reader_options read_opts = cudf_io::parquet_reader_options::builder( - cudf_io::source_info{reinterpret_cast(decimals_parquet), decimals_parquet_len}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = cudf::io::parquet_reader_options::builder( + cudf::io::source_info{reinterpret_cast(decimals_parquet), decimals_parquet_len}); + auto result = cudf::io::read_parquet(read_opts); auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 50; }); @@ -3134,9 +3118,9 @@ TEST_F(ParquetReaderTest, DecimalRead) std::begin(col1_data), std::end(col1_data), validity, numeric::scale_type{-5}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), col1); - cudf_io::parquet_reader_options read_strict_opts = read_opts; + cudf::io::parquet_reader_options read_strict_opts = read_opts; read_strict_opts.set_columns({"dec7p4", "dec14p5"}); - EXPECT_NO_THROW(cudf_io::read_parquet(read_strict_opts)); + EXPECT_NO_THROW(cudf::io::read_parquet(read_strict_opts)); } { // dec7p3: Decimal(precision=7, scale=3) backed by FIXED_LENGTH_BYTE_ARRAY(length = 4) @@ -3229,10 +3213,10 @@ TEST_F(ParquetReaderTest, DecimalRead) unsigned int parquet_len = 1226; - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{ + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{ reinterpret_cast(fixed_len_bytes_decimal_parquet), parquet_len}); - auto result = cudf_io::read_parquet(read_opts); + auto result = cudf::io::read_parquet(read_opts); EXPECT_EQ(result.tbl->view().num_columns(), 3); auto validity_c0 = cudf::test::iterators::nulls_at({19}); @@ -3324,18 +3308,18 @@ TEST_F(ParquetReaderTest, EmptyOutput) table_view expected({c0, c1, c2, *c3, c4}); // set precision on the decimal column - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[2].set_decimal_precision(1); auto filepath = temp_env->get_temp_filepath("EmptyOutput.parquet"); - cudf_io::parquet_writer_options out_args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected); + cudf::io::parquet_writer_options out_args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected); out_args.set_metadata(&expected_metadata); - cudf_io::write_parquet(out_args); + cudf::io::write_parquet(out_args); - cudf_io::parquet_reader_options read_args = - cudf::io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_args); + cudf::io::parquet_reader_options read_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_args); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } @@ -3345,33 +3329,33 @@ TEST_F(ParquetWriterTest, RowGroupSizeInvalid) const auto unused_table = std::make_unique(); std::vector out_buffer; - EXPECT_THROW( - cudf_io::parquet_writer_options::builder(cudf_io::sink_info(&out_buffer), unused_table->view()) - .row_group_size_rows(4999), - cudf::logic_error); - EXPECT_THROW( - cudf_io::parquet_writer_options::builder(cudf_io::sink_info(&out_buffer), unused_table->view()) - .max_page_size_rows(4999), - cudf::logic_error); - EXPECT_THROW( - cudf_io::parquet_writer_options::builder(cudf_io::sink_info(&out_buffer), unused_table->view()) - .row_group_size_bytes(3 << 10), - cudf::logic_error); - EXPECT_THROW( - cudf_io::parquet_writer_options::builder(cudf_io::sink_info(&out_buffer), unused_table->view()) - .max_page_size_bytes(3 << 10), - cudf::logic_error); - - EXPECT_THROW(cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info(&out_buffer)) + EXPECT_THROW(cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer), + unused_table->view()) .row_group_size_rows(4999), cudf::logic_error); - EXPECT_THROW(cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info(&out_buffer)) + EXPECT_THROW(cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer), + unused_table->view()) .max_page_size_rows(4999), cudf::logic_error); - EXPECT_THROW(cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info(&out_buffer)) + EXPECT_THROW(cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer), + unused_table->view()) .row_group_size_bytes(3 << 10), cudf::logic_error); - EXPECT_THROW(cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info(&out_buffer)) + EXPECT_THROW(cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer), + unused_table->view()) + .max_page_size_bytes(3 << 10), + cudf::logic_error); + + EXPECT_THROW(cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer)) + .row_group_size_rows(4999), + cudf::logic_error); + EXPECT_THROW(cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer)) + .max_page_size_rows(4999), + cudf::logic_error); + EXPECT_THROW(cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer)) + .row_group_size_bytes(3 << 10), + cudf::logic_error); + EXPECT_THROW(cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer)) .max_page_size_bytes(3 << 10), cudf::logic_error); } @@ -3381,13 +3365,13 @@ TEST_F(ParquetWriterTest, RowGroupPageSizeMatch) const auto unused_table = std::make_unique
(); std::vector out_buffer; - auto options = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info(&out_buffer), unused_table->view()) - .row_group_size_bytes(128 * 1024) - .max_page_size_bytes(512 * 1024) - .row_group_size_rows(10000) - .max_page_size_rows(20000) - .build(); + auto options = cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer), + unused_table->view()) + .row_group_size_bytes(128 * 1024) + .max_page_size_bytes(512 * 1024) + .row_group_size_rows(10000) + .max_page_size_rows(20000) + .build(); EXPECT_EQ(options.get_row_group_size_bytes(), options.get_max_page_size_bytes()); EXPECT_EQ(options.get_row_group_size_rows(), options.get_max_page_size_rows()); } @@ -3396,7 +3380,7 @@ TEST_F(ParquetChunkedWriterTest, RowGroupPageSizeMatch) { std::vector out_buffer; - auto options = cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info(&out_buffer)) + auto options = cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer)) .row_group_size_bytes(128 * 1024) .max_page_size_bytes(512 * 1024) .row_group_size_rows(10000) @@ -3420,7 +3404,7 @@ TEST_F(ParquetWriterTest, EmptyList) cudf::io::write_parquet(cudf::io::parquet_writer_options_builder(cudf::io::sink_info(filepath), cudf::table_view({*L0}))); - auto result = cudf_io::read_parquet( + auto result = cudf::io::read_parquet( cudf::io::parquet_reader_options_builder(cudf::io::source_info(filepath))); using lcw = cudf::test::lists_column_wrapper; @@ -3447,7 +3431,7 @@ TEST_F(ParquetWriterTest, DeepEmptyList) cudf::io::write_parquet(cudf::io::parquet_writer_options_builder(cudf::io::sink_info(filepath), cudf::table_view({*L0}))); - auto result = cudf_io::read_parquet( + auto result = cudf::io::read_parquet( cudf::io::parquet_reader_options_builder(cudf::io::source_info(filepath))); CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), *L0); @@ -3472,7 +3456,7 @@ TEST_F(ParquetWriterTest, EmptyListWithStruct) auto filepath = temp_env->get_temp_filepath("EmptyListWithStruct.parquet"); cudf::io::write_parquet(cudf::io::parquet_writer_options_builder(cudf::io::sink_info(filepath), cudf::table_view({*L0}))); - auto result = cudf_io::read_parquet( + auto result = cudf::io::read_parquet( cudf::io::parquet_reader_options_builder(cudf::io::source_info(filepath))); CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), *L0); @@ -4362,15 +4346,15 @@ TEST_F(ParquetReaderTest, EmptyColumnsParam) auto const expected = create_random_fixed_table(2, 4, false); std::vector out_buffer; - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{&out_buffer}, *expected); - cudf_io::write_parquet(args); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&out_buffer}, *expected); + cudf::io::write_parquet(args); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder( - cudf_io::source_info{out_buffer.data(), out_buffer.size()}) + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder( + cudf::io::source_info{out_buffer.data(), out_buffer.size()}) .columns({}); - auto const result = cudf_io::read_parquet(read_opts); + auto const result = cudf::io::read_parquet(read_opts); EXPECT_EQ(result.tbl->num_columns(), 0); EXPECT_EQ(result.tbl->num_rows(), 0); @@ -4401,7 +4385,7 @@ TEST_F(ParquetReaderTest, BinaryAsStrings) {'F', 'u', 'n', 'd', 'a', 'y'}}; auto output = table_view{{int_col, string_col, float_col, string_col, list_int_col}}; - cudf_io::table_input_metadata output_metadata(output); + cudf::io::table_input_metadata output_metadata(output); output_metadata.column_metadata[0].set_name("col_other"); output_metadata.column_metadata[1].set_name("col_string"); output_metadata.column_metadata[2].set_name("col_float"); @@ -4409,37 +4393,38 @@ TEST_F(ParquetReaderTest, BinaryAsStrings) output_metadata.column_metadata[4].set_name("col_binary").set_output_as_binary(true); auto filepath = temp_env->get_temp_filepath("BinaryReadStrings.parquet"); - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, output) + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, output) .metadata(&output_metadata); - cudf_io::write_parquet(out_opts); + cudf::io::write_parquet(out_opts); auto expected_string = table_view{{int_col, string_col, float_col, string_col, string_col}}; auto expected_mixed = table_view{{int_col, string_col, float_col, list_int_col, list_int_col}}; - cudf_io::parquet_reader_options in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) .set_column_schema({{}, {}, {}, {}, {}}); - auto result = cudf_io::read_parquet(in_opts); + auto result = cudf::io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected_string, result.tbl->view()); - cudf_io::parquet_reader_options default_in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - result = cudf_io::read_parquet(default_in_opts); + cudf::io::parquet_reader_options default_in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + result = cudf::io::read_parquet(default_in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected_string, result.tbl->view()); - std::vector md{ + std::vector md{ {}, {}, {}, - cudf_io::reader_column_schema().set_convert_binary_to_strings(false), - cudf_io::reader_column_schema().set_convert_binary_to_strings(false)}; + cudf::io::reader_column_schema().set_convert_binary_to_strings(false), + cudf::io::reader_column_schema().set_convert_binary_to_strings(false)}; - cudf_io::parquet_reader_options mixed_in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}).set_column_schema(md); - result = cudf_io::read_parquet(mixed_in_opts); + cudf::io::parquet_reader_options mixed_in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) + .set_column_schema(md); + result = cudf::io::read_parquet(mixed_in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected_mixed, result.tbl->view()); } @@ -4478,32 +4463,33 @@ TEST_F(ParquetReaderTest, NestedByteArray) {{'M', 'o', 'n', 'd', 'a', 'y'}, {'F', 'r', 'i', 'd', 'a', 'y'}}}; auto const expected = table_view{{int_col, float_col, list_list_int_col}}; - cudf_io::table_input_metadata output_metadata(expected); + cudf::io::table_input_metadata output_metadata(expected); output_metadata.column_metadata[0].set_name("col_other"); output_metadata.column_metadata[1].set_name("col_float"); output_metadata.column_metadata[2].set_name("col_binary").child(1).set_output_as_binary(true); auto filepath = temp_env->get_temp_filepath("NestedByteArray.parquet"); - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected) + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&output_metadata); - cudf_io::write_parquet(out_opts); + cudf::io::write_parquet(out_opts); - auto source = cudf_io::datasource::create(filepath); - cudf_io::parquet::FileMetaData fmd; + auto source = cudf::io::datasource::create(filepath); + cudf::io::parquet::FileMetaData fmd; read_footer(source, &fmd); EXPECT_EQ(fmd.schema[5].type, cudf::io::parquet::Type::BYTE_ARRAY); - std::vector md{ + std::vector md{ {}, {}, - cudf_io::reader_column_schema().add_child( - cudf_io::reader_column_schema().set_convert_binary_to_strings(false))}; + cudf::io::reader_column_schema().add_child( + cudf::io::reader_column_schema().set_convert_binary_to_strings(false))}; - cudf_io::parquet_reader_options in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}).set_column_schema(md); - auto result = cudf_io::read_parquet(in_opts); + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) + .set_column_schema(md); + auto result = cudf::io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } @@ -4524,23 +4510,23 @@ TEST_F(ParquetWriterTest, ByteArrayStats) {0xfe, 0xfe, 0xfe}, {0xfe, 0xfe, 0xfe}, {0xfe, 0xfe, 0xfe}}; auto expected = table_view{{list_int_col0, list_int_col1}}; - cudf_io::table_input_metadata output_metadata(expected); + cudf::io::table_input_metadata output_metadata(expected); output_metadata.column_metadata[0].set_name("col_binary0").set_output_as_binary(true); output_metadata.column_metadata[1].set_name("col_binary1").set_output_as_binary(true); auto filepath = temp_env->get_temp_filepath("ByteArrayStats.parquet"); - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected) + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&output_metadata); - cudf_io::write_parquet(out_opts); + cudf::io::write_parquet(out_opts); - cudf_io::parquet_reader_options in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) .set_column_schema({{}, {}}); - auto result = cudf_io::read_parquet(in_opts); + auto result = cudf::io::read_parquet(in_opts); - auto source = cudf_io::datasource::create(filepath); - cudf_io::parquet::FileMetaData fmd; + auto source = cudf::io::datasource::create(filepath); + cudf::io::parquet::FileMetaData fmd; read_footer(source, &fmd); @@ -4570,7 +4556,7 @@ TEST_F(ParquetReaderTest, StructByteArray) auto const expected = table_view{{struct_col}}; EXPECT_EQ(1, expected.num_columns()); - cudf_io::table_input_metadata output_metadata(expected); + cudf::io::table_input_metadata output_metadata(expected); output_metadata.column_metadata[0] .set_name("struct_binary") .child(0) @@ -4578,17 +4564,18 @@ TEST_F(ParquetReaderTest, StructByteArray) .set_output_as_binary(true); auto filepath = temp_env->get_temp_filepath("StructByteArray.parquet"); - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected) + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&output_metadata); - cudf_io::write_parquet(out_opts); + cudf::io::write_parquet(out_opts); - std::vector md{cudf_io::reader_column_schema().add_child( - cudf_io::reader_column_schema().set_convert_binary_to_strings(false))}; + std::vector md{cudf::io::reader_column_schema().add_child( + cudf::io::reader_column_schema().set_convert_binary_to_strings(false))}; - cudf_io::parquet_reader_options in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}).set_column_schema(md); - auto result = cudf_io::read_parquet(in_opts); + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) + .set_column_schema(md); + auto result = cudf::io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } From ba0febe308a8c097474b3316387dc8051fa1bc64 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 3 Oct 2022 17:29:42 -0700 Subject: [PATCH 005/202] Test/remove thrust vector usage (#11813) This PR removes usage of `thrust::device_vector` from almost all of our tests. Since the construction of a device vector is not stream-ordered, we should be using `rmm::device_uvector` instead wherever possible. There is one remaining use of `thrust::device_vector`, but that is in an test explicitly verifying that `device_vector` can convert implicitly to a `device_span` so it's worth keeping that there. I am working on automated tooling to detect any usage of stream 0 in tests as part of a push to prioritize stream-safety in libcudf, and this PR is a prerequisite to adding such tooling to our CI pipeline since at that point any test using stream 0 would fail. Since there is at least one test where I anticipate stream 0 will always be used (the one described above), I should be able to add specific tests to an allowlist as needed. It's an open question whether the added complexity required by the changes in this PR is a worthwhile tradeoff to be able to programmatically detect stream 0 usage. If reviewers feel that the additional complexity is too high, we can revert some (or all) of these changes and I can just plan for allowing stream 0 usage in all of the necessary tests. This PR demonstrates how we would go about removing it if we choose to do so, though. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - Tobias Ribizel (https://github.com/upsj) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/11813 --- cpp/include/cudf_test/tdigest_utilities.cuh | 2 +- cpp/tests/join/conditional_join_tests.cu | 40 ++++++++++++++++----- cpp/tests/quantiles/tdigest_utilities.cu | 33 +++++++++++++---- cpp/tests/utilities_tests/span_tests.cu | 12 +++---- 4 files changed, 64 insertions(+), 23 deletions(-) diff --git a/cpp/include/cudf_test/tdigest_utilities.cuh b/cpp/include/cudf_test/tdigest_utilities.cuh index 6f206a789fd..1a75016d78c 100644 --- a/cpp/include/cudf_test/tdigest_utilities.cuh +++ b/cpp/include/cudf_test/tdigest_utilities.cuh @@ -118,7 +118,7 @@ void tdigest_minmax_compare(cudf::tdigest::tdigest_column_view const& tdv, // verify min/max thrust::host_vector> h_spans; h_spans.push_back({input_values.begin(), static_cast(input_values.size())}); - thrust::device_vector> spans(h_spans); + auto spans = cudf::detail::make_device_uvector_async(h_spans, cudf::default_stream_value); auto expected_min = cudf::make_fixed_width_column( data_type{type_id::FLOAT64}, spans.size(), mask_state::UNALLOCATED); diff --git a/cpp/tests/join/conditional_join_tests.cu b/cpp/tests/join/conditional_join_tests.cu index bc2a96b5adf..f8dfc972191 100644 --- a/cpp/tests/join/conditional_join_tests.cu +++ b/cpp/tests/join/conditional_join_tests.cu @@ -26,10 +26,8 @@ #include -#include #include -#include -#include +#include #include #include @@ -127,6 +125,30 @@ gen_random_nullable_repeated_columns(unsigned int N = 10000, unsigned int num_re std::pair(std::move(right), std::move(right_nulls))); } +// `rmm::device_uvector` requires that T be trivially copyable. `thrust::pair` does +// not satisfy this requirement because it defines nontrivial copy/move +// constructors. Therefore, we need a simple, trivially copyable pair-like +// object. `index_pair` is a minimal implementation suitable for use in the +// tests in this file. +struct index_pair { + cudf::size_type first{}; + cudf::size_type second{}; + __device__ index_pair(){}; + __device__ index_pair(cudf::size_type const& first, cudf::size_type const& second) + : first(first), second(second){}; +}; + +__device__ inline bool operator<(const index_pair& lhs, const index_pair& rhs) +{ + if (lhs.first > rhs.first) return false; + return (lhs.first < rhs.first) || (lhs.second < rhs.second); +} + +__device__ inline bool operator==(const index_pair& lhs, const index_pair& rhs) +{ + return lhs.first == rhs.first && lhs.second == rhs.second; +} + } // namespace /** @@ -253,10 +275,10 @@ struct ConditionalJoinPairReturnTest : public ConditionalJoinTest { */ void _compare_to_hash_join(PairJoinReturn const& result, PairJoinReturn const& reference) { - thrust::device_vector> result_pairs( - result.first->size()); - thrust::device_vector> reference_pairs( - reference.first->size()); + auto result_pairs = + rmm::device_uvector(result.first->size(), cudf::default_stream_value); + auto reference_pairs = + rmm::device_uvector(reference.first->size(), cudf::default_stream_value); thrust::transform(rmm::exec_policy(cudf::default_stream_value), result.first->begin(), @@ -264,7 +286,7 @@ struct ConditionalJoinPairReturnTest : public ConditionalJoinTest { result.second->begin(), result_pairs.begin(), [] __device__(cudf::size_type first, cudf::size_type second) { - return thrust::make_pair(first, second); + return index_pair{first, second}; }); thrust::transform(rmm::exec_policy(cudf::default_stream_value), reference.first->begin(), @@ -272,7 +294,7 @@ struct ConditionalJoinPairReturnTest : public ConditionalJoinTest { reference.second->begin(), reference_pairs.begin(), [] __device__(cudf::size_type first, cudf::size_type second) { - return thrust::make_pair(first, second); + return index_pair{first, second}; }); thrust::sort( diff --git a/cpp/tests/quantiles/tdigest_utilities.cu b/cpp/tests/quantiles/tdigest_utilities.cu index 63ccd85bd6d..3cf2f2eb4ef 100644 --- a/cpp/tests/quantiles/tdigest_utilities.cu +++ b/cpp/tests/quantiles/tdigest_utilities.cu @@ -51,13 +51,34 @@ void tdigest_sample_compare(cudf::tdigest::tdigest_column_view const& tdv, auto sampled_result_weight = cudf::make_fixed_width_column( data_type{type_id::FLOAT64}, h_expected.size(), mask_state::UNALLOCATED); - rmm::device_vector expected(h_expected.begin(), h_expected.end()); + auto h_expected_src = std::vector(h_expected.size()); + auto h_expected_mean = std::vector(h_expected.size()); + auto h_expected_weight = std::vector(h_expected.size()); + + { + auto iter = thrust::make_counting_iterator(0); + std::for_each_n(iter, h_expected.size(), [&](size_type const index) { + h_expected_src[index] = thrust::get<0>(h_expected[index]); + h_expected_mean[index] = thrust::get<1>(h_expected[index]); + h_expected_weight[index] = thrust::get<2>(h_expected[index]); + }); + } + + auto d_expected_src = + cudf::detail::make_device_uvector_async(h_expected_src, cudf::default_stream_value); + auto d_expected_mean = + cudf::detail::make_device_uvector_async(h_expected_mean, cudf::default_stream_value); + auto d_expected_weight = + cudf::detail::make_device_uvector_async(h_expected_weight, cudf::default_stream_value); + auto iter = thrust::make_counting_iterator(0); thrust::for_each( rmm::exec_policy(cudf::default_stream_value), iter, - iter + expected.size(), - [expected = expected.data().get(), + iter + h_expected.size(), + [expected_src_in = d_expected_src.data(), + expected_mean_in = d_expected_mean.data(), + expected_weight_in = d_expected_weight.data(), expected_mean = expected_mean->mutable_view().begin(), expected_weight = expected_weight->mutable_view().begin(), result_mean = result_mean.begin(), @@ -65,9 +86,9 @@ void tdigest_sample_compare(cudf::tdigest::tdigest_column_view const& tdv, sampled_result_mean = sampled_result_mean->mutable_view().begin(), sampled_result_weight = sampled_result_weight->mutable_view().begin()] __device__(size_type index) { - expected_mean[index] = thrust::get<1>(expected[index]); - expected_weight[index] = thrust::get<2>(expected[index]); - auto const src_index = thrust::get<0>(expected[index]); + expected_mean[index] = expected_mean_in[index]; + expected_weight[index] = expected_weight_in[index]; + auto const src_index = expected_src_in[index]; sampled_result_mean[index] = result_mean[src_index]; sampled_result_weight[index] = result_weight[src_index]; }); diff --git a/cpp/tests/utilities_tests/span_tests.cu b/cpp/tests/utilities_tests/span_tests.cu index fc4104c765b..cccef4b6284 100644 --- a/cpp/tests/utilities_tests/span_tests.cu +++ b/cpp/tests/utilities_tests/span_tests.cu @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include #include @@ -234,17 +235,14 @@ __global__ void simple_device_kernel(device_span result) { result[0] = tru TEST(SpanTest, CanUseDeviceSpan) { - rmm::device_vector d_message = std::vector({false}); + auto d_message = + cudf::detail::make_zeroed_device_uvector_async(1, cudf::default_stream_value); - auto d_span = device_span(d_message.data().get(), d_message.size()); + auto d_span = device_span(d_message.data(), d_message.size()); simple_device_kernel<<<1, 1, 0, cudf::default_stream_value.value()>>>(d_span); - cudaDeviceSynchronize(); - - thrust::host_vector h_message = d_message; - - ASSERT_TRUE(h_message[0]); + ASSERT_TRUE(d_message.element(0, cudf::default_stream_value)); } class MdSpanTest : public cudf::test::BaseFixture { From 5e42c2d80ce7850e4f03b7f703b206a12927d797 Mon Sep 17 00:00:00 2001 From: jakirkham Date: Tue, 4 Oct 2022 06:06:16 -0700 Subject: [PATCH 006/202] Use conda-forge's `pyorc` (#11855) This PR switches the `pyorc` install from a `pip` wheel to a `conda` package. xref: https://github.com/rapidsai/cudf/pull/7085#discussion_r553446553 Authors: - https://github.com/jakirkham Approvers: - Jordan Jacobelli (https://github.com/Ethyling) - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/11855 --- conda/environments/cudf_dev_cuda11.5.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml index d161804ce7e..d7a762e00f9 100644 --- a/conda/environments/cudf_dev_cuda11.5.yml +++ b/conda/environments/cudf_dev_cuda11.5.yml @@ -70,6 +70,7 @@ dependencies: - cachetools - transformers<=4.10.3 - pydata-sphinx-theme + - pyorc - librdkafka=1.7.0 - python-confluent-kafka=1.7.0 - moto>=3.1.6 @@ -79,9 +80,6 @@ dependencies: - s3fs>=2022.3.0 - werkzeug<2.2.0 # Temporary transient dependency pinning to avoid URL-LIB3 + moto timeouts - pytorch<1.12.0 - - pip: - - git+https://github.com/python-streamz/streamz.git@master - - pyorc - cubinlinker # [linux64] - gcc_linux-64=9.* # [linux64] - sysroot_linux-64==2.17 # [linux64] @@ -90,3 +88,5 @@ dependencies: # - gcc_linux-aarch64=9.* # [aarch64] # - sysroot_linux-aarch64==2.17 # [aarch64] # - nvcc_linux-aarch64=11.5 # [aarch64] + - pip: + - git+https://github.com/python-streamz/streamz.git@master From 7d173c9d144a64c5e1a0467d2a5eb4181854f25e Mon Sep 17 00:00:00 2001 From: Peixin Date: Tue, 4 Oct 2022 21:24:15 +0800 Subject: [PATCH 007/202] Update cudf JNI version to 22.12.0-SNAPSHOT (#11764) Update JNI version to 22.12.0-SNAPSHOT Authors: - Peixin (https://github.com/pxLi) Approvers: - Nghia Truong (https://github.com/ttnghia) - Robert (Bobby) Evans (https://github.com/revans2) URL: https://github.com/rapidsai/cudf/pull/11764 --- java/ci/README.md | 4 ++-- java/pom.xml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/java/ci/README.md b/java/ci/README.md index d74c7b41157..538e18c37c5 100644 --- a/java/ci/README.md +++ b/java/ci/README.md @@ -34,7 +34,7 @@ nvidia-docker run -it cudf-build:11.5.0-devel-centos7 bash You can download the cuDF repo in the docker container or you can mount it into the container. Here I choose to download again in the container. ```bash -git clone --recursive https://github.com/rapidsai/cudf.git -b branch-22.10 +git clone --recursive https://github.com/rapidsai/cudf.git -b branch-22.12 ``` ### Build cuDF jar with devtoolset @@ -47,5 +47,5 @@ scl enable devtoolset-9 "java/ci/build-in-docker.sh" ### The output -You can find the cuDF jar in java/target/ like cudf-22.10.0-SNAPSHOT-cuda11.jar. +You can find the cuDF jar in java/target/ like cudf-22.12.0-SNAPSHOT-cuda11.jar. diff --git a/java/pom.xml b/java/pom.xml index f2bb3def459..10d5c7ec360 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -21,7 +21,7 @@ ai.rapids cudf - 22.10.0-SNAPSHOT + 22.12.0-SNAPSHOT cudfjni From 0fb4d7621de51e58fb63c30c73d35cd01a116ef4 Mon Sep 17 00:00:00 2001 From: Gregory Kimball Date: Tue, 4 Oct 2022 10:02:36 -0700 Subject: [PATCH 008/202] Remove unused includes for table/row_operators (#11857) After reviewing usages of the "legacy" row operators, several of the includes are no longer needed. Authors: - Gregory Kimball (https://github.com/GregoryKimball) Approvers: - David Wendt (https://github.com/davidwendt) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/11857 --- cpp/src/groupby/sort/group_single_pass_reduction_util.cuh | 1 - cpp/src/partitioning/round_robin.cu | 1 - cpp/src/search/search_ordered.cu | 1 - 3 files changed, 3 deletions(-) diff --git a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh index 93d5e6c032c..58ee06fcfef 100644 --- a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh +++ b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh @@ -25,7 +25,6 @@ #include #include #include -#include #include #include diff --git a/cpp/src/partitioning/round_robin.cu b/cpp/src/partitioning/round_robin.cu index d455df3e890..85bd31a20ea 100644 --- a/cpp/src/partitioning/round_robin.cu +++ b/cpp/src/partitioning/round_robin.cu @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include diff --git a/cpp/src/search/search_ordered.cu b/cpp/src/search/search_ordered.cu index 8d3b0f97726..754a17dc6d8 100644 --- a/cpp/src/search/search_ordered.cu +++ b/cpp/src/search/search_ordered.cu @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include From 001aede876f58a2c14b30176dcdd981d1c121769 Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Wed, 5 Oct 2022 14:32:14 -0500 Subject: [PATCH 009/202] JNI Avoid NPE for reading host binary data (#11865) This avoids a potential null pointer exception when trying to read byte data from an empty column Authors: - Robert (Bobby) Evans (https://github.com/revans2) Approvers: - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/11865 --- java/src/main/java/ai/rapids/cudf/HostColumnVectorCore.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/java/src/main/java/ai/rapids/cudf/HostColumnVectorCore.java b/java/src/main/java/ai/rapids/cudf/HostColumnVectorCore.java index 8b1a9a63131..95d209c0984 100644 --- a/java/src/main/java/ai/rapids/cudf/HostColumnVectorCore.java +++ b/java/src/main/java/ai/rapids/cudf/HostColumnVectorCore.java @@ -399,7 +399,9 @@ public byte[] getBytesFromList(long rowIndex) { int size = end - start; byte[] result = new byte[size]; - listData.offHeap.data.getBytes(result, 0, start, size); + if (size > 0) { + listData.offHeap.data.getBytes(result, 0, start, size); + } return result; } From 6d1854381b895d7dfd11540a763a4068cb556c66 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 5 Oct 2022 15:06:37 -0500 Subject: [PATCH 010/202] Unpin `dask` and `distributed` for development (#11859) This PR relaxes the pinnings of `dask` and `distributed` for `22.12` development. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Joseph (https://github.com/jolorunyomi) - https://github.com/jakirkham URL: https://github.com/rapidsai/cudf/pull/11859 --- ci/benchmark/build.sh | 2 +- ci/cpu/build.sh | 2 +- ci/gpu/build.sh | 2 +- conda/environments/cudf_dev_cuda11.5.yml | 4 ++-- conda/recipes/custreamz/meta.yaml | 4 ++-- conda/recipes/dask-cudf/meta.yaml | 8 ++++---- python/dask_cudf/setup.py | 4 ++-- 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/ci/benchmark/build.sh b/ci/benchmark/build.sh index ffa48797fe3..a8bc33e00bc 100755 --- a/ci/benchmark/build.sh +++ b/ci/benchmark/build.sh @@ -37,7 +37,7 @@ export GBENCH_BENCHMARKS_DIR="$WORKSPACE/cpp/build/gbenchmarks/" export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache" # Dask & Distributed option to install main(nightly) or `conda-forge` packages. -export INSTALL_DASK_MAIN=0 +export INSTALL_DASK_MAIN=1 # Dask version to install when `INSTALL_DASK_MAIN=0` export DASK_STABLE_VERSION="2022.9.2" diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh index 560de6db187..400a1ba4764 100755 --- a/ci/cpu/build.sh +++ b/ci/cpu/build.sh @@ -28,7 +28,7 @@ export CONDA_BLD_DIR="$WORKSPACE/.conda-bld" # Whether to keep `dask/label/dev` channel in the env. If INSTALL_DASK_MAIN=0, # `dask/label/dev` channel is removed. -export INSTALL_DASK_MAIN=0 +export INSTALL_DASK_MAIN=1 # Switch to project root; also root of repo checkout cd "$WORKSPACE" diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 7eebd2bb91d..afcc80a6803 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -32,7 +32,7 @@ export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'` unset GIT_DESCRIBE_TAG # Dask & Distributed option to install main(nightly) or `conda-forge` packages. -export INSTALL_DASK_MAIN=0 +export INSTALL_DASK_MAIN=1 # Dask version to install when `INSTALL_DASK_MAIN=0` export DASK_STABLE_VERSION="2022.9.2" diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml index d7a762e00f9..142d3c7d9cb 100644 --- a/conda/environments/cudf_dev_cuda11.5.yml +++ b/conda/environments/cudf_dev_cuda11.5.yml @@ -49,8 +49,8 @@ dependencies: - pydocstyle=6.1.1 - typing_extensions - pre-commit - - dask==2022.9.2 - - distributed==2022.9.2 + - dask>=2022.9.2 + - distributed>=2022.9.2 - streamz - arrow-cpp=9 - dlpack>=0.5,<0.6.0a0 diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml index 596e5fde940..989f8855533 100644 --- a/conda/recipes/custreamz/meta.yaml +++ b/conda/recipes/custreamz/meta.yaml @@ -29,8 +29,8 @@ requirements: - python - streamz - cudf ={{ version }} - - dask==2022.9.2 - - distributed==2022.9.2 + - dask>=2022.9.2 + - distributed>=2022.9.2 - python-confluent-kafka >=1.7.0,<1.8.0a0 - cudf_kafka ={{ version }} diff --git a/conda/recipes/dask-cudf/meta.yaml b/conda/recipes/dask-cudf/meta.yaml index 2d95151018b..1c718c0e995 100644 --- a/conda/recipes/dask-cudf/meta.yaml +++ b/conda/recipes/dask-cudf/meta.yaml @@ -24,14 +24,14 @@ requirements: host: - python - cudf ={{ version }} - - dask==2022.9.2 - - distributed==2022.9.2 + - dask>=2022.9.2 + - distributed>=2022.9.2 - cudatoolkit ={{ cuda_version }} run: - python - cudf ={{ version }} - - dask==2022.9.2 - - distributed==2022.9.2 + - dask>=2022.9.2 + - distributed>=2022.9.2 - {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }} test: # [linux64] diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py index 54b8f69c6d9..4fa2af89b9d 100644 --- a/python/dask_cudf/setup.py +++ b/python/dask_cudf/setup.py @@ -9,8 +9,8 @@ install_requires = [ "cudf", - "dask==2022.9.2", - "distributed==2022.9.2", + "dask>=2022.9.2", + "distributed>=2022.9.2", "fsspec>=0.6.0", "numpy", "pandas>=1.0,<1.6.0dev0", From 45254745b4d91588fc6575d649d010110a5e7ad3 Mon Sep 17 00:00:00 2001 From: nvdbaranec <56695930+nvdbaranec@users.noreply.github.com> Date: Wed, 5 Oct 2022 16:30:05 -0500 Subject: [PATCH 011/202] Parquet reader: bug fix for a num_rows/skip_rows corner case, w/optimization for nested preprocessing (#11752) Fixes an issue where using user bounds with parquet files containing both nested and non-nested types could result in incorrect row counts for the non-nested columns. Originally reported by @etseidl The nature of the fix also implements a longstanding desired optimization: when running the preprocess step for nested types, ignore pages for non-nested hierarchies. This can result in significant speedups for files containing only a few nested columns. The tests added for this PR seem to tease a bug in the parquet writer into happening (https://github.com/rapidsai/cudf/issues/11748) so I will leave this as a draft until that issue is resolved. Authors: - https://github.com/nvdbaranec Approvers: - Yunsong Wang (https://github.com/PointKernel) - Nghia Truong (https://github.com/ttnghia) - Mike Wilson (https://github.com/hyperbolic2346) URL: https://github.com/rapidsai/cudf/pull/11752 --- cpp/src/io/parquet/page_data.cu | 78 ++++++++++++------------- cpp/src/io/parquet/reader_impl.cu | 59 ++++++++++--------- cpp/src/io/parquet/reader_impl.hpp | 6 +- cpp/tests/io/parquet_test.cpp | 92 +++++++++++++++++++++++++++++- 4 files changed, 162 insertions(+), 73 deletions(-) diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu index 531733a7df7..a5f6d737637 100644 --- a/cpp/src/io/parquet/page_data.cu +++ b/cpp/src/io/parquet/page_data.cu @@ -1175,7 +1175,8 @@ static __device__ void gpuUpdateValidityOffsetsAndRowIndices(int32_t target_inpu int t) { // max nesting depth of the column - int const max_depth = s->col.max_nesting_depth; + int const max_depth = s->col.max_nesting_depth; + bool const has_repetition = s->col.max_level[level_type::REPETITION] > 0; // how many (input) values we've processed in the page so far int input_value_count = s->input_value_count; // how many rows we've processed in the page so far @@ -1235,7 +1236,7 @@ static __device__ void gpuUpdateValidityOffsetsAndRowIndices(int32_t target_inpu uint32_t const warp_valid_mask = // for flat schemas, a simple ballot_sync gives us the correct count and bit positions // because every value in the input matches to a value in the output - max_depth == 1 + !has_repetition ? ballot(is_valid) : // for nested schemas, it's more complicated. This warp will visit 32 incoming values, @@ -1284,11 +1285,12 @@ static __device__ void gpuUpdateValidityOffsetsAndRowIndices(int32_t target_inpu // the correct position to start reading. since we are about to write the validity vector here // we need to adjust our computed mask to take into account the write row bounds. int const in_write_row_bounds = - max_depth == 1 + !has_repetition ? thread_row_index >= s->first_row && thread_row_index < (s->first_row + s->num_rows) : in_row_bounds; int const first_thread_in_write_range = - max_depth == 1 ? __ffs(ballot(in_write_row_bounds)) - 1 : 0; + !has_repetition ? __ffs(ballot(in_write_row_bounds)) - 1 : 0; + // # of bits to of the validity mask to write out int const warp_valid_mask_bit_count = first_thread_in_write_range < 0 ? 0 : warp_value_count - first_thread_in_write_range; @@ -1384,7 +1386,6 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s, { // max nesting depth of the column int max_depth = s->col.max_nesting_depth; - // bool has_repetition = s->col.max_level[level_type::REPETITION] > 0 ? true : false; // how many input level values we've processed in the page so far int input_value_count = s->input_value_count; // how many leaf values we've processed in the page so far @@ -1479,6 +1480,11 @@ __global__ void __launch_bounds__(block_size) int t = threadIdx.x; PageInfo* pp = &pages[page_idx]; + // we only need to preprocess hierarchies with repetition in them (ie, hierarchies + // containing lists anywhere within). + bool const has_repetition = chunks[pp->chunk_idx].max_level[level_type::REPETITION] > 0; + if (!has_repetition) { return; } + if (!setupLocalPageInfo(s, pp, chunks, trim_pass ? min_row : 0, trim_pass ? num_rows : INT_MAX)) { return; } @@ -1504,8 +1510,6 @@ __global__ void __launch_bounds__(block_size) } __syncthreads(); - bool has_repetition = s->col.max_level[level_type::REPETITION] > 0; - // optimization : it might be useful to have a version of gpuDecodeStream that could go wider than // 1 warp. Currently it only uses 1 warp so that it can overlap work with the value decoding step // when in the actual value decoding kernel. However, during this preprocess step we have no such @@ -1516,16 +1520,13 @@ __global__ void __launch_bounds__(block_size) while (!s->error && s->input_value_count < s->num_input_values) { // decode repetition and definition levels. these will attempt to decode at // least up to the target, but may decode a few more. - if (has_repetition) { - gpuDecodeStream(s->rep, s, target_input_count, t, level_type::REPETITION); - } + gpuDecodeStream(s->rep, s, target_input_count, t, level_type::REPETITION); gpuDecodeStream(s->def, s, target_input_count, t, level_type::DEFINITION); __syncwarp(); // we may have decoded different amounts from each stream, so only process what we've been - int actual_input_count = has_repetition ? min(s->lvl_count[level_type::REPETITION], - s->lvl_count[level_type::DEFINITION]) - : s->lvl_count[level_type::DEFINITION]; + int actual_input_count = + min(s->lvl_count[level_type::REPETITION], s->lvl_count[level_type::DEFINITION]); // process what we got back gpuUpdatePageSizes(s, actual_input_count, t, trim_pass); @@ -1573,6 +1574,8 @@ __global__ void __launch_bounds__(block_size) gpuDecodePageData( ((s->col.data_type & 7) == BOOLEAN || (s->col.data_type & 7) == BYTE_ARRAY) ? 64 : 32; } + bool const has_repetition = s->col.max_level[level_type::REPETITION] > 0; + // skipped_leaf_values will always be 0 for flat hierarchies. uint32_t skipped_leaf_values = s->page.skipped_leaf_values; while (!s->error && (s->input_value_count < s->num_input_values || s->src_pos < s->nz_count)) { @@ -1625,7 +1628,7 @@ __global__ void __launch_bounds__(block_size) gpuDecodePageData( // - so we will end up ignoring the first two input rows, and input rows 2..n will // get written to the output starting at position 0. // - if (s->col.max_nesting_depth == 1) { dst_pos -= s->first_row; } + if (!has_repetition) { dst_pos -= s->first_row; } // target_pos will always be properly bounded by num_rows, but dst_pos may be negative (values // before first_row) in the flat hierarchy case. @@ -1765,6 +1768,8 @@ void PreprocessColumnData(hostdevice_vector& pages, // computes: // PageInfo::chunk_row for all pages + // Note: this is doing some redundant work for pages in flat hierarchies. chunk_row has already + // been computed during header decoding. the overall amount of work here is very small though. auto key_input = thrust::make_transform_iterator( pages.device_ptr(), [] __device__(PageInfo const& page) { return page.chunk_idx; }); auto page_input = thrust::make_transform_iterator( @@ -1840,26 +1845,14 @@ void PreprocessColumnData(hostdevice_vector& pages, return page.nesting[l_idx].size; }); - // compute column size. + // if this buffer is part of a list hierarchy, we need to determine it's + // final size and allocate it here. + // // for struct columns, higher levels of the output columns are shared between input // columns. so don't compute any given level more than once. - if (out_buf.size == 0) { + if ((out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) && out_buf.size == 0) { int size = thrust::reduce(rmm::exec_policy(stream), size_input, size_input + pages.size()); - // Handle a specific corner case. It is possible to construct a parquet file such that - // a column within a row group contains more rows than the row group itself. This may be - // invalid, but we have seen instances of this in the wild, including how they were created - // using the apache parquet tools. Normally, the trim pass would handle this case quietly, - // but if we are not running the trim pass (which is most of the time) we need to cap the - // number of rows we will allocate/read from the file with the amount specified in the - // associated row group. This only applies to columns that are not children of lists as - // those may have an arbitrary number of rows in them. - if (!uses_custom_row_bounds && - !(out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) && - size > static_cast(num_rows)) { - size = static_cast(num_rows); - } - // if this is a list column add 1 for non-leaf levels for the terminating offset if (out_buf.type.id() == type_id::LIST && l_idx < max_depth) { size++; } @@ -1867,16 +1860,21 @@ void PreprocessColumnData(hostdevice_vector& pages, out_buf.create(size, stream, mr); } - // compute per-page start offset - thrust::exclusive_scan_by_key(rmm::exec_policy(stream), - page_keys.begin(), - page_keys.end(), - size_input, - start_offset_output_iterator{pages.device_ptr(), - page_index.begin(), - 0, - static_cast(src_col_schema), - static_cast(l_idx)}); + // for nested hierarchies, compute per-page start offset. + // it would be better/safer to be checking (schema.max_repetition_level > 0) here, but there's + // no easy way to get at that info here. we'd have to move this function into reader_impl.cu + if ((out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) || + out_buf.type.id() == type_id::LIST) { + thrust::exclusive_scan_by_key(rmm::exec_policy(stream), + page_keys.begin(), + page_keys.end(), + size_input, + start_offset_output_iterator{pages.device_ptr(), + page_index.begin(), + 0, + static_cast(src_col_schema), + static_cast(l_idx)}); + } } } diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu index 59bef6f5600..07869189089 100644 --- a/cpp/src/io/parquet/reader_impl.cu +++ b/cpp/src/io/parquet/reader_impl.cu @@ -1353,26 +1353,39 @@ void reader::impl::preprocess_columns(hostdevice_vector& c hostdevice_vector& pages, size_t min_row, size_t total_rows, - bool uses_custom_row_bounds, - bool has_lists) + bool uses_custom_row_bounds) { - // TODO : we should be selectively preprocessing only columns that have - // lists in them instead of doing them all if even one contains lists. - - // if there are no lists, simply allocate every allocate every output - // column to be of size num_rows - if (!has_lists) { - std::function&)> create_columns = - [&](std::vector& cols) { - for (size_t idx = 0; idx < cols.size(); idx++) { - auto& col = cols[idx]; - col.create(total_rows, _stream, _mr); - create_columns(col.children); - } - }; - create_columns(_output_columns); - } else { - // preprocess per-nesting level sizes by page + // iterate over all input columns and allocate any associated output + // buffers if they are not part of a list hierarchy. mark down + // if we have any list columns that need further processing. + bool has_lists = false; + for (size_t idx = 0; idx < _input_columns.size(); idx++) { + auto const& input_col = _input_columns[idx]; + size_t const max_depth = input_col.nesting_depth(); + + auto* cols = &_output_columns; + for (size_t l_idx = 0; l_idx < max_depth; l_idx++) { + auto& out_buf = (*cols)[input_col.nesting[l_idx]]; + cols = &out_buf.children; + + // if this has a list parent, we will have to do further work in gpu::PreprocessColumnData + // to know how big this buffer actually is. + if (out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) { + has_lists = true; + } + // if we haven't already processed this column because it is part of a struct hierarchy + else if (out_buf.size == 0) { + // add 1 for the offset if this is a list column + out_buf.create( + out_buf.type.id() == type_id::LIST && l_idx < max_depth ? total_rows + 1 : total_rows, + _stream, + _mr); + } + } + } + + // if we have columns containing lists, further preprocessing is necessary. + if (has_lists) { gpu::PreprocessColumnData(pages, chunks, _input_columns, @@ -1636,9 +1649,6 @@ table_with_metadata reader::impl::read(size_type skip_rows, // Keep track of column chunk file offsets std::vector column_chunk_offsets(num_chunks); - // if there are lists present, we need to preprocess - bool has_lists = false; - // Initialize column chunk information size_t total_decompressed_size = 0; auto remaining_rows = num_rows; @@ -1657,9 +1667,6 @@ table_with_metadata reader::impl::read(size_type skip_rows, auto& col_meta = _metadata->get_column_metadata(rg.index, rg.source_index, col.schema_idx); auto& schema = _metadata->get_schema(col.schema_idx); - // this column contains repetition levels and will require a preprocess - if (schema.max_repetition_level > 0) { has_lists = true; } - auto [type_width, clock_rate, converted_type] = conversion_info(to_type_id(schema, _strings_to_categorical, _timestamp_type.id()), _timestamp_type.id(), @@ -1755,7 +1762,7 @@ table_with_metadata reader::impl::read(size_type skip_rows, // // - for nested schemas, output buffer offset values per-page, per nesting-level for the // purposes of decoding. - preprocess_columns(chunks, pages, skip_rows, num_rows, uses_custom_row_bounds, has_lists); + preprocess_columns(chunks, pages, skip_rows, num_rows, uses_custom_row_bounds); // decoding of column data itself decode_page_data(chunks, pages, page_nesting_info, skip_rows, num_rows); diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp index e1f275bb8e8..6c3e05b4264 100644 --- a/cpp/src/io/parquet/reader_impl.hpp +++ b/cpp/src/io/parquet/reader_impl.hpp @@ -148,7 +148,7 @@ class reader::impl { hostdevice_vector& page_nesting_info); /** - * @brief Preprocess column information for nested schemas. + * @brief Preprocess column information and allocate output buffers. * * There are several pieces of information we can't compute directly from row counts in * the parquet headers when dealing with nested schemas. @@ -163,15 +163,13 @@ class reader::impl { * @param total_rows Maximum number of rows to read * @param uses_custom_row_bounds Whether or not num_rows and min_rows represents user-specific * bounds - * @param has_lists Whether or not this data contains lists and requires * a preprocess. */ void preprocess_columns(hostdevice_vector& chunks, hostdevice_vector& pages, size_t min_row, size_t total_rows, - bool uses_custom_row_bounds, - bool has_lists); + bool uses_custom_row_bounds); /** * @brief Converts the page data and outputs to columns. diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index 8a98efabcb5..134eff54144 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -112,7 +112,7 @@ std::unique_ptr create_compressible_fixed_table(cudf::size_type num // this function replicates the "list_gen" function in // python/cudf/cudf/tests/test_parquet.py template -std::unique_ptr make_parquet_list_col( +std::unique_ptr make_parquet_list_list_col( int skip_rows, int num_rows, int lists_per_row, int list_size, bool include_validity) { auto valids = @@ -2212,8 +2212,8 @@ TYPED_TEST(ParquetChunkedWriterNumericTypeTest, UnalignedSize) bool mask[] = {false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, true}; + true, true, true, true, true, true, true, true, true}; T c1a[num_els]; std::fill(c1a, c1a + num_els, static_cast(5)); T c1b[num_els]; @@ -2589,6 +2589,92 @@ TEST_F(ParquetReaderTest, UserBoundsWithNulls) } } +TEST_F(ParquetReaderTest, UserBoundsWithNullsMixedTypes) +{ + constexpr int num_rows = 32 * 1024; + + std::mt19937 gen(6542); + std::bernoulli_distribution bn(0.7f); + auto valids = + cudf::detail::make_counting_transform_iterator(0, [&](int index) { return bn(gen); }); + auto values = thrust::make_counting_iterator(0); + + // int64 + cudf::test::fixed_width_column_wrapper c0(values, values + num_rows, valids); + + // list + constexpr int floats_per_row = 4; + auto c1_offset_iter = cudf::detail::make_counting_transform_iterator( + 0, [floats_per_row](cudf::size_type idx) { return idx * floats_per_row; }); + cudf::test::fixed_width_column_wrapper c1_offsets( + c1_offset_iter, c1_offset_iter + num_rows + 1); + cudf::test::fixed_width_column_wrapper c1_floats( + values, values + (num_rows * floats_per_row), valids); + auto _c1 = cudf::make_lists_column(num_rows, + c1_offsets.release(), + c1_floats.release(), + cudf::UNKNOWN_NULL_COUNT, + cudf::test::detail::make_null_mask(valids, valids + num_rows)); + auto c1 = cudf::purge_nonempty_nulls(static_cast(*_c1)); + + // list> + auto c2 = make_parquet_list_list_col(0, num_rows, 5, 8, true); + + // struct, int, float> + std::vector strings{ + "abc", "x", "bananas", "gpu", "minty", "backspace", "", "cayenne", "turbine", "soft"}; + std::uniform_int_distribution uni(0, strings.size() - 1); + auto string_iter = cudf::detail::make_counting_transform_iterator( + 0, [&](cudf::size_type idx) { return strings[uni(gen)]; }); + constexpr int string_per_row = 3; + constexpr int num_string_rows = num_rows * string_per_row; + cudf::test::strings_column_wrapper string_col{string_iter, string_iter + num_string_rows}; + auto offset_iter = cudf::detail::make_counting_transform_iterator( + 0, [string_per_row](cudf::size_type idx) { return idx * string_per_row; }); + cudf::test::fixed_width_column_wrapper offsets(offset_iter, + offset_iter + num_rows + 1); + auto _c3_list = + cudf::make_lists_column(num_rows, + offsets.release(), + string_col.release(), + cudf::UNKNOWN_NULL_COUNT, + cudf::test::detail::make_null_mask(valids, valids + num_rows)); + auto c3_list = cudf::purge_nonempty_nulls(static_cast(*_c3_list)); + cudf::test::fixed_width_column_wrapper c3_ints(values, values + num_rows, valids); + cudf::test::fixed_width_column_wrapper c3_floats(values, values + num_rows, valids); + std::vector> c3_children; + c3_children.push_back(std::move(c3_list)); + c3_children.push_back(c3_ints.release()); + c3_children.push_back(c3_floats.release()); + cudf::test::structs_column_wrapper _c3(std::move(c3_children)); + auto c3 = cudf::purge_nonempty_nulls(static_cast(_c3)); + + // write it out + cudf::table_view tbl({c0, *c1, *c2, *c3}); + auto filepath = temp_env->get_temp_filepath("UserBoundsWithNullsMixedTypes.parquet"); + cudf::io::parquet_writer_options out_args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl); + cudf::io::write_parquet(out_args); + + // read it back + std::vector> params{ + {-1, -1}, {0, num_rows}, {1, num_rows - 1}, {num_rows - 1, 1}, {517, 22000}}; + for (auto p : params) { + cudf::io::parquet_reader_options read_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + if (p.first >= 0) { read_args.set_skip_rows(p.first); } + if (p.second >= 0) { read_args.set_num_rows(p.second); } + auto result = cudf::io::read_parquet(read_args); + + p.first = p.first < 0 ? 0 : p.first; + p.second = p.second < 0 ? num_rows - p.first : p.second; + std::vector slice_indices{p.first, p.first + p.second}; + auto expected = cudf::slice(tbl, slice_indices); + + CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, expected[0]); + } +} + TEST_F(ParquetReaderTest, UserBoundsWithNullsLarge) { constexpr int num_rows = 30 * 1000000; @@ -2636,7 +2722,7 @@ TEST_F(ParquetReaderTest, UserBoundsWithNullsLarge) TEST_F(ParquetReaderTest, ListUserBoundsWithNullsLarge) { constexpr int num_rows = 5 * 1000000; - auto colp = make_parquet_list_col(0, num_rows, 5, 8, true); + auto colp = make_parquet_list_list_col(0, num_rows, 5, 8, true); cudf::column_view col = *colp; // this file will have row groups of 1,000,000 each From 029b1dbbaf4aa7f2eb19f29a68589e574c0c7230 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 6 Oct 2022 10:41:33 -0700 Subject: [PATCH 012/202] Fix RangeIndex unary operators. (#11868) These operators rely on a method that was renamed in #11272 and are also out of sync with the rest of the `RangeIndex` design now that the `__getattr__` overload has been removed (#10538). Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/11868 --- python/cudf/cudf/core/index.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index b6ae7beebc5..3734893627f 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -867,15 +867,14 @@ def min(self): def max(self): return self._minmax("max") + def __neg__(self): + return -self._as_int_index() -# Patch in all binops and unary ops, which bypass __getattr__ on the instance -# and prevent the above overload from working. -for unaop in ("__neg__", "__pos__", "__abs__"): - setattr( - RangeIndex, - unaop, - lambda self, op=unaop: getattr(self._as_int64(), op)(), - ) + def __pos__(self): + return +self._as_int_index() + + def __abs__(self): + return abs(self._as_int_index()) class GenericIndex(SingleColumnFrame, BaseIndex): From e323f0a75757374bce17d4c4832f422b4e8c19a3 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Thu, 6 Oct 2022 15:55:37 -0400 Subject: [PATCH 013/202] Fix make_column_from_scalar for all-null strings column (#11807) Fixes the `cudf::make_column_from_scalar` for an invalid `cudf::string_scalar` to return a column with children. Some libcudf APIs will not work with a strings column with no children. This condition would be rare enough that additional logic for checking no children in these places would be a performance and maintenance issue. This also greatly simplifies the `make_column_from_scalar` specialization logic for strings. Closes #11756 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Yunsong Wang (https://github.com/PointKernel) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/11807 --- cpp/src/column/column_factories.cu | 12 +++--------- cpp/src/strings/filling/fill.cu | 20 ++++++++++++-------- cpp/tests/column/factories_test.cpp | 2 ++ cpp/tests/filling/fill_tests.cpp | 4 ++-- cpp/tests/strings/fill_tests.cpp | 10 +++++----- 5 files changed, 24 insertions(+), 24 deletions(-) diff --git a/cpp/src/column/column_factories.cu b/cpp/src/column/column_factories.cu index 90252fd6cf1..c401b765f0b 100644 --- a/cpp/src/column/column_factories.cu +++ b/cpp/src/column/column_factories.cu @@ -54,21 +54,15 @@ std::unique_ptr column_from_scalar_dispatch::operator()( - value.type(), size, rmm::device_buffer{}, std::move(null_mask), size); - - // Create a strings column_view with all nulls and no children. // Since we are setting every row to the scalar, the fill() never needs to access // any of the children in the strings column which would otherwise cause an exception. - column_view sc{ - data_type{type_id::STRING}, size, nullptr, static_cast(null_mask.data()), size}; + column_view sc{value.type(), size, nullptr}; auto& sv = static_cast const&>(value); + // fill the column with the scalar auto output = strings::detail::fill(strings_column_view(sc), 0, size, sv, stream, mr); - output->set_null_mask(rmm::device_buffer{}, 0); // should be no nulls + return output; } diff --git a/cpp/src/strings/filling/fill.cu b/cpp/src/strings/filling/fill.cu index a858a3d6238..f813ec24ee9 100644 --- a/cpp/src/strings/filling/fill.cu +++ b/cpp/src/strings/filling/fill.cu @@ -58,14 +58,18 @@ std::unique_ptr fill( auto d_strings = *strings_column; // create resulting null mask - auto valid_mask = cudf::detail::valid_if( - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(strings_count), - [d_strings, begin, end, d_value] __device__(size_type idx) { - return ((begin <= idx) && (idx < end)) ? d_value.is_valid() : !d_strings.is_null(idx); - }, - stream, - mr); + auto valid_mask = [begin, end, d_value, value, d_strings, stream, mr] { + if (begin == 0 and end == d_strings.size() and value.is_valid(stream)) + return std::pair(rmm::device_buffer{}, 0); + return cudf::detail::valid_if( + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(d_strings.size()), + [d_strings, begin, end, d_value] __device__(size_type idx) { + return ((begin <= idx) && (idx < end)) ? d_value.is_valid() : !d_strings.is_null(idx); + }, + stream, + mr); + }(); auto null_count = valid_mask.second; rmm::device_buffer& null_mask = valid_mask.first; diff --git a/cpp/tests/column/factories_test.cpp b/cpp/tests/column/factories_test.cpp index e8098202fc3..bd37da91f69 100644 --- a/cpp/tests/column/factories_test.cpp +++ b/cpp/tests/column/factories_test.cpp @@ -423,6 +423,7 @@ TEST_F(ColumnFactoryTest, FromStringScalar) EXPECT_EQ(0, column->null_count()); EXPECT_FALSE(column->nullable()); EXPECT_FALSE(column->has_nulls()); + EXPECT_TRUE(column->num_children() > 0); } TEST_F(ColumnFactoryTest, FromNullStringScalar) @@ -434,6 +435,7 @@ TEST_F(ColumnFactoryTest, FromNullStringScalar) EXPECT_EQ(2, column->null_count()); EXPECT_TRUE(column->nullable()); EXPECT_TRUE(column->has_nulls()); + EXPECT_TRUE(column->num_children() > 0); } TEST_F(ColumnFactoryTest, FromStringScalarWithZeroSize) diff --git a/cpp/tests/filling/fill_tests.cpp b/cpp/tests/filling/fill_tests.cpp index f305d4a06c7..ec400fa61c8 100644 --- a/cpp/tests/filling/fill_tests.cpp +++ b/cpp/tests/filling/fill_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -206,7 +206,7 @@ class FillStringTestFixture : public cudf::test::BaseFixture { })); auto p_ret = cudf::fill(destination, begin, end, *p_val); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*p_ret, expected); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*p_ret, expected); } }; diff --git a/cpp/tests/strings/fill_tests.cpp b/cpp/tests/strings/fill_tests.cpp index 721fb6d8d33..44bbb3c9c29 100644 --- a/cpp/tests/strings/fill_tests.cpp +++ b/cpp/tests/strings/fill_tests.cpp @@ -47,7 +47,7 @@ TEST_F(StringsFillTest, Fill) h_expected.begin(), h_expected.end(), thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; })); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected); } { auto results = cudf::strings::detail::fill(view, 2, 4, cudf::string_scalar("", false)); @@ -57,23 +57,23 @@ TEST_F(StringsFillTest, Fill) h_expected.begin(), h_expected.end(), thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; })); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected); } { auto results = cudf::strings::detail::fill(view, 5, 5, cudf::string_scalar("zz")); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, view.parent()); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, view.parent()); } { auto results = cudf::strings::detail::fill(view, 0, 7, cudf::string_scalar("")); cudf::test::strings_column_wrapper expected({"", "", "", "", "", "", ""}, {1, 1, 1, 1, 1, 1, 1}); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected); } { auto results = cudf::strings::detail::fill(view, 0, 7, cudf::string_scalar("", false)); cudf::test::strings_column_wrapper expected({"", "", "", "", "", "", ""}, {0, 0, 0, 0, 0, 0, 0}); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected); } } From 1ef722d690bddfc1df48577dada44afe5f5d5aa0 Mon Sep 17 00:00:00 2001 From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com> Date: Fri, 7 Oct 2022 02:23:36 +0530 Subject: [PATCH 014/202] Fix decimal benchmark input data generation (#11863) closes https://github.com/rapidsai/cudf/issues/11850 Fixes decimal benchmark input data generation. Generated data alternated between two values because `device_uvector` has both value and scale. scale is fixed for a column and hence when this data is copied to `cudf::column`, this column values alternated between values and scale. Fix is to use `device_storage_type_t` instead of `T`. Authors: - Karthikeyan (https://github.com/karthikeyann) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - Nghia Truong (https://github.com/ttnghia) - David Wendt (https://github.com/davidwendt) URL: https://github.com/rapidsai/cudf/pull/11863 --- cpp/benchmarks/common/generate_input.cu | 52 +++++++++++++------------ 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu index 890a78bb9bf..2bcdaa6760c 100644 --- a/cpp/benchmarks/common/generate_input.cu +++ b/cpp/benchmarks/common/generate_input.cu @@ -247,40 +247,33 @@ struct random_value_fn()>> { */ template struct random_value_fn()>> { - using rep = typename T::rep; - rep const lower_bound; - rep const upper_bound; - distribution_fn dist; + using DeviceType = cudf::device_storage_type_t; + DeviceType const lower_bound; + DeviceType const upper_bound; + distribution_fn dist; std::optional scale; - random_value_fn(distribution_params const& desc) + random_value_fn(distribution_params const& desc) : lower_bound{desc.lower_bound}, upper_bound{desc.upper_bound}, - dist{make_distribution(desc.id, desc.lower_bound, desc.upper_bound)} + dist{make_distribution(desc.id, desc.lower_bound, desc.upper_bound)} { } - rmm::device_uvector operator()(thrust::minstd_rand& engine, unsigned size) + [[nodiscard]] numeric::scale_type get_scale(thrust::minstd_rand& engine) { if (not scale.has_value()) { - int const max_scale = std::numeric_limits::digits10; + constexpr int max_scale = std::numeric_limits::digits10; std::uniform_int_distribution scale_dist{-max_scale, max_scale}; std::mt19937 engine_scale(engine()); scale = numeric::scale_type{scale_dist(engine_scale)}; } - auto const ints = dist(engine, size); - rmm::device_uvector result(size, cudf::default_stream_value); - // Clamp the generated random value to the specified range - thrust::transform(thrust::device, - ints.begin(), - ints.end(), - result.begin(), - [scale = *(this->scale), - upper_bound = this->upper_bound, - lower_bound = this->lower_bound] __device__(auto int_value) { - return T{std::clamp(int_value, lower_bound, upper_bound), scale}; - }); - return result; + return scale.value_or(numeric::scale_type{0}); + } + + rmm::device_uvector operator()(thrust::minstd_rand& engine, unsigned size) + { + return dist(engine, size); } }; @@ -398,9 +391,17 @@ std::unique_ptr create_random_column(data_profile const& profile, distribution_params{1. - profile.get_null_probability().value_or(0)}); auto value_dist = random_value_fn{profile.get_distribution_params()}; + using DeviceType = cudf::device_storage_type_t; + cudf::data_type const dtype = [&]() { + if constexpr (cudf::is_fixed_point()) + return cudf::data_type{cudf::type_to_id(), value_dist.get_scale(engine)}; + else + return cudf::data_type{cudf::type_to_id()}; + }(); + // Distribution for picking elements from the array of samples auto const avg_run_len = profile.get_avg_run_length(); - rmm::device_uvector data(0, cudf::default_stream_value); + rmm::device_uvector data(0, cudf::default_stream_value); rmm::device_uvector null_mask(0, cudf::default_stream_value); if (profile.get_cardinality() == 0 and avg_run_len == 1) { @@ -412,11 +413,12 @@ std::unique_ptr create_random_column(data_profile const& profile, : profile_cardinality; }(); rmm::device_uvector samples_null_mask = valid_dist(engine, cardinality); - rmm::device_uvector samples = value_dist(engine, cardinality); + rmm::device_uvector samples = value_dist(engine, cardinality); + // generate n samples and gather. auto const sample_indices = sample_indices_with_run_length(avg_run_len, cardinality, num_rows, engine); - data = rmm::device_uvector(num_rows, cudf::default_stream_value); + data = rmm::device_uvector(num_rows, cudf::default_stream_value); null_mask = rmm::device_uvector(num_rows, cudf::default_stream_value); thrust::gather( thrust::device, sample_indices.begin(), sample_indices.end(), samples.begin(), data.begin()); @@ -431,7 +433,7 @@ std::unique_ptr create_random_column(data_profile const& profile, cudf::detail::valid_if(null_mask.begin(), null_mask.end(), thrust::identity{}); return std::make_unique( - cudf::data_type{cudf::type_to_id()}, + dtype, num_rows, data.release(), profile.get_null_probability().has_value() ? std::move(result_bitmask) : rmm::device_buffer{}); From e20eb94aedf5c8cc5c3f5ce3405e0dab1ace6f63 Mon Sep 17 00:00:00 2001 From: Sheilah Kirui <71867292+skirui-source@users.noreply.github.com> Date: Thu, 6 Oct 2022 15:15:17 -0700 Subject: [PATCH 015/202] part1: Simplify BaseIndex to an abstract class (#10389) This PR is in response to @vyasr comment, as partial fix for PR https://github.com/rapidsai/cudf/issues/9593 : `BaseIndex `should be reduced as closely as possible to an abstract class. While there are a subset of APIs that truly make sense for all types of index objects, in almost all cases the optimal implementation for `RangeIndex `(and `MultiIndex`, for that matter) is very different from the implementation for `GenericIndex`. In addition, this change reduces cognitive load for developers by simplifying the inheritance hierarchy Authors: - Sheilah Kirui (https://github.com/skirui-source) Approvers: - Matthew Roeschke (https://github.com/mroeschke) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/10389 --- .../cudf/benchmarks/API/bench_rangeindex.py | 5 + python/cudf/cudf/core/_base_index.py | 240 ++++++++---------- python/cudf/cudf/core/column/categorical.py | 2 +- python/cudf/cudf/core/index.py | 110 +++++++- python/cudf/cudf/tests/test_index.py | 136 ++++++++-- 5 files changed, 332 insertions(+), 161 deletions(-) diff --git a/python/cudf/benchmarks/API/bench_rangeindex.py b/python/cudf/benchmarks/API/bench_rangeindex.py index 7b2baef9081..42de5a86b65 100644 --- a/python/cudf/benchmarks/API/bench_rangeindex.py +++ b/python/cudf/benchmarks/API/bench_rangeindex.py @@ -40,3 +40,8 @@ def bench_min(benchmark, rangeindex): def bench_where(benchmark, rangeindex): cond = rangeindex % 2 == 0 benchmark(rangeindex.where, cond, 0) + + +def bench_isin(benchmark, rangeindex): + values = [10, 100] + benchmark(rangeindex.isin, values) diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py index 6898ae4941c..b73536558f1 100644 --- a/python/cudf/cudf/core/_base_index.py +++ b/python/cudf/cudf/core/_base_index.py @@ -27,10 +27,7 @@ from cudf.core.column import ColumnBase, column from cudf.core.column_accessor import ColumnAccessor from cudf.utils import ioutils -from cudf.utils.dtypes import ( - is_mixed_with_object_dtype, - numeric_normalize_types, -) +from cudf.utils.dtypes import is_mixed_with_object_dtype _index_astype_docstring = """\ Create an Index with values cast to dtypes. @@ -90,7 +87,7 @@ def size(self): @property def values(self): - return self._values.values + raise NotImplementedError def get_loc(self, key, method=None, tolerance=None): raise NotImplementedError @@ -188,12 +185,7 @@ def _clean_nulls_from_index(self): methods using this method to replace or handle representation of the actual types correctly. """ - if self._values.has_nulls(): - return cudf.Index( - self._values.astype("str").fillna(cudf._NA_REP), name=self.name - ) - else: - return self + raise NotImplementedError @property def is_monotonic(self): @@ -549,13 +541,11 @@ def to_frame(self, index=True, name=None): Set the index of the returned DataFrame as the original Index name : str, default None Name to be used for the column - Returns ------- DataFrame cudf DataFrame """ - if name is not None: col_name = name elif self.name is None: @@ -570,7 +560,40 @@ def any(self): """ Return whether any elements is True in Index. """ - return self._values.any() + raise NotImplementedError + + def isna(self): + """ + Detect missing values. + + Return a boolean same-sized object indicating if the values are NA. + NA values, such as ``None``, :attr:`numpy.NaN` or :attr:`cudf.NaN`, get + mapped to ``True`` values. + Everything else get mapped to ``False`` values. + + Returns + ------- + numpy.ndarray[bool] + A boolean array to indicate which entries are NA. + + """ + raise NotImplementedError + + def notna(self): + """ + Detect existing (non-missing) values. + + Return a boolean same-sized object indicating if the values are not NA. + Non-missing values get mapped to ``True``. + NA values, such as None or :attr:`numpy.NaN`, get mapped to ``False`` + values. + + Returns + ------- + numpy.ndarray[bool] + A boolean array to indicate which entries are not NA. + """ + raise NotImplementedError def to_pandas(self): """ @@ -589,7 +612,75 @@ def to_pandas(self): >>> type(idx) """ - return pd.Index(self._values.to_pandas(), name=self.name) + raise NotImplementedError + + def isin(self, values): + """Return a boolean array where the index values are in values. + + Compute boolean array of whether each index value is found in + the passed set of values. The length of the returned boolean + array matches the length of the index. + + Parameters + ---------- + values : set, list-like, Index + Sought values. + + Returns + ------- + is_contained : cupy array + CuPy array of boolean values. + + Examples + -------- + >>> idx = cudf.Index([1,2,3]) + >>> idx + Int64Index([1, 2, 3], dtype='int64') + + Check whether each index value in a list of values. + + >>> idx.isin([1, 4]) + array([ True, False, False]) + """ + # To match pandas behavior, even though only list-like objects are + # supposed to be passed, only scalars throw errors. Other types (like + # dicts) just transparently return False (see the implementation of + # ColumnBase.isin). + raise NotImplementedError + + def unique(self): + """ + Return unique values in the index. + + Returns + ------- + Index without duplicates + """ + raise NotImplementedError + + def to_series(self, index=None, name=None): + """ + Create a Series with both index and values equal to the index keys. + Useful with map for returning an indexer based on an index. + + Parameters + ---------- + index : Index, optional + Index of resulting Series. If None, defaults to original index. + name : str, optional + Name of resulting Series. If None, defaults to name of original + index. + + Returns + ------- + Series + The dtype will be based on the type of the Index values. + """ + return cudf.Series._from_data( + self._data, + index=self.copy(deep=False) if index is None else index, + name=self.name if name is None else name, + ) @ioutils.doc_to_dlpack() def to_dlpack(self): @@ -599,7 +690,7 @@ def to_dlpack(self): def append(self, other): """ - Append a collection of Index options together. + Append a collection of Index objects together. Parameters ---------- @@ -626,45 +717,7 @@ def append(self, other): >>> idx.append([other, other]) Int64Index([1, 2, 10, 100, 200, 400, 50, 200, 400, 50], dtype='int64') """ - - if is_list_like(other): - to_concat = [self] - to_concat.extend(other) - else: - this = self - if len(other) == 0: - # short-circuit and return a copy - to_concat = [self] - - other = cudf.Index(other) - - if len(self) == 0: - to_concat = [other] - - if len(self) and len(other): - if is_mixed_with_object_dtype(this, other): - got_dtype = ( - other.dtype - if this.dtype == cudf.dtype("object") - else this.dtype - ) - raise TypeError( - f"cudf does not support appending an Index of " - f"dtype `{cudf.dtype('object')}` with an Index " - f"of dtype `{got_dtype}`, please type-cast " - f"either one of them to same dtypes." - ) - - if isinstance(self._values, cudf.core.column.NumericalColumn): - if self.dtype != other.dtype: - this, other = numeric_normalize_types(self, other) - to_concat = [this, other] - - for obj in to_concat: - if not isinstance(obj, BaseIndex): - raise TypeError("all inputs must be Index") - - return self._concat(to_concat) + raise NotImplementedError def difference(self, other, sort=None): """ @@ -1119,18 +1172,6 @@ def sort_values( else: return index_sorted - def unique(self): - """ - Return unique values in the index. - - Returns - ------- - Index without duplicates - """ - return cudf.core.index._index_from_data( - {self.name: self._values.unique()}, name=self.name - ) - def join( self, other, how="left", level=None, return_indexers=False, sort=False ): @@ -1263,30 +1304,6 @@ def rename(self, name, inplace=False): out.name = name return out - def to_series(self, index=None, name=None): - """ - Create a Series with both index and values equal to the index keys. - Useful with map for returning an indexer based on an index. - - Parameters - ---------- - index : Index, optional - Index of resulting Series. If None, defaults to original index. - name : str, optional - Dame of resulting Series. If None, defaults to name of original - index. - - Returns - ------- - Series - The dtype will be based on the type of the Index values. - """ - return cudf.Series( - self._values, - index=self.copy(deep=False) if index is None else index, - name=self.name if name is None else name, - ) - def get_slice_bound(self, label, side, kind=None): """ Calculate slice bound that corresponds to given label. @@ -1339,47 +1356,6 @@ def __array_function__(self, func, types, args, kwargs): else: return NotImplemented - def isin(self, values): - """Return a boolean array where the index values are in values. - - Compute boolean array of whether each index value is found in - the passed set of values. The length of the returned boolean - array matches the length of the index. - - Parameters - ---------- - values : set, list-like, Index - Sought values. - - Returns - ------- - is_contained : cupy array - CuPy array of boolean values. - - Examples - -------- - >>> idx = cudf.Index([1,2,3]) - >>> idx - Int64Index([1, 2, 3], dtype='int64') - - Check whether each index value in a list of values. - - >>> idx.isin([1, 4]) - array([ True, False, False]) - """ - - # To match pandas behavior, even though only list-like objects are - # supposed to be passed, only scalars throw errors. Other types (like - # dicts) just transparently return False (see the implementation of - # ColumnBase.isin). - if is_scalar(values): - raise TypeError( - "only list-like objects are allowed to be passed " - f"to isin(), you passed a {type(values).__name__}" - ) - - return self._values.isin(values).values - @classmethod def from_pandas(cls, index, nan_as_null=None): """ diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index 601ad707ba6..af5d140a20a 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -104,7 +104,7 @@ def __init__(self, parent: SeriesOrSingleColumnIndex): super().__init__(parent=parent) @property - def categories(self) -> "cudf.core.index.BaseIndex": + def categories(self) -> "cudf.core.index.GenericIndex": """ The categories of this categorical. """ diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 3734893627f..3d77ed15027 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -33,6 +33,8 @@ is_categorical_dtype, is_dtype_equal, is_interval_dtype, + is_list_like, + is_scalar, is_string_dtype, ) from cudf.core._base_index import BaseIndex, _index_astype_docstring @@ -55,7 +57,12 @@ from cudf.core.mixins import BinaryOperand from cudf.core.single_column_frame import SingleColumnFrame from cudf.utils.docutils import copy_docstring, doc_apply -from cudf.utils.dtypes import _maybe_convert_to_default_type, find_common_type +from cudf.utils.dtypes import ( + _maybe_convert_to_default_type, + find_common_type, + is_mixed_with_object_dtype, + numeric_normalize_types, +) from cudf.utils.utils import _cudf_nvtx_annotate, search_range T = TypeVar("T", bound="Frame") @@ -243,6 +250,9 @@ def _values(self): else: return column.column_empty(0, masked=False, dtype=self.dtype) + def _clean_nulls_from_index(self): + return self + def is_numeric(self): return True @@ -867,6 +877,25 @@ def min(self): def max(self): return self._minmax("max") + @property + def values(self): + return cupy.arange(self.start, self.stop, self.step) + + def any(self): + return any(self._range) + + def append(self, other): + return self._as_int_index().append(other) + + def isin(self, values): + if is_scalar(values): + raise TypeError( + "only list-like objects are allowed to be passed " + f"to isin(), you passed a {type(values).__name__}" + ) + + return self._values.isin(values).values + def __neg__(self): return -self._as_int_index() @@ -1409,6 +1438,81 @@ def where(self, cond, other=None, inplace=False): inplace=inplace, ) + @property + def values(self): + return self._column.values + + def __contains__(self, item): + return item in self._values + + def _clean_nulls_from_index(self): + if self._values.has_nulls(): + return cudf.Index( + self._values.astype("str").fillna(cudf._NA_REP), name=self.name + ) + + return self + + def any(self): + return self._values.any() + + def to_pandas(self): + return pd.Index(self._values.to_pandas(), name=self.name) + + def append(self, other): + if is_list_like(other): + to_concat = [self] + to_concat.extend(other) + else: + this = self + if len(other) == 0: + # short-circuit and return a copy + to_concat = [self] + + other = cudf.Index(other) + + if len(self) == 0: + to_concat = [other] + + if len(self) and len(other): + if is_mixed_with_object_dtype(this, other): + got_dtype = ( + other.dtype + if this.dtype == cudf.dtype("object") + else this.dtype + ) + raise TypeError( + f"cudf does not support appending an Index of " + f"dtype `{cudf.dtype('object')}` with an Index " + f"of dtype `{got_dtype}`, please type-cast " + f"either one of them to same dtypes." + ) + + if isinstance(self._values, cudf.core.column.NumericalColumn): + if self.dtype != other.dtype: + this, other = numeric_normalize_types(self, other) + to_concat = [this, other] + + for obj in to_concat: + if not isinstance(obj, BaseIndex): + raise TypeError("all inputs must be Index") + + return self._concat(to_concat) + + def unique(self): + return cudf.core.index._index_from_data( + {self.name: self._values.unique()}, name=self.name + ) + + def isin(self, values): + if is_scalar(values): + raise TypeError( + "only list-like objects are allowed to be passed " + f"to isin(), you passed a {type(values).__name__}" + ) + + return self._values.isin(values).values + class NumericIndex(GenericIndex): """Immutable, ordered and sliceable sequence of labels. @@ -2796,10 +2900,6 @@ def str(self): return StringMethods(parent=self) def _clean_nulls_from_index(self): - """ - Convert all na values(if any) in Index object - to `` as a preprocessing step to `__repr__` methods. - """ if self._values.has_nulls(): return self.fillna(cudf._NA_REP) else: diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index e8c568979a3..358d5e2170e 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -2537,32 +2537,20 @@ def rangeindex(request): return RangeIndex(request.param) -def test_rangeindex_nunique(rangeindex): - gidx = rangeindex - pidx = gidx.to_pandas() - - actual = gidx.nunique() - expected = pidx.nunique() - - assert_eq(expected, actual) - - -def test_rangeindex_min(rangeindex): - gidx = rangeindex - pidx = gidx.to_pandas() - - actual = gidx.min() - expected = pidx.min() - - assert_eq(expected, actual) - - -def test_rangeindex_max(rangeindex): +@pytest.mark.parametrize( + "func", + ["nunique", "min", "max", "any", "values"], +) +def test_rangeindex_methods(rangeindex, func): gidx = rangeindex pidx = gidx.to_pandas() - actual = gidx.max() - expected = pidx.max() + if func == "values": + expected = pidx.values + actual = gidx.values + else: + expected = getattr(pidx, func)() + actual = getattr(gidx, func)() assert_eq(expected, actual) @@ -2693,3 +2681,105 @@ def test_rangeindex_where_user_option(default_integer_bitwidth): dtype=f"int{default_integer_bitwidth}", ) assert_eq(expected, actual) + + +index_data = [ + range(np.random.randint(0, 100)), + range(0, 10, -2), + range(0, -10, 2), + range(0, -10, -2), + range(0, 1), + [1, 2, 3, 1, None, None], + [None, None, 3.2, 1, None, None], + [None, "a", "3.2", "z", None, None], + pd.Series(["a", "b", None], dtype="category"), + np.array([1, 2, 3, None], dtype="datetime64[s]"), +] + + +@pytest.fixture(params=index_data) +def index(request): + """Create a cudf Index of different dtypes""" + return cudf.Index(request.param) + + +@pytest.mark.parametrize( + "func", + [ + "to_series", + "isna", + "notna", + "append", + ], +) +def test_index_methods(index, func): + gidx = index + pidx = gidx.to_pandas() + + if func == "append": + expected = pidx.append(other=pidx) + actual = gidx.append(other=gidx) + else: + expected = getattr(pidx, func)() + actual = getattr(gidx, func)() + + assert_eq(expected, actual) + + +@pytest.mark.parametrize( + "idx, values", + [ + (range(100, 1000, 10), [200, 600, 800]), + ([None, "a", "3.2", "z", None, None], ["a", "z"]), + (pd.Series(["a", "b", None], dtype="category"), [10, None]), + ], +) +def test_index_isin_values(idx, values): + gidx = cudf.Index(idx) + pidx = gidx.to_pandas() + + actual = gidx.isin(values) + expected = pidx.isin(values) + + assert_eq(expected, actual) + + +@pytest.mark.parametrize( + "idx, scalar", + [ + (range(0, -10, -2), -4), + ([None, "a", "3.2", "z", None, None], "x"), + (pd.Series(["a", "b", None], dtype="category"), 10), + ], +) +def test_index_isin_scalar_values(idx, scalar): + gidx = cudf.Index(idx) + + with pytest.raises( + TypeError, + match=re.escape( + f"only list-like objects are allowed to be passed " + f"to isin(), you passed a {type(scalar).__name__}" + ), + ): + gidx.isin(scalar) + + +def test_index_any(): + gidx = cudf.Index([1, 2, 3]) + pidx = gidx.to_pandas() + + assert_eq(pidx.any(), gidx.any()) + + +def test_index_values(): + gidx = cudf.Index([1, 2, 3]) + pidx = gidx.to_pandas() + + assert_eq(pidx.values, gidx.values) + + +def test_index_null_values(): + gidx = cudf.Index([1.0, None, 3, 0, None]) + with pytest.raises(ValueError): + gidx.values From 4c4acd546ccab233bfcf495ab08ad7be8303a30d Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Sat, 8 Oct 2022 01:03:03 +0200 Subject: [PATCH 016/202] Add BGZIP reader to python `read_text` (#11802) Adds the missing integration, plus some tests. I decided to extend the `read_text` interface rather than add a new one. For details on the bgzip format, see #11652 Authors: - Tobias Ribizel (https://github.com/upsj) Approvers: - Ashwin Srinath (https://github.com/shwina) - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/11802 --- python/cudf/cudf/_lib/cpp/io/text.pxd | 7 +++ python/cudf/cudf/_lib/text.pyx | 35 ++++++++++-- python/cudf/cudf/io/text.py | 13 ++++- python/cudf/cudf/tests/data/text/chess.pgn.gz | Bin 0 -> 881 bytes python/cudf/cudf/tests/test_text.py | 51 ++++++++++++++++++ python/cudf/cudf/utils/ioutils.py | 10 ++++ 6 files changed, 110 insertions(+), 6 deletions(-) create mode 100644 python/cudf/cudf/tests/data/text/chess.pgn.gz diff --git a/python/cudf/cudf/_lib/cpp/io/text.pxd b/python/cudf/cudf/_lib/cpp/io/text.pxd index 5b110d6234c..7bbe870dad3 100644 --- a/python/cudf/cudf/_lib/cpp/io/text.pxd +++ b/python/cudf/cudf/_lib/cpp/io/text.pxd @@ -1,5 +1,6 @@ # Copyright (c) 2020-2022, NVIDIA CORPORATION. +from libc.stdint cimport uint64_t from libcpp.memory cimport unique_ptr from libcpp.string cimport string @@ -25,6 +26,12 @@ cdef extern from "cudf/io/text/data_chunk_source_factories.hpp" \ unique_ptr[data_chunk_source] make_source(string data) except + unique_ptr[data_chunk_source] \ make_source_from_file(string filename) except + + unique_ptr[data_chunk_source] \ + make_source_from_bgzip_file(string filename) except + + unique_ptr[data_chunk_source] \ + make_source_from_bgzip_file(string filename, + uint64_t virtual_begin, + uint64_t virtual_end) except + cdef extern from "cudf/io/text/multibyte_split.hpp" \ diff --git a/python/cudf/cudf/_lib/text.pyx b/python/cudf/cudf/_lib/text.pyx index 868574be187..31a5617af58 100644 --- a/python/cudf/cudf/_lib/text.pyx +++ b/python/cudf/cudf/_lib/text.pyx @@ -5,6 +5,7 @@ from io import TextIOBase import cudf from cython.operator cimport dereference +from libc.stdint cimport uint64_t from libcpp.memory cimport make_unique, unique_ptr from libcpp.string cimport string from libcpp.utility cimport move @@ -15,6 +16,7 @@ from cudf._lib.cpp.io.text cimport ( byte_range_info, data_chunk_source, make_source, + make_source_from_bgzip_file, make_source_from_file, multibyte_split, ) @@ -22,7 +24,9 @@ from cudf._lib.cpp.io.text cimport ( def read_text(object filepaths_or_buffers, object delimiter=None, - object byte_range=None): + object byte_range=None, + object compression=None, + object compression_offsets=None): """ Cython function to call into libcudf API, see `multibyte_split`. @@ -38,11 +42,34 @@ def read_text(object filepaths_or_buffers, cdef size_t c_byte_range_offset cdef size_t c_byte_range_size cdef byte_range_info c_byte_range + cdef uint64_t c_compression_begin_offset + cdef uint64_t c_compression_end_offset - if isinstance(filepaths_or_buffers, TextIOBase): - datasource = move(make_source(filepaths_or_buffers.read().encode())) + if compression is None: + if isinstance(filepaths_or_buffers, TextIOBase): + datasource = move(make_source( + filepaths_or_buffers.read().encode())) + else: + datasource = move(make_source_from_file( + filepaths_or_buffers.encode())) + elif compression == "bgzip": + if isinstance(filepaths_or_buffers, TextIOBase): + raise ValueError("bgzip compression requires a file path") + if compression_offsets is not None: + if len(compression_offsets) != 2: + raise ValueError( + "compression offsets need to consist of two elements") + c_compression_begin_offset = compression_offsets[0] + c_compression_end_offset = compression_offsets[1] + datasource = move(make_source_from_bgzip_file( + filepaths_or_buffers.encode(), + c_compression_begin_offset, + c_compression_end_offset)) + else: + datasource = move(make_source_from_bgzip_file( + filepaths_or_buffers.encode())) else: - datasource = move(make_source_from_file(filepaths_or_buffers.encode())) + raise ValueError("Only bgzip compression is supported at the moment") if (byte_range is None): with nogil: diff --git a/python/cudf/cudf/io/text.py b/python/cudf/cudf/io/text.py index 12aa0f6ef8b..23983f01966 100644 --- a/python/cudf/cudf/io/text.py +++ b/python/cudf/cudf/io/text.py @@ -14,11 +14,16 @@ def read_text( filepath_or_buffer, delimiter=None, byte_range=None, + compression=None, + compression_offsets=None, **kwargs, ): """{docstring}""" - filepath_or_buffer, compression = ioutils.get_reader_filepath_or_buffer( + if delimiter is None: + raise ValueError("delimiter needs to be provided") + + filepath_or_buffer, _ = ioutils.get_reader_filepath_or_buffer( path_or_data=filepath_or_buffer, compression=None, iotypes=(BytesIO, StringIO), @@ -27,6 +32,10 @@ def read_text( return cudf.Series._from_data( libtext.read_text( - filepath_or_buffer, delimiter=delimiter, byte_range=byte_range + filepath_or_buffer, + delimiter=delimiter, + byte_range=byte_range, + compression=compression, + compression_offsets=compression_offsets, ) ) diff --git a/python/cudf/cudf/tests/data/text/chess.pgn.gz b/python/cudf/cudf/tests/data/text/chess.pgn.gz new file mode 100644 index 0000000000000000000000000000000000000000..f03d0d0f73da338711a703032503539090b4a9fc GIT binary patch literal 881 zcmZva%Wl&^6ox0ELddUxM1N&b7D=7X_>x3oWlBLcxWwVIh**s6i5)dcs$41;#DX=i zz#AaJjwM^z@Br+24HW)~SV1l2XykMGzB%#!`&)a0&)$ujBD&uc1JSrGer?AOF3o(6 z)=77S)T}qlIiBcsUbS91mmj~eM2P)=I-iP$5cVs|GRthLar*or zo5#o6X60a@Tq)fnpYOV#zJF2I41CvYQ}G zWdU@EH_s|IFD}e{IxoRvu4gk-U|kucn=3qK>b3jU@!6q$4hRvrI6FK;WO@i)?oEQD zdUm3RB1GRs?(HfDE-Lb}KID;$g56osbGQ)XK4>9nl-Jh721%)eol(|vY5Q0Hi47WyqqL6s_8SdL6U1~nhYtcNG4Vxv2PUF3E@f2poov`A<05V zSvr@LYSV|Z_6?k9X!Um5HsSc170X!=3|B3QGJPO1Ug=i8u9_gv>p=7V!w+`EgxdM;im=?)| zV+by}>YDrZh>dySm^<~!O$oL2e*Gx@oHzgfq>n}8K>Xa&i?f}CSf5sR Z{(ex-*L!<+53c!)?}&yF+buJ@=MSea)6oC` literal 0 HcmV?d00001 diff --git a/python/cudf/cudf/tests/test_text.py b/python/cudf/cudf/tests/test_text.py index a4edaeff545..7f41d606473 100644 --- a/python/cudf/cudf/tests/test_text.py +++ b/python/cudf/cudf/tests/test_text.py @@ -845,3 +845,54 @@ def test_read_text_in_memory(datadir): actual = cudf.read_text(StringIO("x::y::z"), delimiter="::") assert_eq(expected, actual) + + +def test_read_text_bgzip(datadir): + chess_file_compressed = str(datadir) + "/chess.pgn.gz" + chess_file = str(datadir) + "/chess.pgn" + delimiter = "1." + + with open(chess_file) as f: + content = f.read().split(delimiter) + + # Since Python split removes the delimiter and read_text does + # not we need to add it back to the 'content' + expected = cudf.Series( + [ + c + delimiter if i < (len(content) - 1) else c + for i, c in enumerate(content) + ] + ) + + actual = cudf.read_text( + chess_file_compressed, compression="bgzip", delimiter=delimiter + ) + + assert_eq(expected, actual) + + +def test_read_text_bgzip_offsets(datadir): + chess_file_compressed = str(datadir) + "/chess.pgn.gz" + chess_file = str(datadir) + "/chess.pgn" + delimiter = "1." + + with open(chess_file) as f: + content = f.read()[29:695].split(delimiter) + + # Since Python split removes the delimiter and read_text does + # not we need to add it back to the 'content' + expected = cudf.Series( + [ + c + delimiter if i < (len(content) - 1) else c + for i, c in enumerate(content) + ] + ) + + actual = cudf.read_text( + chess_file_compressed, + compression="bgzip", + compression_offsets=[58 * 2**16 + 2, 781 * 2**16 + 7], + delimiter=delimiter, + ) + + assert_eq(expected, actual) diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py index fe65b8f22fc..8bb246c9c84 100644 --- a/python/cudf/cudf/utils/ioutils.py +++ b/python/cudf/cudf/utils/ioutils.py @@ -1172,6 +1172,16 @@ The output contains all rows that start inside the byte range (i.e. at or after the offset, and before the end at `offset + size`), which may include rows that continue past the end. +compression : string, default None + Which compression type is the input compressed with. + Currently supports only `bgzip`, and requires the path to a file as input. +compression_offsets: list or tuple, default None + The virtual begin and end offset associated with the provided compression. + For `bgzip`, they are composed of a local uncompressed offset inside a + BGZIP block (lower 16 bits) and the start offset of this BGZIP block in the + compressed file (upper 48 bits). + The start offset points to the first byte to be read, the end offset points + one past the last byte to be read. Returns ------- From 4eb9c6c945674a56eaed740f5411860d4441c9f3 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Mon, 10 Oct 2022 09:56:11 +0200 Subject: [PATCH 017/202] Add BGZIP multibyte_split benchmark (#11723) This refactors #11652 to extract the BGZIP IO and adds another `source_type` to the `multibyte_split` benchmark, creating a compressed file using `zlib`. A quick benchmark shows performance results around 2.5x slower than reading from a device buffer at around 1:5 compression ratio ### [0] Tesla T4 | source_type | delim_size | delim_percent | size_approx | byte_range_percent | Time | Peak Memory Usage | Encoded file size | |-------------|------------|---------------|-------------------|--------------------|------------|-------------------|-------------------| | bgzip | 1 | 1 | 2^30 = 1073741824 | 100 | 507.479 ms | 4.022 GiB | 1006.638 MiB | | file | 1 | 1 | 2^30 = 1073741824 | 100 | 339.860 ms | 3.947 GiB | 1006.638 MiB | | device | 1 | 1 | 2^30 = 1073741824 | 100 | 201.556 ms | 3.947 GiB | 1006.638 MiB | Authors: - Tobias Ribizel (https://github.com/upsj) Approvers: - Robert Maynard (https://github.com/robertmaynard) - Vukasin Milovanovic (https://github.com/vuule) - Bradley Dice (https://github.com/bdice) - Jordan Jacobelli (https://github.com/Ethyling) URL: https://github.com/rapidsai/cudf/pull/11723 --- conda/recipes/libcudf/meta.yaml | 1 + cpp/CMakeLists.txt | 1 + cpp/benchmarks/CMakeLists.txt | 3 +- cpp/benchmarks/io/text/multibyte_split.cpp | 70 ++++-- .../cudf/io/text/detail/bgzip_utils.hpp | 112 +++++++++ cpp/src/io/text/bgzip_data_chunk_source.cu | 72 +----- cpp/src/io/text/bgzip_utils.cpp | 179 ++++++++++++++ cpp/tests/CMakeLists.txt | 1 + cpp/tests/io/text/data_chunk_source_test.cpp | 219 +++++++++--------- 9 files changed, 473 insertions(+), 185 deletions(-) create mode 100644 cpp/include/cudf/io/text/detail/bgzip_utils.hpp create mode 100644 cpp/src/io/text/bgzip_utils.cpp diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index a417b407044..ccb0d685062 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -152,6 +152,7 @@ outputs: - test -f $PREFIX/include/cudf/io/text/byte_range_info.hpp - test -f $PREFIX/include/cudf/io/text/data_chunk_source.hpp - test -f $PREFIX/include/cudf/io/text/data_chunk_source_factories.hpp + - test -f $PREFIX/include/cudf/io/text/detail/bgzip_utils.hpp - test -f $PREFIX/include/cudf/io/text/detail/multistate.hpp - test -f $PREFIX/include/cudf/io/text/detail/tile_state.hpp - test -f $PREFIX/include/cudf/io/text/detail/trie.hpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 60e914f07d3..8bde0bcfb9b 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -356,6 +356,7 @@ add_library( src/io/text/byte_range_info.cpp src/io/text/data_chunk_source_factories.cpp src/io/text/bgzip_data_chunk_source.cu + src/io/text/bgzip_utils.cpp src/io/text/multibyte_split.cu src/io/utilities/column_buffer.cpp src/io/utilities/config_utils.cpp diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index d1ff177a25e..f35d0b0b49e 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -301,7 +301,8 @@ ConfigureNVBench(NESTED_JSON_NVBENCH io/json/nested_json.cpp) # ################################################################################################## # * io benchmark --------------------------------------------------------------------- -ConfigureNVBench(MULTIBYTE_SPLIT_BENCHMARK io/text/multibyte_split.cpp) +ConfigureNVBench(MULTIBYTE_SPLIT_NVBENCH io/text/multibyte_split.cpp) +target_link_libraries(MULTIBYTE_SPLIT_NVBENCH PRIVATE ZLIB::ZLIB) add_custom_target( run_benchmarks diff --git a/cpp/benchmarks/io/text/multibyte_split.cpp b/cpp/benchmarks/io/text/multibyte_split.cpp index 4865d11ae8b..b7e85d8aa7e 100644 --- a/cpp/benchmarks/io/text/multibyte_split.cpp +++ b/cpp/benchmarks/io/text/multibyte_split.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -40,10 +41,25 @@ #include #include #include +#include temp_directory const temp_dir("cudf_nvbench"); -enum class data_chunk_source_type { device, file, host, host_pinned }; +enum class data_chunk_source_type { device, file, host, host_pinned, file_bgzip }; + +NVBENCH_DECLARE_ENUM_TYPE_STRINGS( + data_chunk_source_type, + [](auto value) { + switch (value) { + case data_chunk_source_type::device: return "device"; + case data_chunk_source_type::file: return "file"; + case data_chunk_source_type::host: return "host"; + case data_chunk_source_type::host_pinned: return "host_pinned"; + case data_chunk_source_type::file_bgzip: return "file_bgzip"; + default: return "Unknown"; + } + }, + [](auto) { return std::string{}; }) static cudf::string_scalar create_random_input(int32_t num_chars, double delim_factor, @@ -78,14 +94,32 @@ static cudf::string_scalar create_random_input(int32_t num_chars, return cudf::string_scalar(std::move(*chars_buffer)); } -static void bench_multibyte_split(nvbench::state& state) +static void write_bgzip_file(cudf::host_span host_data, std::ostream& output_stream) +{ + // a bit of variability with a decent amount of padding so we don't overflow 16 bit block sizes + std::uniform_int_distribution chunk_size_dist{64000, 65000}; + std::default_random_engine rng{}; + std::size_t pos = 0; + while (pos < host_data.size()) { + auto const remainder = host_data.size() - pos; + auto const chunk_size = std::min(remainder, chunk_size_dist(rng)); + cudf::io::text::detail::bgzip::write_compressed_block(output_stream, + {host_data.data() + pos, chunk_size}); + pos += chunk_size; + } + // empty block denotes EOF + cudf::io::text::detail::bgzip::write_uncompressed_block(output_stream, {}); +} + +template +static void bench_multibyte_split(nvbench::state& state, + nvbench::type_list>) { cudf::rmm_pool_raii pool_raii; - auto const source_type = static_cast(state.get_int64("source_type")); - auto const delim_size = state.get_int64("delim_size"); - auto const delim_percent = state.get_int64("delim_percent"); - auto const file_size_approx = state.get_int64("size_approx"); + auto const delim_size = state.get_int64("delim_size"); + auto const delim_percent = state.get_int64("delim_percent"); + auto const file_size_approx = state.get_int64("size_approx"); auto const byte_range_percent = state.get_int64("byte_range_percent"); auto const byte_range_factor = static_cast(byte_range_percent) / 100; @@ -104,7 +138,8 @@ static void bench_multibyte_split(nvbench::state& state) auto host_pinned_input = thrust::host_vector>{}; - if (source_type == data_chunk_source_type::host || source_type == data_chunk_source_type::file) { + if (source_type == data_chunk_source_type::host || source_type == data_chunk_source_type::file || + source_type == data_chunk_source_type::file_bgzip) { host_input = cudf::detail::make_std_vector_sync( {device_input.data(), static_cast(device_input.size())}, cudf::default_stream_value); @@ -131,6 +166,14 @@ static void bench_multibyte_split(nvbench::state& state) return cudf::io::text::make_source(host_pinned_input); case data_chunk_source_type::device: // return cudf::io::text::make_source(device_input); + case data_chunk_source_type::file_bgzip: { + auto const temp_file_name = random_file_in_dir(temp_dir.path()); + { + std::ofstream output_stream(temp_file_name, std::ofstream::out); + write_bgzip_file(host_input, output_stream); + } + return cudf::io::text::make_source_from_bgzip_file(temp_file_name); + } default: CUDF_FAIL(); } }(); @@ -152,13 +195,14 @@ static void bench_multibyte_split(nvbench::state& state) state.add_buffer_size(range_size, "efs", "Encoded file size"); } -NVBENCH_BENCH(bench_multibyte_split) +using source_type_list = nvbench::enum_type_list; + +NVBENCH_BENCH_TYPES(bench_multibyte_split, NVBENCH_TYPE_AXES(source_type_list)) .set_name("multibyte_split") - .add_int64_axis("source_type", - {static_cast(data_chunk_source_type::device), - static_cast(data_chunk_source_type::file), - static_cast(data_chunk_source_type::host), - static_cast(data_chunk_source_type::host_pinned)}) .add_int64_axis("delim_size", {1, 4, 7}) .add_int64_axis("delim_percent", {1, 25}) .add_int64_power_of_two_axis("size_approx", {15, 30}) diff --git a/cpp/include/cudf/io/text/detail/bgzip_utils.hpp b/cpp/include/cudf/io/text/detail/bgzip_utils.hpp new file mode 100644 index 00000000000..627df5f358a --- /dev/null +++ b/cpp/include/cudf/io/text/detail/bgzip_utils.hpp @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include +#include + +#include +#include +#include +#include + +namespace cudf::io::text::detail::bgzip { + +struct header { + int block_size; + int extra_length; + [[nodiscard]] int data_size() const { return block_size - extra_length - 20; } +}; + +struct footer { + uint32_t crc; + uint32_t decompressed_size; +}; + +/** + * @brief Reads the full BGZIP header from the given input stream. Afterwards, the stream position + * is at the first data byte. + * + * @param input_stream The input stream + * @return The header storing the compressed size and extra subfield length + */ +header read_header(std::istream& input_stream); + +/** + * @brief Reads the full BGZIP footer from the given input stream. Afterwards, the stream position + * is after the last footer byte. + * + * @param input_stream The input stream + * @return The footer storing uncompressed size and CRC32 + */ +footer read_footer(std::istream& input_stream); + +/** + * @brief Writes a header for data of the given compressed size to the given stream. + * + * @param output_stream The output stream + * @param compressed_size The size of the compressed data + * @param pre_size_subfields Any GZIP extra subfields (need to be valid) to be placed before the + * BGZIP block size subfield + * @param post_size_subfields Any subfields to be placed after the BGZIP block size subfield + */ +void write_header(std::ostream& output_stream, + uint16_t compressed_size, + host_span pre_size_subfields, + host_span post_size_subfields); + +/** + * @brief Writes a footer for the given uncompressed data to the given stream. + * + * @param output_stream The output stream + * @param data The data for which uncompressed size and CRC32 will be computed and written + */ +void write_footer(std::ostream& output_stream, host_span data); + +/** + * @brief Writes the given data to the given stream as an uncompressed deflate block with BZGIP + * header and footer. + * + * @param output_stream The output stream + * @param data The uncompressed data + * @param pre_size_subfields Any GZIP extra subfields (need to be valid) to be placed before the + * BGZIP block size subfield + * @param post_size_subfields Any subfields to be placed after the BGZIP block size subfield + */ +void write_uncompressed_block(std::ostream& output_stream, + host_span data, + host_span pre_size_subfields = {}, + host_span post_size_subfields = {}); + +/** + * @brief Writes the given data to the given stream as a compressed deflate block with BZGIP + * header and footer. + * + * @param output_stream The output stream + * @param data The uncompressed data + * @param pre_size_subfields Any GZIP extra subfields (need to be valid) to be placed before the + * BGZIP block size subfield + * @param post_size_subfields Any subfields to be placed after the BGZIP block size subfield + */ +void write_compressed_block(std::ostream& output_stream, + host_span data, + host_span pre_size_subfields = {}, + host_span post_size_subfields = {}); + +} // namespace cudf::io::text::detail::bgzip diff --git a/cpp/src/io/text/bgzip_data_chunk_source.cu b/cpp/src/io/text/bgzip_data_chunk_source.cu index 7715c2ca7e1..9c4ff218783 100644 --- a/cpp/src/io/text/bgzip_data_chunk_source.cu +++ b/cpp/src/io/text/bgzip_data_chunk_source.cu @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -36,7 +37,6 @@ #include namespace cudf::io::text { - namespace { /** @@ -64,68 +64,6 @@ struct bgzip_nvcomp_transform_functor { class bgzip_data_chunk_reader : public data_chunk_reader { private: - template - static IntType read_int(char* data) - { - IntType result{}; - // we assume little-endian - std::memcpy(&result, &data[0], sizeof(result)); - return result; - } - - struct bgzip_header { - int block_size; - int extra_length; - [[nodiscard]] int data_size() const { return block_size - extra_length - 20; } - }; - - bgzip_header read_header() - { - std::array buffer{}; - _data_stream->read(buffer.data(), sizeof(buffer)); - std::array const expected_header{{31, 139, 8, 4}}; - CUDF_EXPECTS( - std::equal( - expected_header.begin(), expected_header.end(), reinterpret_cast(buffer.data())), - "malformed BGZIP header"); - // we ignore the remaining bytes of the fixed header, since they don't matter to us - auto const extra_length = read_int(&buffer[10]); - uint16_t extra_offset{}; - // read all the extra subfields - while (extra_offset < extra_length) { - auto const remaining_size = extra_length - extra_offset; - CUDF_EXPECTS(remaining_size >= 4, "invalid extra field length"); - // a subfield consists of 2 identifier bytes and a uint16 length - // 66/67 identifies a BGZIP block size field, we skip all other fields - _data_stream->read(buffer.data(), 4); - extra_offset += 4; - auto const subfield_size = read_int(&buffer[2]); - if (buffer[0] == 66 && buffer[1] == 67) { - // the block size subfield contains a single uint16 value, which is block_size - 1 - CUDF_EXPECTS(subfield_size == sizeof(uint16_t), "malformed BGZIP extra subfield"); - _data_stream->read(buffer.data(), sizeof(uint16_t)); - _data_stream->seekg(remaining_size - 6, std::ios_base::cur); - auto const block_size_minus_one = read_int(&buffer[0]); - return {block_size_minus_one + 1, extra_length}; - } else { - _data_stream->seekg(subfield_size, std::ios_base::cur); - extra_offset += subfield_size; - } - } - CUDF_FAIL("missing BGZIP size extra subfield"); - } - - struct bgzip_footer { - uint32_t decompressed_size; - }; - - bgzip_footer read_footer() - { - std::array buffer{}; - _data_stream->read(buffer.data(), sizeof(buffer)); - return {read_int(&buffer[4])}; - } - template using pinned_host_vector = thrust::host_vector>; @@ -258,13 +196,13 @@ class bgzip_data_chunk_reader : public data_chunk_reader { return available_decompressed_size - read_pos; } - void read_block(bgzip_header header, std::istream& stream) + void read_block(detail::bgzip::header header, std::istream& stream) { h_compressed_blocks.resize(h_compressed_blocks.size() + header.data_size()); stream.read(h_compressed_blocks.data() + compressed_size(), header.data_size()); } - void add_block_offsets(bgzip_header header, bgzip_footer footer) + void add_block_offsets(detail::bgzip::header header, detail::bgzip::footer footer) { max_decompressed_size = std::max(footer.decompressed_size, max_decompressed_size); @@ -294,9 +232,9 @@ class bgzip_data_chunk_reader : public data_chunk_reader { // peek is necessary if we are already at the end, but didn't try to read another byte _data_stream->peek(); if (_data_stream->eof() || _compressed_pos > _compressed_end) { break; } - auto header = read_header(); + auto header = detail::bgzip::read_header(*_data_stream); _curr_blocks.read_block(header, *_data_stream); - auto footer = read_footer(); + auto footer = detail::bgzip::read_footer(*_data_stream); _curr_blocks.add_block_offsets(header, footer); // for the last GZIP block, we restrict ourselves to the bytes up to _local_end // but only for the reader, not for decompression! diff --git a/cpp/src/io/text/bgzip_utils.cpp b/cpp/src/io/text/bgzip_utils.cpp new file mode 100644 index 00000000000..dd08387a6b5 --- /dev/null +++ b/cpp/src/io/text/bgzip_utils.cpp @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include + +#include +#include +#include +#include + +namespace cudf::io::text::detail::bgzip { +namespace { + +template +IntType read_int(char* data) +{ + IntType result{}; + // we assume little-endian + std::memcpy(&result, &data[0], sizeof(result)); + return result; +} + +template +void write_int(std::ostream& output_stream, T val) +{ + std::array bytes; + // we assume little-endian + std::memcpy(&bytes[0], &val, sizeof(T)); + output_stream.write(bytes.data(), bytes.size()); +} + +} // namespace + +std::array constexpr extra_blocklen_field_header{{66, 67, 2, 0}}; + +header read_header(std::istream& input_stream) +{ + std::array buffer{}; + input_stream.read(buffer.data(), sizeof(buffer)); + std::array constexpr expected_header{{31, 139, 8, 4}}; + CUDF_EXPECTS( + std::equal( + expected_header.begin(), expected_header.end(), reinterpret_cast(buffer.data())), + "malformed BGZIP header"); + // we ignore the remaining bytes of the fixed header, since they don't matter to us + auto const extra_length = read_int(&buffer[10]); + uint16_t extra_offset{}; + // read all the extra subfields + while (extra_offset < extra_length) { + auto const remaining_size = extra_length - extra_offset; + CUDF_EXPECTS(remaining_size >= 4, "invalid extra field length"); + // a subfield consists of 2 identifier bytes and a uint16 length + // 66/67 identifies a BGZIP block size field, we skip all other fields + input_stream.read(buffer.data(), 4); + extra_offset += 4; + auto const subfield_size = read_int(&buffer[2]); + if (buffer[0] == extra_blocklen_field_header[0] && + buffer[1] == extra_blocklen_field_header[1]) { + // the block size subfield contains a single uint16 value, which is block_size - 1 + CUDF_EXPECTS( + buffer[2] == extra_blocklen_field_header[2] && buffer[3] == extra_blocklen_field_header[3], + "malformed BGZIP extra subfield"); + input_stream.read(buffer.data(), sizeof(uint16_t)); + input_stream.seekg(remaining_size - 6, std::ios_base::cur); + auto const block_size_minus_one = read_int(&buffer[0]); + return {block_size_minus_one + 1, extra_length}; + } else { + input_stream.seekg(subfield_size, std::ios_base::cur); + extra_offset += subfield_size; + } + } + CUDF_FAIL("missing BGZIP size extra subfield"); +} + +footer read_footer(std::istream& input_stream) +{ + std::array buffer{}; + input_stream.read(buffer.data(), sizeof(buffer)); + return {read_int(&buffer[0]), read_int(&buffer[4])}; +} + +void write_footer(std::ostream& output_stream, host_span data) +{ + // compute crc32 with zlib, this allows checking the generated files with external tools + write_int(output_stream, crc32(0, (unsigned char*)data.data(), data.size())); + write_int(output_stream, data.size()); +} + +void write_header(std::ostream& output_stream, + uint16_t compressed_size, + host_span pre_size_subfield, + host_span post_size_subfield) +{ + std::array constexpr header_data{{ + 31, // magic number + 139, // magic number + 8, // compression type: deflate + 4, // flags: extra header + 0, // mtime + 0, // mtime + 0, // mtime + 0, // mtime: irrelevant + 4, // xfl: irrelevant + 3 // OS: irrelevant + }}; + output_stream.write(reinterpret_cast(header_data.data()), header_data.size()); + auto const extra_size = pre_size_subfield.size() + extra_blocklen_field_header.size() + + sizeof(uint16_t) + post_size_subfield.size(); + auto const block_size = + header_data.size() + sizeof(uint16_t) + extra_size + compressed_size + 2 * sizeof(uint32_t); + write_int(output_stream, extra_size); + output_stream.write(pre_size_subfield.data(), pre_size_subfield.size()); + output_stream.write(extra_blocklen_field_header.data(), extra_blocklen_field_header.size()); + CUDF_EXPECTS(block_size - 1 <= std::numeric_limits::max(), "block size overflow"); + write_int(output_stream, block_size - 1); + output_stream.write(post_size_subfield.data(), post_size_subfield.size()); +} + +void write_uncompressed_block(std::ostream& output_stream, + host_span data, + host_span pre_size_subfields, + host_span post_size_subfields) +{ + CUDF_EXPECTS(data.size() <= std::numeric_limits::max(), "data size overflow"); + write_header(output_stream, data.size() + 5, pre_size_subfields, post_size_subfields); + write_int(output_stream, 1); + write_int(output_stream, data.size()); + write_int(output_stream, ~static_cast(data.size())); + output_stream.write(data.data(), data.size()); + write_footer(output_stream, data); +} + +void write_compressed_block(std::ostream& output_stream, + host_span data, + host_span pre_size_subfields, + host_span post_size_subfields) +{ + CUDF_EXPECTS(data.size() <= std::numeric_limits::max(), "data size overflow"); + z_stream deflate_stream{}; + // let's make sure we have enough space to store the data + std::vector compressed_out(data.size() * 2 + 256); + deflate_stream.next_in = reinterpret_cast(const_cast(data.data())); + deflate_stream.avail_in = data.size(); + deflate_stream.next_out = reinterpret_cast(compressed_out.data()); + deflate_stream.avail_out = compressed_out.size(); + CUDF_EXPECTS( + deflateInit2(&deflate_stream, // stream + Z_DEFAULT_COMPRESSION, // compression level + Z_DEFLATED, // method + -15, // log2 of window size (negative value means no ZLIB header/footer) + 9, // mem level: best performance/most memory usage for compression + Z_DEFAULT_STRATEGY // strategy + ) == Z_OK, + "deflateInit failed"); + CUDF_EXPECTS(deflate(&deflate_stream, Z_FINISH) == Z_STREAM_END, "deflate failed"); + CUDF_EXPECTS(deflateEnd(&deflate_stream) == Z_OK, "deflateEnd failed"); + write_header(output_stream, deflate_stream.total_out, pre_size_subfields, post_size_subfields); + output_stream.write(compressed_out.data(), deflate_stream.total_out); + write_footer(output_stream, data); +} + +} // namespace cudf::io::text::detail::bgzip diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index e630e842f4e..8675dc891c1 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -230,6 +230,7 @@ ConfigureTest(NESTED_JSON_TEST io/nested_json_test.cpp io/json_tree.cpp) ConfigureTest(ARROW_IO_SOURCE_TEST io/arrow_io_source_test.cpp) ConfigureTest(MULTIBYTE_SPLIT_TEST io/text/multibyte_split_test.cpp) ConfigureTest(DATA_CHUNK_SOURCE_TEST io/text/data_chunk_source_test.cpp) +target_link_libraries(DATA_CHUNK_SOURCE_TEST PRIVATE ZLIB::ZLIB) ConfigureTest(LOGICAL_STACK_TEST io/fst/logical_stack_test.cu) ConfigureTest(FST_TEST io/fst/fst_test.cu) ConfigureTest(TYPE_INFERENCE_TEST io/type_inference_test.cu) diff --git a/cpp/tests/io/text/data_chunk_source_test.cpp b/cpp/tests/io/text/data_chunk_source_test.cpp index 115a66cdd95..7cb75aea8e2 100644 --- a/cpp/tests/io/text/data_chunk_source_test.cpp +++ b/cpp/tests/io/text/data_chunk_source_test.cpp @@ -18,6 +18,7 @@ #include #include +#include #include @@ -125,102 +126,67 @@ TEST_F(DataChunkSourceTest, Host) test_source(content, *source); } -template -void write_int(std::ostream& stream, T val) -{ - std::array bytes; - // we assume little-endian - std::memcpy(&bytes[0], &val, sizeof(T)); - stream.write(bytes.data(), bytes.size()); -} +enum class compression { ENABLED, DISABLED }; -void write_bgzip_block(std::ostream& stream, - const std::string& data, - bool add_extra_garbage_before, - bool add_extra_garbage_after) -{ - std::array const header{{ - 31, // magic number - 139, // magic number - 8, // compression type: deflate - 4, // flags: extra header - 0, // mtime - 0, // mtime - 0, // mtime - 0, // mtime: irrelevant - 4, // xfl: irrelevant - 3 // OS: irrelevant - }}; - std::array const extra_blocklen_field{{66, 67, 2, 0}}; - std::array const extra_garbage_field1{{13, // magic number - 37, // magic number - 7, // field length - 0, // field length - 1, - 2, - 3, - 4, - 5, - 6, - 7}}; - std::array const extra_garbage_field2{{12, // magic number - 34, // magic number - 2, // field length - 0, // field length - 1, 2, - 56, // magic number - 78, // magic number - 1, // field length - 0, // field length - 3, // - 90, // magic number - 12, // magic number - 8, // field length - 0, // field length - 1, 2, 3, 4, 5, 6, 7, 8}}; - stream.write(reinterpret_cast(header.data()), header.size()); - uint16_t extra_size = extra_blocklen_field.size() + 2; - if (add_extra_garbage_before) { extra_size += extra_garbage_field1.size(); } - if (add_extra_garbage_after) { extra_size += extra_garbage_field2.size(); } - write_int(stream, extra_size); - if (add_extra_garbage_before) { - stream.write(extra_garbage_field1.data(), extra_garbage_field1.size()); - } - stream.write(extra_blocklen_field.data(), extra_blocklen_field.size()); - auto const compressed_size = data.size() + 5; - uint16_t const block_size_minus_one = compressed_size + 19 + extra_size; - write_int(stream, block_size_minus_one); - if (add_extra_garbage_after) { - stream.write(extra_garbage_field2.data(), extra_garbage_field2.size()); - } - write_int(stream, 1); - write_int(stream, data.size()); - write_int(stream, ~static_cast(data.size())); - stream.write(data.data(), data.size()); - // this does not produce a valid file, since we write 0 as the CRC - // the parser ignores the checksum, so it doesn't matter to the test - // to check output with gzip, plug in the CRC of `data` here. - write_int(stream, 0); - write_int(stream, data.size()); -} +enum class eof { ADD_EOF_BLOCK, NO_EOF_BLOCK }; -void write_bgzip(std::ostream& stream, - const std::string& data, +void write_bgzip(std::ostream& output_stream, + cudf::host_span data, std::default_random_engine& rng, - bool write_eof = true) + compression compress, + eof add_eof) { + std::vector const extra_garbage_fields1{{13, // magic number + 37, // magic number + 7, // field length + 0, // field length + 1, + 2, + 3, + 4, + 5, + 6, + 7}}; + std::vector const extra_garbage_fields2{{12, // magic number + 34, // magic number + 2, // field length + 0, // field length + 1, 2, + 56, // magic number + 78, // magic number + 1, // field length + 0, // field length + 3, // + 90, // magic number + 12, // magic number + 8, // field length + 0, // field length + 1, 2, 3, 4, 5, 6, 7, 8}}; // make sure the block size with header stays below 65536 std::uniform_int_distribution block_size_dist{1, 65000}; auto begin = data.begin(); auto const end = data.end(); int i = 0; while (begin < end) { + using cudf::host_span; auto len = std::min(end - begin, block_size_dist(rng)); - write_bgzip_block(stream, std::string{begin, begin + len}, i & 1, i & 2); + host_span const garbage_before = + i & 1 ? extra_garbage_fields1 : host_span{}; + host_span const garbage_after = + i & 2 ? extra_garbage_fields2 : host_span{}; + if (compress == compression::ENABLED) { + cudf::io::text::detail::bgzip::write_compressed_block( + output_stream, {begin, len}, garbage_before, garbage_after); + } else { + cudf::io::text::detail::bgzip::write_uncompressed_block( + output_stream, {begin, len}, garbage_before, garbage_after); + } begin += len; i++; } - if (write_eof) { write_bgzip_block(stream, {}, false, false); } + if (add_eof == eof::ADD_EOF_BLOCK) { + cudf::io::text::detail::bgzip::write_uncompressed_block(output_stream, {}); + } } TEST_F(DataChunkSourceTest, BgzipSource) @@ -231,9 +197,9 @@ TEST_F(DataChunkSourceTest, BgzipSource) input = input + input; } { - std::ofstream stream{filename}; + std::ofstream output_stream{filename}; std::default_random_engine rng{}; - write_bgzip(stream, input, rng); + write_bgzip(output_stream, input, rng, compression::DISABLED, eof::ADD_EOF_BLOCK); } auto const source = cudf::io::text::make_source_from_bgzip_file(filename); @@ -243,7 +209,7 @@ TEST_F(DataChunkSourceTest, BgzipSource) TEST_F(DataChunkSourceTest, BgzipSourceVirtualOffsets) { - auto const filename = temp_env->get_temp_filepath("bgzip_source"); + auto const filename = temp_env->get_temp_filepath("bgzip_source_offsets"); std::string input{"bananarama"}; for (int i = 0; i < 24; i++) { input = input + input; @@ -260,16 +226,18 @@ TEST_F(DataChunkSourceTest, BgzipSourceVirtualOffsets) std::size_t const begin_local_offset{data_garbage.size()}; std::size_t const end_local_offset{endinput.size()}; { - std::ofstream stream{filename}; - stream.write(padding_garbage.data(), padding_garbage.size()); + std::ofstream output_stream{filename}; + output_stream.write(padding_garbage.data(), padding_garbage.size()); std::default_random_engine rng{}; - begin_compressed_offset = stream.tellp(); - write_bgzip_block(stream, data_garbage + begininput, false, false); - write_bgzip(stream, input, rng, false); - end_compressed_offset = stream.tellp(); - write_bgzip_block(stream, endinput + data_garbage + data_garbage, false, false); - write_bgzip_block(stream, {}, false, false); - stream.write(padding_garbage.data(), padding_garbage.size()); + begin_compressed_offset = output_stream.tellp(); + cudf::io::text::detail::bgzip::write_uncompressed_block(output_stream, + data_garbage + begininput); + write_bgzip(output_stream, input, rng, compression::DISABLED, eof::NO_EOF_BLOCK); + end_compressed_offset = output_stream.tellp(); + cudf::io::text::detail::bgzip::write_uncompressed_block(output_stream, + endinput + data_garbage + data_garbage); + cudf::io::text::detail::bgzip::write_uncompressed_block(output_stream, {}); + output_stream.write(padding_garbage.data(), padding_garbage.size()); } input = begininput + input + endinput; @@ -283,7 +251,7 @@ TEST_F(DataChunkSourceTest, BgzipSourceVirtualOffsets) TEST_F(DataChunkSourceTest, BgzipSourceVirtualOffsetsSingleGZipBlock) { - auto const filename = temp_env->get_temp_filepath("bgzip_source"); + auto const filename = temp_env->get_temp_filepath("bgzip_source_offsets_single_block"); std::string const input{"collection unit brings"}; std::string const head_garbage{"garbage"}; std::string const tail_garbage{"GARBAGE"}; @@ -292,9 +260,10 @@ TEST_F(DataChunkSourceTest, BgzipSourceVirtualOffsetsSingleGZipBlock) std::size_t const begin_local_offset{head_garbage.size()}; std::size_t const end_local_offset{head_garbage.size() + input.size()}; { - std::ofstream stream{filename}; - write_bgzip_block(stream, head_garbage + input + tail_garbage, false, false); - write_bgzip_block(stream, {}, false, false); + std::ofstream output_stream{filename}; + cudf::io::text::detail::bgzip::write_uncompressed_block(output_stream, + head_garbage + input + tail_garbage); + cudf::io::text::detail::bgzip::write_uncompressed_block(output_stream, {}); } auto const source = @@ -307,7 +276,7 @@ TEST_F(DataChunkSourceTest, BgzipSourceVirtualOffsetsSingleGZipBlock) TEST_F(DataChunkSourceTest, BgzipSourceVirtualOffsetsSingleChunk) { - auto const filename = temp_env->get_temp_filepath("bgzip_source"); + auto const filename = temp_env->get_temp_filepath("bgzip_source_offsets_single_chunk"); std::string const input{"collection unit brings"}; std::string const head_garbage{"garbage"}; std::string const tail_garbage{"GARBAGE"}; @@ -316,11 +285,13 @@ TEST_F(DataChunkSourceTest, BgzipSourceVirtualOffsetsSingleChunk) std::size_t const begin_local_offset{head_garbage.size()}; std::size_t const end_local_offset{input.size() - 10}; { - std::ofstream stream{filename}; - write_bgzip_block(stream, head_garbage + input.substr(0, 10), false, false); - end_compressed_offset = stream.tellp(); - write_bgzip_block(stream, input.substr(10) + tail_garbage, false, false); - write_bgzip_block(stream, {}, false, false); + std::ofstream output_stream{filename}; + cudf::io::text::detail::bgzip::write_uncompressed_block(output_stream, + head_garbage + input.substr(0, 10)); + end_compressed_offset = output_stream.tellp(); + cudf::io::text::detail::bgzip::write_uncompressed_block(output_stream, + input.substr(10) + tail_garbage); + cudf::io::text::detail::bgzip::write_uncompressed_block(output_stream, {}); } auto const source = @@ -331,4 +302,44 @@ TEST_F(DataChunkSourceTest, BgzipSourceVirtualOffsetsSingleChunk) test_source(input, *source); } +TEST_F(DataChunkSourceTest, BgzipCompressedSourceVirtualOffsets) +{ + auto const filename = temp_env->get_temp_filepath("bgzip_source_compressed_offsets"); + std::string input{"bananarama"}; + for (int i = 0; i < 24; i++) { + input = input + input; + } + std::string padding_garbage{"garbage"}; + for (int i = 0; i < 10; i++) { + padding_garbage = padding_garbage + padding_garbage; + } + std::string const data_garbage{"GARBAGE"}; + std::string const begininput{"begin of bananarama"}; + std::string const endinput{"end of bananarama"}; + std::size_t begin_compressed_offset{}; + std::size_t end_compressed_offset{}; + std::size_t const begin_local_offset{data_garbage.size()}; + std::size_t const end_local_offset{endinput.size()}; + { + std::ofstream output_stream{filename}; + output_stream.write(padding_garbage.data(), padding_garbage.size()); + std::default_random_engine rng{}; + begin_compressed_offset = output_stream.tellp(); + cudf::io::text::detail::bgzip::write_compressed_block(output_stream, data_garbage + begininput); + write_bgzip(output_stream, input, rng, compression::ENABLED, eof::NO_EOF_BLOCK); + end_compressed_offset = output_stream.tellp(); + cudf::io::text::detail::bgzip::write_compressed_block(output_stream, + endinput + data_garbage + data_garbage); + cudf::io::text::detail::bgzip::write_uncompressed_block(output_stream, {}); + output_stream.write(padding_garbage.data(), padding_garbage.size()); + } + input = begininput + input + endinput; + + auto source = + cudf::io::text::make_source_from_bgzip_file(filename, + begin_compressed_offset << 16 | begin_local_offset, + end_compressed_offset << 16 | end_local_offset); + test_source(input, *source); +} + CUDF_TEST_PROGRAM_MAIN() From 586907bf5dc6c897229ed365bdc49a5908cebfe3 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Mon, 10 Oct 2022 15:59:28 -0500 Subject: [PATCH 018/202] Fix pre-commit copyright check (#11860) This PR improves the copyright check script to handle cases where the ancestor `branch-*` does not have an upstream set. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) - Bradley Dice (https://github.com/bdice) Approvers: - Bradley Dice (https://github.com/bdice) - Jake Awe (https://github.com/AyodeAwe) URL: https://github.com/rapidsai/cudf/pull/11860 --- ci/checks/copyright.py | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/ci/checks/copyright.py b/ci/checks/copyright.py index 61e30d7922e..83f43183f71 100644 --- a/ci/checks/copyright.py +++ b/ci/checks/copyright.py @@ -68,20 +68,40 @@ def modifiedFiles(): we can read only the staged changes. """ repo = git.Repo() - # TARGET_BRANCH is defined in CI + # Use the environment variable TARGET_BRANCH (defined in CI) if possible target_branch = os.environ.get("TARGET_BRANCH") if target_branch is None: # Fall back to the closest branch if not on CI target_branch = repo.git.describe( all=True, tags=True, match="branch-*", abbrev=0 ).lstrip("heads/") - try: - # Use the tracking branch of the local reference if it exists + + upstream_target_branch = None + if target_branch in repo.heads: + # Use the tracking branch of the local reference if it exists. This + # returns None if no tracking branch is set. upstream_target_branch = repo.heads[target_branch].tracking_branch() - except IndexError: - # Fall back to the remote reference (this happens on CI because the - # only local branch reference is current-pr-branch) - upstream_target_branch = repo.remote().refs[target_branch] + if upstream_target_branch is None: + # Fall back to the remote with the newest target_branch. This code + # path is used on CI because the only local branch reference is + # current-pr-branch, and thus target_branch is not in repo.heads. + # This also happens if no tracking branch is defined for the local + # target_branch. We use the remote with the latest commit if + # multiple remotes are defined. + candidate_branches = [ + remote.refs[target_branch] for remote in repo.remotes + if target_branch in remote.refs + ] + if len(candidate_branches) > 0: + upstream_target_branch = sorted( + candidate_branches, + key=lambda branch: branch.commit.committed_datetime, + )[-1] + else: + # If no remotes are defined, try to use the local version of the + # target_branch. If this fails, the repo configuration must be very + # strange and we can fix this script on a case-by-case basis. + upstream_target_branch = repo.heads[target_branch] merge_base = repo.merge_base("HEAD", upstream_target_branch.commit)[0] diff = merge_base.diff() changed_files = {f for f in diff if f.b_path is not None} From 5b51591e27d6480292f9e2602d8175b3276868b1 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Mon, 10 Oct 2022 15:34:09 -0700 Subject: [PATCH 019/202] Remove "experimental" warning for struct columns in ORC reader and writer (#11880) Closes https://github.com/rapidsai/cudf/issues/11484 Authors: - Vukasin Milovanovic (https://github.com/vuule) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) - Bradley Dice (https://github.com/bdice) - https://github.com/nvdbaranec URL: https://github.com/rapidsai/cudf/pull/11880 --- cpp/include/cudf/io/orc.hpp | 6 ------ python/cudf/cudf/io/orc.py | 5 ----- python/cudf/cudf/tests/test_orc.py | 3 --- python/cudf/cudf/utils/ioutils.py | 7 ------- 4 files changed, 21 deletions(-) diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index 7f3cb95e4b2..b1e2197a868 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -378,9 +378,6 @@ class orc_reader_options_builder { * auto result = cudf::io::read_orc(options); * @endcode * - * Note: Support for reading files with struct columns is currently experimental, the output may not - * be as reliable as reading for other datatypes. - * * @param options Settings for controlling reading behavior * @param mr Device memory resource used to allocate device memory of the table in the returned * table_with_metadata. @@ -783,9 +780,6 @@ class orc_writer_options_builder { * cudf::io::write_orc(options); * @endcode * - * Note: Support for writing tables with struct columns is currently experimental, the output may - * not be as reliable as writing for other datatypes. - * * @param options Settings for controlling reading behavior * @param mr Device memory resource to use for device memory allocation */ diff --git a/python/cudf/cudf/io/orc.py b/python/cudf/cudf/io/orc.py index 718b9c4144f..b9ce07466e5 100644 --- a/python/cudf/cudf/io/orc.py +++ b/python/cudf/cudf/io/orc.py @@ -418,11 +418,6 @@ def to_orc( """{docstring}""" for col in df._data.columns: - if isinstance(col, cudf.core.column.StructColumn): - warnings.warn( - "Support for writing tables with struct columns is " - "currently experimental." - ) if isinstance(col, cudf.core.column.CategoricalColumn): raise NotImplementedError( "Writing to ORC format is not yet supported with " diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py index 422c2588eb0..5aa049db31a 100644 --- a/python/cudf/cudf/tests/test_orc.py +++ b/python/cudf/cudf/tests/test_orc.py @@ -1555,7 +1555,6 @@ def test_names_in_struct_dtype_nesting(datadir): assert edf.dtypes.equals(got.dtypes) -@pytest.mark.filterwarnings("ignore:.*struct.*experimental") def test_writer_lists_structs(list_struct_buff): df_in = cudf.read_orc(list_struct_buff) @@ -1567,7 +1566,6 @@ def test_writer_lists_structs(list_struct_buff): assert pyarrow_tbl.equals(df_in.to_arrow()) -@pytest.mark.filterwarnings("ignore:.*struct.*experimental") @pytest.mark.parametrize( "data", [ @@ -1668,7 +1666,6 @@ def test_empty_statistics(): assert stats[0]["i"].get("sum") == 1 -@pytest.mark.filterwarnings("ignore:.*struct.*experimental") @pytest.mark.parametrize( "equivalent_columns", [ diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py index 8bb246c9c84..366b2e0ebae 100644 --- a/python/cudf/cudf/utils/ioutils.py +++ b/python/cudf/cudf/utils/ioutils.py @@ -301,8 +301,6 @@ Notes ----- -Support for reading files with struct columns is currently experimental, -the output may not be as reliable as reading for other datatypes. {remote_data_sources} Examples @@ -447,11 +445,6 @@ Note that this option only affects columns of ListDtype. Names of other column types will be ignored. -Notes ------ -Support for writing tables with struct columns is currently experimental, -the output may not be as reliable as writing for other datatypes. - See Also -------- cudf.read_orc From 26f3e76ceddda6d3517f46825e3a8bc2fce0381f Mon Sep 17 00:00:00 2001 From: Liangcai Li Date: Tue, 11 Oct 2022 09:06:14 +0800 Subject: [PATCH 020/202] ArrowIPCTableWriter writes en empty batch in the case of an empty table. (#11883) closes https://github.com/rapidsai/cudf/issues/11882 Updated the `ArrowIPCTableWriter` to write en empty batch explicitly in the case of an empty table, because the Arrow IPC writer will write no batches out for this case, leading to an error as below when calling the `Pyarrow.Table.from_batches` without specifying a schema. ``` E File "pyarrow/table.pxi", line 1609, in pyarrow.lib.Table.from_batches E ValueError: Must pass schema, or at least one RecordBatch ``` Signed-off-by: Liangcai Li Authors: - Liangcai Li (https://github.com/firestarman) Approvers: - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/11883 --- java/src/main/native/src/TableJni.cpp | 10 ++++++- .../test/java/ai/rapids/cudf/TableTest.java | 29 +++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index ad280cad5fd..c23c5a3ccb2 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -258,7 +258,15 @@ class native_arrow_ipc_writer_handle final { writer = *tmp_writer; initialized = true; } - writer->WriteTable(*arrow_tab, max_chunk); + if (arrow_tab->num_rows() == 0) { + // Arrow C++ IPC writer will not write an empty batch in the case of an + // empty table, so need to write an empty batch explicitly. + // For more please see https://issues.apache.org/jira/browse/ARROW-17912. + auto empty_batch = arrow::RecordBatch::MakeEmpty(arrow_tab->schema()); + writer->WriteRecordBatch(*(*empty_batch)); + } else { + writer->WriteTable(*arrow_tab, max_chunk); + } } void close() { diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java index 194c1094caf..4649a0e3507 100644 --- a/java/src/test/java/ai/rapids/cudf/TableTest.java +++ b/java/src/test/java/ai/rapids/cudf/TableTest.java @@ -7937,6 +7937,35 @@ void testArrowIPCWriteToBufferChunked() { } } + @Test + void testArrowIPCWriteEmptyToBufferChunked() { + try (Table emptyTable = new Table.TestBuilder().timestampDayColumn().build(); + MyBufferConsumer consumer = new MyBufferConsumer()) { + ArrowIPCWriterOptions options = ArrowIPCWriterOptions.builder() + .withColumnNames("day") + .build(); + try (TableWriter writer = Table.writeArrowIPCChunked(options, consumer)) { + writer.write(emptyTable); + } + try (StreamedTableReader reader = Table.readArrowIPCChunked(new MyBufferProvider(consumer))) { + boolean done = false; + int count = 0; + while (!done) { + try (Table t = reader.getNextIfAvailable()) { + if (t == null) { + done = true; + } else { + assertTablesAreEqual(emptyTable, t); + count++; + } + } + } + // Expect one empty batch for the empty table. + assertEquals(1, count); + } + } + } + @Test void testORCWriteToBufferChunked() { String[] selectedColumns = WriteUtils.getAllColumns(false); From 566b3d105bf58bcd6050a539fabb022782e050ab Mon Sep 17 00:00:00 2001 From: Gregory Kimball Date: Tue, 11 Oct 2022 08:17:08 -0700 Subject: [PATCH 021/202] Conform "bench_isin" to match generator column names (#11549) The version of `bench_isin` merged in #11125 used key and column names of the format `f"key{i}"` rather than the format `f"{string.ascii_lowercase[i]}"` as is used in the dataframe generator. As a result the `isin` benchmark using a dictionary argument short-circuits with no matching keys, and the `isin` benchmark using a dataframe argument finds no matches. This PR also adjusts the `isin` arguments from `range(1000)` to `range(50)` to better match the input dataframe cardinality of 100. With `range(1000)`, every element matches but with `range(50)` only 50% of the elements match. Authors: - Gregory Kimball (https://github.com/GregoryKimball) Approvers: - Bradley Dice (https://github.com/bdice) - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/11549 --- python/cudf/benchmarks/API/bench_dataframe.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/python/cudf/benchmarks/API/bench_dataframe.py b/python/cudf/benchmarks/API/bench_dataframe.py index 9bad637f6ae..42bfa854396 100644 --- a/python/cudf/benchmarks/API/bench_dataframe.py +++ b/python/cudf/benchmarks/API/bench_dataframe.py @@ -41,14 +41,16 @@ def bench_merge(benchmark, dataframe, num_key_cols): @pytest.mark.parametrize( "values", [ - range(1000), - {f"key{i}": range(1000) for i in range(10)}, - cudf.DataFrame({f"key{i}": range(1000) for i in range(10)}), - cudf.Series(range(1000)), + lambda: range(50), + lambda: {f"{string.ascii_lowercase[i]}": range(50) for i in range(10)}, + lambda: cudf.DataFrame( + {f"{string.ascii_lowercase[i]}": range(50) for i in range(10)} + ), + lambda: cudf.Series(range(50)), ], ) def bench_isin(benchmark, dataframe, values): - benchmark(dataframe.isin, values) + benchmark(dataframe.isin, values()) @pytest.fixture( From 9ba6142f6eb6a7d8f9903a5dfaf9af22cdd76b8a Mon Sep 17 00:00:00 2001 From: Gregory Kimball Date: Tue, 11 Oct 2022 08:18:10 -0700 Subject: [PATCH 022/202] Use public APIs in STREAM_COMPACTION_NVBENCH (#11892) Use `state.set_cuda_stream` to set the stream for the nvbench benchmark. Then run `state.exec` on the public API instead of the detail API, e.g. `cudf::distinct` instead of `cudf::detail::distinct`. Authors: - Gregory Kimball (https://github.com/GregoryKimball) Approvers: - Nghia Truong (https://github.com/ttnghia) - Karthikeyan (https://github.com/karthikeyann) URL: https://github.com/rapidsai/cudf/pull/11892 --- cpp/benchmarks/stream_compaction/distinct.cpp | 28 +++++++++---------- cpp/benchmarks/stream_compaction/unique.cpp | 7 ++--- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/cpp/benchmarks/stream_compaction/distinct.cpp b/cpp/benchmarks/stream_compaction/distinct.cpp index ad837bc4caa..23960b24b89 100644 --- a/cpp/benchmarks/stream_compaction/distinct.cpp +++ b/cpp/benchmarks/stream_compaction/distinct.cpp @@ -18,8 +18,8 @@ #include #include -#include #include +#include #include #include @@ -41,14 +41,13 @@ void nvbench_distinct(nvbench::state& state, nvbench::type_list) auto input_column = source_column->view(); auto input_table = cudf::table_view({input_column, input_column, input_column, input_column}); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value())); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { - rmm::cuda_stream_view stream_view{launch.get_stream()}; - auto result = cudf::detail::distinct(input_table, - {0}, - cudf::duplicate_keep_option::KEEP_ANY, - cudf::null_equality::EQUAL, - cudf::nan_equality::ALL_EQUAL, - stream_view); + auto result = cudf::distinct(input_table, + {0}, + cudf::duplicate_keep_option::KEEP_ANY, + cudf::null_equality::EQUAL, + cudf::nan_equality::ALL_EQUAL); }); } @@ -84,14 +83,13 @@ void nvbench_distinct_list(nvbench::state& state, nvbench::type_list) auto const table = create_random_table( {dtype}, table_size_bytes{static_cast(size)}, data_profile{builder}, 0); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value())); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { - rmm::cuda_stream_view stream_view{launch.get_stream()}; - auto result = cudf::detail::distinct(*table, - {0}, - cudf::duplicate_keep_option::KEEP_ANY, - cudf::null_equality::EQUAL, - cudf::nan_equality::ALL_EQUAL, - stream_view); + auto result = cudf::distinct(*table, + {0}, + cudf::duplicate_keep_option::KEEP_ANY, + cudf::null_equality::EQUAL, + cudf::nan_equality::ALL_EQUAL); }); } diff --git a/cpp/benchmarks/stream_compaction/unique.cpp b/cpp/benchmarks/stream_compaction/unique.cpp index 6b586581408..bcf9628b19f 100644 --- a/cpp/benchmarks/stream_compaction/unique.cpp +++ b/cpp/benchmarks/stream_compaction/unique.cpp @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include @@ -62,10 +62,9 @@ void nvbench_unique(nvbench::state& state, nvbench::type_listview(); auto input_table = cudf::table_view({input_column, input_column, input_column, input_column}); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value())); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { - rmm::cuda_stream_view stream_view{launch.get_stream()}; - auto result = - cudf::detail::unique(input_table, {0}, Keep, cudf::null_equality::EQUAL, stream_view); + auto result = cudf::unique(input_table, {0}, Keep, cudf::null_equality::EQUAL); }); } From a921f5daf6a3753e04bd01c408a5a1de5b208589 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Tue, 11 Oct 2022 12:54:44 -0500 Subject: [PATCH 023/202] Error on `ListColumn` or any new unsupported column in `cudf.Index` (#11902) This PR raises a `NotImplementedError` for `ListColumn` or any new column that isn't supported by `cudf.Index` yet. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Ashwin Srinath (https://github.com/shwina) URL: https://github.com/rapidsai/cudf/pull/11902 --- python/cudf/cudf/core/index.py | 5 +++++ python/cudf/cudf/tests/test_index.py | 12 ++++++++++++ 2 files changed, 17 insertions(+) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 3d77ed15027..5b101f74664 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -118,6 +118,11 @@ def _index_from_data(data: MutableMapping, name: Any = None): index_class_type = CategoricalIndex elif isinstance(values, (IntervalColumn, StructColumn)): index_class_type = IntervalIndex + else: + raise NotImplementedError( + "Unsupported column type passed to " + f"create an Index: {type(values)}" + ) else: index_class_type = cudf.MultiIndex return index_class_type._from_data(data, name) diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index 358d5e2170e..894c87add4b 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -2783,3 +2783,15 @@ def test_index_null_values(): gidx = cudf.Index([1.0, None, 3, 0, None]) with pytest.raises(ValueError): gidx.values + + +def test_index_error_list_index(): + s = cudf.Series([[1, 2], [2], [4]]) + with pytest.raises( + NotImplementedError, + match=re.escape( + "Unsupported column type passed to create an " + "Index: " + ), + ): + cudf.Index(s) From 7032cc3c073f5d8842765adc4dc32883c943ef2b Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 11 Oct 2022 11:12:05 -0700 Subject: [PATCH 024/202] Add coverage for string UDF tests. (#11891) Many PRs are currently showing Codecov patch status check failures that appear to be the result of not uploading coverage reports for the string UDF tests. This PR should enable the missing coverage and ensure that we are actually measuring coverage of these code paths. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) - https://github.com/brandon-b-miller - Jake Awe (https://github.com/AyodeAwe) URL: https://github.com/rapidsai/cudf/pull/11891 --- ci/gpu/build.sh | 2 +- codecov.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index afcc80a6803..41dac0e5e0f 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -294,7 +294,7 @@ elif [ ${STRINGS_UDF_PYTEST_RETCODE} -ne 0 ]; then else cd "$WORKSPACE/python/cudf/cudf" gpuci_logger "Python py.test retest cuDF UDFs" - py.test tests/test_udf_masked_ops.py -n 8 --cache-clear + py.test -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-strings-udf-tmp" --ignore="$WORKSPACE/python/cudf/cudf/benchmarks" --junitxml="$WORKSPACE/junit-cudf-strings-udf.xml" -v --cov-config="$WORKSPACE/python/cudf/.coveragerc" --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-strings-udf-coverage.xml" --cov-report term --dist=loadscope tests fi # Run benchmarks with both cudf and pandas to ensure compatibility is maintained. diff --git a/codecov.yml b/codecov.yml index f9d0f906807..d45c7e2990f 100644 --- a/codecov.yml +++ b/codecov.yml @@ -2,7 +2,7 @@ coverage: status: project: off - patch: on + patch: default: target: auto threshold: 0% From 387192c2b3659c872b27497aef033a5abcdb5444 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath <3190405+shwina@users.noreply.github.com> Date: Tue, 11 Oct 2022 16:04:42 -0400 Subject: [PATCH 025/202] Add ngroup (#11871) Adds the `GroupBy.ngroup()` method. Closes #11848 Authors: - Ashwin Srinath (https://github.com/shwina) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/11871 --- docs/cudf/source/api_docs/groupby.rst | 1 + python/cudf/cudf/_lib/cpp/null_mask.pxd | 22 +++-- python/cudf/cudf/_lib/groupby.pyx | 25 ++++- python/cudf/cudf/_lib/null_mask.pyx | 29 ++++++ python/cudf/cudf/core/groupby/groupby.py | 114 +++++++++++++++++++++-- python/cudf/cudf/tests/test_groupby.py | 33 +++++++ 6 files changed, 211 insertions(+), 13 deletions(-) diff --git a/docs/cudf/source/api_docs/groupby.rst b/docs/cudf/source/api_docs/groupby.rst index 141e5adba93..f36951749fb 100644 --- a/docs/cudf/source/api_docs/groupby.rst +++ b/docs/cudf/source/api_docs/groupby.rst @@ -53,6 +53,7 @@ Computations / descriptive stats GroupBy.mean GroupBy.median GroupBy.min + GroupBy.ngroup GroupBy.nth GroupBy.pad GroupBy.prod diff --git a/python/cudf/cudf/_lib/cpp/null_mask.pxd b/python/cudf/cudf/_lib/cpp/null_mask.pxd index c225a16297b..3050a9f3459 100644 --- a/python/cudf/cudf/_lib/cpp/null_mask.pxd +++ b/python/cudf/cudf/_lib/cpp/null_mask.pxd @@ -1,11 +1,13 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. from libc.stdint cimport int32_t +from libcpp.pair cimport pair from rmm._lib.device_buffer cimport device_buffer -cimport cudf._lib.cpp.types as libcudf_types from cudf._lib.cpp.column.column_view cimport column_view +from cudf._lib.cpp.table.table_view cimport table_view +from cudf._lib.cpp.types cimport mask_state, size_type ctypedef int32_t underlying_type_t_mask_state @@ -16,15 +18,23 @@ cdef extern from "cudf/null_mask.hpp" namespace "cudf" nogil: ) except + cdef size_t bitmask_allocation_size_bytes ( - libcudf_types.size_type number_of_bits, + size_type number_of_bits, size_t padding_boundary ) except + cdef size_t bitmask_allocation_size_bytes ( - libcudf_types.size_type number_of_bits + size_type number_of_bits ) except + cdef device_buffer create_null_mask ( - libcudf_types.size_type size, - libcudf_types.mask_state state + size_type size, + mask_state state ) except + + + cdef pair[device_buffer, size_type] bitmask_and( + table_view view + ) + + cdef pair[device_buffer, size_type] bitmask_or( + table_view view + ) diff --git a/python/cudf/cudf/_lib/groupby.pyx b/python/cudf/cudf/_lib/groupby.pyx index be5bb2741b4..08a1d74f80f 100644 --- a/python/cudf/cudf/_lib/groupby.pyx +++ b/python/cudf/cudf/_lib/groupby.pyx @@ -121,13 +121,36 @@ cdef class GroupBy: self.dropna = dropna def groups(self, list values): + """ + Perform a sort groupby, using ``self.keys`` as the key columns + and ``values`` as the value columns. + + Parameters + ---------- + values: list of Columns + The value columns + + Returns + ------- + grouped_keys: list of Columns + The grouped key columns + grouped_values: list of Columns + The grouped value columns + offsets: list of integers + Integer offsets such that offsets[i+1] - offsets[i] + represents the size of group `i`. + """ cdef table_view values_view = table_view_from_columns(values) with nogil: c_groups = move(self.c_obj.get()[0].get_groups(values_view)) grouped_key_cols = columns_from_unique_ptr(move(c_groups.keys)) - grouped_value_cols = columns_from_unique_ptr(move(c_groups.values)) + + if values: + grouped_value_cols = columns_from_unique_ptr(move(c_groups.values)) + else: + grouped_value_cols = [] return grouped_key_cols, grouped_value_cols, c_groups.offsets def aggregate_internal(self, values, aggregations): diff --git a/python/cudf/cudf/_lib/null_mask.pyx b/python/cudf/cudf/_lib/null_mask.pyx index b0ee28baf29..976fe0e78fc 100644 --- a/python/cudf/cudf/_lib/null_mask.pyx +++ b/python/cudf/cudf/_lib/null_mask.pyx @@ -3,6 +3,7 @@ from enum import Enum from libcpp.memory cimport make_unique, unique_ptr +from libcpp.pair cimport pair from libcpp.utility cimport move from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer @@ -11,11 +12,15 @@ from cudf._lib.column cimport Column from cudf._lib.cpp.column.column_view cimport column_view from cudf._lib.cpp.null_mask cimport ( bitmask_allocation_size_bytes as cpp_bitmask_allocation_size_bytes, + bitmask_and as cpp_bitmask_and, + bitmask_or as cpp_bitmask_or, copy_bitmask as cpp_copy_bitmask, create_null_mask as cpp_create_null_mask, underlying_type_t_mask_state, ) +from cudf._lib.cpp.table.table_view cimport table_view from cudf._lib.cpp.types cimport mask_state, size_type +from cudf._lib.utils cimport table_view_from_columns from cudf.core.buffer import as_device_buffer_like @@ -95,3 +100,27 @@ def create_null_mask(size_type size, state=MaskState.UNINITIALIZED): rmm_db = DeviceBuffer.c_from_unique_ptr(move(up_db)) buf = as_device_buffer_like(rmm_db) return buf + + +def bitmask_and(columns: list): + cdef table_view c_view = table_view_from_columns(columns) + cdef pair[device_buffer, size_type] c_result + cdef unique_ptr[device_buffer] up_db + with nogil: + c_result = move(cpp_bitmask_and(c_view)) + up_db = make_unique[device_buffer](move(c_result.first)) + dbuf = DeviceBuffer.c_from_unique_ptr(move(up_db)) + buf = as_device_buffer_like(dbuf) + return buf, c_result.second + + +def bitmask_or(columns: list): + cdef table_view c_view = table_view_from_columns(columns) + cdef pair[device_buffer, size_type] c_result + cdef unique_ptr[device_buffer] up_db + with nogil: + c_result = move(cpp_bitmask_or(c_view)) + up_db = make_unique[device_buffer](move(c_result.first)) + dbuf = DeviceBuffer.c_from_unique_ptr(move(up_db)) + buf = as_device_buffer_like(dbuf) + return buf, c_result.second diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index c96407a7ff9..0ab64bd985a 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -8,11 +8,13 @@ from functools import cached_property from typing import Any, Iterable, List, Tuple, Union +import cupy as cp import numpy as np import pandas as pd import cudf from cudf._lib import groupby as libgroupby +from cudf._lib.null_mask import bitmask_or from cudf._lib.reshape import interleave_columns from cudf._typing import AggType, DataFrameOrSeries, MultiColumnAggType from cudf.api.types import is_list_like @@ -544,6 +546,88 @@ def nth(self, n): return result[sizes > n] + def ngroup(self, ascending=True): + """ + Number each group from 0 to the number of groups - 1. + + This is the enumerative complement of cumcount. Note that the + numbers given to the groups match the order in which the groups + would be seen when iterating over the groupby object, not the + order they are first observed. + + Parameters + ---------- + ascending : bool, default True + If False, number in reverse, from number of group - 1 to 0. + + Returns + ------- + Series + Unique numbers for each group. + + See Also + -------- + .cumcount : Number the rows in each group. + + Examples + -------- + >>> df = cudf.DataFrame({"A": list("aaabba")}) + >>> df + A + 0 a + 1 a + 2 a + 3 b + 4 b + 5 a + >>> df.groupby('A').ngroup() + 0 0 + 1 0 + 2 0 + 3 1 + 4 1 + 5 0 + dtype: int64 + >>> df.groupby('A').ngroup(ascending=False) + 0 1 + 1 1 + 2 1 + 3 0 + 4 0 + 5 1 + dtype: int64 + >>> df.groupby(["A", [1,1,2,3,2,1]]).ngroup() + 0 0 + 1 0 + 2 1 + 3 3 + 4 2 + 5 0 + dtype: int64 + """ + num_groups = len(index := self.grouping.keys.unique()) + _, has_null_group = bitmask_or([*index._columns]) + + if ascending: + if has_null_group: + group_ids = cudf.Series._from_data( + {None: cp.arange(-1, num_groups - 1)} + ) + else: + group_ids = cudf.Series._from_data( + {None: cp.arange(num_groups)} + ) + else: + group_ids = cudf.Series._from_data( + {None: cp.arange(num_groups - 1, -1, -1)} + ) + + if has_null_group: + group_ids.iloc[0] = cudf.NA + + group_ids._index = index + return self._broadcast(group_ids) + def serialize(self): header = {} frames = [] @@ -925,6 +1009,29 @@ def rolling_avg(val, avg): kwargs.update({"chunks": offsets}) return grouped_values.apply_chunks(function, **kwargs) + def _broadcast(self, values): + """ + Broadcast the results of an aggregation to the group + + Parameters + ---------- + values: Series + A Series representing the results of an aggregation. The + index of the Series must be the (unique) values + representing the group keys. + + Returns + ------- + A Series of the same size and with the same index as + ``self.obj``. + """ + if not values.index.equals(self.grouping.keys): + values = values._align_to_index( + self.grouping.keys, how="right", allow_non_unique=True + ) + values.index = self.obj.index + return values + def transform(self, function): """Apply an aggregation, then broadcast the result to the group size. @@ -966,12 +1073,7 @@ def transform(self, function): "Currently, `transform()` supports only aggregations." ) from e - if not result.index.equals(self.grouping.keys): - result = result._align_to_index( - self.grouping.keys, how="right", allow_non_unique=True - ) - result.index = self.obj.index - return result + return self._broadcast(result) def rolling(self, *args, **kwargs): """ diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py index c4c8e81dda2..b00e31115c9 100644 --- a/python/cudf/cudf/tests/test_groupby.py +++ b/python/cudf/cudf/tests/test_groupby.py @@ -2718,3 +2718,36 @@ def test_groupby_group_keys(group_keys, by): actual = g_group[["B", "C"]].apply(lambda x: x / x.sum()) expected = p_group[["B", "C"]].apply(lambda x: x / x.sum()) assert_eq(actual, expected) + + +@pytest.fixture +def df_ngroup(): + df = cudf.DataFrame( + { + "a": [2, 2, 1, 1, 2, 3], + "b": [1, 2, 1, 2, 1, 2], + "c": ["a", "a", "b", "c", "d", "c"], + }, + index=[1, 3, 5, 7, 4, 2], + ) + df.index.name = "foo" + return df + + +@pytest.mark.parametrize( + "by", + [ + lambda: "a", + lambda: "b", + lambda: ["a", "b"], + lambda: "c", + lambda: pd.Series([1, 2, 1, 2, 1, 2]), + lambda: pd.Series(["x", "y", "y", "x", "z", "x"]), + ], +) +@pytest.mark.parametrize("ascending", [True, False]) +def test_groupby_ngroup(by, ascending, df_ngroup): + by = by() + expected = df_ngroup.to_pandas().groupby(by).ngroup(ascending=ascending) + actual = df_ngroup.groupby(by).ngroup(ascending=ascending) + assert_eq(expected, actual, check_dtype=False) From ccbd852421fed8f25029a1d47b0bbf833e840ddf Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Wed, 12 Oct 2022 09:25:06 -0400 Subject: [PATCH 026/202] Change expect_strings_empty into expect_column_empty libcudf test utility (#11873) Moves the `cudf::test::expect_strings_empty` utility from `cpp/tests/strings` to more generic function `cudf::test::expect_column_empty` Reference #11734 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - Vukasin Milovanovic (https://github.com/vuule) - Tobias Ribizel (https://github.com/upsj) URL: https://github.com/rapidsai/cudf/pull/11873 --- cpp/CMakeLists.txt | 1 - cpp/include/cudf_test/column_utilities.hpp | 7 ++++ cpp/tests/copying/detail_gather_tests.cu | 15 ++++----- cpp/tests/copying/gather_list_tests.cpp | 17 +++++----- cpp/tests/copying/gather_str_tests.cpp | 16 +++++----- cpp/tests/copying/gather_tests.cpp | 15 ++++----- cpp/tests/copying/scatter_list_tests.cpp | 14 ++++---- cpp/tests/lists/extract_tests.cpp | 9 +++--- .../reshape/interleave_columns_tests.cpp | 7 ++-- cpp/tests/strings/array_tests.cpp | 12 +++---- cpp/tests/strings/booleans_tests.cpp | 10 +++--- cpp/tests/strings/case_tests.cpp | 21 ++++++------ .../strings/combine/concatenate_tests.cpp | 15 ++++----- .../strings/combine/join_strings_tests.cpp | 13 ++++---- cpp/tests/strings/concatenate_tests.cpp | 13 ++++---- cpp/tests/strings/datetime_tests.cpp | 11 +++---- cpp/tests/strings/durations_tests.cpp | 13 ++++---- cpp/tests/strings/extract_tests.cpp | 2 -- cpp/tests/strings/factories_test.cu | 12 +++---- cpp/tests/strings/fill_tests.cpp | 11 +++---- cpp/tests/strings/findall_tests.cpp | 6 ++-- cpp/tests/strings/fixed_point_tests.cpp | 13 ++++---- cpp/tests/strings/floats_tests.cpp | 9 +++--- cpp/tests/strings/format_lists_tests.cpp | 6 ++-- cpp/tests/strings/integers_tests.cpp | 11 +++---- cpp/tests/strings/ipv4_tests.cpp | 8 ++--- cpp/tests/strings/pad_tests.cpp | 11 +++---- cpp/tests/strings/replace_regex_tests.cpp | 6 ++-- cpp/tests/strings/replace_tests.cpp | 11 +++---- cpp/tests/strings/strip_tests.cpp | 11 +++---- cpp/tests/strings/substring_tests.cpp | 17 +++++----- cpp/tests/strings/translate_tests.cpp | 13 ++++---- cpp/tests/strings/urls_tests.cpp | 10 +++--- cpp/tests/strings/utilities.cpp | 32 ------------------- cpp/tests/strings/utilities.h | 30 ----------------- cpp/tests/text/ngrams_tests.cpp | 12 +++---- cpp/tests/utilities/column_utilities.cu | 9 ++++++ 37 files changed, 187 insertions(+), 262 deletions(-) delete mode 100644 cpp/tests/strings/utilities.cpp delete mode 100644 cpp/tests/strings/utilities.h diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 8bde0bcfb9b..bfabbbc625d 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -693,7 +693,6 @@ add_library( tests/utilities/base_fixture.cpp tests/utilities/column_utilities.cu tests/utilities/table_utilities.cu - tests/strings/utilities.cpp ) set_target_properties( diff --git a/cpp/include/cudf_test/column_utilities.hpp b/cpp/include/cudf_test/column_utilities.hpp index d41ea530402..b7d890fb315 100644 --- a/cpp/include/cudf_test/column_utilities.hpp +++ b/cpp/include/cudf_test/column_utilities.hpp @@ -107,6 +107,13 @@ bool expect_columns_equivalent(cudf::column_view const& lhs, debug_output_level verbosity = debug_output_level::FIRST_ERROR, size_type fp_ulps = cudf::test::default_ulp); +/** + * @brief Verifies the given column is empty + * + * @param col The column to check + */ +void expect_column_empty(cudf::column_view const& col); + /** * @brief Verifies the bitwise equality of two device memory buffers. * diff --git a/cpp/tests/copying/detail_gather_tests.cu b/cpp/tests/copying/detail_gather_tests.cu index e3cd975ab41..9cd74abce1c 100644 --- a/cpp/tests/copying/detail_gather_tests.cu +++ b/cpp/tests/copying/detail_gather_tests.cu @@ -13,7 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include + +#include +#include +#include +#include +#include +#include #include #include @@ -24,13 +30,6 @@ #include #include -#include -#include -#include -#include -#include -#include - #include #include diff --git a/cpp/tests/copying/gather_list_tests.cpp b/cpp/tests/copying/gather_list_tests.cpp index b26ee90c3b9..1caecb558e2 100644 --- a/cpp/tests/copying/gather_list_tests.cpp +++ b/cpp/tests/copying/gather_list_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include + +#include +#include +#include +#include +#include +#include #include #include @@ -23,13 +29,6 @@ #include #include -#include -#include -#include -#include -#include -#include - template class GatherTestListTyped : public cudf::test::BaseFixture { }; diff --git a/cpp/tests/copying/gather_str_tests.cpp b/cpp/tests/copying/gather_str_tests.cpp index a9a9a4f9342..4e4e9619fbf 100644 --- a/cpp/tests/copying/gather_str_tests.cpp +++ b/cpp/tests/copying/gather_str_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,17 +13,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include -#include -#include -#include #include #include #include #include #include -#include + +#include +#include +#include +#include +#include class GatherTestStr : public cudf::test::BaseFixture { }; @@ -135,7 +135,7 @@ TEST_F(GatherTestStr, GatherEmptyMapStringsColumn) gather_map, cudf::out_of_bounds_policy::NULLIFY, cudf::detail::negative_index_policy::NOT_ALLOWED); - cudf::test::expect_strings_empty(results->get_column(0).view()); + cudf::test::expect_column_empty(results->get_column(0).view()); } TEST_F(GatherTestStr, GatherZeroSizeStringsColumn) diff --git a/cpp/tests/copying/gather_tests.cpp b/cpp/tests/copying/gather_tests.cpp index 141503ed978..9c8d6102000 100644 --- a/cpp/tests/copying/gather_tests.cpp +++ b/cpp/tests/copying/gather_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,13 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include - -#include -#include -#include -#include -#include #include #include @@ -28,6 +21,12 @@ #include #include +#include +#include +#include +#include +#include + template class GatherTest : public cudf::test::BaseFixture { }; diff --git a/cpp/tests/copying/scatter_list_tests.cpp b/cpp/tests/copying/scatter_list_tests.cpp index 0c12f10137a..179ab56fc40 100644 --- a/cpp/tests/copying/scatter_list_tests.cpp +++ b/cpp/tests/copying/scatter_list_tests.cpp @@ -14,7 +14,12 @@ * limitations under the License. */ -#include +#include +#include +#include +#include +#include +#include #include #include @@ -24,13 +29,6 @@ #include #include -#include -#include -#include -#include -#include -#include - template class TypedScatterListsTest : public cudf::test::BaseFixture { }; diff --git a/cpp/tests/lists/extract_tests.cpp b/cpp/tests/lists/extract_tests.cpp index 210a5814ede..34c8e044a3f 100644 --- a/cpp/tests/lists/extract_tests.cpp +++ b/cpp/tests/lists/extract_tests.cpp @@ -14,16 +14,15 @@ * limitations under the License. */ -#include -#include -#include - #include #include #include #include #include -#include + +#include +#include +#include #include diff --git a/cpp/tests/reshape/interleave_columns_tests.cpp b/cpp/tests/reshape/interleave_columns_tests.cpp index c682e4ab29f..63e465f7658 100644 --- a/cpp/tests/reshape/interleave_columns_tests.cpp +++ b/cpp/tests/reshape/interleave_columns_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,13 +14,12 @@ * limitations under the License. */ -#include #include #include #include #include -#include +#include #include using namespace cudf::test::iterators; @@ -195,7 +194,7 @@ TEST_F(InterleaveStringsColumnsTest, ZeroSizedColumns) cudf::column_view col0(cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0); auto results = cudf::interleave_columns(cudf::table_view{{col0}}); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); } TEST_F(InterleaveStringsColumnsTest, SingleColumn) diff --git a/cpp/tests/strings/array_tests.cpp b/cpp/tests/strings/array_tests.cpp index 10cc4562be7..488184f4099 100644 --- a/cpp/tests/strings/array_tests.cpp +++ b/cpp/tests/strings/array_tests.cpp @@ -14,8 +14,6 @@ * limitations under the License. */ -#include - #include #include #include @@ -53,7 +51,7 @@ TEST_F(StringsColumnTest, SortZeroSizeStringsColumn) cudf::column_view zero_size_strings_column( cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0); auto results = cudf::sort(cudf::table_view({zero_size_strings_column})); - cudf::test::expect_strings_empty(results->view().column(0)); + cudf::test::expect_column_empty(results->view().column(0)); } class SliceParmsTest : public StringsColumnTest, @@ -123,7 +121,7 @@ TEST_F(StringsColumnTest, SliceZeroSizeStringsColumn) cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0); auto strings_view = cudf::strings_column_view(zero_size_strings_column); auto results = cudf::strings::detail::copy_slice(strings_view, 1, 2); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); } TEST_F(StringsColumnTest, Gather) @@ -151,7 +149,7 @@ TEST_F(StringsColumnTest, GatherZeroSizeStringsColumn) cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0); cudf::column_view map_view(cudf::data_type{cudf::type_id::INT32}, 0, nullptr, nullptr, 0); auto results = cudf::gather(cudf::table_view{{zero_size_strings_column}}, map_view)->release(); - cudf::test::expect_strings_empty(results.front()->view()); + cudf::test::expect_column_empty(results.front()->view()); } TEST_F(StringsColumnTest, GatherTooBig) @@ -204,12 +202,12 @@ TEST_F(StringsColumnTest, ScatterZeroSizeStringsColumn) cudf::column_view scatter_map(cudf::data_type{cudf::type_id::INT8}, 0, nullptr, nullptr, 0); auto results = cudf::scatter(cudf::table_view({source}), scatter_map, cudf::table_view({target})); - cudf::test::expect_strings_empty(results->view().column(0)); + cudf::test::expect_column_empty(results->view().column(0)); cudf::string_scalar scalar(""); auto scalar_source = std::vector>({scalar}); results = cudf::scatter(scalar_source, scatter_map, cudf::table_view({target})); - cudf::test::expect_strings_empty(results->view().column(0)); + cudf::test::expect_column_empty(results->view().column(0)); } TEST_F(StringsColumnTest, OffsetsBeginEnd) diff --git a/cpp/tests/strings/booleans_tests.cpp b/cpp/tests/strings/booleans_tests.cpp index cc637bf55a0..b8e47a89274 100644 --- a/cpp/tests/strings/booleans_tests.cpp +++ b/cpp/tests/strings/booleans_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,12 +14,12 @@ * limitations under the License. */ -#include -#include #include #include #include -#include + +#include +#include #include @@ -69,7 +69,7 @@ TEST_F(StringsConvertTest, ZeroSizeStringsColumnBoolean) { cudf::column_view zero_size_column(cudf::data_type{cudf::type_id::BOOL8}, 0, nullptr, nullptr, 0); auto results = cudf::strings::from_booleans(zero_size_column); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); } TEST_F(StringsConvertTest, ZeroSizeBooleansColumn) diff --git a/cpp/tests/strings/case_tests.cpp b/cpp/tests/strings/case_tests.cpp index c399c640bb6..26b44b577eb 100644 --- a/cpp/tests/strings/case_tests.cpp +++ b/cpp/tests/strings/case_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,16 +14,15 @@ * limitations under the License. */ +#include +#include +#include + #include #include #include #include -#include -#include -#include -#include - #include #include @@ -211,19 +210,19 @@ TEST_F(StringsCaseTest, EmptyStringsColumn) auto strings_view = cudf::strings_column_view(zero_size_strings_column); auto results = cudf::strings::to_lower(strings_view); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); results = cudf::strings::to_upper(strings_view); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); results = cudf::strings::swapcase(strings_view); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); results = cudf::strings::capitalize(strings_view); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); results = cudf::strings::title(strings_view); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); } TEST_F(StringsCaseTest, ErrorTest) diff --git a/cpp/tests/strings/combine/concatenate_tests.cpp b/cpp/tests/strings/combine/concatenate_tests.cpp index 569767531bc..0b744cd6bb4 100644 --- a/cpp/tests/strings/combine/concatenate_tests.cpp +++ b/cpp/tests/strings/combine/concatenate_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +14,10 @@ * limitations under the License. */ +#include +#include +#include + #include #include #include @@ -21,11 +25,6 @@ #include #include -#include -#include -#include -#include - #include constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::ALL_ERRORS}; @@ -158,7 +157,7 @@ TEST_F(StringsCombineTest, ConcatZeroSizeStringsColumns) strings_columns.push_back(zero_size_strings_column); cudf::table_view table(strings_columns); auto results = cudf::strings::concatenate(table); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); } TEST_F(StringsCombineTest, SingleColumnErrorCheck) @@ -207,7 +206,7 @@ TEST_F(StringsConcatenateWithColSeparatorTest, ZeroSizedColumns) auto results = cudf::strings::concatenate(cudf::table_view{{col0}}, cudf::strings_column_view(col0)); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); } TEST_F(StringsConcatenateWithColSeparatorTest, SingleColumnEmptyAndNullStringsNoReplacements) diff --git a/cpp/tests/strings/combine/join_strings_tests.cpp b/cpp/tests/strings/combine/join_strings_tests.cpp index e018540e84c..e0187ce2e26 100644 --- a/cpp/tests/strings/combine/join_strings_tests.cpp +++ b/cpp/tests/strings/combine/join_strings_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,17 +14,16 @@ * limitations under the License. */ +#include +#include +#include + #include #include #include #include #include -#include -#include -#include -#include - #include struct JoinStringsTest : public cudf::test::BaseFixture { @@ -66,7 +65,7 @@ TEST_F(JoinStringsTest, JoinZeroSizeStringsColumn) cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0); auto strings_view = cudf::strings_column_view(zero_size_strings_column); auto results = cudf::strings::join_strings(strings_view); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); } TEST_F(JoinStringsTest, JoinAllNullStringsColumn) diff --git a/cpp/tests/strings/concatenate_tests.cpp b/cpp/tests/strings/concatenate_tests.cpp index 0318fc3edb9..1462d4dc73a 100644 --- a/cpp/tests/strings/concatenate_tests.cpp +++ b/cpp/tests/strings/concatenate_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,14 +14,13 @@ * limitations under the License. */ -#include -#include -#include - #include #include #include -#include + +#include +#include +#include #include @@ -76,7 +75,7 @@ TEST_F(StringsConcatenateTest, ZeroSizeStringsColumns) strings_columns.push_back(zero_size_strings_column); strings_columns.push_back(zero_size_strings_column); auto results = cudf::strings::detail::concatenate(strings_columns); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); } TEST_F(StringsConcatenateTest, ZeroSizeStringsPlusNormal) diff --git a/cpp/tests/strings/datetime_tests.cpp b/cpp/tests/strings/datetime_tests.cpp index d8203917d4c..26beaf9756a 100644 --- a/cpp/tests/strings/datetime_tests.cpp +++ b/cpp/tests/strings/datetime_tests.cpp @@ -14,6 +14,10 @@ * limitations under the License. */ +#include +#include +#include + #include #include #include @@ -21,11 +25,6 @@ #include #include -#include -#include -#include -#include - #include #include @@ -573,7 +572,7 @@ TEST_F(StringsDatetimeTest, ZeroSizeStringsColumn) cudf::column_view zero_size_column( cudf::data_type{cudf::type_id::TIMESTAMP_SECONDS}, 0, nullptr, nullptr, 0); auto results = cudf::strings::from_timestamps(zero_size_column); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); cudf::column_view zero_size_strings_column( cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0); diff --git a/cpp/tests/strings/durations_tests.cpp b/cpp/tests/strings/durations_tests.cpp index 523c64159f4..ac971aa300d 100644 --- a/cpp/tests/strings/durations_tests.cpp +++ b/cpp/tests/strings/durations_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,14 +14,13 @@ * limitations under the License. */ -#include -#include -#include - #include #include #include -#include + +#include +#include +#include #include #include @@ -732,7 +731,7 @@ TEST_F(StringsDurationsTest, ZeroSizeStringsColumn) cudf::column_view zero_size_column( cudf::data_type{cudf::type_id::DURATION_SECONDS}, 0, nullptr, nullptr, 0); auto results = cudf::strings::from_durations(zero_size_column); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); cudf::column_view zero_size_strings_column( cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0); diff --git a/cpp/tests/strings/extract_tests.cpp b/cpp/tests/strings/extract_tests.cpp index 49a0c51e14f..e396ca42d6c 100644 --- a/cpp/tests/strings/extract_tests.cpp +++ b/cpp/tests/strings/extract_tests.cpp @@ -14,8 +14,6 @@ * limitations under the License. */ -#include - #include #include #include diff --git a/cpp/tests/strings/factories_test.cu b/cpp/tests/strings/factories_test.cu index 36fdd423168..a381c1cff89 100644 --- a/cpp/tests/strings/factories_test.cu +++ b/cpp/tests/strings/factories_test.cu @@ -14,6 +14,10 @@ * limitations under the License. */ +#include +#include +#include + #include #include #include @@ -24,10 +28,6 @@ #include #include #include -#include -#include -#include -#include #include #include @@ -188,12 +188,12 @@ TEST_F(StringsFactoriesTest, EmptyStringsColumn) rmm::device_uvector d_nulls{0, cudf::default_stream_value}; auto results = cudf::make_strings_column(d_chars, d_offsets, d_nulls, 0); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); rmm::device_uvector> d_strings{ 0, cudf::default_stream_value}; results = cudf::make_strings_column(d_strings); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); } namespace { diff --git a/cpp/tests/strings/fill_tests.cpp b/cpp/tests/strings/fill_tests.cpp index 44bbb3c9c29..46f6b633dc5 100644 --- a/cpp/tests/strings/fill_tests.cpp +++ b/cpp/tests/strings/fill_tests.cpp @@ -14,16 +14,15 @@ * limitations under the License. */ +#include +#include +#include + #include #include #include #include -#include -#include -#include -#include - #include #include @@ -83,7 +82,7 @@ TEST_F(StringsFillTest, ZeroSizeStringsColumns) cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0); auto results = cudf::strings::detail::fill( cudf::strings_column_view(zero_size_strings_column), 0, 1, cudf::string_scalar("")); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); } TEST_F(StringsFillTest, FillRangeError) diff --git a/cpp/tests/strings/findall_tests.cpp b/cpp/tests/strings/findall_tests.cpp index b55d0977215..1dd088cb70f 100644 --- a/cpp/tests/strings/findall_tests.cpp +++ b/cpp/tests/strings/findall_tests.cpp @@ -14,13 +14,13 @@ * limitations under the License. */ -#include -#include #include #include #include #include -#include + +#include +#include #include diff --git a/cpp/tests/strings/fixed_point_tests.cpp b/cpp/tests/strings/fixed_point_tests.cpp index 81122b1c5d8..15c12421dd9 100644 --- a/cpp/tests/strings/fixed_point_tests.cpp +++ b/cpp/tests/strings/fixed_point_tests.cpp @@ -14,17 +14,16 @@ * limitations under the License. */ -#include -#include -#include - #include #include #include #include -#include -#include +#include +#include +#include + +#include struct StringsConvertTest : public cudf::test::BaseFixture { }; @@ -224,7 +223,7 @@ TEST_F(StringsConvertTest, ZeroSizeStringsColumnFixedPoint) auto zero_size_column = cudf::make_empty_column(cudf::data_type{cudf::type_id::DECIMAL32}); auto results = cudf::strings::from_fixed_point(zero_size_column->view()); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); } TEST_F(StringsConvertTest, ZeroSizeFixedPointColumn) diff --git a/cpp/tests/strings/floats_tests.cpp b/cpp/tests/strings/floats_tests.cpp index 360ea8be178..1a3c5ada04f 100644 --- a/cpp/tests/strings/floats_tests.cpp +++ b/cpp/tests/strings/floats_tests.cpp @@ -14,13 +14,12 @@ * limitations under the License. */ -#include -#include - #include #include #include -#include + +#include +#include #include @@ -188,7 +187,7 @@ TEST_F(StringsConvertTest, ZeroSizeStringsColumnFloat) cudf::column_view zero_size_column( cudf::data_type{cudf::type_id::FLOAT32}, 0, nullptr, nullptr, 0); auto results = cudf::strings::from_floats(zero_size_column); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); } TEST_F(StringsConvertTest, ZeroSizeFloatsColumn) diff --git a/cpp/tests/strings/format_lists_tests.cpp b/cpp/tests/strings/format_lists_tests.cpp index 63fcdf6f00e..f1ab90ee9c5 100644 --- a/cpp/tests/strings/format_lists_tests.cpp +++ b/cpp/tests/strings/format_lists_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,8 +14,6 @@ * limitations under the License. */ -#include - #include #include #include @@ -36,7 +34,7 @@ TEST_F(StringsFormatListsTest, EmptyList) auto const view = cudf::lists_column_view(input); auto results = cudf::strings::format_list_column(view); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); } TEST_F(StringsFormatListsTest, EmptyNestedList) diff --git a/cpp/tests/strings/integers_tests.cpp b/cpp/tests/strings/integers_tests.cpp index 5802a1ddc0a..e938eec8b3e 100644 --- a/cpp/tests/strings/integers_tests.cpp +++ b/cpp/tests/strings/integers_tests.cpp @@ -14,15 +14,14 @@ * limitations under the License. */ -#include -#include - -#include #include #include #include #include -#include + +#include +#include +#include #include #include @@ -265,7 +264,7 @@ TEST_F(StringsConvertTest, ZeroSizeStringsColumn) { cudf::column_view zero_size_column(cudf::data_type{cudf::type_id::INT32}, 0, nullptr, nullptr, 0); auto results = cudf::strings::from_integers(zero_size_column); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); } TEST_F(StringsConvertTest, ZeroSizeIntegersColumn) diff --git a/cpp/tests/strings/ipv4_tests.cpp b/cpp/tests/strings/ipv4_tests.cpp index 1bc726edea7..0a404534916 100644 --- a/cpp/tests/strings/ipv4_tests.cpp +++ b/cpp/tests/strings/ipv4_tests.cpp @@ -14,12 +14,12 @@ * limitations under the License. */ -#include -#include #include #include #include -#include + +#include +#include #include @@ -75,7 +75,7 @@ TEST_F(StringsConvertTest, ZeroSizeStringsColumnIPV4) { cudf::column_view zero_size_column(cudf::data_type{cudf::type_id::INT64}, 0, nullptr, nullptr, 0); auto results = cudf::strings::integers_to_ipv4(zero_size_column); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); results = cudf::strings::ipv4_to_integers(results->view()); EXPECT_EQ(0, results->size()); } diff --git a/cpp/tests/strings/pad_tests.cpp b/cpp/tests/strings/pad_tests.cpp index 1ccef58a8f6..c416c2b3ce1 100644 --- a/cpp/tests/strings/pad_tests.cpp +++ b/cpp/tests/strings/pad_tests.cpp @@ -14,6 +14,10 @@ * limitations under the License. */ +#include +#include +#include + #include #include #include @@ -21,11 +25,6 @@ #include #include -#include -#include -#include -#include - #include #include @@ -103,7 +102,7 @@ TEST_F(StringsPadTest, ZeroSizeStringsColumn) cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0); auto strings_view = cudf::strings_column_view(zero_size_strings_column); auto results = cudf::strings::pad(strings_view, 5); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); } class PadParameters : public StringsPadTest, public testing::WithParamInterface { diff --git a/cpp/tests/strings/replace_regex_tests.cpp b/cpp/tests/strings/replace_regex_tests.cpp index 79d968b14ad..6280463d112 100644 --- a/cpp/tests/strings/replace_regex_tests.cpp +++ b/cpp/tests/strings/replace_regex_tests.cpp @@ -14,12 +14,12 @@ * limitations under the License. */ -#include -#include #include #include #include -#include + +#include +#include #include diff --git a/cpp/tests/strings/replace_tests.cpp b/cpp/tests/strings/replace_tests.cpp index 75c6cfa70e4..cd39c1e088a 100644 --- a/cpp/tests/strings/replace_tests.cpp +++ b/cpp/tests/strings/replace_tests.cpp @@ -14,16 +14,15 @@ * limitations under the License. */ +#include +#include +#include + #include #include #include #include -#include "./utilities.h" -#include -#include -#include - #include #include @@ -348,5 +347,5 @@ TEST_F(StringsReplaceTest, EmptyStringsColumn) auto results = cudf::strings::replace( strings_view, cudf::string_scalar("not"), cudf::string_scalar("pertinent")); auto view = results->view(); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); } diff --git a/cpp/tests/strings/strip_tests.cpp b/cpp/tests/strings/strip_tests.cpp index 6916b990762..f7044b48e40 100644 --- a/cpp/tests/strings/strip_tests.cpp +++ b/cpp/tests/strings/strip_tests.cpp @@ -14,15 +14,14 @@ * limitations under the License. */ -#include -#include -#include - -#include "./utilities.h" #include #include #include +#include +#include +#include + #include #include @@ -99,7 +98,7 @@ TEST_F(StringsStripTest, EmptyStringsColumn) auto strings_view = cudf::strings_column_view(zero_size_strings_column); auto results = cudf::strings::strip(strings_view); auto view = results->view(); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); } TEST_F(StringsStripTest, InvalidParameter) diff --git a/cpp/tests/strings/substring_tests.cpp b/cpp/tests/strings/substring_tests.cpp index 1a90dc5fe38..e8e2d936d12 100644 --- a/cpp/tests/strings/substring_tests.cpp +++ b/cpp/tests/strings/substring_tests.cpp @@ -14,16 +14,15 @@ * limitations under the License. */ +#include +#include +#include + #include #include #include #include -#include -#include -#include -#include - #include #include #include @@ -283,18 +282,18 @@ TEST_F(StringsSubstringsTest, ZeroSizeStringsColumn) auto strings_view = cudf::strings_column_view(zero_size_strings_column); auto results = cudf::strings::slice_strings(strings_view, 1, 2); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); results = cudf::strings::slice_strings(strings_view, cudf::string_scalar("foo"), 1); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); cudf::column_view starts_column(cudf::data_type{cudf::type_id::INT32}, 0, nullptr, nullptr, 0); cudf::column_view stops_column(cudf::data_type{cudf::type_id::INT32}, 0, nullptr, nullptr, 0); results = cudf::strings::slice_strings(strings_view, starts_column, stops_column); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); results = cudf::strings::slice_strings(strings_view, strings_view, 1); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); } TEST_F(StringsSubstringsTest, AllEmpty) diff --git a/cpp/tests/strings/translate_tests.cpp b/cpp/tests/strings/translate_tests.cpp index 53c6982b880..1e278caa366 100644 --- a/cpp/tests/strings/translate_tests.cpp +++ b/cpp/tests/strings/translate_tests.cpp @@ -14,17 +14,16 @@ * limitations under the License. */ +#include +#include +#include + #include #include #include #include #include -#include -#include -#include -#include - #include #include @@ -69,9 +68,9 @@ TEST_F(StringsTranslateTest, ZeroSizeStringsColumn) auto strings_view = cudf::strings_column_view(zero_size_strings_column); std::vector> translate_table; auto results = cudf::strings::translate(strings_view, translate_table); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); results = cudf::strings::filter_characters(strings_view, translate_table); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); } TEST_F(StringsTranslateTest, FilterCharacters) diff --git a/cpp/tests/strings/urls_tests.cpp b/cpp/tests/strings/urls_tests.cpp index 95a51bbaaeb..9199d78cfb8 100644 --- a/cpp/tests/strings/urls_tests.cpp +++ b/cpp/tests/strings/urls_tests.cpp @@ -14,12 +14,12 @@ * limitations under the License. */ -#include -#include #include #include #include -#include + +#include +#include #include @@ -230,7 +230,7 @@ TEST_F(StringsConvertTest, ZeroSizeUrlStringsColumn) cudf::column_view zero_size_column( cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0); auto results = cudf::strings::url_encode(zero_size_column); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); results = cudf::strings::url_decode(zero_size_column); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); } diff --git a/cpp/tests/strings/utilities.cpp b/cpp/tests/strings/utilities.cpp deleted file mode 100644 index 1d7ec7cbecd..00000000000 --- a/cpp/tests/strings/utilities.cpp +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2019, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include - -namespace cudf { -namespace test { -void expect_strings_empty(cudf::column_view strings_column) -{ - EXPECT_EQ(type_id::STRING, strings_column.type().id()); - EXPECT_EQ(0, strings_column.size()); - EXPECT_EQ(0, strings_column.null_count()); - EXPECT_EQ(0, strings_column.num_children()); -} - -} // namespace test -} // namespace cudf diff --git a/cpp/tests/strings/utilities.h b/cpp/tests/strings/utilities.h deleted file mode 100644 index d6f0e9c4f1f..00000000000 --- a/cpp/tests/strings/utilities.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include - -namespace cudf { -namespace test { -/** - * @brief Utility will verify the given strings column is empty. - * - * @param strings_column Column of strings to check - */ -void expect_strings_empty(cudf::column_view strings_column); - -} // namespace test -} // namespace cudf diff --git a/cpp/tests/text/ngrams_tests.cpp b/cpp/tests/text/ngrams_tests.cpp index 20ffd3baa41..61bd1b3dccd 100644 --- a/cpp/tests/text/ngrams_tests.cpp +++ b/cpp/tests/text/ngrams_tests.cpp @@ -14,13 +14,13 @@ * limitations under the License. */ -#include -#include -#include #include #include #include -#include + +#include +#include +#include #include @@ -105,9 +105,9 @@ TEST_F(TextGenerateNgramsTest, Empty) cudf::column_view zero_size_strings_column( cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0); auto results = nvtext::generate_ngrams(cudf::strings_column_view(zero_size_strings_column)); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); results = nvtext::generate_character_ngrams(cudf::strings_column_view(zero_size_strings_column)); - cudf::test::expect_strings_empty(results->view()); + cudf::test::expect_column_empty(results->view()); } TEST_F(TextGenerateNgramsTest, Errors) diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu index 5106196a58f..d0fc92b0bb5 100644 --- a/cpp/tests/utilities/column_utilities.cu +++ b/cpp/tests/utilities/column_utilities.cu @@ -854,6 +854,15 @@ bool expect_columns_equivalent(cudf::column_view const& lhs, fp_ulps); } +/** + * @copydoc cudf::test::expect_column_empty + */ +void expect_column_empty(cudf::column_view const& col) +{ + EXPECT_EQ(0, col.size()); + EXPECT_EQ(0, col.null_count()); +} + /** * @copydoc cudf::test::expect_equal_buffers */ From 75a6973275f4601411c94dc42d878a5bfc04a104 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 12 Oct 2022 08:53:43 -0500 Subject: [PATCH 027/202] Relax `codecov` threshold diff (#11899) This PR relaxes `codecov` threshold which will allow CI checks to pass(though it's optional to merge). Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Jake Awe (https://github.com/AyodeAwe) URL: https://github.com/rapidsai/cudf/pull/11899 --- codecov.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codecov.yml b/codecov.yml index d45c7e2990f..344d4f3f04e 100644 --- a/codecov.yml +++ b/codecov.yml @@ -5,7 +5,7 @@ coverage: patch: default: target: auto - threshold: 0% + threshold: 5% github_checks: annotations: true From 8b5ab2394748a84c2200da389f7f3ff3de5b5590 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Wed, 12 Oct 2022 09:58:24 -0400 Subject: [PATCH 028/202] Fix memcheck error in TypeInference.Timestamp gtest (#11905) Fixes an error in the `TypeInference.Timestamp` gtest where the `size` parameter was incorrect. This error was found by the nightly builds and could be recreated using ``` compute-sanitizer --tool memcheck gtests/TYPE_INFERENCE_TEST --gtest_filter=TypeInference.Timestamp --rmm_mode=cuda ``` Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/11905 --- cpp/tests/io/type_inference_test.cu | 49 +++++++++++++---------------- 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/cpp/tests/io/type_inference_test.cu b/cpp/tests/io/type_inference_test.cu index 04bb7507934..4d01ef95b85 100644 --- a/cpp/tests/io/type_inference_test.cu +++ b/cpp/tests/io/type_inference_test.cu @@ -50,9 +50,8 @@ TEST_F(TypeInference, Basic) auto d_data = cudf::make_string_scalar(data); auto& d_string_scalar = static_cast(*d_data); - std::size_t constexpr size = 3; - auto const string_offset = std::vector{1, 4, 7}; - auto const string_length = std::vector{2, 2, 1}; + auto const string_offset = std::vector{1, 4, 7}; + auto const string_length = std::vector{2, 2, 1}; rmm::device_vector d_string_offset{string_offset}; rmm::device_vector d_string_length{string_length}; @@ -63,7 +62,7 @@ TEST_F(TypeInference, Basic) infer_data_type(options.json_view(), {d_string_scalar.data(), static_cast(d_string_scalar.size())}, d_col_strings, - size, + string_offset.size(), stream); EXPECT_EQ(res_type, cudf::data_type{cudf::type_id::INT64}); @@ -82,9 +81,8 @@ TEST_F(TypeInference, Null) auto d_data = cudf::make_string_scalar(data); auto& d_string_scalar = static_cast(*d_data); - std::size_t constexpr size = 3; - auto const string_offset = std::vector{1, 1, 4}; - auto const string_length = std::vector{0, 2, 1}; + auto const string_offset = std::vector{1, 1, 4}; + auto const string_length = std::vector{0, 2, 1}; rmm::device_vector d_string_offset{string_offset}; rmm::device_vector d_string_length{string_length}; @@ -95,7 +93,7 @@ TEST_F(TypeInference, Null) infer_data_type(options.json_view(), {d_string_scalar.data(), static_cast(d_string_scalar.size())}, d_col_strings, - size, + string_offset.size(), stream); EXPECT_EQ(res_type, @@ -115,9 +113,8 @@ TEST_F(TypeInference, AllNull) auto d_data = cudf::make_string_scalar(data); auto& d_string_scalar = static_cast(*d_data); - std::size_t constexpr size = 3; - auto const string_offset = std::vector{1, 1, 1}; - auto const string_length = std::vector{0, 0, 4}; + auto const string_offset = std::vector{1, 1, 1}; + auto const string_length = std::vector{0, 0, 4}; rmm::device_vector d_string_offset{string_offset}; rmm::device_vector d_string_length{string_length}; @@ -128,7 +125,7 @@ TEST_F(TypeInference, AllNull) infer_data_type(options.json_view(), {d_string_scalar.data(), static_cast(d_string_scalar.size())}, d_col_strings, - size, + string_offset.size(), stream); EXPECT_EQ(res_type, cudf::data_type{cudf::type_id::INT8}); // INT8 if all nulls @@ -147,9 +144,8 @@ TEST_F(TypeInference, String) auto d_data = cudf::make_string_scalar(data); auto& d_string_scalar = static_cast(*d_data); - std::size_t constexpr size = 3; - auto const string_offset = std::vector{1, 8, 12}; - auto const string_length = std::vector{6, 3, 4}; + auto const string_offset = std::vector{1, 8, 12}; + auto const string_length = std::vector{6, 3, 4}; rmm::device_vector d_string_offset{string_offset}; rmm::device_vector d_string_length{string_length}; @@ -160,7 +156,7 @@ TEST_F(TypeInference, String) infer_data_type(options.json_view(), {d_string_scalar.data(), static_cast(d_string_scalar.size())}, d_col_strings, - size, + string_offset.size(), stream); EXPECT_EQ(res_type, cudf::data_type{cudf::type_id::STRING}); @@ -179,9 +175,8 @@ TEST_F(TypeInference, Bool) auto d_data = cudf::make_string_scalar(data); auto& d_string_scalar = static_cast(*d_data); - std::size_t constexpr size = 3; - auto const string_offset = std::vector{1, 6, 12}; - auto const string_length = std::vector{4, 5, 5}; + auto const string_offset = std::vector{1, 6, 12}; + auto const string_length = std::vector{4, 5, 5}; rmm::device_vector d_string_offset{string_offset}; rmm::device_vector d_string_length{string_length}; @@ -192,7 +187,7 @@ TEST_F(TypeInference, Bool) infer_data_type(options.json_view(), {d_string_scalar.data(), static_cast(d_string_scalar.size())}, d_col_strings, - size, + string_offset.size(), stream); EXPECT_EQ(res_type, cudf::data_type{cudf::type_id::BOOL8}); @@ -211,9 +206,8 @@ TEST_F(TypeInference, Timestamp) auto d_data = cudf::make_string_scalar(data); auto& d_string_scalar = static_cast(*d_data); - std::size_t constexpr size = 3; - auto const string_offset = std::vector{1, 10}; - auto const string_length = std::vector{8, 9}; + auto const string_offset = std::vector{1, 10}; + auto const string_length = std::vector{8, 9}; rmm::device_vector d_string_offset{string_offset}; rmm::device_vector d_string_length{string_length}; @@ -224,7 +218,7 @@ TEST_F(TypeInference, Timestamp) infer_data_type(options.json_view(), {d_string_scalar.data(), static_cast(d_string_scalar.size())}, d_col_strings, - size, + string_offset.size(), stream); // All data time (quoted and unquoted) is inferred as string for now @@ -244,9 +238,8 @@ TEST_F(TypeInference, InvalidInput) auto d_data = cudf::make_string_scalar(data); auto& d_string_scalar = static_cast(*d_data); - std::size_t constexpr size = 5; - auto const string_offset = std::vector{1, 3, 5, 7, 9}; - auto const string_length = std::vector{1, 1, 1, 1, 1}; + auto const string_offset = std::vector{1, 3, 5, 7, 9}; + auto const string_length = std::vector{1, 1, 1, 1, 1}; rmm::device_vector d_string_offset{string_offset}; rmm::device_vector d_string_length{string_length}; @@ -257,7 +250,7 @@ TEST_F(TypeInference, InvalidInput) infer_data_type(options.json_view(), {d_string_scalar.data(), static_cast(d_string_scalar.size())}, d_col_strings, - size, + string_offset.size(), stream); // Invalid input is inferred as string for now From 3226859c9dd860c5225ffb34cc2de4c0a5e3bf71 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Wed, 12 Oct 2022 13:11:59 -0400 Subject: [PATCH 029/202] Fix memcheck error in get_dremel_data (#11903) Fixes logic that applies offsets to nested column children to not write past the end of the offsets vector. This error was found by the nightly builds and could be recreated using ``` compute-sanitizer --tool memcheck gtests/PARQUET_TEST --gtest_filter=ParquetReaderTest.NestedByteArray --rmm_mode=cuda ``` Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Karthikeyan (https://github.com/karthikeyann) - Vukasin Milovanovic (https://github.com/vuule) - Tobias Ribizel (https://github.com/upsj) URL: https://github.com/rapidsai/cudf/pull/11903 --- cpp/src/lists/dremel.cu | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/cpp/src/lists/dremel.cu b/cpp/src/lists/dremel.cu index cb9cd4293b5..25094536cce 100644 --- a/cpp/src/lists/dremel.cu +++ b/cpp/src/lists/dremel.cu @@ -225,6 +225,7 @@ dremel_data get_dremel_data(column_view h_col, cudf::detail::device_single_thread( [offset_at_level = d_column_offsets.data(), end_idx_at_level = d_column_ends.data(), + level_max = d_column_offsets.size(), col = *d_col] __device__() { auto curr_col = col; size_type off = curr_col.offset(); @@ -239,9 +240,11 @@ dremel_data get_dremel_data(column_view h_col, if (curr_col.type().id() == type_id::LIST) { off = curr_col.child(lists_column_view::offsets_column_index).element(off); end = curr_col.child(lists_column_view::offsets_column_index).element(end); - offset_at_level[level] = off; - end_idx_at_level[level] = end; - ++level; + if (level < level_max) { + offset_at_level[level] = off; + end_idx_at_level[level] = end; + ++level; + } curr_col = curr_col.child(lists_column_view::child_column_index); } else { curr_col = curr_col.child(0); From 0ca68c79662a476fd930a16323102e7087d8c080 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Wed, 12 Oct 2022 23:47:23 -0400 Subject: [PATCH 030/202] Add thrust output iterator fix (1805) to thrust.patch (#11900) Adds fix from https://github.com/NVIDIA/thrust/pull/1805 to libcudf's `thrust.patch` Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Mark Harris (https://github.com/harrism) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/11900 --- cpp/cmake/thrust.patch | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/cpp/cmake/thrust.patch b/cpp/cmake/thrust.patch index ae1962e4738..0dd9854d4aa 100644 --- a/cpp/cmake/thrust.patch +++ b/cpp/cmake/thrust.patch @@ -114,3 +114,29 @@ index d0e3f94..76774b0 100644 /** * Dispatch between 32-bit and 64-bit index based versions of the same algorithm * implementation. This version allows using different token sequences for callables +diff --git a/thrust/iterator/transform_input_output_iterator.h b/thrust/iterator/transform_input_output_iterator.h +index f512a36..a5f725d 100644 +--- a/thrust/iterator/transform_input_output_iterator.h ++++ b/thrust/iterator/transform_input_output_iterator.h +@@ -102,6 +102,8 @@ template + /*! \endcond + */ + ++ transform_input_output_iterator() = default; ++ + /*! This constructor takes as argument a \c Iterator an \c InputFunction and an + * \c OutputFunction and copies them to a new \p transform_input_output_iterator + * +diff --git a/thrust/iterator/transform_output_iterator.h b/thrust/iterator/transform_output_iterator.h +index 66fb46a..4a68cb5 100644 +--- a/thrust/iterator/transform_output_iterator.h ++++ b/thrust/iterator/transform_output_iterator.h +@@ -104,6 +104,8 @@ template + /*! \endcond + */ + ++ transform_output_iterator() = default; ++ + /*! This constructor takes as argument an \c OutputIterator and an \c + * UnaryFunction and copies them to a new \p transform_output_iterator + * From 678946b52d60b96b673aef299fc2a1f36428df70 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Thu, 13 Oct 2022 08:38:03 -0400 Subject: [PATCH 031/202] Fix segmented-sort to ignore indices outside the offsets (#11888) Fixes `cudf::segmented_sorted_order` to ignore indices outside the specified offsets values. The segmented-sort function in general sorts subsets of the input using a column of offsets (integers) to identify the position of each segment. Here is an example: ``` input = { 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 } offsets1 = { 0, 3, 7, 10 } ``` There are 3 segments to sort: `[0,3)`, `[3,7)`, and `[7,10)` Segment 1 sorts to `{ 7, 8, 9 }` Segment 2 sorts to `{ 3, 4, 5, 6 }` Segment 3 sorts to `{ 0, 1, 2 }` The segmented-sort result is `{ 7, 8, 9, 3, 4, 5, 6, 0, 1, 2 }` If the offsets do not fully cover all the input the segmented-sort should ignore any segments outside of the offsets. ``` input = { 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 } offsets2 = { 3, 7 } ``` Here there is only 1 segments to sort: `[3,7) => { 3, 4, 5, 6 }` The segmented-sort result is `{ 9, 8, 7, 3, 4, 5, 6, 2, 1, 0 }` The values before the first offset and after the last offset should be left unchanged. The gtests have been corrected to expect this behavior. Also, the `SegmentedReductionTestUntyped.PartialSegmentReduction` gtest was improved to include offset gaps at the beginning and at the end to verify consistent behavior there as well. Found while working on #11729 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Nghia Truong (https://github.com/ttnghia) - MithunR (https://github.com/mythrocks) - Mark Harris (https://github.com/harrism) URL: https://github.com/rapidsai/cudf/pull/11888 --- cpp/include/cudf/sorting.hpp | 50 ++++++++++++++++++- cpp/src/sort/segmented_sort.cu | 48 +++++++++++++----- .../reductions/segmented_reduction_tests.cpp | 9 ++-- cpp/tests/sort/segmented_sort_tests.cpp | 16 ++++-- 4 files changed, 100 insertions(+), 23 deletions(-) diff --git a/cpp/include/cudf/sorting.hpp b/cpp/include/cudf/sorting.hpp index cf21da1b030..f43089210fd 100644 --- a/cpp/include/cudf/sorting.hpp +++ b/cpp/include/cudf/sorting.hpp @@ -207,9 +207,31 @@ std::unique_ptr rank( /** * @brief Returns sorted order after sorting each segment in the table. * - * If segment_offsets contains values larger than number of rows, behavior is undefined. + * If segment_offsets contains values larger than the number of rows, the behavior is undefined. * @throws cudf::logic_error if `segment_offsets` is not `size_type` column. * + * @code{.pseudo} + * Example: + * keys = { {9, 8, 7, 6, 5, 4, 3, 2, 1, 0} } + * offsets = {0, 3, 7, 10} + * result = cudf::segmented_sorted_order(keys, offsets); + * result is { 2,1,0, 6,5,4,3, 9,8,7 } + * @endcode + * + * If segment_offsets is empty or contains a single index, no values are sorted + * and the result is a sequence of integers from 0 to keys.size()-1. + * + * The segment_offsets are not required to include all indices. Any indices + * outside the specified segments will not be sorted. + * + * @code{.pseudo} + * Example: (offsets do not cover all indices) + * keys = { {9, 8, 7, 6, 5, 4, 3, 2, 1, 0} } + * offsets = {3, 7} + * result = cudf::segmented_sorted_order(keys, offsets); + * result is { 0,1,2, 6,5,4,3, 7,8,9 } + * @endcode + * * @param keys The table that determines the ordering of elements in each segment * @param segment_offsets The column of `size_type` type containing start offset index for each * contiguous segment. @@ -246,10 +268,34 @@ std::unique_ptr stable_segmented_sorted_order( /** * @brief Performs a lexicographic segmented sort of a table * - * If segment_offsets contains values larger than number of rows, behavior is undefined. + * If segment_offsets contains values larger than the number of rows, the behavior is undefined. * @throws cudf::logic_error if `values.num_rows() != keys.num_rows()`. * @throws cudf::logic_error if `segment_offsets` is not `size_type` column. * + * @code{.pseudo} + * Example: + * keys = { {9, 8, 7, 6, 5, 4, 3, 2, 1, 0} } + * values = { {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'} } + * offsets = {0, 3, 7, 10} + * result = cudf::segmented_sort_by_key(keys, values, offsets); + * result is { 'c','b','a', 'g','f','e','d', 'j','i','h' } + * @endcode + * + * If segment_offsets is empty or contains a single index, no values are sorted + * and the result is a copy of the values. + * + * The segment_offsets are not required to include all indices. Any indices + * outside the specified segments will not be sorted. + * + * @code{.pseudo} + * Example: (offsets do not cover all indices) + * keys = { {9, 8, 7, 6, 5, 4, 3, 2, 1, 0} } + * values = { {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'} } + * offsets = {3, 7} + * result = cudf::segmented_sort_by_key(keys, values, offsets); + * result is { 'a','b','c', 'g','f','e','d', 'h','i','j' } + * @endcode + * * @param values The table to reorder * @param keys The table that determines the ordering of elements in each segment * @param segment_offsets The column of `size_type` type containing start offset index for each diff --git a/cpp/src/sort/segmented_sort.cu b/cpp/src/sort/segmented_sort.cu index 3422330bf8b..c5f13df5305 100644 --- a/cpp/src/sort/segmented_sort.cu +++ b/cpp/src/sort/segmented_sort.cu @@ -24,7 +24,6 @@ #include #include -#include namespace cudf { namespace detail { @@ -35,24 +34,49 @@ namespace { */ enum class sort_method { STABLE, UNSTABLE }; -// returns segment indices for each element for all segments. -// first segment begin index = 0, last segment end index = num_rows. +/** + * @brief Builds indices to identify segments to sort + * + * The segments are added to the input table-view keys so they + * are lexicographically sorted within the segmented groups. + * + * ``` + * Example 1: + * num_rows = 10 + * offsets = {0, 3, 7, 10} + * segment-indices -> { 3,3,3, 7,7,7,7, 10,10,10 } + * ``` + * + * ``` + * Example 2: (offsets do not cover all indices) + * num_rows = 10 + * offsets = {3, 7} + * segment-indices -> { 0,1,2, 7,7,7,7, 8,9,10 } + * ``` + * + * @param num_rows Total number of rows in the input keys to sort + * @param offsets The offsets identifying the segments + * @param stream CUDA stream used for device memory operations and kernel launches + */ rmm::device_uvector get_segment_indices(size_type num_rows, column_view const& offsets, rmm::cuda_stream_view stream) { rmm::device_uvector segment_ids(num_rows, stream); - auto offset_begin = offsets.begin(); // assumes already offset column contains offset. - auto offsets_minus_one = thrust::make_transform_iterator( - offset_begin, [offset_begin] __device__(auto i) { return i - 1; }); + auto offset_begin = offsets.begin(); + auto offset_end = offsets.end(); auto counting_iter = thrust::make_counting_iterator(0); - thrust::lower_bound(rmm::exec_policy(stream), - offsets_minus_one, - offsets_minus_one + offsets.size(), - counting_iter, - counting_iter + segment_ids.size(), - segment_ids.begin()); + thrust::transform(rmm::exec_policy(stream), + counting_iter, + counting_iter + segment_ids.size(), + segment_ids.begin(), + [offset_begin, offset_end] __device__(auto idx) { + if (offset_begin == offset_end || idx < *offset_begin) { return idx; } + if (idx >= *(offset_end - 1)) { return idx + 1; } + return static_cast( + *thrust::upper_bound(thrust::seq, offset_begin, offset_end, idx)); + }); return segment_ids; } diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp index 4fd62f9b938..a8547ea982d 100644 --- a/cpp/tests/reductions/segmented_reduction_tests.cpp +++ b/cpp/tests/reductions/segmented_reduction_tests.cpp @@ -655,9 +655,9 @@ TEST_F(SegmentedReductionTestUntyped, PartialSegmentReduction) auto const input = fixed_width_column_wrapper{ {1, 2, 3, 4, 5, 6, 7}, {true, true, true, true, true, true, true}}; - auto const offsets = std::vector{0, 1, 3, 4}; + auto const offsets = std::vector{1, 3, 4}; auto const d_offsets = thrust::device_vector(offsets); - auto const expect = fixed_width_column_wrapper{{1, 5, 4}, {true, true, true}}; + auto const expect = fixed_width_column_wrapper{{5, 4}, {true, true}}; auto res = segmented_reduce(input, d_offsets, @@ -669,7 +669,7 @@ TEST_F(SegmentedReductionTestUntyped, PartialSegmentReduction) // Test with initial value auto const init_scalar = cudf::make_fixed_width_scalar(3); - auto const init_expect = fixed_width_column_wrapper{{4, 8, 7}, {true, true, true}}; + auto const init_expect = fixed_width_column_wrapper{{8, 7}, {true, true}}; res = segmented_reduce(input, d_offsets, @@ -681,8 +681,7 @@ TEST_F(SegmentedReductionTestUntyped, PartialSegmentReduction) // Test with null initial value init_scalar->set_valid_async(false); - auto null_init_expect = - fixed_width_column_wrapper{{XXX, XXX, XXX}, {false, false, false}}; + auto null_init_expect = fixed_width_column_wrapper{{XXX, XXX}, {false, false}}; res = segmented_reduce(input, d_offsets, diff --git a/cpp/tests/sort/segmented_sort_tests.cpp b/cpp/tests/sort/segmented_sort_tests.cpp index fb07bfde795..53642a89b3d 100644 --- a/cpp/tests/sort/segmented_sort_tests.cpp +++ b/cpp/tests/sort/segmented_sort_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -201,9 +201,13 @@ TEST_F(SegmentedSortInt, NonZeroSegmentsStart) column_wrapper segments1{{0, 2, 5, 8, 11}}; column_wrapper segments2{{ 2, 5, 8, 11}}; column_wrapper segments3{{ 6, 8, 11}}; + column_wrapper segments4{{ 6, 8}}; + column_wrapper segments5{{0, 3, 6}}; column_wrapper expected1{{0, 1, 2, 4, 3, 7, 5, 6, 9, 10, 8}}; column_wrapper expected2{{0, 1, 2, 4, 3, 7, 5, 6, 9, 10, 8}}; - column_wrapper expected3{{2, 4, 5, 3, 0, 1, 7, 6, 9, 10, 8}}; + column_wrapper expected3{{0, 1, 2, 3, 4, 5, 7, 6, 9, 10, 8}}; + column_wrapper expected4{{0, 1, 2, 3, 4, 5, 7, 6, 8, 9, 10}}; + column_wrapper expected5{{2, 0, 1, 4, 5, 3, 6, 7, 8, 9, 10}}; // clang-format on table_view input{{col1}}; auto results = cudf::detail::segmented_sorted_order(input, segments1); @@ -212,6 +216,10 @@ TEST_F(SegmentedSortInt, NonZeroSegmentsStart) CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected2); results = cudf::detail::segmented_sorted_order(input, segments3); CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected3); + results = cudf::detail::segmented_sorted_order(input, segments4); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected4); + results = cudf::detail::segmented_sorted_order(input, segments5); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected5); } TEST_F(SegmentedSortInt, Sliced) @@ -219,13 +227,13 @@ TEST_F(SegmentedSortInt, Sliced) using T = int; // clang-format off column_wrapper col1{{8, 9, 2, 3, 2, 2, 4, 1, 7, 5, 6}}; - // sliced 2, 2, 4, 1, 7, 5, 6 + // sliced 2, 2, 4, 1, 7, 5, 6 column_wrapper segments1{{0, 2, 5}}; column_wrapper segments2{{-4, 0, 2, 5}}; column_wrapper segments3{{ 7}}; column_wrapper expected1{{0, 1, 3, 2, 4, 5, 6}}; column_wrapper expected2{{0, 1, 3, 2, 4, 5, 6}}; - column_wrapper expected3{{3, 0, 1, 2, 5, 6, 4}}; + column_wrapper expected3{{0, 1, 2, 3, 4, 5, 6}}; // clang-format on auto slice = cudf::slice(col1, {4, 11})[0]; // 7 elements table_view input{{slice}}; From fb0922f9f8f5a14e8dbf0540a3b68eb059e04a35 Mon Sep 17 00:00:00 2001 From: nvdbaranec <56695930+nvdbaranec@users.noreply.github.com> Date: Thu, 13 Oct 2022 09:24:59 -0500 Subject: [PATCH 032/202] Fix an issue reading struct-of-list types in Parquet. (#11910) Fixes https://github.com/NVIDIA/spark-rapids/issues/6718 There was a bug introduced recently https://github.com/rapidsai/cudf/pull/11752 where an insufficient check for whether an input column contained repetition information could cause incorrect results for column hierarchies with structs at the root. Authors: - https://github.com/nvdbaranec Approvers: - Jim Brennan (https://github.com/jbrennan333) - Nghia Truong (https://github.com/ttnghia) - Mike Wilson (https://github.com/hyperbolic2346) URL: https://github.com/rapidsai/cudf/pull/11910 --- cpp/src/io/parquet/page_data.cu | 7 ++----- cpp/src/io/parquet/parquet_gpu.hpp | 7 +++++++ cpp/src/io/parquet/reader_impl.cu | 4 ++-- cpp/tests/io/parquet_test.cpp | 7 ++++++- 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu index a5f6d737637..57d55be6145 100644 --- a/cpp/src/io/parquet/page_data.cu +++ b/cpp/src/io/parquet/page_data.cu @@ -1860,11 +1860,8 @@ void PreprocessColumnData(hostdevice_vector& pages, out_buf.create(size, stream, mr); } - // for nested hierarchies, compute per-page start offset. - // it would be better/safer to be checking (schema.max_repetition_level > 0) here, but there's - // no easy way to get at that info here. we'd have to move this function into reader_impl.cu - if ((out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) || - out_buf.type.id() == type_id::LIST) { + // for nested hierarchies, compute per-page start offset + if (input_col.has_repetition) { thrust::exclusive_scan_by_key(rmm::exec_policy(stream), page_keys.begin(), page_keys.end(), diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index 8f4cd5c6f3b..1a8c0f4cd9e 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -57,9 +57,16 @@ constexpr size_type MAX_DICT_SIZE = (1 << MAX_DICT_BITS) - 1; struct input_column_info { int schema_idx; std::string name; + bool has_repetition; // size == nesting depth. the associated real output // buffer index in the dest column for each level of nesting. std::vector nesting; + + input_column_info(int _schema_idx, std::string _name, bool _has_repetition) + : schema_idx(_schema_idx), name(_name), has_repetition(_has_repetition) + { + } + auto nesting_depth() const { return nesting.size(); } }; diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu index 07869189089..0997d2a968d 100644 --- a/cpp/src/io/parquet/reader_impl.cu +++ b/cpp/src/io/parquet/reader_impl.cu @@ -684,8 +684,8 @@ class aggregate_reader_metadata { // if I have no children, we're at a leaf and I'm an input column (that is, one with actual // data stored) so add me to the list. if (schema_elem.num_children == 0) { - input_column_info& input_col = - input_columns.emplace_back(input_column_info{schema_idx, schema_elem.name}); + input_column_info& input_col = input_columns.emplace_back( + input_column_info{schema_idx, schema_elem.name, schema_elem.max_repetition_level > 0}); // set up child output column for one-level encoding list if (schema_elem.is_one_level_list()) { diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index 134eff54144..6f1c5ef7eb1 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -2633,6 +2633,11 @@ TEST_F(ParquetReaderTest, UserBoundsWithNullsMixedTypes) 0, [string_per_row](cudf::size_type idx) { return idx * string_per_row; }); cudf::test::fixed_width_column_wrapper offsets(offset_iter, offset_iter + num_rows + 1); + + auto _c3_valids = + cudf::detail::make_counting_transform_iterator(0, [&](int index) { return index % 200; }); + std::vector c3_valids(num_rows); + std::copy(_c3_valids, _c3_valids + num_rows, c3_valids.begin()); auto _c3_list = cudf::make_lists_column(num_rows, offsets.release(), @@ -2646,7 +2651,7 @@ TEST_F(ParquetReaderTest, UserBoundsWithNullsMixedTypes) c3_children.push_back(std::move(c3_list)); c3_children.push_back(c3_ints.release()); c3_children.push_back(c3_floats.release()); - cudf::test::structs_column_wrapper _c3(std::move(c3_children)); + cudf::test::structs_column_wrapper _c3(std::move(c3_children), c3_valids); auto c3 = cudf::purge_nonempty_nulls(static_cast(_c3)); // write it out From 662f309b62b56472d63b6e981b514205b6eab999 Mon Sep 17 00:00:00 2001 From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com> Date: Thu, 13 Oct 2022 23:35:53 +0530 Subject: [PATCH 033/202] Fixes Unsupported column type error due to empty list columns in Nested JSON reader (#11897) Fixes `Unsupported column type` error during cudf column creation in Nested JSON reader due to empty list column. During json tree creation, Empty list column does not have `device_json_column` child because it does have any rows, or a type. This PR fixes the issue by creating an empty column as element child column. The list column still retains the null, and empty list information. Authors: - Karthikeyan (https://github.com/karthikeyann) Approvers: - Mike Wilson (https://github.com/hyperbolic2346) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/11897 --- cpp/src/io/json/json_column.cu | 21 +++++++++++++-------- cpp/src/io/json/nested_json.hpp | 3 +++ cpp/src/io/json/nested_json_gpu.cu | 23 ++++++++++++----------- cpp/tests/io/json_test.cpp | 11 +++++++---- cpp/tests/io/json_tree.cpp | 6 +++++- python/cudf/cudf/tests/test_json.py | 18 ++++++++++++++++++ 6 files changed, 58 insertions(+), 24 deletions(-) diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index d54bb5c8ea9..872e742a5af 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -403,7 +403,7 @@ void make_device_json_column(device_span input, std::string name = ""; auto parent_col_id = column_parent_ids[this_col_id]; if (parent_col_id == parent_node_sentinel || column_categories[parent_col_id] == NC_LIST) { - name = "element"; + name = list_child_name; } else if (column_categories[parent_col_id] == NC_FN) { auto field_name_col_id = parent_col_id; parent_col_id = column_parent_ids[parent_col_id]; @@ -689,19 +689,24 @@ std::pair, std::vector> device_json_co size_type num_rows = json_col.child_offsets.size() - 1; std::vector column_names{}; column_names.emplace_back("offsets"); - column_names.emplace_back(json_col.child_columns.begin()->first); + column_names.emplace_back( + json_col.child_columns.empty() ? list_child_name : json_col.child_columns.begin()->first); // Note: json_col modified here, reuse the memory auto offsets_column = std::make_unique( data_type{type_id::INT32}, num_rows + 1, json_col.child_offsets.release()); // Create children column auto [child_column, names] = - device_json_column_to_cudf_column(json_col.child_columns.begin()->second, - d_input, - options, - get_child_schema(json_col.child_columns.begin()->first), - stream, - mr); + json_col.child_columns.empty() + ? std::pair, + std::vector>{std::make_unique(), {}} + : device_json_column_to_cudf_column( + json_col.child_columns.begin()->second, + d_input, + options, + get_child_schema(json_col.child_columns.begin()->first), + stream, + mr); column_names.back().children = names; auto [result_bitmask, null_count] = make_validity(json_col); return {make_lists_column(num_rows, diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp index 10d209b2ea6..8a0f3566d58 100644 --- a/cpp/src/io/json/nested_json.hpp +++ b/cpp/src/io/json/nested_json.hpp @@ -104,6 +104,9 @@ enum node_t : NodeT { */ enum class json_col_t : char { ListColumn, StructColumn, StringColumn, Unknown }; +// Default name for a list's child column +constexpr auto list_child_name{"element"}; + /** * @brief Intermediate representation of data from a nested JSON input */ diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu index 5d60a564b9b..29a29a1f9d5 100644 --- a/cpp/src/io/json/nested_json_gpu.cu +++ b/cpp/src/io/json/nested_json_gpu.cu @@ -1162,9 +1162,6 @@ void make_json_column(json_column& root_column, // Range of encapsulating function that parses to internal columnar data representation CUDF_FUNC_RANGE(); - // Default name for a list's child column - std::string const list_child_name = "element"; - // Parse the JSON and get the token stream const auto [d_tokens_gpu, d_token_indices_gpu] = get_token_stream(d_input, options, stream, mr); @@ -1286,7 +1283,7 @@ void make_json_column(json_column& root_column, * (b) a list, the selected child column corresponds to single child column of * the list column. In this case, the child column may not exist yet. */ - auto get_selected_column = [&list_child_name](std::stack& current_data_path) { + auto get_selected_column = [](std::stack& current_data_path) { json_column* selected_col = current_data_path.top().current_selected_col; // If the node does not have a selected column yet @@ -1680,7 +1677,8 @@ std::pair, std::vector> json_column_to size_type num_rows = json_col.child_offsets.size(); std::vector column_names{}; column_names.emplace_back("offsets"); - column_names.emplace_back(json_col.child_columns.begin()->first); + column_names.emplace_back( + json_col.child_columns.empty() ? list_child_name : json_col.child_columns.begin()->first); rmm::device_uvector d_offsets = cudf::detail::make_device_uvector_async(json_col.child_offsets, stream, mr); @@ -1688,12 +1686,15 @@ std::pair, std::vector> json_column_to std::make_unique(data_type{type_id::INT32}, num_rows, d_offsets.release()); // Create children column auto [child_column, names] = - json_column_to_cudf_column(json_col.child_columns.begin()->second, - d_input, - options, - get_child_schema(json_col.child_columns.begin()->first), - stream, - mr); + json_col.child_columns.empty() + ? std::pair, + std::vector>{std::make_unique(), {}} + : json_column_to_cudf_column(json_col.child_columns.begin()->second, + d_input, + options, + get_child_schema(json_col.child_columns.begin()->first), + stream, + mr); column_names.back().children = names; auto [result_bitmask, null_count] = make_validity(json_col); return {make_lists_column(num_rows - 1, diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp index d7ab881861a..b8cd4622484 100644 --- a/cpp/tests/io/json_test.cpp +++ b/cpp/tests/io/json_test.cpp @@ -813,7 +813,6 @@ TEST_P(JsonReaderDualTest, JsonLinesObjectsOutOfOrder) cudf::test::strings_column_wrapper({"aaa", "bbb"})); } -/* // currently, the json reader is strict about having non-empty input. TEST_F(JsonReaderTest, EmptyFile) { @@ -824,7 +823,9 @@ TEST_F(JsonReaderTest, EmptyFile) } cudf::io::json_reader_options in_options = - cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}).lines(true); + cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}) + .lines(true) + .experimental(true); auto result = cudf::io::read_json(in_options); const auto view = result.tbl->view(); @@ -832,6 +833,7 @@ TEST_F(JsonReaderTest, EmptyFile) } // currently, the json reader is strict about having non-empty input. +// experimental reader supports empty input TEST_F(JsonReaderTest, NoDataFile) { auto filepath = temp_env->get_temp_dir() + "NoDataFile.csv"; @@ -841,13 +843,14 @@ TEST_F(JsonReaderTest, NoDataFile) } cudf::io::json_reader_options in_options = - cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}).lines(true); + cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}) + .lines(true) + .experimental(true); cudf::io::table_with_metadata result = cudf::io::read_json(in_options); const auto view = result.tbl->view(); EXPECT_EQ(0, view.num_columns()); } -*/ TEST_F(JsonReaderTest, ArrowFileSource) { diff --git a/cpp/tests/io/json_tree.cpp b/cpp/tests/io/json_tree.cpp index 3d024fe8af8..6f7e28a2ca3 100644 --- a/cpp/tests/io/json_tree.cpp +++ b/cpp/tests/io/json_tree.cpp @@ -773,7 +773,11 @@ std::vector json_lines_list = { { "a": { "y" : 6, "z": [] }} { "a": { "y" : 6, "z": [2, 3, 4, 5] }} { "a": { "z": [4], "y" : 6 }} - { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }} )"}; + { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }} )", + // empty list, row. + R"( {"a" : [], "b" : {}} + {"a" : []} + {"b" : {}})"}; INSTANTIATE_TEST_SUITE_P(Mixed_And_Records, JsonTreeTraversalTest, ::testing::Combine(::testing::Values(false), diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py index 1fdef44546a..fb2c24b3757 100644 --- a/python/cudf/cudf/tests/test_json.py +++ b/python/cudf/cudf/tests/test_json.py @@ -649,6 +649,24 @@ def test_json_nested_data(): assert df.to_arrow().equals(pa_table_pdf) +def test_json_empty_types(): + json_str = """ {} + {"a": [], "b": {}} + {"a": []} + {"b": {}} + {"c": {"d": []}} + {"e": [{}]} + """ + df = cudf.read_json( + StringIO(json_str), + engine="cudf_experimental", + orient="records", + lines=True, + ) + pdf = pd.read_json(StringIO(json_str), orient="records", lines=True) + assert_eq(df, pdf) + + def test_json_types_data(): # 0:<0:string,1:float> # 1:list From c824fee8181d06ba1c05a5de4d4ebc0a52027753 Mon Sep 17 00:00:00 2001 From: Gregory Kimball Date: Thu, 13 Oct 2022 16:21:09 -0700 Subject: [PATCH 034/202] Add clear indication of non-GPU accelerated parameters in read_json docstring (#11825) This PR moves the "pandas engine only" arguments to the end of the optional argument list of the docstring. This is the way an `admonition` will look like: Screen Shot 2022-10-11 at 12 06 50 PM Authors: - Gregory Kimball (https://github.com/GregoryKimball) - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Lawrence Mitchell (https://github.com/wence-) - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/11825 --- python/cudf/cudf/utils/ioutils.py | 80 ++++++++++++++++++++++++++----- 1 file changed, 68 insertions(+), 12 deletions(-) diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py index 366b2e0ebae..0a0647f1297 100644 --- a/python/cudf/cudf/utils/ioutils.py +++ b/python/cudf/cudf/utils/ioutils.py @@ -451,7 +451,7 @@ """ doc_to_orc = docfmt_partial(docstring=_docstring_to_orc) -_docstring_read_json = """ +_docstring_read_json = r""" Load a JSON dataset into a DataFrame Parameters @@ -466,8 +466,13 @@ engine : {{ 'auto', 'cudf', 'cudf_experimental', 'pandas' }}, default 'auto' Parser engine to use. If 'auto' is passed, the engine will be automatically selected based on the other parameters. -orient : string, - Indication of expected JSON string format (pandas engine only). +orient : string + + .. admonition:: Not GPU-accelerated + + This parameter is only supported with ``engine='pandas'``. + + Indication of expected JSON string format. Compatible JSON strings can be produced by ``to_json()`` with a corresponding orient value. The set of possible orients is: @@ -500,12 +505,23 @@ typ : type of object to recover (series or frame), default 'frame' With cudf engine, only frame output is supported. dtype : boolean or dict, default True - If True, infer dtypes, if a dict of column to dtype, then use those, - if False, then don't infer dtypes at all, applies only to the data. + If True, infer dtypes for all columns; if False, then don't infer dtypes at all, + if a dict, provide a mapping from column names to their respective dtype (any missing + columns will have their dtype inferred). Applies only to the data. convert_axes : boolean, default True - Try to convert the axes to the proper dtypes (pandas engine only). + + .. admonition:: Not GPU-accelerated + + This parameter is only supported with ``engine='pandas'``. + + Try to convert the axes to the proper dtypes. convert_dates : boolean, default True - List of columns to parse for dates (pandas engine only); If True, then try + + .. admonition:: Not GPU-accelerated + + This parameter is only supported with ``engine='pandas'``. + + List of columns to parse for dates; If True, then try to parse datelike columns default is True; a column label is datelike if * it ends with ``'_at'``, @@ -514,27 +530,57 @@ * it is ``'modified'``, or * it is ``'date'`` keep_default_dates : boolean, default True - If parsing dates, parse the default datelike columns (pandas engine only) + + .. admonition:: Not GPU-accelerated + + This parameter is only supported with ``engine='pandas'``. + + If parsing dates, parse the default datelike columns. numpy : boolean, default False - Direct decoding to numpy arrays (pandas engine only). Supports numeric + + .. admonition:: Not GPU-accelerated + + This parameter is only supported with ``engine='pandas'``. + + Direct decoding to numpy arrays. Supports numeric data only, but non-numeric column and index labels are supported. Note also that the JSON ordering MUST be the same for each term if numpy=True. precise_float : boolean, default False + + .. admonition:: Not GPU-accelerated + + This parameter is only supported with ``engine='pandas'``. + Set to enable usage of higher precision (strtod) function when decoding string to double values (pandas engine only). Default (False) is to use fast but less precise builtin functionality date_unit : string, default None - The timestamp unit to detect if converting dates (pandas engine only). + + .. admonition:: Not GPU-accelerated + + This parameter is only supported with ``engine='pandas'``. + + The timestamp unit to detect if converting dates. The default behavior is to try and detect the correct precision, but if this is not desired then pass one of 's', 'ms', 'us' or 'ns' to force parsing only seconds, milliseconds, microseconds or nanoseconds. encoding : str, default is 'utf-8' + + .. admonition:: Not GPU-accelerated + + This parameter is only supported with ``engine='pandas'``. + The encoding to use to decode py3 bytes. With cudf engine, only utf-8 is supported. lines : boolean, default False Read the file as a json object per line. chunksize : integer, default None - Return JsonReader object for iteration (pandas engine only). + + .. admonition:: Not GPU-accelerated + + This parameter is only supported with ``engine='pandas'``. + + Return JsonReader object for iteration. See the `line-delimited json docs `_ for more information on ``chunksize``. @@ -547,12 +593,22 @@ otherwise. If using 'zip', the ZIP file must contain only one data file to be read in. Set to None for no decompression. byte_range : list or tuple, default None - Byte range within the input file to be read (cudf engine only). + + .. admonition:: GPU-accelerated + + This parameter is only supported with ``engine='cudf'``. + + Byte range within the input file to be read. The first number is the offset in bytes, the second number is the range size in bytes. Set the size to zero to read all data after the offset location. Reads the row that starts before or at the end of the range, even if it ends after the end of the range. keep_quotes : bool, default False + + .. admonition:: GPU-accelerated experimental feature + + This parameter is only supported with ``engine='cudf_experimental'``. + This parameter is only supported in ``cudf_experimental`` engine. If `True`, any string values are read literally (and wrapped in an additional set of quotes). From e91d7d9ef1eb3128de99f78d0127050bb12110d4 Mon Sep 17 00:00:00 2001 From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com> Date: Fri, 14 Oct 2022 14:22:41 +0530 Subject: [PATCH 035/202] Reduce memory usage in nested JSON parser - tree generation (#11864) Reduces Memory usage by 53% in nested JSON parser tree generation algorithm. 1GB JSON takes 8.469 GiB instead of 16.957 GiB. All values below are for 1 GB JSON text input. This PR employs following optimisations to reduce memory usage - Modified to generate parent node ids from nodes instead of tokens. (16.957 GB -> 10.957 GiB) - Reordered node_range, node_categories generation to the end. (10.957 GiB -> 9.774 GiB) - Scope limited token_levels (9.774 GiB -> 9.403 GiB) - Used CUB sort instead of `thrust::stable_sort_by_key` (9.403 GiB -> 8.487 GiB) - Used `cub::DoubleBuffer` which eliminates copy of order. (8.487 GiB -> 7.97 GiB) The peak memory is reduced by 53%, parsing bandwidth still remains same. (1.6 GB/s in GV100 for 1GB JSON). Since `get_stack_context` in JSON parser takes highest memory usage (8.469 GB), peak memory is not influenced by JSON tree generation step anymore. Peak memory is now 50% of that of earlier code. Authors: - Karthikeyan (https://github.com/karthikeyann) Approvers: - Tobias Ribizel (https://github.com/upsj) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/11864 --- cpp/src/io/json/json_tree.cu | 235 ++++++++++++++++++++++------------- 1 file changed, 150 insertions(+), 85 deletions(-) diff --git a/cpp/src/io/json/json_tree.cu b/cpp/src/io/json/json_tree.cu index dbf026c351e..cf041b02a20 100644 --- a/cpp/src/io/json/json_tree.cu +++ b/cpp/src/io/json/json_tree.cu @@ -29,6 +29,8 @@ #include +#include + #include #include #include @@ -39,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -125,6 +128,75 @@ struct node_ranges { } }; +/** + * @brief Returns stable sorted keys and its sorted order + * + * Uses cub stable radix sort. The order is internally generated, hence it saves a copy and memory. + * Since the key and order is returned, using double buffer helps to avoid extra copy to user + * provided output iterator. + * + * @tparam IndexType sorted order type + * @tparam KeyType key type + * @param keys keys to sort + * @param stream CUDA stream used for device memory operations and kernel launches. + * @return Sorted keys and indices producing that sorted order + */ +template +std::pair, rmm::device_uvector> stable_sorted_key_order( + cudf::device_span keys, rmm::cuda_stream_view stream) +{ + CUDF_FUNC_RANGE(); + + // Determine temporary device storage requirements + rmm::device_uvector keys_buffer1(keys.size(), stream); + rmm::device_uvector keys_buffer2(keys.size(), stream); + rmm::device_uvector order_buffer1(keys.size(), stream); + rmm::device_uvector order_buffer2(keys.size(), stream); + cub::DoubleBuffer order_buffer(order_buffer1.data(), order_buffer2.data()); + cub::DoubleBuffer keys_buffer(keys_buffer1.data(), keys_buffer2.data()); + size_t temp_storage_bytes = 0; + cub::DeviceRadixSort::SortPairs( + nullptr, temp_storage_bytes, keys_buffer, order_buffer, keys.size()); + rmm::device_buffer d_temp_storage(temp_storage_bytes, stream); + + thrust::copy(rmm::exec_policy(stream), keys.begin(), keys.end(), keys_buffer1.begin()); + thrust::sequence(rmm::exec_policy(stream), order_buffer1.begin(), order_buffer1.end()); + + cub::DeviceRadixSort::SortPairs( + d_temp_storage.data(), temp_storage_bytes, keys_buffer, order_buffer, keys.size()); + + return std::pair{keys_buffer.Current() == keys_buffer1.data() ? std::move(keys_buffer1) + : std::move(keys_buffer2), + order_buffer.Current() == order_buffer1.data() ? std::move(order_buffer1) + : std::move(order_buffer2)}; +} + +/** + * @brief Propagate parent node to siblings from first sibling. + * + * @param node_levels Node levels of each node + * @param parent_node_ids parent node ids initialized for first child of each push node, + * and other siblings are initialized to -1. + * @param stream CUDA stream used for device memory operations and kernel launches. + */ +void propagate_parent_to_siblings(cudf::device_span node_levels, + cudf::device_span parent_node_ids, + rmm::cuda_stream_view stream) +{ + CUDF_FUNC_RANGE(); + auto [sorted_node_levels, sorted_order] = stable_sorted_key_order(node_levels, stream); + // instead of gather, using permutation_iterator, which is ~17% faster + + thrust::inclusive_scan_by_key( + rmm::exec_policy(stream), + sorted_node_levels.begin(), + sorted_node_levels.end(), + thrust::make_permutation_iterator(parent_node_ids.begin(), sorted_order.begin()), + thrust::make_permutation_iterator(parent_node_ids.begin(), sorted_order.begin()), + thrust::equal_to{}, + thrust::maximum{}); +} + // Generates a tree representation of the given tokens, token_indices. tree_meta_t get_tree_representation(device_span tokens, device_span token_indices, @@ -166,12 +238,86 @@ tree_meta_t get_tree_representation(device_span tokens, }; auto num_tokens = tokens.size(); - auto is_node_it = thrust::make_transform_iterator( - tokens.begin(), - [is_node] __device__(auto t) -> size_type { return static_cast(is_node(t)); }); - auto num_nodes = thrust::count_if( + auto num_nodes = thrust::count_if( rmm::exec_policy(stream), tokens.begin(), tokens.begin() + num_tokens, is_node); + // Node levels: transform_exclusive_scan, copy_if. + rmm::device_uvector node_levels(num_nodes, stream, mr); + { + rmm::device_uvector token_levels(num_tokens, stream); + auto push_pop_it = thrust::make_transform_iterator( + tokens.begin(), [does_push, does_pop] __device__(PdaTokenT const token) -> size_type { + return does_push(token) - does_pop(token); + }); + thrust::exclusive_scan( + rmm::exec_policy(stream), push_pop_it, push_pop_it + num_tokens, token_levels.begin()); + + auto node_levels_end = thrust::copy_if(rmm::exec_policy(stream), + token_levels.begin(), + token_levels.begin() + num_tokens, + tokens.begin(), + node_levels.begin(), + is_node); + CUDF_EXPECTS(thrust::distance(node_levels.begin(), node_levels_end) == num_nodes, + "node level count mismatch"); + } + + // Node parent ids: + // previous push node_id transform, stable sort by level, segmented scan with Max, reorder. + rmm::device_uvector parent_node_ids(num_nodes, stream, mr); + // This block of code is generalized logical stack algorithm. TODO: make this a seperate function. + { + rmm::device_uvector node_token_ids(num_nodes, stream); + thrust::copy_if(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(0) + num_tokens, + tokens.begin(), + node_token_ids.begin(), + is_node); + + // previous push node_id + // if previous node is a push, then i-1 + // if previous node is FE, then i-2 (returns FB's index) + // if previous node is SMB and its previous node is a push, then i-2 + // eg. `{ SMB FB FE VB VE SME` -> `{` index as FB's parent. + // else -1 + auto first_childs_parent_token_id = [tokens_gpu = + tokens.begin()] __device__(auto i) -> NodeIndexT { + if (i <= 0) { return -1; } + if (tokens_gpu[i - 1] == token_t::StructBegin or tokens_gpu[i - 1] == token_t::ListBegin) { + return i - 1; + } else if (tokens_gpu[i - 1] == token_t::FieldNameEnd) { + return i - 2; + } else if (tokens_gpu[i - 1] == token_t::StructMemberBegin and + (tokens_gpu[i - 2] == token_t::StructBegin || + tokens_gpu[i - 2] == token_t::ListBegin)) { + return i - 2; + } else { + return -1; + } + }; + + thrust::transform( + rmm::exec_policy(stream), + node_token_ids.begin(), + node_token_ids.end(), + parent_node_ids.begin(), + [node_ids_gpu = node_token_ids.begin(), num_nodes, first_childs_parent_token_id] __device__( + NodeIndexT const tid) -> NodeIndexT { + auto pid = first_childs_parent_token_id(tid); + return pid < 0 + ? parent_node_sentinel + : thrust::lower_bound(thrust::seq, node_ids_gpu, node_ids_gpu + num_nodes, pid) - + node_ids_gpu; + // parent_node_sentinel is -1, useful for segmented max operation below + }); + } + // Propagate parent node to siblings from first sibling - inplace. + propagate_parent_to_siblings( + cudf::device_span{node_levels.data(), node_levels.size()}, + parent_node_ids, + stream); + // Node categories: copy_if with transform. rmm::device_uvector node_categories(num_nodes, stream, mr); auto node_categories_it = @@ -184,24 +330,6 @@ tree_meta_t get_tree_representation(device_span tokens, CUDF_EXPECTS(node_categories_end - node_categories_it == num_nodes, "node category count mismatch"); - // Node levels: transform_exclusive_scan, copy_if. - rmm::device_uvector token_levels(num_tokens, stream); - auto push_pop_it = thrust::make_transform_iterator( - tokens.begin(), [does_push, does_pop] __device__(PdaTokenT const token) -> size_type { - return does_push(token) - does_pop(token); - }); - thrust::exclusive_scan( - rmm::exec_policy(stream), push_pop_it, push_pop_it + num_tokens, token_levels.begin()); - - rmm::device_uvector node_levels(num_nodes, stream, mr); - auto node_levels_end = thrust::copy_if(rmm::exec_policy(stream), - token_levels.begin(), - token_levels.begin() + num_tokens, - tokens.begin(), - node_levels.begin(), - is_node); - CUDF_EXPECTS(node_levels_end - node_levels.begin() == num_nodes, "node level count mismatch"); - // Node ranges: copy_if with transform. rmm::device_uvector node_range_begin(num_nodes, stream, mr); rmm::device_uvector node_range_end(num_nodes, stream, mr); @@ -223,69 +351,6 @@ tree_meta_t get_tree_representation(device_span tokens, }); CUDF_EXPECTS(node_range_out_end - node_range_out_it == num_nodes, "node range count mismatch"); - // Node parent ids: previous push token_id transform, stable sort, segmented scan with Max, - // reorder, copy_if. This one is sort of logical stack. But more generalized. - // TODO: make it own function. - rmm::device_uvector parent_token_ids(num_tokens, stream); - rmm::device_uvector initial_order(num_tokens, stream); - // TODO re-write the algorithm to work only on nodes, not tokens. - - thrust::sequence(rmm::exec_policy(stream), initial_order.begin(), initial_order.end()); - thrust::tabulate(rmm::exec_policy(stream), - parent_token_ids.begin(), - parent_token_ids.end(), - [does_push, tokens_gpu = tokens.begin()] __device__(auto i) -> size_type { - return (i > 0) && does_push(tokens_gpu[i - 1]) ? i - 1 : -1; - // -1, not sentinel used here because of max operation below - }); - - auto out_pid = thrust::make_zip_iterator(parent_token_ids.data(), initial_order.data()); - // Uses radix sort for builtin types. - thrust::stable_sort_by_key(rmm::exec_policy(stream), - token_levels.data(), - token_levels.data() + token_levels.size(), - out_pid); - - // SegmentedScan Max. - thrust::inclusive_scan_by_key(rmm::exec_policy(stream), - token_levels.data(), - token_levels.data() + token_levels.size(), - parent_token_ids.data(), - parent_token_ids.data(), - thrust::equal_to{}, - thrust::maximum{}); - - // scatter to restore the original order. - { - rmm::device_uvector temp_storage(num_tokens, stream); - thrust::scatter(rmm::exec_policy(stream), - parent_token_ids.begin(), - parent_token_ids.end(), - initial_order.begin(), - temp_storage.begin()); - thrust::copy( - rmm::exec_policy(stream), temp_storage.begin(), temp_storage.end(), parent_token_ids.begin()); - } - - rmm::device_uvector node_ids_gpu(num_tokens, stream); - thrust::exclusive_scan( - rmm::exec_policy(stream), is_node_it, is_node_it + num_tokens, node_ids_gpu.begin()); - - rmm::device_uvector parent_node_ids(num_nodes, stream, mr); - auto parent_node_ids_it = thrust::make_transform_iterator( - parent_token_ids.begin(), - [node_ids_gpu = node_ids_gpu.begin()] __device__(size_type const pid) -> NodeIndexT { - return pid < 0 ? parent_node_sentinel : node_ids_gpu[pid]; - }); - auto parent_node_ids_end = thrust::copy_if(rmm::exec_policy(stream), - parent_node_ids_it, - parent_node_ids_it + parent_token_ids.size(), - tokens.begin(), - parent_node_ids.begin(), - is_node); - CUDF_EXPECTS(parent_node_ids_end - parent_node_ids.begin() == num_nodes, - "parent node id gather mismatch"); - return {std::move(node_categories), std::move(parent_node_ids), std::move(node_levels), From 8a31e26b420afa7ea7aa0255a8fca002f2f47fd5 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Fri, 14 Oct 2022 19:05:21 +0200 Subject: [PATCH 036/202] Fix local offset handling in bgzip reader (#11918) We accidentally checked the local offset against the compressed, not the uncompressed size. The new test failed prior to fixing the behavior. Authors: - Tobias Ribizel (https://github.com/upsj) Approvers: - Nghia Truong (https://github.com/ttnghia) - Bradley Dice (https://github.com/bdice) - Karthikeyan (https://github.com/karthikeyann) URL: https://github.com/rapidsai/cudf/pull/11918 --- cpp/src/io/text/bgzip_data_chunk_source.cu | 4 +- cpp/tests/io/text/data_chunk_source_test.cpp | 72 ++++++++++++-------- 2 files changed, 47 insertions(+), 29 deletions(-) diff --git a/cpp/src/io/text/bgzip_data_chunk_source.cu b/cpp/src/io/text/bgzip_data_chunk_source.cu index 9c4ff218783..e4b6bad614d 100644 --- a/cpp/src/io/text/bgzip_data_chunk_source.cu +++ b/cpp/src/io/text/bgzip_data_chunk_source.cu @@ -271,8 +271,8 @@ class bgzip_data_chunk_reader : public data_chunk_reader { // seek to the beginning of the provided local offset auto const local_pos = virtual_begin & 0xFFFFu; if (local_pos > 0) { - CUDF_EXPECTS(_curr_blocks.h_compressed_offsets.size() > 1 && - local_pos < _curr_blocks.h_compressed_offsets[1], + CUDF_EXPECTS(_curr_blocks.h_decompressed_offsets.size() > 1 && + local_pos < _curr_blocks.h_decompressed_offsets[1], "local part of virtual offset is out of bounds"); _curr_blocks.consume_bytes(local_pos); } diff --git a/cpp/tests/io/text/data_chunk_source_test.cpp b/cpp/tests/io/text/data_chunk_source_test.cpp index 7cb75aea8e2..2111d66a066 100644 --- a/cpp/tests/io/text/data_chunk_source_test.cpp +++ b/cpp/tests/io/text/data_chunk_source_test.cpp @@ -130,6 +130,11 @@ enum class compression { ENABLED, DISABLED }; enum class eof { ADD_EOF_BLOCK, NO_EOF_BLOCK }; +uint64_t virtual_offset(std::size_t block_offset, std::size_t local_offset) +{ + return (block_offset << 16) | local_offset; +} + void write_bgzip(std::ostream& output_stream, cudf::host_span data, std::default_random_engine& rng, @@ -193,6 +198,7 @@ TEST_F(DataChunkSourceTest, BgzipSource) { auto const filename = temp_env->get_temp_filepath("bgzip_source"); std::string input{"bananarama"}; + input.reserve(input.size() << 25); for (int i = 0; i < 24; i++) { input = input + input; } @@ -211,13 +217,11 @@ TEST_F(DataChunkSourceTest, BgzipSourceVirtualOffsets) { auto const filename = temp_env->get_temp_filepath("bgzip_source_offsets"); std::string input{"bananarama"}; + input.reserve(input.size() << 25); for (int i = 0; i < 24; i++) { input = input + input; } - std::string padding_garbage{"garbage"}; - for (int i = 0; i < 10; i++) { - padding_garbage = padding_garbage + padding_garbage; - } + std::string const padding_garbage(10000, 'g'); std::string const data_garbage{"GARBAGE"}; std::string const begininput{"begin of bananarama"}; std::string const endinput{"end of bananarama"}; @@ -241,10 +245,10 @@ TEST_F(DataChunkSourceTest, BgzipSourceVirtualOffsets) } input = begininput + input + endinput; - auto const source = - cudf::io::text::make_source_from_bgzip_file(filename, - begin_compressed_offset << 16 | begin_local_offset, - end_compressed_offset << 16 | end_local_offset); + auto const source = cudf::io::text::make_source_from_bgzip_file( + filename, + virtual_offset(begin_compressed_offset, begin_local_offset), + virtual_offset(end_compressed_offset, end_local_offset)); test_source(input, *source); } @@ -255,8 +259,6 @@ TEST_F(DataChunkSourceTest, BgzipSourceVirtualOffsetsSingleGZipBlock) std::string const input{"collection unit brings"}; std::string const head_garbage{"garbage"}; std::string const tail_garbage{"GARBAGE"}; - std::size_t begin_compressed_offset{}; - std::size_t end_compressed_offset{}; std::size_t const begin_local_offset{head_garbage.size()}; std::size_t const end_local_offset{head_garbage.size() + input.size()}; { @@ -266,10 +268,8 @@ TEST_F(DataChunkSourceTest, BgzipSourceVirtualOffsetsSingleGZipBlock) cudf::io::text::detail::bgzip::write_uncompressed_block(output_stream, {}); } - auto const source = - cudf::io::text::make_source_from_bgzip_file(filename, - begin_compressed_offset << 16 | begin_local_offset, - end_compressed_offset << 16 | end_local_offset); + auto const source = cudf::io::text::make_source_from_bgzip_file( + filename, virtual_offset(0, begin_local_offset), virtual_offset(0, end_local_offset)); test_source(input, *source); } @@ -280,7 +280,6 @@ TEST_F(DataChunkSourceTest, BgzipSourceVirtualOffsetsSingleChunk) std::string const input{"collection unit brings"}; std::string const head_garbage{"garbage"}; std::string const tail_garbage{"GARBAGE"}; - std::size_t begin_compressed_offset{}; std::size_t end_compressed_offset{}; std::size_t const begin_local_offset{head_garbage.size()}; std::size_t const end_local_offset{input.size() - 10}; @@ -294,10 +293,10 @@ TEST_F(DataChunkSourceTest, BgzipSourceVirtualOffsetsSingleChunk) cudf::io::text::detail::bgzip::write_uncompressed_block(output_stream, {}); } - auto const source = - cudf::io::text::make_source_from_bgzip_file(filename, - begin_compressed_offset << 16 | begin_local_offset, - end_compressed_offset << 16 | end_local_offset); + auto const source = cudf::io::text::make_source_from_bgzip_file( + filename, + virtual_offset(0, begin_local_offset), + virtual_offset(end_compressed_offset, end_local_offset)); test_source(input, *source); } @@ -306,13 +305,11 @@ TEST_F(DataChunkSourceTest, BgzipCompressedSourceVirtualOffsets) { auto const filename = temp_env->get_temp_filepath("bgzip_source_compressed_offsets"); std::string input{"bananarama"}; + input.reserve(input.size() << 25); for (int i = 0; i < 24; i++) { input = input + input; } - std::string padding_garbage{"garbage"}; - for (int i = 0; i < 10; i++) { - padding_garbage = padding_garbage + padding_garbage; - } + std::string const padding_garbage(10000, 'g'); std::string const data_garbage{"GARBAGE"}; std::string const begininput{"begin of bananarama"}; std::string const endinput{"end of bananarama"}; @@ -335,10 +332,31 @@ TEST_F(DataChunkSourceTest, BgzipCompressedSourceVirtualOffsets) } input = begininput + input + endinput; - auto source = - cudf::io::text::make_source_from_bgzip_file(filename, - begin_compressed_offset << 16 | begin_local_offset, - end_compressed_offset << 16 | end_local_offset); + auto source = cudf::io::text::make_source_from_bgzip_file( + filename, + virtual_offset(begin_compressed_offset, begin_local_offset), + virtual_offset(end_compressed_offset, end_local_offset)); + test_source(input, *source); +} + +TEST_F(DataChunkSourceTest, BgzipSourceVirtualOffsetsSingleCompressedGZipBlock) +{ + auto const filename = temp_env->get_temp_filepath("bgzip_source_offsets_single_compressed_block"); + std::string const input{"collection unit brings"}; + std::string const head_garbage(10000, 'g'); + std::string const tail_garbage{"GARBAGE"}; + std::size_t const begin_local_offset{head_garbage.size()}; + std::size_t const end_local_offset{head_garbage.size() + input.size()}; + { + std::ofstream output_stream{filename}; + cudf::io::text::detail::bgzip::write_compressed_block(output_stream, + head_garbage + input + tail_garbage); + cudf::io::text::detail::bgzip::write_uncompressed_block(output_stream, {}); + } + + auto const source = cudf::io::text::make_source_from_bgzip_file( + filename, virtual_offset(0, begin_local_offset), virtual_offset(0, end_local_offset)); + test_source(input, *source); } From 759825359b61dfbbf2d8464d8701f4eadcb253e5 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Fri, 14 Oct 2022 13:32:10 -0400 Subject: [PATCH 037/202] Add libcudf strings examples (#11849) Creates example for calling libcudf APIs for strings processing. This also includes examples of building custom kernels for modifying libcudf strings columns. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - Robert Maynard (https://github.com/robertmaynard) - Mark Sadang (https://github.com/msadang) - https://github.com/nvdbaranec URL: https://github.com/rapidsai/cudf/pull/11849 --- ci/release/update-version.sh | 1 + cpp/examples/basic/CMakeLists.txt | 2 +- cpp/examples/build.sh | 9 +- cpp/examples/strings/CMakeLists.txt | 49 ++++++ cpp/examples/strings/common.hpp | 114 ++++++++++++++ cpp/examples/strings/custom_optimized.cu | 165 +++++++++++++++++++++ cpp/examples/strings/custom_prealloc.cu | 126 ++++++++++++++++ cpp/examples/strings/custom_with_malloc.cu | 158 ++++++++++++++++++++ cpp/examples/strings/libcudf_apis.cpp | 62 ++++++++ cpp/examples/strings/names.csv | 20 +++ 10 files changed, 704 insertions(+), 2 deletions(-) create mode 100644 cpp/examples/strings/CMakeLists.txt create mode 100644 cpp/examples/strings/common.hpp create mode 100644 cpp/examples/strings/custom_optimized.cu create mode 100644 cpp/examples/strings/custom_prealloc.cu create mode 100644 cpp/examples/strings/custom_with_malloc.cu create mode 100644 cpp/examples/strings/libcudf_apis.cpp create mode 100644 cpp/examples/strings/names.csv diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 8fad4e08c56..c23f558f071 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -74,6 +74,7 @@ sed_runner "s/cudf=${CURRENT_SHORT_TAG}/cudf=${NEXT_SHORT_TAG}/g" README.md # Libcudf examples update sed_runner "s/CUDF_TAG branch-${CURRENT_SHORT_TAG}/CUDF_TAG branch-${NEXT_SHORT_TAG}/" cpp/examples/basic/CMakeLists.txt +sed_runner "s/CUDF_TAG branch-${CURRENT_SHORT_TAG}/CUDF_TAG branch-${NEXT_SHORT_TAG}/" cpp/examples/strings/CMakeLists.txt # ucx-py version update sed_runner "s/export UCX_PY_VERSION=.*/export UCX_PY_VERSION='${NEXT_UCX_PY_VERSION}'/g" ci/gpu/build.sh diff --git a/cpp/examples/basic/CMakeLists.txt b/cpp/examples/basic/CMakeLists.txt index b182cb08774..7e7c6b191b5 100644 --- a/cpp/examples/basic/CMakeLists.txt +++ b/cpp/examples/basic/CMakeLists.txt @@ -1,6 +1,6 @@ # Copyright (c) 2020-2022, NVIDIA CORPORATION. -cmake_minimum_required(VERSION 3.18) +cmake_minimum_required(VERSION 3.23.1) project( basic_example diff --git a/cpp/examples/build.sh b/cpp/examples/build.sh index 079f7358872..7d389cd318d 100755 --- a/cpp/examples/build.sh +++ b/cpp/examples/build.sh @@ -17,8 +17,15 @@ LIB_BUILD_DIR=${LIB_BUILD_DIR:-$(readlink -f "${EXAMPLES_DIR}/../build")} # Basic example BASIC_EXAMPLE_DIR=${EXAMPLES_DIR}/basic BASIC_EXAMPLE_BUILD_DIR=${BASIC_EXAMPLE_DIR}/build - # Configure cmake -S ${BASIC_EXAMPLE_DIR} -B ${BASIC_EXAMPLE_BUILD_DIR} -Dcudf_ROOT="${LIB_BUILD_DIR}" # Build cmake --build ${BASIC_EXAMPLE_BUILD_DIR} -j${PARALLEL_LEVEL} + +# Strings example +STRINGS_EXAMPLE_DIR=${EXAMPLES_DIR}/strings +STRINGS_EXAMPLE_BUILD_DIR=${STRINGS_EXAMPLE_DIR}/build +# Configure +cmake -S ${STRINGS_EXAMPLE_DIR} -B ${STRINGS_EXAMPLE_BUILD_DIR} -Dcudf_ROOT="${LIB_BUILD_DIR}" +# Build +cmake --build ${STRINGS_EXAMPLE_BUILD_DIR} -j${PARALLEL_LEVEL} diff --git a/cpp/examples/strings/CMakeLists.txt b/cpp/examples/strings/CMakeLists.txt new file mode 100644 index 00000000000..1a16b2bc8fd --- /dev/null +++ b/cpp/examples/strings/CMakeLists.txt @@ -0,0 +1,49 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. + +cmake_minimum_required(VERSION 3.23.1) + +project( + strings_examples + VERSION 0.0.1 + LANGUAGES CXX CUDA +) + +set(CPM_DOWNLOAD_VERSION v0.35.3) +file( + DOWNLOAD + https://github.com/cpm-cmake/CPM.cmake/releases/download/${CPM_DOWNLOAD_VERSION}/get_cpm.cmake + ${CMAKE_BINARY_DIR}/cmake/get_cpm.cmake +) +include(${CMAKE_BINARY_DIR}/cmake/get_cpm.cmake) + +set(CUDF_TAG branch-22.12) +CPMFindPackage( + NAME cudf GIT_REPOSITORY https://github.com/rapidsai/cudf + GIT_TAG ${CUDF_TAG} + GIT_SHALLOW + TRUE + SOURCE_SUBDIR + cpp +) + +list(APPEND CUDF_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr) + +# +add_executable(libcudf_apis libcudf_apis.cpp) +target_compile_features(libcudf_apis PRIVATE cxx_std_17) +target_link_libraries(libcudf_apis PRIVATE cudf::cudf nvToolsExt) + +add_executable(custom_with_malloc custom_with_malloc.cu) +target_compile_features(custom_with_malloc PRIVATE cxx_std_17) +target_compile_options(custom_with_malloc PRIVATE "$<$:${CUDF_CUDA_FLAGS}>") +target_link_libraries(custom_with_malloc PRIVATE cudf::cudf nvToolsExt) + +add_executable(custom_prealloc custom_prealloc.cu) +target_compile_features(custom_prealloc PRIVATE cxx_std_17) +target_compile_options(custom_prealloc PRIVATE "$<$:${CUDF_CUDA_FLAGS}>") +target_link_libraries(custom_prealloc PRIVATE cudf::cudf nvToolsExt) + +add_executable(custom_optimized custom_optimized.cu) +target_compile_features(custom_optimized PRIVATE cxx_std_17) +target_compile_options(custom_optimized PRIVATE "$<$:${CUDF_CUDA_FLAGS}>") +target_link_libraries(custom_optimized PRIVATE cudf::cudf nvToolsExt) diff --git a/cpp/examples/strings/common.hpp b/cpp/examples/strings/common.hpp new file mode 100644 index 00000000000..dbd3c4dbd1b --- /dev/null +++ b/cpp/examples/strings/common.hpp @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +/** + * @brief Main example function returns redacted strings column. + * + * This function returns a redacted version of the input `names` column + * using the the `visibilities` column as in the following example + * ``` + * names visibility --> redacted + * John Doe public D John + * Bobby Joe private X X + * ``` + * + * @param names First and last names separated with a single space + * @param visibilities String values `public` or `private` only + * @return Redacted strings column + */ +std::unique_ptr redact_strings(cudf::column_view const& names, + cudf::column_view const& visibilities); + +/** + * @brief Create CUDA memory resource + */ +auto make_cuda_mr() { return std::make_shared(); } + +/** + * @brief Create a pool device memory resource + */ +auto make_pool_mr() +{ + return rmm::mr::make_owning_wrapper(make_cuda_mr()); +} + +/** + * @brief Create memory resource for libcudf functions + */ +std::shared_ptr create_memory_resource(std::string const& name) +{ + if (name == "pool") { return make_pool_mr(); } + return make_cuda_mr(); +} + +/** + * @brief Main for strings examples + * + * Command line parameters: + * 1. CSV file name/path + * 2. Memory resource (optional): 'pool' or 'cuda' + * + * The stdout includes the number of rows in the input and the output size in bytes. + */ +int main(int argc, char const** argv) +{ + if (argc < 2) { + std::cout << "required parameter: csv-file-path\n"; + return 1; + } + + auto const mr_name = std::string{argc > 2 ? std::string(argv[2]) : std::string("cuda")}; + auto resource = create_memory_resource(mr_name); + rmm::mr::set_current_device_resource(resource.get()); + + auto const csv_file = std::string{argv[1]}; + auto const csv_result = [csv_file] { + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{csv_file}).header(-1); + return cudf::io::read_csv(in_opts).tbl; + }(); + auto const csv_table = csv_result->view(); + + std::cout << "table: " << csv_table.num_rows() << " rows " << csv_table.num_columns() + << " columns\n"; + + auto st = std::chrono::steady_clock::now(); + auto result = redact_strings(csv_table.column(0), csv_table.column(1)); + + std::chrono::duration elapsed = std::chrono::steady_clock::now() - st; + std::cout << "Wall time: " << elapsed.count() << " seconds\n"; + std::cout << "Output size " << result->view().child(1).size() << " bytes\n"; + + return 0; +} diff --git a/cpp/examples/strings/custom_optimized.cu b/cpp/examples/strings/custom_optimized.cu new file mode 100644 index 00000000000..bfe650daa93 --- /dev/null +++ b/cpp/examples/strings/custom_optimized.cu @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "common.hpp" + +#include +#include + +#include +#include +#include + +#include + +#include +#include + +/** + * @brief Computes the size of each output row + * + * This thread is called once per row in d_names. + * + * @param d_names Column of names + * @param d_visibilities Column of visibilities + * @param d_sizes Output sizes for each row + */ +__global__ void sizes_kernel(cudf::column_device_view const d_names, + cudf::column_device_view const d_visibilities, + cudf::size_type* d_sizes) +{ + // The row index is resolved from the CUDA thread/block objects + auto index = threadIdx.x + blockIdx.x * blockDim.x; + // There may be more threads than actual rows + if (index >= d_names.size()) return; + + auto const visible = cudf::string_view("public", 6); + auto const redaction = cudf::string_view("X X", 3); + + auto const name = d_names.element(index); + auto const vis = d_visibilities.element(index); + + cudf::size_type result = redaction.size_bytes(); // init to redaction size + if (vis == visible) { + auto const space_idx = name.find(' '); + auto const first = name.substr(0, space_idx); + auto const last_initial = name.substr(space_idx + 1, 1); + + result = first.size_bytes() + last_initial.size_bytes() + 1; + } + + d_sizes[index] = result; +} + +/** + * @brief Builds the output for each row + * + * This thread is called once per row in d_names. + * + * @param d_names Column of names + * @param d_visibilities Column of visibilities + * @param d_offsets Byte offset in `d_chars` for each row + * @param d_chars Output memory for all rows + */ +__global__ void redact_kernel(cudf::column_device_view const d_names, + cudf::column_device_view const d_visibilities, + cudf::size_type const* d_offsets, + char* d_chars) +{ + // The row index is resolved from the CUDA thread/block objects + auto index = threadIdx.x + blockIdx.x * blockDim.x; + // There may be more threads than actual rows + if (index >= d_names.size()) return; + + auto const visible = cudf::string_view("public", 6); + auto const redaction = cudf::string_view("X X", 3); + + // resolve output_ptr using the offsets vector + char* output_ptr = d_chars + d_offsets[index]; + + auto const name = d_names.element(index); + auto const vis = d_visibilities.element(index); + + if (vis == visible) { + auto const space_idx = name.find(' '); + auto const first = name.substr(0, space_idx); + auto const last_initial = name.substr(space_idx + 1, 1); + auto const output_size = first.size_bytes() + last_initial.size_bytes() + 1; + + // build output string + memcpy(output_ptr, last_initial.data(), last_initial.size_bytes()); + output_ptr += last_initial.size_bytes(); + *output_ptr++ = ' '; + memcpy(output_ptr, first.data(), first.size_bytes()); + } else { + memcpy(output_ptr, redaction.data(), redaction.size_bytes()); + } +} + +/** + * @brief Redacts each name per the corresponding visibility entry + * + * This implementation builds the strings column children (offsets and chars) + * directly into device memory for libcudf. + * + * @param names Column of names + * @param visibilities Column of visibilities + * @return Redacted column of names + */ +std::unique_ptr redact_strings(cudf::column_view const& names, + cudf::column_view const& visibilities) +{ + // all device memory operations and kernel functions will run on this stream + auto stream = rmm::cuda_stream_default; + + auto const d_names = cudf::column_device_view::create(names, stream); + auto const d_visibilities = cudf::column_device_view::create(visibilities, stream); + + constexpr int block_size = 128; // this arbitrary size should be a power of 2 + int const blocks = (names.size() + block_size - 1) / block_size; + + nvtxRangePushA("redact_strings"); + + // create offsets vector + auto offsets = rmm::device_uvector(names.size() + 1, stream); + + // compute output sizes + sizes_kernel<<>>( + *d_names, *d_visibilities, offsets.data()); + + // convert sizes to offsets (in place) + thrust::exclusive_scan(rmm::exec_policy(stream), offsets.begin(), offsets.end(), offsets.begin()); + + // last element is the total output size + // (device-to-host copy of 1 integer -- includes synching the stream) + cudf::size_type output_size = offsets.back_element(stream); + + // create chars vector + auto chars = rmm::device_uvector(output_size, stream); + + // build chars output + redact_kernel<<>>( + *d_names, *d_visibilities, offsets.data(), chars.data()); + + // create column from offsets and chars vectors (no copy is performed) + auto result = cudf::make_strings_column(names.size(), std::move(offsets), std::move(chars)); + + // wait for all of the above to finish + stream.synchronize(); + + nvtxRangePop(); + return result; +} diff --git a/cpp/examples/strings/custom_prealloc.cu b/cpp/examples/strings/custom_prealloc.cu new file mode 100644 index 00000000000..c0bae03af5c --- /dev/null +++ b/cpp/examples/strings/custom_prealloc.cu @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "common.hpp" + +#include +#include +#include + +#include + +#include +#include + +/** + * @brief Builds the output for each row + * + * This thread is called once per row in d_names. + * + * @param d_names Column of names + * @param d_visibilities Column of visibilities + * @param redaction Redacted string replacement + * @param working_memory Output memory for all rows + * @param d_offsets Byte offset in `d_chars` for each row + * @param d_output Output array of string_view objects + */ +__global__ void redact_kernel(cudf::column_device_view const d_names, + cudf::column_device_view const d_visibilities, + cudf::string_view redaction, + char* working_memory, + cudf::offset_type const* d_offsets, + cudf::string_view* d_output) +{ + // The row index is resolved from the CUDA thread/block objects + auto index = threadIdx.x + blockIdx.x * blockDim.x; + // There may be more threads than actual rows + if (index >= d_names.size()) return; + + auto const visible = cudf::string_view("public", 6); + + auto const name = d_names.element(index); + auto const vis = d_visibilities.element(index); + if (vis == visible) { + auto const space_idx = name.find(' '); + auto const first = name.substr(0, space_idx); + auto const last_initial = name.substr(space_idx + 1, 1); + auto const output_size = first.size_bytes() + last_initial.size_bytes() + 1; + + char* output_ptr = working_memory + d_offsets[index]; + d_output[index] = cudf::string_view{output_ptr, output_size}; + + // build output string + memcpy(output_ptr, last_initial.data(), last_initial.size_bytes()); + output_ptr += last_initial.size_bytes(); + *output_ptr++ = ' '; + memcpy(output_ptr, first.data(), first.size_bytes()); + } else { + d_output[index] = cudf::string_view{redaction.data(), redaction.size_bytes()}; + } +} + +/** + * @brief Redacts each name per the corresponding visibility entry + * + * This implementation builds the individual strings into a fixed memory buffer + * and then calls a factory function to gather them into a strings column. + * + * @param names Column of names + * @param visibilities Column of visibilities + * @return Redacted column of names + */ +std::unique_ptr redact_strings(cudf::column_view const& names, + cudf::column_view const& visibilities) +{ + // all device memory operations and kernel functions will run on this stream + auto stream = rmm::cuda_stream_default; + + auto const d_names = cudf::column_device_view::create(names, stream); + auto const d_visibilities = cudf::column_device_view::create(visibilities, stream); + auto const d_redaction = cudf::string_scalar(std::string("X X"), true, stream); + + constexpr int block_size = 128; // this arbitrary size should be a power of 2 + auto const blocks = (names.size() + block_size - 1) / block_size; + + nvtxRangePushA("redact_strings"); + + auto const scv = cudf::strings_column_view(names); + auto const offsets = scv.offsets_begin(); + + // create working memory to hold the output of each string + auto working_memory = rmm::device_uvector(scv.chars_size(), stream); + // create a vector for the output strings' pointers + auto str_ptrs = rmm::device_uvector(names.size(), stream); + + // build the output strings + redact_kernel<<>>(*d_names, + *d_visibilities, + d_redaction.value(), + working_memory.data(), + offsets, + str_ptrs.data()); + + // create strings column from the string_pairs; + // this copies all the individual strings into a single output column + auto result = cudf::make_strings_column(str_ptrs, cudf::string_view{nullptr, 0}, stream); + // temporary memory cleanup cost here for str_ptrs and working_memory + + // wait for all of the above to finish + stream.synchronize(); + + nvtxRangePop(); + return result; +} diff --git a/cpp/examples/strings/custom_with_malloc.cu b/cpp/examples/strings/custom_with_malloc.cu new file mode 100644 index 00000000000..f1d397ef007 --- /dev/null +++ b/cpp/examples/strings/custom_with_malloc.cu @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "common.hpp" + +#include +#include +#include + +#include + +#include +#include + +/** + * @brief Reserve CUDA malloc heap size + * + * Call this function to change the CUDA malloc heap size limit. + * This value depends on the total size of all the malloc() + * calls needed for redact_kernel. + * + * @param heap_size Number of bytes to reserve + * Default is 1GB + */ +void set_malloc_heap_size(size_t heap_size = 1073741824) // 1GB +{ + size_t max_malloc_heap_size = 0; + cudaDeviceGetLimit(&max_malloc_heap_size, cudaLimitMallocHeapSize); + if (max_malloc_heap_size < heap_size) { + max_malloc_heap_size = heap_size; + if (cudaDeviceSetLimit(cudaLimitMallocHeapSize, max_malloc_heap_size) != cudaSuccess) { + fprintf(stderr, "could not set malloc heap size to %ldMB\n", (heap_size / (1024 * 1024))); + throw std::runtime_error(""); + } + } +} + +/** + * @brief Builds the output for each row + * + * This thread is called once per row in d_names. + * + * Note: This uses malloc() in a device kernel which works great + * but is not very efficient. This can be useful for prototyping + * on functions where performance is not yet important. + * All calls to malloc() must have a corresponding free() call. + * The separate free_kernel is launched for this purpose. + * + * @param d_names Column of names + * @param d_visibilities Column of visibilities + * @param redaction Redacted string replacement + * @param d_output Output array of string_view objects + */ +__global__ void redact_kernel(cudf::column_device_view const d_names, + cudf::column_device_view const d_visibilities, + cudf::string_view redaction, + cudf::string_view* d_output) +{ + // The row index is resolved from the CUDA thread/block objects + auto index = threadIdx.x + blockIdx.x * blockDim.x; + // There may be more threads than actual rows + if (index >= d_names.size()) return; + + auto const visible = cudf::string_view("public", 6); + + auto const name = d_names.element(index); + auto const vis = d_visibilities.element(index); + if (vis == visible) { + auto const space_idx = name.find(' '); + auto const first = name.substr(0, space_idx); + auto const last_initial = name.substr(space_idx + 1, 1); + auto const output_size = first.size_bytes() + last_initial.size_bytes() + 1; + + char* output_ptr = static_cast(malloc(output_size)); + d_output[index] = cudf::string_view{output_ptr, output_size}; + + // build output string + memcpy(output_ptr, last_initial.data(), last_initial.size_bytes()); + output_ptr += last_initial.size_bytes(); + *output_ptr++ = ' '; + memcpy(output_ptr, first.data(), first.size_bytes()); + } else { + d_output[index] = cudf::string_view{redaction.data(), redaction.size_bytes()}; + } +} + +/** + * @brief Frees the temporary individual string objects created in the + * redact_kernel + * + * Like malloc(), free() is not very efficient but must be called for + * each malloc() to return the memory to the CUDA malloc heap. + * + * @param redaction Redacted string replacement (not to be freed) + * @param d_output Output array of string_view objects to free + */ +__global__ void free_kernel(cudf::string_view redaction, cudf::string_view* d_output, int count) +{ + auto index = threadIdx.x + blockIdx.x * blockDim.x; + if (index >= count) return; + + auto ptr = const_cast(d_output[index].data()); + if (ptr != redaction.data()) { free(ptr); } +} + +std::unique_ptr redact_strings(cudf::column_view const& names, + cudf::column_view const& visibilities) +{ + // all device memory operations and kernel functions will run on this stream + auto stream = rmm::cuda_stream_default; + + set_malloc_heap_size(); // to illustrate adjusting the malloc heap + + auto const d_names = cudf::column_device_view::create(names, stream); + auto const d_visibilities = cudf::column_device_view::create(visibilities, stream); + auto const d_redaction = cudf::string_scalar(std::string("X X"), true, stream); + + constexpr int block_size = 128; // this arbitrary size should be a power of 2 + auto const blocks = (names.size() + block_size - 1) / block_size; + + nvtxRangePushA("redact_strings"); + + // create a vector for the output strings' pointers + auto str_ptrs = new rmm::device_uvector(names.size(), stream); + + auto result = [&] { + // build the output strings + redact_kernel<<>>( + *d_names, *d_visibilities, d_redaction.value(), str_ptrs->data()); + // create strings column from the string_view vector + // this copies all the individual strings into a single output column + return cudf::make_strings_column(*str_ptrs, cudf::string_view{nullptr, 0}, stream); + }(); + + // free the individual temporary memory pointers + free_kernel<<>>( + d_redaction.value(), str_ptrs->data(), names.size()); + delete str_ptrs; + + // wait for all of the above to finish + stream.synchronize(); + + nvtxRangePop(); + return result; +} diff --git a/cpp/examples/strings/libcudf_apis.cpp b/cpp/examples/strings/libcudf_apis.cpp new file mode 100644 index 00000000000..009e92d8a0d --- /dev/null +++ b/cpp/examples/strings/libcudf_apis.cpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "common.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/** + * @brief Redacts each name per the corresponding visibility entry + * + * This implementation uses libcudf APIs to create the output result. + * + * @param names Column of names + * @param visibilities Column of visibilities + * @return Redacted column of names + */ +std::unique_ptr redact_strings(cudf::column_view const& names, + cudf::column_view const& visibilities) +{ + auto const visible = cudf::string_scalar(std::string("public")); + auto const redaction = cudf::string_scalar(std::string("X X")); + + nvtxRangePushA("redact_strings"); + + auto const allowed = cudf::strings::contains(visibilities, visible); + auto const redacted = cudf::copy_if_else(names, redaction, allowed->view()); + auto const first_last = cudf::strings::split(redacted->view()); + auto const first = first_last->view().column(0); + auto const last = first_last->view().column(1); + auto const last_initial = cudf::strings::slice_strings(last, 0, 1); + + auto const last_initial_first = cudf::table_view({last_initial->view(), first}); + + auto result = cudf::strings::concatenate(last_initial_first, std::string(" ")); + + cudaStreamSynchronize(0); + + nvtxRangePop(); + return result; +} diff --git a/cpp/examples/strings/names.csv b/cpp/examples/strings/names.csv new file mode 100644 index 00000000000..77dca3e02af --- /dev/null +++ b/cpp/examples/strings/names.csv @@ -0,0 +1,20 @@ +John Doe,public +Jane Doe,private +Billy Joe,private +James James,public +Michael Frederick,public +Christopher Cheryl,public +Jessica Autumn,public +Matthew Tyrone,public +Ashley Martha,public +Jennifer Omar,public +Joshua Lydia,public +Amanda Jerome,public +Daniel Theodore,public +David Abby,public +James Neil,public +Robert Shawna,private +John Sierra,private +Joseph Nina,private +Andrew Tammy,private +Ryan Nikki,public From c265c58502e629814a036488d153516724afdebd Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Fri, 14 Oct 2022 17:22:29 -0400 Subject: [PATCH 038/202] Fix cudf::stable_sorted_order for NaN and -NaN in FLOAT64 columns (#11874) Fixes bug in `cudf::stable_sorted_order` when `-NaN` and `NaN` are in a FLOAT64 (double) columns. The code was fixed by refactoring common code with `cudf::sorted_order`. This uses thrust sort functions to help align the behavior and keep results consistent. New gtests were added to check for this case. Some test files were also updated per issue #11734 The new tests are at the bottom of `sort_test.cpp` and `stable_sort_tests.cpp` This was found while working on #11729 The sorted-order functions are reused for many of the libcudf sort functions so this will help with the work in #11729 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Nghia Truong (https://github.com/ttnghia) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/11874 --- cpp/src/sort/sort_column.cu | 104 +------- cpp/src/sort/sort_column_impl.cuh | 152 +++++++++++ cpp/src/sort/stable_sort_column.cu | 72 +---- cpp/tests/sort/sort_test.cpp | 386 ++++++++++++++------------- cpp/tests/sort/stable_sort_tests.cpp | 162 ++++++----- 5 files changed, 459 insertions(+), 417 deletions(-) create mode 100644 cpp/src/sort/sort_column_impl.cuh diff --git a/cpp/src/sort/sort_column.cu b/cpp/src/sort/sort_column.cu index 01ca36874e4..cf8b72f85ad 100644 --- a/cpp/src/sort/sort_column.cu +++ b/cpp/src/sort/sort_column.cu @@ -14,108 +14,16 @@ * limitations under the License. */ -#include +#include + +#include +#include +#include -#include #include -#include namespace cudf { namespace detail { -namespace { - -/** - * @brief Type-dispatched functor for sorting a single column. - */ -struct column_sorted_order_fn { - /** - * @brief Compile time check for allowing radix sort for column type. - * - * Floating point is removed here for special handling of NaNs. - */ - template - static constexpr bool is_radix_sort_supported() - { - return cudf::is_fixed_width() && !cudf::is_floating_point(); - } - - /** - * @brief Sorts fixed-width columns using faster thrust sort. - * - * @param input Column to sort - * @param indices Output sorted indices - * @param ascending True if sort order is ascending - * @param stream CUDA stream used for device memory operations and kernel launches - */ - template ()>* = nullptr> - void radix_sort(column_view const& input, - mutable_column_view& indices, - bool ascending, - rmm::cuda_stream_view stream) - { - // A non-stable sort on a column of arithmetic type with no nulls will use a radix sort - // if specifying only the `thrust::less` or `thrust::greater` comparators. - // But this also requires making a copy of the input data. - auto temp_col = column(input, stream); - auto d_col = temp_col.mutable_view(); - if (ascending) { - thrust::sort_by_key(rmm::exec_policy(stream), - d_col.begin(), - d_col.end(), - indices.begin(), - thrust::less()); - } else { - thrust::sort_by_key(rmm::exec_policy(stream), - d_col.begin(), - d_col.end(), - indices.begin(), - thrust::greater()); - } - } - template ()>* = nullptr> - void radix_sort(column_view const&, mutable_column_view&, bool, rmm::cuda_stream_view) - { - CUDF_FAIL("Only fixed-width types are suitable for faster sorting"); - } - - /** - * @brief Sorts a single column with a relationally comparable type. - * - * This includes numeric, timestamp, duration, and string types. - * - * @param input Column to sort - * @param indices Output sorted indices - * @param ascending True if sort order is ascending - * @param null_precedence How null rows are to be ordered - * @param stream CUDA stream used for device memory operations and kernel launches - */ - template ()>* = nullptr> - void operator()(column_view const& input, - mutable_column_view& indices, - bool ascending, - null_order null_precedence, - rmm::cuda_stream_view stream) - { - // column with nulls or non-supported types will also use a comparator - if (input.has_nulls() || !is_radix_sort_supported()) { - auto keys = column_device_view::create(input, stream); - thrust::sort(rmm::exec_policy(stream), - indices.begin(), - indices.end(), - simple_comparator{*keys, input.has_nulls(), ascending, null_precedence}); - } else { - radix_sort(input, indices, ascending, stream); - } - } - - template ()>* = nullptr> - void operator()(column_view const&, mutable_column_view&, bool, null_order, rmm::cuda_stream_view) - { - CUDF_FAIL("Column type must be relationally comparable"); - } -}; - -} // namespace /** * @copydoc @@ -134,7 +42,7 @@ std::unique_ptr sorted_order(column_view const& input, thrust::sequence( rmm::exec_policy(stream), indices_view.begin(), indices_view.end(), 0); cudf::type_dispatcher(input.type(), - column_sorted_order_fn{}, + column_sorted_order_fn{}, input, indices_view, column_order == order::ASCENDING, diff --git a/cpp/src/sort/sort_column_impl.cuh b/cpp/src/sort/sort_column_impl.cuh new file mode 100644 index 00000000000..acafe4b5a5c --- /dev/null +++ b/cpp/src/sort/sort_column_impl.cuh @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include +#include + +namespace cudf { +namespace detail { + +template +struct column_sorted_order_fn { + /** + * @brief Compile time check for allowing faster sort. + * + * Faster sort is defined for fixed-width types where only + * the primitive comparators thrust::greater or thrust::less + * are needed. + * + * Floating point is removed here for special handling of NaNs + * which require the row-comparator. + */ + template + static constexpr bool is_faster_sort_supported() + { + return cudf::is_fixed_width() && !cudf::is_floating_point(); + } + + /** + * @brief Sorts fixed-width columns using faster thrust sort. + * + * Should not be called if `input.has_nulls()==true` + * + * @param input Column to sort + * @param indices Output sorted indices + * @param ascending True if sort order is ascending + * @param stream CUDA stream used for device memory operations and kernel launches + */ + template + void faster_sort(column_view const& input, + mutable_column_view& indices, + bool ascending, + rmm::cuda_stream_view stream) + { + // A thrust sort on a column of primitive types will use a radix sort. + // For other fixed-width types, thrust will use merge-sort. + // But this also requires making a copy of the input data. + auto temp_col = column(input, stream); + auto d_col = temp_col.mutable_view(); + if (ascending) { + if constexpr (stable) { + thrust::stable_sort_by_key(rmm::exec_policy(stream), + d_col.begin(), + d_col.end(), + indices.begin(), + thrust::less()); + } else { + thrust::sort_by_key(rmm::exec_policy(stream), + d_col.begin(), + d_col.end(), + indices.begin(), + thrust::less()); + } + } else { + if constexpr (stable) { + thrust::stable_sort_by_key(rmm::exec_policy(stream), + d_col.begin(), + d_col.end(), + indices.begin(), + thrust::greater()); + } else { + thrust::sort_by_key(rmm::exec_policy(stream), + d_col.begin(), + d_col.end(), + indices.begin(), + thrust::greater()); + } + } + } + + /** + * @brief Sorts a single column with a relationally comparable type. + * + * This is used when a comparator is required. + * + * @param input Column to sort + * @param indices Output sorted indices + * @param ascending True if sort order is ascending + * @param null_precedence How null rows are to be ordered + * @param stream CUDA stream used for device memory operations and kernel launches + */ + template + void sorted_order(column_view const& input, + mutable_column_view& indices, + bool ascending, + null_order null_precedence, + rmm::cuda_stream_view stream) + { + auto keys = column_device_view::create(input, stream); + auto comp = simple_comparator{*keys, input.has_nulls(), ascending, null_precedence}; + if constexpr (stable) { + thrust::stable_sort( + rmm::exec_policy(stream), indices.begin(), indices.end(), comp); + } else { + thrust::sort( + rmm::exec_policy(stream), indices.begin(), indices.end(), comp); + } + } + + template ())> + void operator()(column_view const& input, + mutable_column_view& indices, + bool ascending, + null_order null_precedence, + rmm::cuda_stream_view stream) + { + if constexpr (is_faster_sort_supported()) { + if (input.has_nulls()) { + sorted_order(input, indices, ascending, null_precedence, stream); + } else { + faster_sort(input, indices, ascending, stream); + } + } else { + sorted_order(input, indices, ascending, null_precedence, stream); + } + } + + template ())> + void operator()(column_view const&, mutable_column_view&, bool, null_order, rmm::cuda_stream_view) + { + CUDF_FAIL("Column type must be relationally comparable"); + } +}; + +} // namespace detail +} // namespace cudf diff --git a/cpp/src/sort/stable_sort_column.cu b/cpp/src/sort/stable_sort_column.cu index 7f8ab778f53..d11ddef1965 100644 --- a/cpp/src/sort/stable_sort_column.cu +++ b/cpp/src/sort/stable_sort_column.cu @@ -14,76 +14,16 @@ * limitations under the License. */ -#include +#include + +#include +#include +#include #include -#include namespace cudf { namespace detail { -namespace { - -struct column_stable_sorted_order_fn { - /** - * @brief Stable sort of fixed-width columns using a thrust sort with no comparator. - * - * @param input Column to sort - * @param indices Output sorted indices - * @param stream CUDA stream used for device memory operations and kernel launches - */ - template ()>* = nullptr> - void faster_stable_sort(column_view const& input, - mutable_column_view& indices, - rmm::cuda_stream_view stream) - { - auto temp_col = column(input, stream); - auto d_col = temp_col.mutable_view(); - thrust::stable_sort_by_key( - rmm::exec_policy(stream), d_col.begin(), d_col.end(), indices.begin()); - } - template ()>* = nullptr> - void faster_stable_sort(column_view const&, mutable_column_view&, rmm::cuda_stream_view) - { - CUDF_FAIL("Only fixed-width types are suitable for faster stable sorting"); - } - - /** - * @brief Stable sorts a single column with a relationally comparable type. - * - * This includes numeric, timestamp, duration, and string types. - * - * @param input Column to sort - * @param indices Output sorted indices - * @param ascending True if sort order is ascending - * @param null_precedence How null rows are to be ordered - * @param stream CUDA stream used for device memory operations and kernel launches - */ - template ()>* = nullptr> - void operator()(column_view const& input, - mutable_column_view& indices, - bool ascending, - null_order null_precedence, - rmm::cuda_stream_view stream) - { - if (!ascending || input.has_nulls() || !cudf::is_fixed_width()) { - auto keys = column_device_view::create(input, stream); - thrust::stable_sort( - rmm::exec_policy(stream), - indices.begin(), - indices.end(), - simple_comparator{*keys, input.has_nulls(), ascending, null_precedence}); - } else { - faster_stable_sort(input, indices, stream); - } - } - template ()>* = nullptr> - void operator()(column_view const&, mutable_column_view&, bool, null_order, rmm::cuda_stream_view) - { - CUDF_FAIL("Column type must be relationally comparable"); - } -}; - -} // namespace /** * @copydoc @@ -102,7 +42,7 @@ std::unique_ptr sorted_order(column_view const& input, thrust::sequence( rmm::exec_policy(stream), indices_view.begin(), indices_view.end(), 0); cudf::type_dispatcher(input.type(), - column_stable_sorted_order_fn{}, + column_sorted_order_fn{}, input, indices_view, column_order == order::ASCENDING, diff --git a/cpp/tests/sort/sort_test.cpp b/cpp/tests/sort/sort_test.cpp index 4092597d8e3..82af21cd7af 100644 --- a/cpp/tests/sort/sort_test.cpp +++ b/cpp/tests/sort/sort_test.cpp @@ -32,22 +32,20 @@ #include #include -namespace cudf { -namespace test { -void run_sort_test(table_view input, - column_view expected_sorted_indices, - std::vector column_order = {}, - std::vector null_precedence = {}) +void run_sort_test(cudf::table_view input, + cudf::column_view expected_sorted_indices, + std::vector column_order = {}, + std::vector null_precedence = {}) { // Sorted table - auto got_sorted_table = sort(input, column_order, null_precedence); - auto expected_sorted_table = gather(input, expected_sorted_indices); + auto got_sorted_table = cudf::sort(input, column_order, null_precedence); + auto expected_sorted_table = cudf::gather(input, expected_sorted_indices); CUDF_TEST_EXPECT_TABLES_EQUAL(expected_sorted_table->view(), got_sorted_table->view()); // Sorted by key - auto got_sort_by_key_table = sort_by_key(input, input, column_order, null_precedence); - auto expected_sort_by_key_table = gather(input, expected_sorted_indices); + auto got_sort_by_key_table = cudf::sort_by_key(input, input, column_order, null_precedence); + auto expected_sort_by_key_table = cudf::gather(input, expected_sorted_indices); CUDF_TEST_EXPECT_TABLES_EQUAL(expected_sort_by_key_table->view(), got_sort_by_key_table->view()); } @@ -56,7 +54,7 @@ using TestTypes = cudf::test::Concat; // include timestamps and durations template -struct Sort : public BaseFixture { +struct Sort : public cudf::test::BaseFixture { }; TYPED_TEST_SUITE(Sort, TestTypes); @@ -65,17 +63,19 @@ TYPED_TEST(Sort, WithNullMax) { using T = TypeParam; - fixed_width_column_wrapper col1{{5, 4, 3, 5, 8, 5}, {1, 1, 0, 1, 1, 1}}; - strings_column_wrapper col2({"d", "e", "a", "d", "k", "d"}, {1, 1, 0, 1, 1, 1}); - fixed_width_column_wrapper col3{{10, 40, 70, 5, 2, 10}, {1, 1, 0, 1, 1, 1}}; - table_view input{{col1, col2, col3}}; + cudf::test::fixed_width_column_wrapper col1{{5, 4, 3, 5, 8, 5}, {1, 1, 0, 1, 1, 1}}; + cudf::test::strings_column_wrapper col2({"d", "e", "a", "d", "k", "d"}, {1, 1, 0, 1, 1, 1}); + cudf::test::fixed_width_column_wrapper col3{{10, 40, 70, 5, 2, 10}, {1, 1, 0, 1, 1, 1}}; + cudf::table_view input{{col1, col2, col3}}; - fixed_width_column_wrapper expected{{1, 0, 5, 3, 4, 2}}; - std::vector column_order{order::ASCENDING, order::ASCENDING, order::DESCENDING}; - std::vector null_precedence{null_order::AFTER, null_order::AFTER, null_order::AFTER}; + cudf::test::fixed_width_column_wrapper expected{{1, 0, 5, 3, 4, 2}}; + std::vector column_order{ + cudf::order::ASCENDING, cudf::order::ASCENDING, cudf::order::DESCENDING}; + std::vector null_precedence{ + cudf::null_order::AFTER, cudf::null_order::AFTER, cudf::null_order::AFTER}; // Sorted order - auto got = sorted_order(input, column_order, null_precedence); + auto got = cudf::sorted_order(input, column_order, null_precedence); if (!std::is_same_v) { CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view()); @@ -85,7 +85,7 @@ TYPED_TEST(Sort, WithNullMax) } else { // for bools only validate that the null element landed at the back, since // the rest of the values are equivalent and yields random sorted order. - auto to_host = [](column_view const& col) { + auto to_host = [](cudf::column_view const& col) { thrust::host_vector h_data(col.size()); CUDF_CUDA_TRY(cudaMemcpy( h_data.data(), col.data(), h_data.size() * sizeof(int32_t), cudaMemcpyDefault)); @@ -96,7 +96,7 @@ TYPED_TEST(Sort, WithNullMax) EXPECT_EQ(h_exp[h_exp.size() - 1], h_got[h_got.size() - 1]); // Run test for sort and sort_by_key - fixed_width_column_wrapper expected_for_bool{{0, 3, 5, 1, 4, 2}}; + cudf::test::fixed_width_column_wrapper expected_for_bool{{0, 3, 5, 1, 4, 2}}; run_sort_test(input, expected_for_bool, column_order, null_precedence); } } @@ -105,15 +105,16 @@ TYPED_TEST(Sort, WithNullMin) { using T = TypeParam; - fixed_width_column_wrapper col1{{5, 4, 3, 5, 8}, {1, 1, 0, 1, 1}}; - strings_column_wrapper col2({"d", "e", "a", "d", "k"}, {1, 1, 0, 1, 1}); - fixed_width_column_wrapper col3{{10, 40, 70, 5, 2}, {1, 1, 0, 1, 1}}; - table_view input{{col1, col2, col3}}; + cudf::test::fixed_width_column_wrapper col1{{5, 4, 3, 5, 8}, {1, 1, 0, 1, 1}}; + cudf::test::strings_column_wrapper col2({"d", "e", "a", "d", "k"}, {1, 1, 0, 1, 1}); + cudf::test::fixed_width_column_wrapper col3{{10, 40, 70, 5, 2}, {1, 1, 0, 1, 1}}; + cudf::table_view input{{col1, col2, col3}}; - fixed_width_column_wrapper expected{{2, 1, 0, 3, 4}}; - std::vector column_order{order::ASCENDING, order::ASCENDING, order::DESCENDING}; + cudf::test::fixed_width_column_wrapper expected{{2, 1, 0, 3, 4}}; + std::vector column_order{ + cudf::order::ASCENDING, cudf::order::ASCENDING, cudf::order::DESCENDING}; - auto got = sorted_order(input, column_order); + auto got = cudf::sorted_order(input, column_order); if (!std::is_same_v) { CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view()); @@ -123,7 +124,7 @@ TYPED_TEST(Sort, WithNullMin) } else { // for bools only validate that the null element landed at the front, since // the rest of the values are equivalent and yields random sorted order. - auto to_host = [](column_view const& col) { + auto to_host = [](cudf::column_view const& col) { thrust::host_vector h_data(col.size()); CUDF_CUDA_TRY(cudaMemcpy( h_data.data(), col.data(), h_data.size() * sizeof(int32_t), cudaMemcpyDefault)); @@ -134,7 +135,7 @@ TYPED_TEST(Sort, WithNullMin) EXPECT_EQ(h_exp.front(), h_got.front()); // Run test for sort and sort_by_key - fixed_width_column_wrapper expected_for_bool{{2, 0, 3, 1, 4}}; + cudf::test::fixed_width_column_wrapper expected_for_bool{{2, 0, 3, 1, 4}}; run_sort_test(input, expected_for_bool, column_order); } } @@ -143,23 +144,25 @@ TYPED_TEST(Sort, WithMixedNullOrder) { using T = TypeParam; - fixed_width_column_wrapper col1{{5, 4, 3, 5, 8}, {0, 0, 1, 1, 0}}; - strings_column_wrapper col2({"d", "e", "a", "d", "k"}, {0, 1, 0, 0, 1}); - fixed_width_column_wrapper col3{{10, 40, 70, 5, 2}, {1, 0, 1, 0, 1}}; - table_view input{{col1, col2, col3}}; + cudf::test::fixed_width_column_wrapper col1{{5, 4, 3, 5, 8}, {0, 0, 1, 1, 0}}; + cudf::test::strings_column_wrapper col2({"d", "e", "a", "d", "k"}, {0, 1, 0, 0, 1}); + cudf::test::fixed_width_column_wrapper col3{{10, 40, 70, 5, 2}, {1, 0, 1, 0, 1}}; + cudf::table_view input{{col1, col2, col3}}; - fixed_width_column_wrapper expected{{2, 3, 0, 1, 4}}; - std::vector column_order{order::ASCENDING, order::ASCENDING, order::ASCENDING}; - std::vector null_precedence{null_order::AFTER, null_order::BEFORE, null_order::AFTER}; + cudf::test::fixed_width_column_wrapper expected{{2, 3, 0, 1, 4}}; + std::vector column_order{ + cudf::order::ASCENDING, cudf::order::ASCENDING, cudf::order::ASCENDING}; + std::vector null_precedence{ + cudf::null_order::AFTER, cudf::null_order::BEFORE, cudf::null_order::AFTER}; - auto got = sorted_order(input, column_order, null_precedence); + auto got = cudf::sorted_order(input, column_order, null_precedence); if (!std::is_same_v) { CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view()); } else { // for bools only validate that the null element landed at the front, since // the rest of the values are equivalent and yields random sorted order. - auto to_host = [](column_view const& col) { + auto to_host = [](cudf::column_view const& col) { thrust::host_vector h_data(col.size()); CUDF_CUDA_TRY(cudaMemcpy( h_data.data(), col.data(), h_data.size() * sizeof(int32_t), cudaMemcpyDefault)); @@ -178,15 +181,16 @@ TYPED_TEST(Sort, WithAllValid) { using T = TypeParam; - fixed_width_column_wrapper col1{{5, 4, 3, 5, 8}}; - strings_column_wrapper col2({"d", "e", "a", "d", "k"}); - fixed_width_column_wrapper col3{{10, 40, 70, 5, 2}}; - table_view input{{col1, col2, col3}}; + cudf::test::fixed_width_column_wrapper col1{{5, 4, 3, 5, 8}}; + cudf::test::strings_column_wrapper col2({"d", "e", "a", "d", "k"}); + cudf::test::fixed_width_column_wrapper col3{{10, 40, 70, 5, 2}}; + cudf::table_view input{{col1, col2, col3}}; - fixed_width_column_wrapper expected{{2, 1, 0, 3, 4}}; - std::vector column_order{order::ASCENDING, order::ASCENDING, order::DESCENDING}; + cudf::test::fixed_width_column_wrapper expected{{2, 1, 0, 3, 4}}; + std::vector column_order{ + cudf::order::ASCENDING, cudf::order::ASCENDING, cudf::order::DESCENDING}; - auto got = sorted_order(input, column_order); + auto got = cudf::sorted_order(input, column_order); // Skip validating bools order. Valid true bools are all // equivalent, and yield random order after thrust::sort @@ -197,7 +201,7 @@ TYPED_TEST(Sort, WithAllValid) run_sort_test(input, expected, column_order); } else { // Run test for sort and sort_by_key - fixed_width_column_wrapper expected_for_bool{{2, 0, 3, 1, 4}}; + cudf::test::fixed_width_column_wrapper expected_for_bool{{2, 0, 3, 1, 4}}; run_sort_test(input, expected_for_bool, column_order); } } @@ -224,16 +228,18 @@ TYPED_TEST(Sort, WithStructColumn) auto struct_col_view{struct_col->view()}; EXPECT_EQ(num_rows, struct_col->size()); - fixed_width_column_wrapper col1{{5, 4, 3, 5, 8, 9}}; - strings_column_wrapper col2({"d", "e", "a", "d", "k", "a"}); - fixed_width_column_wrapper col3{{10, 40, 70, 5, 2, 20}}; - table_view input{{col1, col2, col3, struct_col_view}}; + cudf::test::fixed_width_column_wrapper col1{{5, 4, 3, 5, 8, 9}}; + cudf::test::strings_column_wrapper col2({"d", "e", "a", "d", "k", "a"}); + cudf::test::fixed_width_column_wrapper col3{{10, 40, 70, 5, 2, 20}}; + cudf::table_view input{{col1, col2, col3, struct_col_view}}; - fixed_width_column_wrapper expected{{2, 1, 0, 3, 4, 5}}; - std::vector column_order{ - order::ASCENDING, order::ASCENDING, order::DESCENDING, order::ASCENDING}; + cudf::test::fixed_width_column_wrapper expected{{2, 1, 0, 3, 4, 5}}; + std::vector column_order{cudf::order::ASCENDING, + cudf::order::ASCENDING, + cudf::order::DESCENDING, + cudf::order::ASCENDING}; - auto got = sorted_order(input, column_order); + auto got = cudf::sorted_order(input, column_order); // Skip validating bools order. Valid true bools are all // equivalent, and yield random order after thrust::sort @@ -244,7 +250,7 @@ TYPED_TEST(Sort, WithStructColumn) run_sort_test(input, expected, column_order); } else { // Run test for sort and sort_by_key - fixed_width_column_wrapper expected_for_bool{{2, 5, 3, 0, 1, 4}}; + cudf::test::fixed_width_column_wrapper expected_for_bool{{2, 5, 3, 0, 1, 4}}; run_sort_test(input, expected_for_bool, column_order); } } @@ -271,14 +277,15 @@ TYPED_TEST(Sort, WithNestedStructColumn) auto struct_col_view{struct_col2->view()}; - fixed_width_column_wrapper col1{{6, 6, 6, 6, 6, 6}}; - fixed_width_column_wrapper col2{{1, 1, 1, 2, 2, 2}}; - table_view input{{col1, col2, struct_col_view}}; + cudf::test::fixed_width_column_wrapper col1{{6, 6, 6, 6, 6, 6}}; + cudf::test::fixed_width_column_wrapper col2{{1, 1, 1, 2, 2, 2}}; + cudf::table_view input{{col1, col2, struct_col_view}}; - fixed_width_column_wrapper expected{{3, 5, 4, 2, 1, 0}}; - std::vector column_order{order::ASCENDING, order::DESCENDING, order::ASCENDING}; + cudf::test::fixed_width_column_wrapper expected{{3, 5, 4, 2, 1, 0}}; + std::vector column_order{ + cudf::order::ASCENDING, cudf::order::DESCENDING, cudf::order::ASCENDING}; - auto got = sorted_order(input, column_order); + auto got = cudf::sorted_order(input, column_order); // Skip validating bools order. Valid true bools are all // equivalent, and yield random order after thrust::sort @@ -289,7 +296,7 @@ TYPED_TEST(Sort, WithNestedStructColumn) run_sort_test(input, expected, column_order); } else { // Run test for sort and sort_by_key - fixed_width_column_wrapper expected_for_bool{{2, 5, 1, 3, 4, 0}}; + cudf::test::fixed_width_column_wrapper expected_for_bool{{2, 5, 1, 3, 4, 0}}; run_sort_test(input, expected_for_bool, column_order); } } @@ -346,7 +353,7 @@ TYPED_TEST(Sort, WithNullableStructColumn) auto s1 = make_struct(std::move(s1_children), s1_mask); auto expect = fwcw{4, 5, 7, 3, 2, 0, 6, 1, 8}; - run_sort_test(table_view({s1->view()}), expect); + run_sort_test(cudf::table_view({s1->view()}), expect); } { /* /+-------------+ @@ -384,7 +391,7 @@ TYPED_TEST(Sort, WithNullableStructColumn) auto s12 = make_struct(std::move(s12_children), s1_mask); auto expect = fwcw{4, 5, 7, 0, 6, 1, 2, 3, 8}; - run_sort_test(table_view({s12->view()}), expect); + run_sort_test(cudf::table_view({s12->view()}), expect); } } @@ -406,12 +413,12 @@ TYPED_TEST(Sort, WithSingleStructColumn) auto struct_col = cudf::test::structs_column_wrapper{{names_col, ages_col, is_human_col}, v}.release(); auto struct_col_view{struct_col->view()}; - table_view input{{struct_col_view}}; + cudf::table_view input{{struct_col_view}}; - fixed_width_column_wrapper expected{{2, 5, 1, 3, 4, 0}}; - std::vector column_order{order::ASCENDING}; + cudf::test::fixed_width_column_wrapper expected{{2, 5, 1, 3, 4, 0}}; + std::vector column_order{cudf::order::ASCENDING}; - auto got = sorted_order(input, column_order); + auto got = cudf::sorted_order(input, column_order); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view()); @@ -443,13 +450,13 @@ TYPED_TEST(Sort, WithSlicedStructColumn) auto col2 = FWCW{{ 1, 1, 0, 0, 0, 2, 1, 3}}; auto col3 = FWCW{{ 7, 8, 1, 1, 9, 5, 7, 3}}; auto col1 = cudf::test::strings_column_wrapper{names.begin(), names.end(), string_valids.begin()}; - auto struct_col = structs_column_wrapper{{col1, col2, col3}}.release(); + auto struct_col = cudf::test::structs_column_wrapper{{col1, col2, col3}}.release(); // clang-format on auto struct_col_view{struct_col->view()}; - table_view input{{struct_col_view}}; - auto sliced_columns = cudf::split(struct_col_view, std::vector{3}); - auto sliced_tables = cudf::split(input, std::vector{3}); - std::vector column_order{order::ASCENDING}; + cudf::table_view input{{struct_col_view}}; + auto sliced_columns = cudf::split(struct_col_view, std::vector{3}); + auto sliced_tables = cudf::split(input, std::vector{3}); + std::vector column_order{cudf::order::ASCENDING}; /* asce_null_first sliced[3:] /+-------------+ @@ -467,30 +474,30 @@ TYPED_TEST(Sort, WithSlicedStructColumn) */ // normal - fixed_width_column_wrapper expected{{7, 2, 4, 3, 6, 0, 1, 5}}; - auto got = sorted_order(input, column_order); + cudf::test::fixed_width_column_wrapper expected{{7, 2, 4, 3, 6, 0, 1, 5}}; + auto got = cudf::sorted_order(input, column_order); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view()); // Run test for sort and sort_by_key run_sort_test(input, expected, column_order); // table with sliced column - table_view input2{{sliced_columns[1]}}; - fixed_width_column_wrapper expected2{{4, 1, 0, 3, 2}}; - got = sorted_order(input2, column_order); + cudf::table_view input2{{sliced_columns[1]}}; + cudf::test::fixed_width_column_wrapper expected2{{4, 1, 0, 3, 2}}; + got = cudf::sorted_order(input2, column_order); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, got->view()); // Run test for sort and sort_by_key run_sort_test(input2, expected2, column_order); // sliced table[1] - fixed_width_column_wrapper expected3{{4, 1, 0, 3, 2}}; - got = sorted_order(sliced_tables[1], column_order); + cudf::test::fixed_width_column_wrapper expected3{{4, 1, 0, 3, 2}}; + got = cudf::sorted_order(sliced_tables[1], column_order); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected3, got->view()); // Run test for sort and sort_by_key run_sort_test(sliced_tables[1], expected3, column_order); // sliced table[0] - fixed_width_column_wrapper expected4{{2, 0, 1}}; - got = sorted_order(sliced_tables[0], column_order); + cudf::test::fixed_width_column_wrapper expected4{{2, 0, 1}}; + got = cudf::sorted_order(sliced_tables[0], column_order); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected4, got->view()); // Run test for sort and sort_by_key run_sort_test(sliced_tables[0], expected4, column_order); @@ -507,25 +514,25 @@ TYPED_TEST(Sort, SlicedColumns) auto col2 = FWCW{{ 7, 8, 1, 1, 9, 5, 7, 3}}; auto col1 = cudf::test::strings_column_wrapper{names.begin(), names.end(), string_valids.begin()}; // clang-format on - table_view input{{col1, col2}}; - auto sliced_columns1 = cudf::split(col1, std::vector{3}); - auto sliced_columns2 = cudf::split(col1, std::vector{3}); - auto sliced_tables = cudf::split(input, std::vector{3}); - std::vector column_order{order::ASCENDING, order::ASCENDING}; + cudf::table_view input{{col1, col2}}; + auto sliced_columns1 = cudf::split(col1, std::vector{3}); + auto sliced_columns2 = cudf::split(col1, std::vector{3}); + auto sliced_tables = cudf::split(input, std::vector{3}); + std::vector column_order{cudf::order::ASCENDING, cudf::order::ASCENDING}; // normal - // fixed_width_column_wrapper expected{{2, 3, 7, 5, 0, 6, 1, 4}}; - fixed_width_column_wrapper expected{{7, 2, 4, 3, 6, 0, 1, 5}}; - auto got = sorted_order(input, column_order); + // cudf::test::fixed_width_column_wrapper expected{{2, 3, 7, 5, 0, 6, 1, 4}}; + cudf::test::fixed_width_column_wrapper expected{{7, 2, 4, 3, 6, 0, 1, 5}}; + auto got = cudf::sorted_order(input, column_order); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view()); // Run test for sort and sort_by_key run_sort_test(input, expected, column_order); // table with sliced column - table_view input2{{sliced_columns1[1], sliced_columns2[1]}}; - // fixed_width_column_wrapper expected2{{0, 4, 2, 3, 1}}; - fixed_width_column_wrapper expected2{{4, 1, 0, 3, 2}}; - got = sorted_order(input2, column_order); + cudf::table_view input2{{sliced_columns1[1], sliced_columns2[1]}}; + // cudf::test::fixed_width_column_wrapper expected2{{0, 4, 2, 3, 1}}; + cudf::test::fixed_width_column_wrapper expected2{{4, 1, 0, 3, 2}}; + got = cudf::sorted_order(input2, column_order); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, got->view()); // Run test for sort and sort_by_key run_sort_test(input2, expected2, column_order); @@ -572,37 +579,37 @@ TYPED_TEST(Sort, WithStructColumnCombinations) */ // clang-format on auto struct_col_view{struct_col->view()}; - table_view input{{struct_col_view}}; - std::vector column_order1{order::DESCENDING}; + cudf::table_view input{{struct_col_view}}; + std::vector column_order1{cudf::order::DESCENDING}; // desc_nulls_first - fixed_width_column_wrapper expected1{{2, 4, 3, 5, 6, 7, 1, 0}}; - auto got = sorted_order(input, column_order1, {null_order::AFTER}); + cudf::test::fixed_width_column_wrapper expected1{{2, 4, 3, 5, 6, 7, 1, 0}}; + auto got = cudf::sorted_order(input, column_order1, {cudf::null_order::AFTER}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, got->view()); // Run test for sort and sort_by_key - run_sort_test(input, expected1, column_order1, {null_order::AFTER}); + run_sort_test(input, expected1, column_order1, {cudf::null_order::AFTER}); // desc_nulls_last - fixed_width_column_wrapper expected2{{1, 0, 6, 7, 3, 5, 2, 4}}; - got = sorted_order(input, column_order1, {null_order::BEFORE}); + cudf::test::fixed_width_column_wrapper expected2{{1, 0, 6, 7, 3, 5, 2, 4}}; + got = cudf::sorted_order(input, column_order1, {cudf::null_order::BEFORE}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, got->view()); // Run test for sort and sort_by_key - run_sort_test(input, expected2, column_order1, {null_order::BEFORE}); + run_sort_test(input, expected2, column_order1, {cudf::null_order::BEFORE}); // asce_nulls_first - std::vector column_order2{order::ASCENDING}; - fixed_width_column_wrapper expected3{{2, 4, 3, 5, 7, 6, 0, 1}}; - got = sorted_order(input, column_order2, {null_order::BEFORE}); + std::vector column_order2{cudf::order::ASCENDING}; + cudf::test::fixed_width_column_wrapper expected3{{2, 4, 3, 5, 7, 6, 0, 1}}; + got = cudf::sorted_order(input, column_order2, {cudf::null_order::BEFORE}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected3, got->view()); // Run test for sort and sort_by_key - run_sort_test(input, expected3, column_order2, {null_order::BEFORE}); + run_sort_test(input, expected3, column_order2, {cudf::null_order::BEFORE}); // asce_nulls_last - fixed_width_column_wrapper expected4{{0, 1, 7, 6, 3, 5, 2, 4}}; - got = sorted_order(input, column_order2, {null_order::AFTER}); + cudf::test::fixed_width_column_wrapper expected4{{0, 1, 7, 6, 3, 5, 2, 4}}; + got = cudf::sorted_order(input, column_order2, {cudf::null_order::AFTER}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected4, got->view()); // Run test for sort and sort_by_key - run_sort_test(input, expected4, column_order2, {null_order::AFTER}); + run_sort_test(input, expected4, column_order2, {cudf::null_order::AFTER}); } TYPED_TEST(Sort, WithStructColumnCombinationsWithoutNulls) @@ -645,93 +652,94 @@ TYPED_TEST(Sort, WithStructColumnCombinationsWithoutNulls) */ // clang-format on auto struct_col_view{struct_col->view()}; - table_view input{{struct_col_view}}; - std::vector column_order{order::DESCENDING}; + cudf::table_view input{{struct_col_view}}; + std::vector column_order{cudf::order::DESCENDING}; // desc_nulls_first auto const expected1 = []() { if constexpr (std::is_same_v) { - return fixed_width_column_wrapper{{3, 5, 6, 7, 1, 2, 4, 0}}; + return cudf::test::fixed_width_column_wrapper{{3, 5, 6, 7, 1, 2, 4, 0}}; } - return fixed_width_column_wrapper{{3, 5, 6, 7, 2, 4, 1, 0}}; + return cudf::test::fixed_width_column_wrapper{{3, 5, 6, 7, 2, 4, 1, 0}}; }(); - auto got = sorted_order(input, column_order, {null_order::AFTER}); + auto got = cudf::sorted_order(input, column_order, {cudf::null_order::AFTER}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, got->view()); // Run test for sort and sort_by_key - run_sort_test(input, expected1, column_order, {null_order::AFTER}); + run_sort_test(input, expected1, column_order, {cudf::null_order::AFTER}); // desc_nulls_last - fixed_width_column_wrapper expected2{{2, 4, 1, 0, 6, 7, 3, 5}}; - got = sorted_order(input, column_order, {null_order::BEFORE}); + cudf::test::fixed_width_column_wrapper expected2{{2, 4, 1, 0, 6, 7, 3, 5}}; + got = cudf::sorted_order(input, column_order, {cudf::null_order::BEFORE}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, got->view()); // Run test for sort and sort_by_key - run_sort_test(input, expected2, column_order, {null_order::BEFORE}); + run_sort_test(input, expected2, column_order, {cudf::null_order::BEFORE}); // asce_nulls_first - std::vector column_order2{order::ASCENDING}; - fixed_width_column_wrapper expected3{{3, 5, 7, 6, 0, 1, 2, 4}}; - got = sorted_order(input, column_order2, {null_order::BEFORE}); + std::vector column_order2{cudf::order::ASCENDING}; + cudf::test::fixed_width_column_wrapper expected3{{3, 5, 7, 6, 0, 1, 2, 4}}; + got = cudf::sorted_order(input, column_order2, {cudf::null_order::BEFORE}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected3, got->view()); // Run test for sort and sort_by_key - run_sort_test(input, expected3, column_order2, {null_order::BEFORE}); + run_sort_test(input, expected3, column_order2, {cudf::null_order::BEFORE}); // asce_nulls_last auto const expected4 = []() { if constexpr (std::is_same_v) { - return fixed_width_column_wrapper{{0, 2, 4, 1, 7, 6, 3, 5}}; + return cudf::test::fixed_width_column_wrapper{{0, 2, 4, 1, 7, 6, 3, 5}}; } - return fixed_width_column_wrapper{{0, 1, 2, 4, 7, 6, 3, 5}}; + return cudf::test::fixed_width_column_wrapper{{0, 1, 2, 4, 7, 6, 3, 5}}; }(); - got = sorted_order(input, column_order2, {null_order::AFTER}); + got = cudf::sorted_order(input, column_order2, {cudf::null_order::AFTER}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected4, got->view()); // Run test for sort and sort_by_key - run_sort_test(input, expected4, column_order2, {null_order::AFTER}); + run_sort_test(input, expected4, column_order2, {cudf::null_order::AFTER}); } -TYPED_TEST(Sort, MisMatchInColumnOrderSize) +TYPED_TEST(Sort, MismatchInColumnOrderSize) { using T = TypeParam; - fixed_width_column_wrapper col1{{5, 4, 3, 5, 8}}; - strings_column_wrapper col2({"d", "e", "a", "d", "k"}); - fixed_width_column_wrapper col3{{10, 40, 70, 5, 2}}; - table_view input{{col1, col2, col3}}; + cudf::test::fixed_width_column_wrapper col1{{5, 4, 3, 5, 8}}; + cudf::test::strings_column_wrapper col2({"d", "e", "a", "d", "k"}); + cudf::test::fixed_width_column_wrapper col3{{10, 40, 70, 5, 2}}; + cudf::table_view input{{col1, col2, col3}}; - std::vector column_order{order::ASCENDING, order::DESCENDING}; + std::vector column_order{cudf::order::ASCENDING, cudf::order::DESCENDING}; - EXPECT_THROW(sorted_order(input, column_order), logic_error); - EXPECT_THROW(sort(input, column_order), logic_error); - EXPECT_THROW(sort_by_key(input, input, column_order), logic_error); + EXPECT_THROW(cudf::sorted_order(input, column_order), cudf::logic_error); + EXPECT_THROW(cudf::sort(input, column_order), cudf::logic_error); + EXPECT_THROW(cudf::sort_by_key(input, input, column_order), cudf::logic_error); } -TYPED_TEST(Sort, MisMatchInNullPrecedenceSize) +TYPED_TEST(Sort, MismatchInNullPrecedenceSize) { using T = TypeParam; - fixed_width_column_wrapper col1{{5, 4, 3, 5, 8}}; - strings_column_wrapper col2({"d", "e", "a", "d", "k"}); - fixed_width_column_wrapper col3{{10, 40, 70, 5, 2}}; - table_view input{{col1, col2, col3}}; + cudf::test::fixed_width_column_wrapper col1{{5, 4, 3, 5, 8}}; + cudf::test::strings_column_wrapper col2({"d", "e", "a", "d", "k"}); + cudf::test::fixed_width_column_wrapper col3{{10, 40, 70, 5, 2}}; + cudf::table_view input{{col1, col2, col3}}; - std::vector column_order{order::ASCENDING, order::DESCENDING, order::DESCENDING}; - std::vector null_precedence{null_order::AFTER, null_order::BEFORE}; + std::vector column_order{ + cudf::order::ASCENDING, cudf::order::DESCENDING, cudf::order::DESCENDING}; + std::vector null_precedence{cudf::null_order::AFTER, cudf::null_order::BEFORE}; - EXPECT_THROW(sorted_order(input, column_order, null_precedence), logic_error); - EXPECT_THROW(sort(input, column_order, null_precedence), logic_error); - EXPECT_THROW(sort_by_key(input, input, column_order, null_precedence), logic_error); + EXPECT_THROW(cudf::sorted_order(input, column_order, null_precedence), cudf::logic_error); + EXPECT_THROW(cudf::sort(input, column_order, null_precedence), cudf::logic_error); + EXPECT_THROW(cudf::sort_by_key(input, input, column_order, null_precedence), cudf::logic_error); } TYPED_TEST(Sort, ZeroSizedColumns) { using T = TypeParam; - fixed_width_column_wrapper col1{}; - table_view input{{col1}}; + cudf::test::fixed_width_column_wrapper col1{}; + cudf::table_view input{{col1}}; - fixed_width_column_wrapper expected{}; - std::vector column_order{order::ASCENDING}; + cudf::test::fixed_width_column_wrapper expected{}; + std::vector column_order{cudf::order::ASCENDING}; - auto got = sorted_order(input, column_order); + auto got = cudf::sorted_order(input, column_order); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view()); @@ -837,33 +845,31 @@ TYPED_TEST(Sort, WithEmptyListColumn) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect, *result); } -struct SortByKey : public BaseFixture { +struct SortByKey : public cudf::test::BaseFixture { }; TEST_F(SortByKey, ValueKeysSizeMismatch) { using T = int64_t; - fixed_width_column_wrapper col1{{5, 4, 3, 5, 8}}; - strings_column_wrapper col2({"d", "e", "a", "d", "k"}); - fixed_width_column_wrapper col3{{10, 40, 70, 5, 2}}; - table_view values{{col1, col2, col3}}; + cudf::test::fixed_width_column_wrapper col1{{5, 4, 3, 5, 8}}; + cudf::test::strings_column_wrapper col2({"d", "e", "a", "d", "k"}); + cudf::test::fixed_width_column_wrapper col3{{10, 40, 70, 5, 2}}; + cudf::table_view values{{col1, col2, col3}}; - fixed_width_column_wrapper key_col{{5, 4, 3, 5}}; - table_view keys{{key_col}}; + cudf::test::fixed_width_column_wrapper key_col{{5, 4, 3, 5}}; + cudf::table_view keys{{key_col}}; - EXPECT_THROW(sort_by_key(values, keys), logic_error); + EXPECT_THROW(cudf::sort_by_key(values, keys), cudf::logic_error); } template -struct FixedPointTestAllReps : public cudf::test::BaseFixture { +struct SortFixedPointTest : public cudf::test::BaseFixture { }; -template -using wrapper = cudf::test::fixed_width_column_wrapper; -TYPED_TEST_SUITE(FixedPointTestAllReps, cudf::test::FixedPointTypes); +TYPED_TEST_SUITE(SortFixedPointTest, cudf::test::FixedPointTypes); -TYPED_TEST(FixedPointTestAllReps, FixedPointSortedOrderGather) +TYPED_TEST(SortFixedPointTest, SortedOrderGather) { using namespace numeric; using decimalXX = TypeParam; @@ -878,9 +884,12 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointSortedOrderGather) auto const index_vec = std::vector{2, 1, 0, 4, 3}; auto const sorted_vec = std::vector{ZERO, ONE, TWO, THREE, FOUR}; - auto const input_col = wrapper(input_vec.begin(), input_vec.end()); - auto const index_col = wrapper(index_vec.begin(), index_vec.end()); - auto const sorted_col = wrapper(sorted_vec.begin(), sorted_vec.end()); + auto const input_col = + cudf::test::fixed_width_column_wrapper(input_vec.begin(), input_vec.end()); + auto const index_col = + cudf::test::fixed_width_column_wrapper(index_vec.begin(), index_vec.end()); + auto const sorted_col = + cudf::test::fixed_width_column_wrapper(sorted_vec.begin(), sorted_vec.end()); auto const sorted_table = cudf::table_view{{sorted_col}}; auto const input_table = cudf::table_view{{input_col}}; @@ -892,25 +901,27 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointSortedOrderGather) CUDF_TEST_EXPECT_TABLES_EQUAL(sorted_table, sorted->view()); } -struct SortCornerTest : public BaseFixture { +struct SortCornerTest : public cudf::test::BaseFixture { }; TEST_F(SortCornerTest, WithEmptyStructColumn) { - using int_col = fixed_width_column_wrapper; + using int_col = cudf::test::fixed_width_column_wrapper; // struct{}, int, int int_col col_for_mask{{0, 0, 0, 0, 0, 0}, {1, 0, 1, 1, 1, 1}}; - auto null_mask = cudf::copy_bitmask(col_for_mask.release()->view()); - auto struct_col = cudf::make_structs_column(6, {}, UNKNOWN_NULL_COUNT, std::move(null_mask)); + auto null_mask = cudf::copy_bitmask(col_for_mask.release()->view()); + auto struct_col = + cudf::make_structs_column(6, {}, cudf::UNKNOWN_NULL_COUNT, std::move(null_mask)); int_col col1{{1, 2, 3, 1, 2, 3}}; int_col col2{{1, 1, 1, 2, 2, 2}}; - table_view input{{struct_col->view(), col1, col2}}; + cudf::table_view input{{struct_col->view(), col1, col2}}; int_col expected{{1, 0, 3, 4, 2, 5}}; - std::vector column_order{order::ASCENDING, order::ASCENDING, order::ASCENDING}; - auto got = sorted_order(input, column_order); + std::vector column_order{ + cudf::order::ASCENDING, cudf::order::ASCENDING, cudf::order::ASCENDING}; + auto got = cudf::sorted_order(input, column_order); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view()); // struct{struct{}, int} @@ -920,17 +931,18 @@ TEST_F(SortCornerTest, WithEmptyStructColumn) child_columns.push_back(col3.release()); auto struct_col2 = cudf::make_structs_column(6, std::move(child_columns), 0, rmm::device_buffer{}); - table_view input2{{struct_col2->view()}}; + cudf::table_view input2{{struct_col2->view()}}; int_col expected2{{5, 4, 3, 2, 0, 1}}; - auto got2 = sorted_order(input2, {order::DESCENDING}); + auto got2 = cudf::sorted_order(input2, {cudf::order::DESCENDING}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, got2->view()); // struct{struct{}, struct{int}} int_col col_for_mask2{{0, 0, 0, 0, 0, 0}, {1, 0, 1, 1, 0, 1}}; auto null_mask2 = cudf::copy_bitmask(col_for_mask2.release()->view()); std::vector> child_columns2; - auto child_col_1 = cudf::make_structs_column(6, {}, UNKNOWN_NULL_COUNT, std::move(null_mask2)); + auto child_col_1 = + cudf::make_structs_column(6, {}, cudf::UNKNOWN_NULL_COUNT, std::move(null_mask2)); child_columns2.push_back(std::move(child_col_1)); int_col col4{{5, 4, 3, 2, 1, 0}}; std::vector> grand_child; @@ -939,14 +951,26 @@ TEST_F(SortCornerTest, WithEmptyStructColumn) child_columns2.push_back(std::move(child_col_2)); auto struct_col3 = cudf::make_structs_column(6, std::move(child_columns2), 0, rmm::device_buffer{}); - table_view input3{{struct_col3->view()}}; + cudf::table_view input3{{struct_col3->view()}}; int_col expected3{{4, 1, 5, 3, 2, 0}}; - auto got3 = sorted_order(input3, {order::ASCENDING}); + auto got3 = cudf::sorted_order(input3, {cudf::order::ASCENDING}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected3, got3->view()); }; -} // namespace test -} // namespace cudf +using SortDouble = Sort; +TEST_F(SortDouble, InfinityAndNan) +{ + auto constexpr NaN = std::numeric_limits::quiet_NaN(); + auto constexpr Inf = std::numeric_limits::infinity(); + + auto input = cudf::test::fixed_width_column_wrapper( + {-0.0, -NaN, -NaN, NaN, Inf, -Inf, 7.0, 5.0, 6.0, NaN, Inf, -Inf, -NaN, -NaN, -0.0}); + auto expected = // -inf,-inf,-0,-0,5,6,7,inf,inf,-nan,-nan,nan,nan,-nan,-nan + cudf::test::fixed_width_column_wrapper( + {5, 11, 0, 14, 7, 8, 6, 4, 10, 1, 2, 3, 9, 12, 13}); + auto results = cudf::sorted_order(cudf::table_view({input})); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected); +} CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/sort/stable_sort_tests.cpp b/cpp/tests/sort/stable_sort_tests.cpp index ee43c9e7b4b..57ad6361ad6 100644 --- a/cpp/tests/sort/stable_sort_tests.cpp +++ b/cpp/tests/sort/stable_sort_tests.cpp @@ -31,15 +31,13 @@ #include #include -namespace cudf { -namespace test { -void run_stable_sort_test(table_view input, - column_view expected_sorted_indices, - std::vector column_order = {}, - std::vector null_precedence = {}) +void run_stable_sort_test(cudf::table_view input, + cudf::column_view expected_sorted_indices, + std::vector column_order = {}, + std::vector null_precedence = {}) { - auto got_sort_by_key_table = sort_by_key(input, input, column_order, null_precedence); - auto expected_sort_by_key_table = gather(input, expected_sorted_indices); + auto got_sort_by_key_table = cudf::sort_by_key(input, input, column_order, null_precedence); + auto expected_sort_by_key_table = cudf::gather(input, expected_sorted_indices); CUDF_TEST_EXPECT_TABLES_EQUAL(expected_sort_by_key_table->view(), got_sort_by_key_table->view()); } @@ -48,7 +46,7 @@ using TestTypes = cudf::test::Concat; // include timestamps and durations template -struct StableSort : public BaseFixture { +struct StableSort : public cudf::test::BaseFixture { }; TYPED_TEST_SUITE(StableSort, TestTypes); @@ -58,14 +56,16 @@ TYPED_TEST(StableSort, MixedNullOrder) using T = TypeParam; using R = int32_t; - fixed_width_column_wrapper col1({0, 1, 1, 0, 0, 1, 0, 1}, {0, 1, 1, 1, 1, 1, 1, 1}); - strings_column_wrapper col2({"2", "a", "b", "x", "k", "a", "x", "a"}, {1, 1, 1, 1, 0, 1, 1, 1}); + cudf::test::fixed_width_column_wrapper col1({0, 1, 1, 0, 0, 1, 0, 1}, + {0, 1, 1, 1, 1, 1, 1, 1}); + cudf::test::strings_column_wrapper col2({"2", "a", "b", "x", "k", "a", "x", "a"}, + {1, 1, 1, 1, 0, 1, 1, 1}); - fixed_width_column_wrapper expected{{4, 3, 6, 1, 5, 7, 2, 0}}; + cudf::test::fixed_width_column_wrapper expected{{4, 3, 6, 1, 5, 7, 2, 0}}; - auto got = stable_sorted_order(table_view({col1, col2}), - {order::ASCENDING, order::ASCENDING}, - {null_order::AFTER, null_order::BEFORE}); + auto got = cudf::stable_sorted_order(cudf::table_view({col1, col2}), + {cudf::order::ASCENDING, cudf::order::ASCENDING}, + {cudf::null_order::AFTER, cudf::null_order::BEFORE}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view()); } @@ -74,16 +74,18 @@ TYPED_TEST(StableSort, WithNullMax) { using T = TypeParam; - fixed_width_column_wrapper col1{{5, 4, 3, 5, 8, 5}, {1, 1, 0, 1, 1, 1}}; - strings_column_wrapper col2({"d", "e", "a", "d", "k", "d"}, {1, 1, 0, 1, 1, 1}); - fixed_width_column_wrapper col3{{10, 40, 70, 10, 2, 10}, {1, 1, 0, 1, 1, 1}}; - table_view input{{col1, col2, col3}}; + cudf::test::fixed_width_column_wrapper col1{{5, 4, 3, 5, 8, 5}, {1, 1, 0, 1, 1, 1}}; + cudf::test::strings_column_wrapper col2({"d", "e", "a", "d", "k", "d"}, {1, 1, 0, 1, 1, 1}); + cudf::test::fixed_width_column_wrapper col3{{10, 40, 70, 10, 2, 10}, {1, 1, 0, 1, 1, 1}}; + cudf::table_view input{{col1, col2, col3}}; - fixed_width_column_wrapper expected{{1, 0, 3, 5, 4, 2}}; - std::vector column_order{order::ASCENDING, order::ASCENDING, order::DESCENDING}; - std::vector null_precedence{null_order::AFTER, null_order::AFTER, null_order::AFTER}; + cudf::test::fixed_width_column_wrapper expected{{1, 0, 3, 5, 4, 2}}; + std::vector column_order{ + cudf::order::ASCENDING, cudf::order::ASCENDING, cudf::order::DESCENDING}; + std::vector null_precedence{ + cudf::null_order::AFTER, cudf::null_order::AFTER, cudf::null_order::AFTER}; - auto got = stable_sorted_order(input, column_order, null_precedence); + auto got = cudf::stable_sorted_order(input, column_order, null_precedence); if (not std::is_same_v) { CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view()); @@ -92,7 +94,7 @@ TYPED_TEST(StableSort, WithNullMax) } else { // for bools only validate that the null element landed at the back, since // the rest of the values are equivalent and yields random sorted order. - auto to_host = [](column_view const& col) { + auto to_host = [](cudf::column_view const& col) { thrust::host_vector h_data(col.size()); CUDF_CUDA_TRY(cudaMemcpy( h_data.data(), col.data(), h_data.size() * sizeof(int32_t), cudaMemcpyDefault)); @@ -102,7 +104,7 @@ TYPED_TEST(StableSort, WithNullMax) thrust::host_vector h_got = to_host(got->view()); EXPECT_EQ(h_exp[h_exp.size() - 1], h_got[h_got.size() - 1]); - fixed_width_column_wrapper expected_for_bool{{0, 3, 5, 1, 4, 2}}; + cudf::test::fixed_width_column_wrapper expected_for_bool{{0, 3, 5, 1, 4, 2}}; run_stable_sort_test(input, expected_for_bool, column_order, null_precedence); } } @@ -111,15 +113,16 @@ TYPED_TEST(StableSort, WithNullMin) { using T = TypeParam; - fixed_width_column_wrapper col1{{5, 4, 3, 5, 8}, {1, 1, 0, 1, 1}}; - strings_column_wrapper col2({"d", "e", "a", "d", "k"}, {1, 1, 0, 1, 1}); - fixed_width_column_wrapper col3{{10, 40, 70, 10, 2}, {1, 1, 0, 1, 1}}; - table_view input{{col1, col2, col3}}; + cudf::test::fixed_width_column_wrapper col1{{5, 4, 3, 5, 8}, {1, 1, 0, 1, 1}}; + cudf::test::strings_column_wrapper col2({"d", "e", "a", "d", "k"}, {1, 1, 0, 1, 1}); + cudf::test::fixed_width_column_wrapper col3{{10, 40, 70, 10, 2}, {1, 1, 0, 1, 1}}; + cudf::table_view input{{col1, col2, col3}}; - fixed_width_column_wrapper expected{{2, 1, 0, 3, 4}}; - std::vector column_order{order::ASCENDING, order::ASCENDING, order::DESCENDING}; + cudf::test::fixed_width_column_wrapper expected{{2, 1, 0, 3, 4}}; + std::vector column_order{ + cudf::order::ASCENDING, cudf::order::ASCENDING, cudf::order::DESCENDING}; - auto got = stable_sorted_order(input, column_order); + auto got = cudf::stable_sorted_order(input, column_order); if (!std::is_same_v) { CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view()); @@ -128,7 +131,7 @@ TYPED_TEST(StableSort, WithNullMin) } else { // for bools only validate that the null element landed at the front, since // the rest of the values are equivalent and yields random sorted order. - auto to_host = [](column_view const& col) { + auto to_host = [](cudf::column_view const& col) { thrust::host_vector h_data(col.size()); CUDF_CUDA_TRY(cudaMemcpy( h_data.data(), col.data(), h_data.size() * sizeof(int32_t), cudaMemcpyDefault)); @@ -138,7 +141,7 @@ TYPED_TEST(StableSort, WithNullMin) thrust::host_vector h_got = to_host(got->view()); EXPECT_EQ(h_exp.front(), h_got.front()); - fixed_width_column_wrapper expected_for_bool{{2, 0, 3, 1, 4}}; + cudf::test::fixed_width_column_wrapper expected_for_bool{{2, 0, 3, 1, 4}}; run_stable_sort_test(input, expected_for_bool, column_order); } } @@ -147,15 +150,16 @@ TYPED_TEST(StableSort, WithAllValid) { using T = TypeParam; - fixed_width_column_wrapper col1{{5, 4, 3, 5, 8}}; - strings_column_wrapper col2({"d", "e", "a", "d", "k"}); - fixed_width_column_wrapper col3{{10, 40, 70, 10, 2}}; - table_view input{{col1, col2, col3}}; + cudf::test::fixed_width_column_wrapper col1{{5, 4, 3, 5, 8}}; + cudf::test::strings_column_wrapper col2({"d", "e", "a", "d", "k"}); + cudf::test::fixed_width_column_wrapper col3{{10, 40, 70, 10, 2}}; + cudf::table_view input{{col1, col2, col3}}; - fixed_width_column_wrapper expected{{2, 1, 0, 3, 4}}; - std::vector column_order{order::ASCENDING, order::ASCENDING, order::DESCENDING}; + cudf::test::fixed_width_column_wrapper expected{{2, 1, 0, 3, 4}}; + std::vector column_order{ + cudf::order::ASCENDING, cudf::order::ASCENDING, cudf::order::DESCENDING}; - auto got = stable_sorted_order(input, column_order); + auto got = cudf::stable_sorted_order(input, column_order); // Skip validating bools order. Valid true bools are all // equivalent, and yield random order after thrust::sort @@ -164,7 +168,7 @@ TYPED_TEST(StableSort, WithAllValid) run_stable_sort_test(input, expected, column_order); } else { - fixed_width_column_wrapper expected_for_bool{{2, 0, 3, 1, 4}}; + cudf::test::fixed_width_column_wrapper expected_for_bool{{2, 0, 3, 1, 4}}; run_stable_sort_test(input, expected_for_bool, column_order); } } @@ -173,66 +177,68 @@ TYPED_TEST(StableSort, MisMatchInColumnOrderSize) { using T = TypeParam; - fixed_width_column_wrapper col1{{5, 4, 3, 5, 8}}; - strings_column_wrapper col2({"d", "e", "a", "d", "k"}); - fixed_width_column_wrapper col3{{10, 40, 70, 5, 2}}; - table_view input{{col1, col2, col3}}; + cudf::test::fixed_width_column_wrapper col1{{5, 4, 3, 5, 8}}; + cudf::test::strings_column_wrapper col2({"d", "e", "a", "d", "k"}); + cudf::test::fixed_width_column_wrapper col3{{10, 40, 70, 5, 2}}; + cudf::table_view input{{col1, col2, col3}}; - std::vector column_order{order::ASCENDING, order::DESCENDING}; + std::vector column_order{cudf::order::ASCENDING, cudf::order::DESCENDING}; - EXPECT_THROW(stable_sorted_order(input, column_order), logic_error); - EXPECT_THROW(stable_sort_by_key(input, input, column_order), logic_error); + EXPECT_THROW(cudf::stable_sorted_order(input, column_order), cudf::logic_error); + EXPECT_THROW(cudf::stable_sort_by_key(input, input, column_order), cudf::logic_error); } TYPED_TEST(StableSort, MisMatchInNullPrecedenceSize) { using T = TypeParam; - fixed_width_column_wrapper col1{{5, 4, 3, 5, 8}}; - strings_column_wrapper col2({"d", "e", "a", "d", "k"}); - fixed_width_column_wrapper col3{{10, 40, 70, 5, 2}}; - table_view input{{col1, col2, col3}}; + cudf::test::fixed_width_column_wrapper col1{{5, 4, 3, 5, 8}}; + cudf::test::strings_column_wrapper col2({"d", "e", "a", "d", "k"}); + cudf::test::fixed_width_column_wrapper col3{{10, 40, 70, 5, 2}}; + cudf::table_view input{{col1, col2, col3}}; - std::vector column_order{order::ASCENDING, order::DESCENDING, order::DESCENDING}; - std::vector null_precedence{null_order::AFTER, null_order::BEFORE}; + std::vector column_order{ + cudf::order::ASCENDING, cudf::order::DESCENDING, cudf::order::DESCENDING}; + std::vector null_precedence{cudf::null_order::AFTER, cudf::null_order::BEFORE}; - EXPECT_THROW(stable_sorted_order(input, column_order, null_precedence), logic_error); - EXPECT_THROW(stable_sort_by_key(input, input, column_order, null_precedence), logic_error); + EXPECT_THROW(cudf::stable_sorted_order(input, column_order, null_precedence), cudf::logic_error); + EXPECT_THROW(cudf::stable_sort_by_key(input, input, column_order, null_precedence), + cudf::logic_error); } TYPED_TEST(StableSort, ZeroSizedColumns) { using T = TypeParam; - fixed_width_column_wrapper col1{}; - table_view input{{col1}}; + cudf::test::fixed_width_column_wrapper col1{}; + cudf::table_view input{{col1}}; - fixed_width_column_wrapper expected{}; - std::vector column_order{order::ASCENDING}; + cudf::test::fixed_width_column_wrapper expected{}; + std::vector column_order{cudf::order::ASCENDING}; - auto got = stable_sorted_order(input, column_order); + auto got = cudf::stable_sorted_order(input, column_order); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view()); run_stable_sort_test(input, expected, column_order); } -struct StableSortByKey : public BaseFixture { +struct StableSortByKey : public cudf::test::BaseFixture { }; TEST_F(StableSortByKey, ValueKeysSizeMismatch) { using T = int64_t; - fixed_width_column_wrapper col1{{5, 4, 3, 5, 8}}; - strings_column_wrapper col2({"d", "e", "a", "d", "k"}); - fixed_width_column_wrapper col3{{10, 40, 70, 5, 2}}; - table_view values{{col1, col2, col3}}; + cudf::test::fixed_width_column_wrapper col1{{5, 4, 3, 5, 8}}; + cudf::test::strings_column_wrapper col2({"d", "e", "a", "d", "k"}); + cudf::test::fixed_width_column_wrapper col3{{10, 40, 70, 5, 2}}; + cudf::table_view values{{col1, col2, col3}}; - fixed_width_column_wrapper key_col{{5, 4, 3, 5}}; - table_view keys{{key_col}}; + cudf::test::fixed_width_column_wrapper key_col{{5, 4, 3, 5}}; + cudf::table_view keys{{key_col}}; - EXPECT_THROW(stable_sort_by_key(values, keys), logic_error); + EXPECT_THROW(cudf::stable_sort_by_key(values, keys), cudf::logic_error); } template @@ -272,5 +278,17 @@ TYPED_TEST(StableSortFixedPoint, FixedPointSortedOrderGather) CUDF_TEST_EXPECT_TABLES_EQUAL(sorted_table, sorted->view()); } -} // namespace test -} // namespace cudf +using StableSortDouble = StableSort; +TEST_F(StableSortDouble, InfinityAndNaN) +{ + auto constexpr NaN = std::numeric_limits::quiet_NaN(); + auto constexpr Inf = std::numeric_limits::infinity(); + + auto input = cudf::test::fixed_width_column_wrapper( + {-0.0, -NaN, -NaN, NaN, Inf, -Inf, 7.0, 5.0, 6.0, NaN, Inf, -Inf, -NaN, -NaN, -0.0}); + auto expected = // -inf,-inf,-0,-0,5,6,7,inf,inf,-nan,-nan,nan,nan,-nan,-nan + cudf::test::fixed_width_column_wrapper( + {5, 11, 0, 14, 7, 8, 6, 4, 10, 1, 2, 3, 9, 12, 13}); + auto results = stable_sorted_order(cudf::table_view({input})); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected); +} From 9f8b93680ea81209ce34db6957cc0ef2791fa806 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Sat, 15 Oct 2022 09:06:29 +0200 Subject: [PATCH 039/202] Handle `multibyte_split` byte_range out-of-bounds offsets on host (#11885) In order to uniformize the interface for a future combined handling of byte ranges between read_csv and read_text, this PR replaces the `cutoff_offset` by a plain integer again, and handles finding the first out-of-bounds on the host side instead. Authors: - Tobias Ribizel (https://github.com/upsj) Approvers: - Mike Wilson (https://github.com/hyperbolic2346) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/11885 --- cpp/src/io/text/multibyte_split.cu | 195 ++++++++++++++--------------- 1 file changed, 97 insertions(+), 98 deletions(-) diff --git a/cpp/src/io/text/multibyte_split.cu b/cpp/src/io/text/multibyte_split.cu index 133c5fe9826..136eb8d24c6 100644 --- a/cpp/src/io/text/multibyte_split.cu +++ b/cpp/src/io/text/multibyte_split.cu @@ -31,7 +31,6 @@ #include #include -#include #include #include #include @@ -39,6 +38,8 @@ #include #include +#include +#include #include #include @@ -46,6 +47,8 @@ #pragma GCC diagnostic pop +#include +#include #include #include #include @@ -160,6 +163,10 @@ struct PatternScan { } }; +// type aliases to distinguish between row offsets and character offsets +using output_offset = int64_t; +using byte_offset = int64_t; + // multibyte_split works by splitting up inputs in to 32 inputs (bytes) per thread, and transforming // them in to data structures called "multistates". these multistates are created by searching a // trie, but instead of a tradition trie where the search begins at a single node at the beginning, @@ -170,35 +177,11 @@ struct PatternScan { // it begins in. From there, each thread can then take deterministic action. In this case, the // deterministic action is counting and outputting delimiter offsets when a delimiter is found. -// This struct provides output offsets that are only incremented until a cutoff point. -struct cutoff_offset { - // magnitude stores the offset, sign bit stores whether we are past the cutoff - int64_t value = 0; - - constexpr cutoff_offset() = default; - - constexpr cutoff_offset(int64_t offset, bool is_past_cutoff) - : value{is_past_cutoff ? -offset : offset} - { - } - - [[nodiscard]] constexpr int64_t offset() const { return value < 0 ? -value : value; } - - [[nodiscard]] constexpr bool is_past_end() { return value < 0; } - - friend constexpr cutoff_offset operator+(cutoff_offset lhs, cutoff_offset rhs) - { - auto const past_end = lhs.is_past_end() or rhs.is_past_end(); - auto const offset = lhs.offset() + (lhs.is_past_end() ? 0 : rhs.offset()); - return cutoff_offset{offset, past_end}; - } -}; - __global__ void multibyte_split_init_kernel( cudf::size_type base_tile_idx, cudf::size_type num_tiles, cudf::io::text::detail::scan_tile_state_view tile_multistates, - cudf::io::text::detail::scan_tile_state_view tile_output_offsets, + cudf::io::text::detail::scan_tile_state_view tile_output_offsets, cudf::io::text::detail::scan_tile_status status = cudf::io::text::detail::scan_tile_status::invalid) { @@ -212,9 +195,9 @@ __global__ void multibyte_split_init_kernel( __global__ void multibyte_split_seed_kernel( cudf::io::text::detail::scan_tile_state_view tile_multistates, - cudf::io::text::detail::scan_tile_state_view tile_output_offsets, + cudf::io::text::detail::scan_tile_state_view tile_output_offsets, multistate tile_multistate_seed, - cutoff_offset tile_output_offset) + output_offset tile_output_offset) { auto const thread_idx = blockIdx.x * blockDim.x + threadIdx.x; if (thread_idx == 0) { @@ -225,19 +208,18 @@ __global__ void multibyte_split_seed_kernel( __global__ __launch_bounds__(THREADS_PER_TILE) void multibyte_split_kernel( cudf::size_type base_tile_idx, - int64_t base_input_offset, - int64_t base_offset_offset, + byte_offset base_input_offset, + output_offset base_output_offset, cudf::io::text::detail::scan_tile_state_view tile_multistates, - cudf::io::text::detail::scan_tile_state_view tile_output_offsets, + cudf::io::text::detail::scan_tile_state_view tile_output_offsets, cudf::device_span delim, cudf::device_span chunk_input_chars, - int64_t byte_range_end, - cudf::split_device_span output_offsets) + cudf::split_device_span row_offsets) { using InputLoad = cub::BlockLoad; - using OffsetScan = cub::BlockScan; - using OffsetScanCallback = cudf::io::text::detail::scan_tile_state_callback; + using OffsetScan = cub::BlockScan; + using OffsetScanCallback = cudf::io::text::detail::scan_tile_state_callback; __shared__ union { typename InputLoad::TempStorage input_load; @@ -269,17 +251,15 @@ __global__ __launch_bounds__(THREADS_PER_TILE) void multibyte_split_kernel( // STEP 3: Flag matches - cutoff_offset thread_offset; + output_offset thread_offset{}; uint32_t thread_match_mask[(ITEMS_PER_THREAD + 31) / 32]{}; for (int32_t i = 0; i < ITEMS_PER_THREAD; i++) { - thread_multistate = transition(thread_chars[i], thread_multistate, delim); - auto const thread_state = thread_multistate.max_tail(); - auto const is_match = i < thread_input_size and thread_state == delim.size(); - auto const match_end = base_input_offset + thread_input_offset + i + 1; - auto const is_past_range = match_end >= byte_range_end; + thread_multistate = transition(thread_chars[i], thread_multistate, delim); + auto const thread_state = thread_multistate.max_tail(); + auto const is_match = i < thread_input_size and thread_state == delim.size(); thread_match_mask[i / 32] |= uint32_t{is_match} << (i % 32); - thread_offset = thread_offset + cutoff_offset{is_match, is_past_range}; + thread_offset += output_offset{is_match}; } // STEP 4: Scan flags to determine absolute thread output offset @@ -293,29 +273,27 @@ __global__ __launch_bounds__(THREADS_PER_TILE) void multibyte_split_kernel( for (int32_t i = 0; i < ITEMS_PER_THREAD; i++) { auto const is_match = (thread_match_mask[i / 32] >> (i % 32)) & 1u; - if (is_match && !thread_offset.is_past_end()) { - auto const match_end = base_input_offset + thread_input_offset + i + 1; - auto const is_past_range = match_end >= byte_range_end; - output_offsets[thread_offset.offset() - base_offset_offset] = match_end; - thread_offset = thread_offset + cutoff_offset{true, is_past_range}; + if (is_match) { + auto const match_end = base_input_offset + thread_input_offset + i + 1; + row_offsets[thread_offset - base_output_offset] = match_end; + thread_offset++; } } } __global__ __launch_bounds__(THREADS_PER_TILE) void byte_split_kernel( cudf::size_type base_tile_idx, - int64_t base_input_offset, - int64_t base_offset_offset, - cudf::io::text::detail::scan_tile_state_view tile_output_offsets, + byte_offset base_input_offset, + output_offset base_output_offset, + cudf::io::text::detail::scan_tile_state_view tile_output_offsets, char delim, cudf::device_span chunk_input_chars, - int64_t byte_range_end, - cudf::split_device_span output_offsets) + cudf::split_device_span row_offsets) { using InputLoad = cub::BlockLoad; - using OffsetScan = cub::BlockScan; - using OffsetScanCallback = cudf::io::text::detail::scan_tile_state_callback; + using OffsetScan = cub::BlockScan; + using OffsetScanCallback = cudf::io::text::detail::scan_tile_state_callback; __shared__ union { typename InputLoad::TempStorage input_load; @@ -338,15 +316,13 @@ __global__ __launch_bounds__(THREADS_PER_TILE) void byte_split_kernel( // STEP 2: Flag matches - cutoff_offset thread_offset; + output_offset thread_offset{}; uint32_t thread_match_mask[(ITEMS_PER_THREAD + 31) / 32]{}; for (int32_t i = 0; i < ITEMS_PER_THREAD; i++) { - auto const is_match = i < thread_input_size and thread_chars[i] == delim; - auto const match_end = base_input_offset + thread_input_offset + i + 1; - auto const is_past_range = match_end >= byte_range_end; + auto const is_match = i < thread_input_size and thread_chars[i] == delim; thread_match_mask[i / 32] |= uint32_t{is_match} << (i % 32); - thread_offset = thread_offset + cutoff_offset{is_match, is_past_range}; + thread_offset += output_offset{is_match}; } // STEP 3: Scan flags to determine absolute thread output offset @@ -360,11 +336,10 @@ __global__ __launch_bounds__(THREADS_PER_TILE) void byte_split_kernel( for (int32_t i = 0; i < ITEMS_PER_THREAD; i++) { auto const is_match = (thread_match_mask[i / 32] >> (i % 32)) & 1u; - if (is_match && !thread_offset.is_past_end()) { - auto const match_end = base_input_offset + thread_input_offset + i + 1; - auto const is_past_range = match_end >= byte_range_end; - output_offsets[thread_offset.offset() - base_offset_offset] = match_end; - thread_offset = thread_offset + cutoff_offset{true, is_past_range}; + if (is_match) { + auto const match_end = base_input_offset + thread_input_offset + i + 1; + row_offsets[thread_offset - base_output_offset] = match_end; + thread_offset++; } } } @@ -611,7 +586,7 @@ std::unique_ptr multibyte_split(cudf::io::text::data_chunk_source // best when at least 32 more than max possible concurrent tiles, due to rolling `invalid`s auto num_tile_states = std::max(32, TILES_PER_CHUNK * concurrency + 32); auto tile_multistates = scan_tile_state(num_tile_states, stream); - auto tile_offsets = scan_tile_state(num_tile_states, stream); + auto tile_offsets = scan_tile_state(num_tile_states, stream); multibyte_split_init_kernel<< multibyte_split(cudf::io::text::data_chunk_source tile_multistates, tile_offsets, multistate_seed, - {}); + 0); auto reader = source.create_reader(); - auto chunk_offset = std::max(0, byte_range.offset() - delimiter.size()); + auto chunk_offset = std::max(0, byte_range.offset() - delimiter.size()); auto const byte_range_end = byte_range.offset() + byte_range.size(); reader->skip_bytes(chunk_offset); // amortize output chunk allocations over 8 worst-case outputs. This limits the overallocation constexpr auto max_growth = 8; - output_builder offset_storage(ITEMS_PER_CHUNK, max_growth, stream); + output_builder row_offset_storage(ITEMS_PER_CHUNK, max_growth, stream); output_builder char_storage(ITEMS_PER_CHUNK, max_growth, stream); fork_stream(streams, stream); @@ -653,22 +628,23 @@ std::unique_ptr multibyte_split(cudf::io::text::data_chunk_source auto& scan_stream = streams[1]; auto chunk = reader->get_next_chunk(ITEMS_PER_CHUNK, read_stream); int64_t base_tile_idx = 0; - std::optional first_offset; - std::optional last_offset; - if (byte_range.offset() == 0) { first_offset = 0; } + std::optional first_row_offset; + std::optional last_row_offset; + bool found_last_offset = false; + if (byte_range.offset() == 0) { first_row_offset = 0; } std::swap(read_stream, scan_stream); while (chunk->size() > 0) { // if we found the last delimiter, or didn't find delimiters inside the byte range at all: abort - if (last_offset.has_value() or - (not first_offset.has_value() and chunk_offset >= byte_range_end)) { + if (last_row_offset.has_value() or + (not first_row_offset.has_value() and chunk_offset >= byte_range_end)) { break; } auto tiles_in_launch = cudf::util::div_rounding_up_safe(chunk->size(), static_cast(ITEMS_PER_TILE)); - auto offset_output = offset_storage.next_output(scan_stream); + auto row_offsets = row_offset_storage.next_output(scan_stream); // reset the next chunk of tile state multibyte_split_init_kernel<< multibyte_split(cudf::io::text::data_chunk_source scan_stream.value()>>>( // base_tile_idx, chunk_offset, - offset_storage.size(), + row_offset_storage.size(), tile_offsets, delimiter[0], *chunk, - byte_range_end, - offset_output); + row_offsets); } else { multibyte_split_kernel<< multibyte_split(cudf::io::text::data_chunk_source scan_stream.value()>>>( // base_tile_idx, chunk_offset, - offset_storage.size(), + row_offset_storage.size(), tile_multistates, tile_offsets, {device_delim.data(), static_cast(device_delim.size())}, *chunk, - byte_range_end, - offset_output); + row_offsets); } // load the next chunk auto next_chunk = reader->get_next_chunk(ITEMS_PER_CHUNK, read_stream); // while that is running, determine how many offsets we output (synchronizes) - auto next_tile_offset = - tile_offsets.get_inclusive_prefix(base_tile_idx + tiles_in_launch - 1, scan_stream); - offset_storage.advance_output(next_tile_offset.offset() - offset_storage.size()); + auto const new_offsets = [&] { + auto const new_offsets_unclamped = + tile_offsets.get_inclusive_prefix(base_tile_idx + tiles_in_launch - 1, scan_stream) - + static_cast(row_offset_storage.size()); + // if we are not in the last chunk, we can use all offsets + if (chunk_offset + static_cast(chunk->size()) < byte_range_end) { + return new_offsets_unclamped; + } + // if we are in the last chunk, we need to find the first out-of-bounds offset + auto const it = thrust::make_counting_iterator(output_offset{}); + auto const end_loc = + *thrust::find_if(rmm::exec_policy_nosync(scan_stream), + it, + it + new_offsets_unclamped, + [row_offsets, byte_range_end] __device__(output_offset i) { + return row_offsets[i] >= byte_range_end; + }); + // if we had no out-of-bounds offset, we copy all offsets + if (end_loc == new_offsets_unclamped) { return end_loc; } + // otherwise we copy only up to (including) the first out-of-bounds delimiter + found_last_offset = true; + return end_loc + 1; + }(); + row_offset_storage.advance_output(new_offsets); // determine if we found the first or last field offset for the byte range - if (next_tile_offset.offset() > 0 and not first_offset) { - first_offset = offset_storage.front_element(scan_stream); + if (new_offsets > 0 and not first_row_offset) { + first_row_offset = row_offset_storage.front_element(scan_stream); } - if (next_tile_offset.is_past_end()) { last_offset = offset_storage.back_element(scan_stream); } + if (found_last_offset) { last_row_offset = row_offset_storage.back_element(scan_stream); } // copy over the characters we need, if we already encountered the first field delimiter - if (first_offset.has_value()) { - auto const begin = chunk->data() + std::max(0, *first_offset - chunk_offset); - auto const sentinel = last_offset.value_or(std::numeric_limits::max()); - auto const end = chunk->data() + std::min(sentinel - chunk_offset, chunk->size()); + if (first_row_offset.has_value()) { + auto const begin = chunk->data() + std::max(0, *first_row_offset - chunk_offset); + auto const sentinel = last_row_offset.value_or(std::numeric_limits::max()); + auto const end = + chunk->data() + std::min(sentinel - chunk_offset, chunk->size()); auto const output_size = end - begin; auto char_output = char_storage.next_output(scan_stream); - auto const split = begin + std::min(output_size, char_output.head().size()); + auto const split = begin + std::min(output_size, char_output.head().size()); thrust::copy(rmm::exec_policy_nosync(scan_stream), begin, split, char_output.head().begin()); thrust::copy(rmm::exec_policy_nosync(scan_stream), split, end, char_output.tail().begin()); char_storage.advance_output(output_size); @@ -739,7 +735,7 @@ std::unique_ptr multibyte_split(cudf::io::text::data_chunk_source cudaEventRecord(last_launch_event, scan_stream.value()); std::swap(read_stream, scan_stream); - base_tile_idx += TILES_PER_CHUNK; + base_tile_idx += tiles_in_launch; chunk_offset += chunk->size(); chunk = std::move(next_chunk); } @@ -750,24 +746,27 @@ std::unique_ptr multibyte_split(cudf::io::text::data_chunk_source // if the input was empty, we didn't find a delimiter at all, // or the first delimiter was also the last: empty output - if (chunk_offset == 0 or not first_offset.has_value() or first_offset == last_offset) { + if (chunk_offset == 0 or not first_row_offset.has_value() or + first_row_offset == last_row_offset) { return make_empty_column(type_id::STRING); } auto chars = char_storage.gather(stream, mr); - auto global_offsets = offset_storage.gather(stream, mr); + auto global_offsets = row_offset_storage.gather(stream, mr); - bool const insert_begin = *first_offset == 0; - bool const insert_end = not last_offset.has_value() or last_offset == chunk_offset; + bool const insert_begin = *first_row_offset == 0; + bool const insert_end = not last_row_offset.has_value() or last_row_offset == chunk_offset; rmm::device_uvector offsets{ global_offsets.size() + insert_begin + insert_end, stream, mr}; if (insert_begin) { offsets.set_element_to_zero_async(0, stream); } - if (insert_end) { offsets.set_element(offsets.size() - 1, chunk_offset - *first_offset, stream); } + if (insert_end) { + offsets.set_element(offsets.size() - 1, chunk_offset - *first_row_offset, stream); + } thrust::transform(rmm::exec_policy(stream), global_offsets.begin(), global_offsets.end(), offsets.begin() + insert_begin, - [baseline = *first_offset] __device__(int64_t global_offset) { + [baseline = *first_row_offset] __device__(byte_offset global_offset) { return static_cast(global_offset - baseline); }); From edc058f0e250e4fe6a1cd3829683c13b6a394373 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Mon, 17 Oct 2022 14:48:21 -0500 Subject: [PATCH 040/202] Add `nanosecond` & `microsecond` to `DatetimeProperties` (#11911) This PR: - [x] Implemented `extract_milli_second`, `extract_micro_second` and `extract_nano_second` in libcudf. - [x] Added `nanosecond` and `microsecond` in `DatetimeProperties` & `DatetimeIndex`. - [x] Updated docs - [x] Added & modified tests Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - David Wendt (https://github.com/davidwendt) - Matthew Roeschke (https://github.com/mroeschke) - Nghia Truong (https://github.com/ttnghia) - MithunR (https://github.com/mythrocks) - https://github.com/nvdbaranec - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/11911 --- cpp/include/cudf/datetime.hpp | 71 ++++++++++++++++--- cpp/include/cudf/detail/datetime.hpp | 33 +++++++++ cpp/src/datetime/datetime_ops.cu | 76 +++++++++++++++++++-- cpp/tests/datetime/datetime_ops_test.cpp | 54 +++++++++++++++ docs/cudf/source/api_docs/index_objects.rst | 9 ++- docs/cudf/source/api_docs/series.rst | 28 ++++---- python/cudf/cudf/_lib/cpp/datetime.pxd | 9 +++ python/cudf/cudf/_lib/datetime.pyx | 12 ++++ python/cudf/cudf/core/index.py | 50 ++++++++++++++ python/cudf/cudf/core/series.py | 61 ++++++++++++++++- python/cudf/cudf/tests/test_datetime.py | 10 ++- 11 files changed, 377 insertions(+), 36 deletions(-) diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp index a8955ffb17c..fb04336871f 100644 --- a/cpp/include/cudf/datetime.hpp +++ b/cpp/include/cudf/datetime.hpp @@ -36,7 +36,7 @@ namespace datetime { */ /** - * @brief Extracts year from any date time type and returns an int16_t + * @brief Extracts year from any datetime type and returns an int16_t * cudf::column. * * @param column cudf::column_view of the input datetime values @@ -50,7 +50,7 @@ std::unique_ptr extract_year( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Extracts month from any date time type and returns an int16_t + * @brief Extracts month from any datetime type and returns an int16_t * cudf::column. * * @param column cudf::column_view of the input datetime values @@ -64,7 +64,7 @@ std::unique_ptr extract_month( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Extracts day from any date time type and returns an int16_t + * @brief Extracts day from any datetime type and returns an int16_t * cudf::column. * * @param column cudf::column_view of the input datetime values @@ -78,7 +78,7 @@ std::unique_ptr extract_day( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Extracts day from any date time type and returns an int16_t + * @brief Extracts day from any datetime type and returns an int16_t * cudf::column. * * @param column cudf::column_view of the input datetime values @@ -92,7 +92,7 @@ std::unique_ptr extract_weekday( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Extracts hour from any date time type and returns an int16_t + * @brief Extracts hour from any datetime type and returns an int16_t * cudf::column. * * @param column cudf::column_view of the input datetime values @@ -106,7 +106,7 @@ std::unique_ptr extract_hour( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Extracts minute from any date time type and returns an int16_t + * @brief Extracts minute from any datetime type and returns an int16_t * cudf::column. * * @param column cudf::column_view of the input datetime values @@ -120,7 +120,7 @@ std::unique_ptr extract_minute( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Extracts second from any date time type and returns an int16_t + * @brief Extracts second from any datetime type and returns an int16_t * cudf::column. * * @param column cudf::column_view of the input datetime values @@ -133,6 +133,57 @@ std::unique_ptr extract_second( cudf::column_view const& column, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Extracts millisecond fraction from any datetime type and returns an int16_t + * cudf::column. + * + * A millisecond fraction is only the 3 digits that make up the millisecond portion of a duration. + * For example, the millisecond fraction of 1.234567890 seconds is 234. + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column + * + * @returns cudf::column of the extracted int16_t milliseconds + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + */ +std::unique_ptr extract_millisecond_fraction( + cudf::column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Extracts microsecond fraction from any datetime type and returns an int16_t + * cudf::column. + * + * A microsecond fraction is only the 3 digits that make up the microsecond portion of a duration. + * For example, the microsecond fraction of 1.234567890 seconds is 567. + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column + * + * @returns cudf::column of the extracted int16_t microseconds + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + */ +std::unique_ptr extract_microsecond_fraction( + cudf::column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Extracts nanosecond fraction from any datetime type and returns an int16_t + * cudf::column. + * + * A nanosecond fraction is only the 3 digits that make up the nanosecond portion of a duration. + * For example, the nanosecond fraction of 1.234567890 seconds is 890. + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column + * + * @returns cudf::column of the extracted int16_t nanoseconds + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + */ +std::unique_ptr extract_nanosecond_fraction( + cudf::column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** @} */ // end of group /** * @addtogroup datetime_compute @@ -141,7 +192,7 @@ std::unique_ptr extract_second( */ /** - * @brief Computes the last day of the month in date time type and returns a TIMESTAMP_DAYS + * @brief Computes the last day of the month in datetime type and returns a TIMESTAMP_DAYS * cudf::column. * * @param column cudf::column_view of the input datetime values @@ -169,7 +220,7 @@ std::unique_ptr day_of_year( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Adds or subtracts a number of months from the date time type and returns a + * @brief Adds or subtracts a number of months from the datetime type and returns a * timestamp column that is of the same type as the input `timestamps` column. * * For a given row, if the `timestamps` or the `months` column value is null, @@ -204,7 +255,7 @@ std::unique_ptr add_calendrical_months( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Adds or subtracts a number of months from the date time type and returns a + * @brief Adds or subtracts a number of months from the datetime type and returns a * timestamp column that is of the same type as the input `timestamps` column. * * For a given row, if the `timestamps` value is null, the output for that row is null. diff --git a/cpp/include/cudf/detail/datetime.hpp b/cpp/include/cudf/detail/datetime.hpp index 7a2545fbdcf..d17e641533e 100644 --- a/cpp/include/cudf/detail/datetime.hpp +++ b/cpp/include/cudf/detail/datetime.hpp @@ -94,6 +94,39 @@ std::unique_ptr extract_second( rmm::cuda_stream_view stream = cudf::default_stream_value, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @copydoc cudf::extract_millisecond_fraction(cudf::column_view const&, + * rmm::mr::device_memory_resource *) + * + * @param stream CUDA stream used for device memory operations and kernel launches. + */ +std::unique_ptr extract_millisecond_fraction( + cudf::column_view const& column, + rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @copydoc cudf::extract_microsecond_fraction(cudf::column_view const&, + * rmm::mr::device_memory_resource *) + * + * @param stream CUDA stream used for device memory operations and kernel launches. + */ +std::unique_ptr extract_microsecond_fraction( + cudf::column_view const& column, + rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @copydoc cudf::extract_nanosecond_fraction(cudf::column_view const&, + * rmm::mr::device_memory_resource *) + * + * @param stream CUDA stream used for device memory operations and kernel launches. + */ +std::unique_ptr extract_nanosecond_fraction( + cudf::column_view const& column, + rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @copydoc cudf::last_day_of_month(cudf::column_view const&, rmm::mr::device_memory_resource *) * diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu index ee026d6c395..e89792525c9 100644 --- a/cpp/src/datetime/datetime_ops.cu +++ b/cpp/src/datetime/datetime_ops.cu @@ -76,9 +76,22 @@ struct extract_component_operator { if (time_since_midnight.count() < 0) { time_since_midnight += days(1); } - auto hrs_ = duration_cast(time_since_midnight); - auto mins_ = duration_cast(time_since_midnight - hrs_); - auto secs_ = duration_cast(time_since_midnight - hrs_ - mins_); + auto const hrs_ = [&] { return duration_cast(time_since_midnight); }; + auto const mins_ = [&] { return duration_cast(time_since_midnight) - hrs_(); }; + auto const secs_ = [&] { + return duration_cast(time_since_midnight) - hrs_() - mins_(); + }; + auto const millisecs_ = [&] { + return duration_cast(time_since_midnight) - hrs_() - mins_() - secs_(); + }; + auto const microsecs_ = [&] { + return duration_cast(time_since_midnight) - hrs_() - mins_() - secs_() - + millisecs_(); + }; + auto const nanosecs_ = [&] { + return duration_cast(time_since_midnight) - hrs_() - mins_() - secs_() - + millisecs_() - microsecs_(); + }; switch (Component) { case datetime_component::YEAR: @@ -89,9 +102,12 @@ struct extract_component_operator { return static_cast(year_month_day(days_since_epoch).day()); case datetime_component::WEEKDAY: return year_month_weekday(days_since_epoch).weekday().iso_encoding(); - case datetime_component::HOUR: return hrs_.count(); - case datetime_component::MINUTE: return mins_.count(); - case datetime_component::SECOND: return secs_.count(); + case datetime_component::HOUR: return hrs_().count(); + case datetime_component::MINUTE: return mins_().count(); + case datetime_component::SECOND: return secs_().count(); + case datetime_component::MILLISECOND: return millisecs_().count(); + case datetime_component::MICROSECOND: return microsecs_().count(); + case datetime_component::NANOSECOND: return nanosecs_().count(); default: return 0; } } @@ -495,6 +511,33 @@ std::unique_ptr extract_second(column_view const& column, cudf::type_id::INT16>(column, stream, mr); } +std::unique_ptr extract_millisecond_fraction(column_view const& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + return detail::apply_datetime_op< + detail::extract_component_operator, + cudf::type_id::INT16>(column, stream, mr); +} + +std::unique_ptr extract_microsecond_fraction(column_view const& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + return detail::apply_datetime_op< + detail::extract_component_operator, + cudf::type_id::INT16>(column, stream, mr); +} + +std::unique_ptr extract_nanosecond_fraction(column_view const& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + return detail::apply_datetime_op< + detail::extract_component_operator, + cudf::type_id::INT16>(column, stream, mr); +} + std::unique_ptr last_day_of_month(column_view const& column, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -607,6 +650,27 @@ std::unique_ptr extract_second(column_view const& column, return detail::extract_second(column, cudf::default_stream_value, mr); } +std::unique_ptr extract_millisecond_fraction(column_view const& column, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::extract_millisecond_fraction(column, cudf::default_stream_value, mr); +} + +std::unique_ptr extract_microsecond_fraction(column_view const& column, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::extract_microsecond_fraction(column, cudf::default_stream_value, mr); +} + +std::unique_ptr extract_nanosecond_fraction(column_view const& column, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::extract_nanosecond_fraction(column, cudf::default_stream_value, mr); +} + std::unique_ptr last_day_of_month(column_view const& column, rmm::mr::device_memory_resource* mr) { diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp index 2898a649e36..c6d36b2aa6e 100644 --- a/cpp/tests/datetime/datetime_ops_test.cpp +++ b/cpp/tests/datetime/datetime_ops_test.cpp @@ -60,6 +60,9 @@ TYPED_TEST(NonTimestampTest, TestThrowsOnNonTimestamp) EXPECT_THROW(extract_hour(col), cudf::logic_error); EXPECT_THROW(extract_minute(col), cudf::logic_error); EXPECT_THROW(extract_second(col), cudf::logic_error); + EXPECT_THROW(extract_millisecond_fraction(col), cudf::logic_error); + EXPECT_THROW(extract_microsecond_fraction(col), cudf::logic_error); + EXPECT_THROW(extract_nanosecond_fraction(col), cudf::logic_error); EXPECT_THROW(last_day_of_month(col), cudf::logic_error); EXPECT_THROW(day_of_year(col), cudf::logic_error); EXPECT_THROW(add_calendrical_months( @@ -97,12 +100,21 @@ TEST_F(BasicDatetimeOpsTest, TestExtractingDatetimeComponents) 1674631932929 // 2023-01-25 07:32:12.929 GMT }; + auto timestamps_ns = + cudf::test::fixed_width_column_wrapper{ + -23324234, // 1969-12-31 23:59:59.976675766 GMT + 23432424, // 1970-01-01 00:00:00.023432424 GMT + 987234623 // 1970-01-01 00:00:00.987234623 GMT + }; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_year(timestamps_D), fixed_width_column_wrapper{1965, 2018, 2023}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_year(timestamps_s), fixed_width_column_wrapper{1965, 2018, 2023}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_year(timestamps_ms), fixed_width_column_wrapper{1965, 2018, 2023}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_year(timestamps_ns), + fixed_width_column_wrapper{1969, 1970, 1970}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_month(timestamps_D), fixed_width_column_wrapper{10, 7, 1}); @@ -110,6 +122,8 @@ TEST_F(BasicDatetimeOpsTest, TestExtractingDatetimeComponents) fixed_width_column_wrapper{10, 7, 1}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_month(timestamps_ms), fixed_width_column_wrapper{10, 7, 1}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_month(timestamps_ns), + fixed_width_column_wrapper{12, 1, 1}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_day(timestamps_D), fixed_width_column_wrapper{26, 4, 25}); @@ -117,6 +131,8 @@ TEST_F(BasicDatetimeOpsTest, TestExtractingDatetimeComponents) fixed_width_column_wrapper{26, 4, 25}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_day(timestamps_ms), fixed_width_column_wrapper{26, 4, 25}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_day(timestamps_ns), + fixed_width_column_wrapper{31, 1, 1}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_weekday(timestamps_D), fixed_width_column_wrapper{2, 3, 3}); @@ -124,6 +140,8 @@ TEST_F(BasicDatetimeOpsTest, TestExtractingDatetimeComponents) fixed_width_column_wrapper{2, 3, 3}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_weekday(timestamps_ms), fixed_width_column_wrapper{2, 3, 3}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_weekday(timestamps_ms), + fixed_width_column_wrapper{2, 3, 3}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_hour(timestamps_D), fixed_width_column_wrapper{0, 0, 0}); @@ -131,6 +149,8 @@ TEST_F(BasicDatetimeOpsTest, TestExtractingDatetimeComponents) fixed_width_column_wrapper{14, 12, 7}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_hour(timestamps_ms), fixed_width_column_wrapper{14, 12, 7}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_hour(timestamps_ns), + fixed_width_column_wrapper{23, 0, 0}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_minute(timestamps_D), fixed_width_column_wrapper{0, 0, 0}); @@ -138,6 +158,8 @@ TEST_F(BasicDatetimeOpsTest, TestExtractingDatetimeComponents) fixed_width_column_wrapper{1, 0, 32}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_minute(timestamps_ms), fixed_width_column_wrapper{1, 0, 32}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_minute(timestamps_ns), + fixed_width_column_wrapper{59, 0, 0}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_second(timestamps_D), fixed_width_column_wrapper{0, 0, 0}); @@ -145,6 +167,35 @@ TEST_F(BasicDatetimeOpsTest, TestExtractingDatetimeComponents) fixed_width_column_wrapper{12, 0, 12}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_second(timestamps_ms), fixed_width_column_wrapper{12, 0, 12}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_minute(timestamps_ns), + fixed_width_column_wrapper{59, 0, 0}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_millisecond_fraction(timestamps_D), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_millisecond_fraction(timestamps_s), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_millisecond_fraction(timestamps_ms), + fixed_width_column_wrapper{762, 0, 929}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_millisecond_fraction(timestamps_ns), + fixed_width_column_wrapper{976, 23, 987}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_microsecond_fraction(timestamps_D), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_microsecond_fraction(timestamps_s), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_microsecond_fraction(timestamps_ms), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_microsecond_fraction(timestamps_ns), + fixed_width_column_wrapper{675, 432, 234}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_nanosecond_fraction(timestamps_D), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_nanosecond_fraction(timestamps_s), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_nanosecond_fraction(timestamps_ms), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_nanosecond_fraction(timestamps_ns), + fixed_width_column_wrapper{766, 424, 623}); } template @@ -175,6 +226,9 @@ TYPED_TEST(TypedDatetimeOpsTest, TestEmptyColumns) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_hour(timestamps), int16s); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_minute(timestamps), int16s); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_second(timestamps), int16s); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_millisecond_fraction(timestamps), int16s); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_microsecond_fraction(timestamps), int16s); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_nanosecond_fraction(timestamps), int16s); } TYPED_TEST(TypedDatetimeOpsTest, TestExtractingGeneratedDatetimeComponents) diff --git a/docs/cudf/source/api_docs/index_objects.rst b/docs/cudf/source/api_docs/index_objects.rst index 8e0e3bbd411..6edd15e7176 100644 --- a/docs/cudf/source/api_docs/index_objects.rst +++ b/docs/cudf/source/api_docs/index_objects.rst @@ -262,12 +262,15 @@ Time/date components DatetimeIndex.hour DatetimeIndex.minute DatetimeIndex.second - DatetimeIndex.dayofweek - DatetimeIndex.dayofyear + DatetimeIndex.microsecond + DatetimeIndex.nanosecond DatetimeIndex.day_of_year + DatetimeIndex.dayofyear + DatetimeIndex.dayofweek DatetimeIndex.weekday - DatetimeIndex.is_leap_year DatetimeIndex.quarter + DatetimeIndex.is_leap_year + DatetimeIndex.isocalendar Time-specific operations diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst index 53042041f6d..842319338b3 100644 --- a/docs/cudf/source/api_docs/series.rst +++ b/docs/cudf/source/api_docs/series.rst @@ -260,25 +260,27 @@ Datetime properties .. autosummary:: :toctree: api/ + year + month day - dayofweek - dayofyear - days_in_month - day_of_year hour minute - month second + microsecond + nanosecond + dayofweek weekday - year - is_leap_year + dayofyear + day_of_year + quarter is_month_start is_month_end is_quarter_start is_quarter_end is_year_start is_year_end - quarter + is_leap_year + days_in_month Datetime methods ^^^^^^^^^^^^^^^^ @@ -286,11 +288,11 @@ Datetime methods .. autosummary:: :toctree: api/ - strftime isocalendar - ceil - floor + strftime round + floor + ceil Timedelta properties @@ -300,11 +302,11 @@ Timedelta properties .. autosummary:: :toctree: api/ - components days + seconds microseconds nanoseconds - seconds + components .. _api.series.str: .. include:: string_handling.rst diff --git a/python/cudf/cudf/_lib/cpp/datetime.pxd b/python/cudf/cudf/_lib/cpp/datetime.pxd index 74addb87357..d03587745e1 100644 --- a/python/cudf/cudf/_lib/cpp/datetime.pxd +++ b/python/cudf/cudf/_lib/cpp/datetime.pxd @@ -15,6 +15,15 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: cdef unique_ptr[column] extract_hour(const column_view& column) except + cdef unique_ptr[column] extract_minute(const column_view& column) except + cdef unique_ptr[column] extract_second(const column_view& column) except + + cdef unique_ptr[column] extract_millisecond_fraction( + const column_view& column + ) except + + cdef unique_ptr[column] extract_microsecond_fraction( + const column_view& column + ) except + + cdef unique_ptr[column] extract_nanosecond_fraction( + const column_view& column + ) except + ctypedef enum rounding_frequency "cudf::datetime::rounding_frequency": DAY "cudf::datetime::rounding_frequency::DAY" diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx index e218400a2db..cb0a245b915 100644 --- a/python/cudf/cudf/_lib/datetime.pyx +++ b/python/cudf/cudf/_lib/datetime.pyx @@ -49,6 +49,18 @@ def extract_datetime_component(Column col, object field): c_result = move(libcudf_datetime.extract_minute(col_view)) elif field == "second": c_result = move(libcudf_datetime.extract_second(col_view)) + elif field == "millisecond": + c_result = move( + libcudf_datetime.extract_millisecond_fraction(col_view) + ) + elif field == "microsecond": + c_result = move( + libcudf_datetime.extract_microsecond_fraction(col_view) + ) + elif field == "nanosecond": + c_result = move( + libcudf_datetime.extract_nanosecond_fraction(col_view) + ) elif field == "day_of_year": c_result = move(libcudf_datetime.day_of_year(col_view)) else: diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 5b101f74664..0628497fc29 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -2051,6 +2051,56 @@ def second(self): """ return self._get_dt_field("second") + @property # type: ignore + @_cudf_nvtx_annotate + def microsecond(self): + """ + The microseconds of the datetime. + + Examples + -------- + >>> import pandas as pd + >>> import cudf + >>> datetime_index = cudf.Index(pd.date_range("2000-01-01", + ... periods=3, freq="us")) + >>> datetime_index + DatetimeIndex([ '2000-01-01 00:00:00', '2000-01-01 00:00:00.000001', + '2000-01-01 00:00:00.000002'], + dtype='datetime64[ns]') + >>> datetime_index.microsecond + Int32Index([0, 1, 2], dtype='int32') + """ # noqa: E501 + return as_index( + ( + self._values.get_dt_field("millisecond") + * cudf.Scalar(1000, dtype="int32") + ) + + self._values.get_dt_field("microsecond"), + name=self.name, + ) + + @property # type: ignore + @_cudf_nvtx_annotate + def nanosecond(self): + """ + The nanoseconds of the datetime. + + Examples + -------- + >>> import pandas as pd + >>> import cudf + >>> datetime_index = cudf.Index(pd.date_range("2000-01-01", + ... periods=3, freq="ns")) + >>> datetime_index + DatetimeIndex([ '2000-01-01 00:00:00', + '2000-01-01 00:00:00.000000001', + '2000-01-01 00:00:00.000000002'], + dtype='datetime64[ns]') + >>> datetime_index.nanosecond + Int16Index([0, 1, 2], dtype='int16') + """ + return self._get_dt_field("nanosecond") + @property # type: ignore @_cudf_nvtx_annotate def weekday(self): diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index f11052096e3..7493202a3d1 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -1206,7 +1206,8 @@ def __repr__(self): and not is_decimal_dtype(preprocess.dtype) and not is_struct_dtype(preprocess.dtype) ) or isinstance( - preprocess._column, cudf.core.column.timedelta.TimeDeltaColumn + preprocess._column, + cudf.core.column.timedelta.TimeDeltaColumn, ): output = repr( preprocess.astype("O").fillna(cudf._NA_REP).to_pandas() @@ -3591,6 +3592,64 @@ def second(self): """ return self._get_dt_field("second") + @property # type: ignore + @_cudf_nvtx_annotate + def microsecond(self): + """ + The microseconds of the datetime. + + Examples + -------- + >>> import pandas as pd + >>> import cudf + >>> datetime_series = cudf.Series(pd.date_range("2000-01-01", + ... periods=3, freq="us")) + >>> datetime_series + 0 2000-01-01 00:00:00.000000 + 1 2000-01-01 00:00:00.000001 + 2 2000-01-01 00:00:00.000002 + dtype: datetime64[ns] + >>> datetime_series.dt.microsecond + 0 0 + 1 1 + 2 2 + dtype: int32 + """ + return Series( + data=( + self.series._column.get_dt_field("millisecond") + * cudf.Scalar(1000, dtype="int32") + ) + + self.series._column.get_dt_field("microsecond"), + index=self.series._index, + name=self.series.name, + ) + + @property # type: ignore + @_cudf_nvtx_annotate + def nanosecond(self): + """ + The nanoseconds of the datetime. + + Examples + -------- + >>> import pandas as pd + >>> import cudf + >>> datetime_series = cudf.Series(pd.date_range("2000-01-01", + ... periods=3, freq="ns")) + >>> datetime_series + 0 2000-01-01 00:00:00.000000000 + 1 2000-01-01 00:00:00.000000001 + 2 2000-01-01 00:00:00.000000002 + dtype: datetime64[ns] + >>> datetime_series.dt.nanosecond + 0 0 + 1 1 + 2 2 + dtype: int16 + """ + return self._get_dt_field("nanosecond") + @property # type: ignore @_cudf_nvtx_annotate def weekday(self): diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index 800a8aeeab5..bd3b3561701 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -28,7 +28,9 @@ def data1(): def data2(): - return pd.date_range("20010101", "20020215", freq="400h", name="times") + return pd.date_range( + "20010101", freq="243434324423423234N", name="times", periods=10 + ) def timeseries_us_data(): @@ -81,6 +83,8 @@ def numerical_data(): "hour", "minute", "second", + "microsecond", + "nanosecond", "weekday", "dayofweek", "dayofyear", @@ -172,7 +176,7 @@ def test_dt_ops(data): # libcudf doesn't respect timezones -@pytest.mark.parametrize("data", [data1()]) +@pytest.mark.parametrize("data", [data1(), data2()]) @pytest.mark.parametrize("field", fields) def test_dt_series(data, field): pd_data = pd.Series(data.copy()) @@ -182,7 +186,7 @@ def test_dt_series(data, field): assert_eq(base, test) -@pytest.mark.parametrize("data", [data1()]) +@pytest.mark.parametrize("data", [data1(), data2()]) @pytest.mark.parametrize("field", fields) def test_dt_index(data, field): pd_data = data.copy() From afa16b433f0d468d3ee933d93c7945a7078cad52 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 17 Oct 2022 20:24:49 -0500 Subject: [PATCH 041/202] Fix documentation referring to removed as_gpu_matrix method. (#11937) This fixes outdated documentation that refers to the `as_gpu_matrix` method, which was removed. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/11937 --- docs/cudf/source/user_guide/cupy-interop.ipynb | 2 +- python/cudf/cudf/tests/test_dataframe.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/cudf/source/user_guide/cupy-interop.ipynb b/docs/cudf/source/user_guide/cupy-interop.ipynb index 9fbac3b2578..47c6ba408fb 100644 --- a/docs/cudf/source/user_guide/cupy-interop.ipynb +++ b/docs/cudf/source/user_guide/cupy-interop.ipynb @@ -42,7 +42,7 @@ "\n", "2. We can also use `DataFrame.values`.\n", "\n", - "3. We can also convert via the [CUDA array interface](https://numba.pydata.org/numba-doc/dev/cuda/cuda_array_interface.html) by using cuDF's `as_gpu_matrix` and CuPy's `asarray` functionality." + "3. We can also convert via the [CUDA array interface](https://numba.pydata.org/numba-doc/dev/cuda/cuda_array_interface.html) by using cuDF's `to_cupy` functionality." ] }, { diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index d9e9a4dbba1..1fcfbe5fc91 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -4286,26 +4286,26 @@ def test_series_values_property(data): pytest.param( {"A": [1, None, 3], "B": [1, 2, None]}, marks=pytest.mark.xfail( - reason="Nulls not supported by as_gpu_matrix" + reason="Nulls not supported by values accessor" ), ), pytest.param( {"A": [None, None, None], "B": [None, None, None]}, marks=pytest.mark.xfail( - reason="Nulls not supported by as_gpu_matrix" + reason="Nulls not supported by values accessor" ), ), {"A": [], "B": []}, pytest.param( {"A": [1, 2, 3], "B": ["a", "b", "c"]}, marks=pytest.mark.xfail( - reason="str or categorical not supported by as_gpu_matrix" + reason="str or categorical not supported by values accessor" ), ), pytest.param( {"A": pd.Categorical(["a", "b", "c"]), "B": ["d", "e", "f"]}, marks=pytest.mark.xfail( - reason="str or categorical not supported by as_gpu_matrix" + reason="str or categorical not supported by values accessor" ), ), ], From a926c52d58c08657f8d437210ce31fddeaa868e7 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Mon, 17 Oct 2022 21:12:11 -0500 Subject: [PATCH 042/202] Add `.str.find_multiple` API (#11928) Resolves: https://github.com/rapidsai/cudf/issues/10126 This PR adds `.str.find_multiple` API. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Matthew Roeschke (https://github.com/mroeschke) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/11928 --- docs/cudf/source/api_docs/string_handling.rst | 1 + python/cudf/cudf/_lib/strings/__init__.py | 1 + python/cudf/cudf/core/column/string.py | 64 +++++++++++++++++++ python/cudf/cudf/tests/test_string.py | 61 ++++++++++++++++++ 4 files changed, 127 insertions(+) diff --git a/docs/cudf/source/api_docs/string_handling.rst b/docs/cudf/source/api_docs/string_handling.rst index 1496d68db6f..2285bb8fb7a 100644 --- a/docs/cudf/source/api_docs/string_handling.rst +++ b/docs/cudf/source/api_docs/string_handling.rst @@ -28,6 +28,7 @@ strings and apply several methods to it. These can be accessed like filter_tokens find findall + find_multiple get get_json_object hex_to_int diff --git a/python/cudf/cudf/_lib/strings/__init__.py b/python/cudf/cudf/_lib/strings/__init__.py index ff558a06d87..22a5066a20e 100644 --- a/python/cudf/cudf/_lib/strings/__init__.py +++ b/python/cudf/cudf/_lib/strings/__init__.py @@ -61,6 +61,7 @@ startswith, startswith_multiple, ) +from cudf._lib.strings.find_multiple import find_multiple from cudf._lib.strings.findall import findall from cudf._lib.strings.json import GetJsonObjectOptions, get_json_object from cudf._lib.strings.padding import ( diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 287e68531f8..c84e4ff4adb 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -3623,6 +3623,70 @@ def findall(self, pat: str, flags: int = 0) -> SeriesOrIndex: data = libstrings.findall(self._column, pat, flags) return self._return_or_inplace(data) + def find_multiple(self, patterns: SeriesOrIndex) -> "cudf.Series": + """ + Find all first occurrences of patterns in the Series/Index. + + Parameters + ---------- + patterns : array-like, Sequence or Series + Patterns to search for in the given Series/Index. + + Returns + ------- + Series + A Series with a list of indices of each pattern's first occurrence. + If a pattern is not found, -1 is returned for that index. + + Examples + -------- + >>> import cudf + >>> s = cudf.Series(["strings", "to", "search", "in"]) + >>> s + 0 strings + 1 to + 2 search + 3 in + dtype: object + >>> t = cudf.Series(["a", "string", "g", "inn", "o", "r", "sea"]) + >>> t + 0 a + 1 string + 2 g + 3 inn + 4 o + 5 r + 6 sea + dtype: object + >>> s.str.find_multiple(t) + 0 [-1, 0, 5, -1, -1, 2, -1] + 1 [-1, -1, -1, -1, 1, -1, -1] + 2 [2, -1, -1, -1, -1, 3, 0] + 3 [-1, -1, -1, -1, -1, -1, -1] + dtype: list + """ + if can_convert_to_column(patterns): + patterns_column = column.as_column(patterns) + else: + raise TypeError( + "patterns should be an array-like or a Series object, " + f"found {type(patterns)}" + ) + + if not isinstance(patterns_column, StringColumn): + raise TypeError( + "patterns can only be of 'string' dtype, " + f"got: {patterns_column.dtype}" + ) + + return cudf.Series( + libstrings.find_multiple(self._column, patterns_column), + index=self._parent.index + if isinstance(self._parent, cudf.Series) + else self._parent, + name=self._parent.name, + ) + def isempty(self) -> SeriesOrIndex: """ Check whether each string is an empty string. diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py index 74d602c2cf1..2a43adf5a5c 100644 --- a/python/cudf/cudf/tests/test_string.py +++ b/python/cudf/cudf/tests/test_string.py @@ -3423,3 +3423,64 @@ def test_str_join_lists(sr, sep, string_na_rep, sep_na_rep, expected): sep=sep, string_na_rep=string_na_rep, sep_na_rep=sep_na_rep ) assert_eq(actual, expected) + + +@pytest.mark.parametrize( + "patterns, expected", + [ + ( + lambda: ["a", "s", "g", "i", "o", "r"], + [ + [-1, 0, 5, 3, -1, 2], + [-1, -1, -1, -1, 1, -1], + [2, 0, -1, -1, -1, 3], + [-1, -1, -1, 0, -1, -1], + ], + ), + ( + lambda: cudf.Series(["a", "string", "g", "inn", "o", "r", "sea"]), + [ + [-1, 0, 5, -1, -1, 2, -1], + [-1, -1, -1, -1, 1, -1, -1], + [2, -1, -1, -1, -1, 3, 0], + [-1, -1, -1, -1, -1, -1, -1], + ], + ), + ], +) +def test_str_find_multiple(patterns, expected): + s = cudf.Series(["strings", "to", "search", "in"]) + t = patterns() + + expected = cudf.Series(expected) + + # We convert to pandas because find_multiple returns ListDtype(int32) + # and expected is ListDtype(int64). + # Currently there is no easy way to type-cast these to match. + assert_eq(s.str.find_multiple(t).to_pandas(), expected.to_pandas()) + + s = cudf.Index(s) + t = cudf.Index(t) + + expected.index = s + + assert_eq(s.str.find_multiple(t).to_pandas(), expected.to_pandas()) + + +def test_str_find_multiple_error(): + s = cudf.Series(["strings", "to", "search", "in"]) + with pytest.raises( + TypeError, + match=re.escape( + "patterns should be an array-like or a Series object, found " + "" + ), + ): + s.str.find_multiple("a") + + t = cudf.Series([1, 2, 3]) + with pytest.raises( + TypeError, + match=re.escape("patterns can only be of 'string' dtype, got: int64"), + ): + s.str.find_multiple(t) From cea10cabd21732fd6334ca9a9956b99acfbf32ec Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 18 Oct 2022 12:07:26 -0500 Subject: [PATCH 043/202] Pin mimesis version in setup.py. (#11906) The dependency pinning for `mimesis` in cudf's `setup.py` didn't match the conda environment. It was missing a pinning to `<4.1` from #8745. However, based on the conversation in #8551, this pinning of `<4.1` was only chosen because 4.1.0 wasn't yet available on conda-forge. Since the current version of mimesis is now 6.1.1, this PR updates the mimesis pinning to `>=4.1` and uses `generate_string` instead of `schoice`. I tested this locally with mimesis 6.1.1 and mimesis 4.1.0 and both passed tests. Merge this PR concurrently with https://github.com/rapidsai/integration/pull/547. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) - AJ Schmidt (https://github.com/ajschmidt8) URL: https://github.com/rapidsai/cudf/pull/11906 --- conda/environments/cudf_dev_cuda11.5.yml | 2 +- python/cudf/cudf/testing/dataset_generator.py | 4 ++-- python/cudf/setup.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml index 142d3c7d9cb..e27e8557c80 100644 --- a/conda/environments/cudf_dev_cuda11.5.yml +++ b/conda/environments/cudf_dev_cuda11.5.yml @@ -63,7 +63,7 @@ dependencies: - myst-nb - scipy - dask-cuda=22.12.* - - mimesis<4.1 + - mimesis>=4.1.0 - packaging - protobuf>=3.20.1,<3.21.0a0 - nvtx>=0.2.1 diff --git a/python/cudf/cudf/testing/dataset_generator.py b/python/cudf/cudf/testing/dataset_generator.py index 4d24e7ff2a2..2867c4d10eb 100644 --- a/python/cudf/cudf/testing/dataset_generator.py +++ b/python/cudf/cudf/testing/dataset_generator.py @@ -502,7 +502,7 @@ def rand_dataframe( cardinality=cardinality, null_frequency=null_frequency, generator=lambda cardinality=cardinality: [ - mimesis.random.random.schoice( + mimesis.random.random.generate_string( string.printable, np.random.randint( low=0, @@ -684,7 +684,7 @@ def get_values_for_nested_data(dtype, lists_max_length=None, size=None): values = float_generator(dtype=dtype, size=cardinality)() elif dtype.kind in ("U", "O"): values = [ - mimesis.random.random.schoice( + mimesis.random.random.generate_string( string.printable, 100, ) diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 93948afc0f6..3ebb66cb0ad 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -31,7 +31,7 @@ "pytest-benchmark", "pytest-xdist", "hypothesis", - "mimesis", + "mimesis>=4.1.0", "fastavro>=0.22.9", "python-snappy>=0.6.0", "pyorc", From 1effe19ab6384b229fc7e58c8109bbf279e3ac61 Mon Sep 17 00:00:00 2001 From: Mike Wilson Date: Tue, 18 Oct 2022 13:09:19 -0400 Subject: [PATCH 044/202] Removing int8 column option from parquet byte_array writing (#11539) As suggested in #11526 and captured in issue #11536 the usage of both INT8 and UINT8 as supported types for byte_arrays is unnecessary and adds complexity to the code. This change removes INT8 as an option and only allows UINT8 columns to be written out as byte_arrays. ~~This matches with cudf string columns which contain an INT8 column for data.~~ closes #11536 Authors: - Mike Wilson (https://github.com/hyperbolic2346) Approvers: - Tobias Ribizel (https://github.com/upsj) - Nghia Truong (https://github.com/ttnghia) - David Wendt (https://github.com/davidwendt) - MithunR (https://github.com/mythrocks) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/11539 --- cpp/src/io/parquet/parquet_gpu.hpp | 5 +-- cpp/src/io/parquet/writer_impl.cu | 2 +- cpp/src/io/utilities/column_buffer.cpp | 21 ++++++++--- cpp/src/io/utilities/column_utils.cuh | 4 +-- cpp/src/lists/dremel.cu | 3 +- cpp/src/reshape/byte_cast.cu | 16 +++++++-- cpp/tests/io/parquet_test.cpp | 36 +++++++++---------- cpp/tests/reshape/byte_cast_tests.cpp | 4 +-- .../java/ai/rapids/cudf/ColumnVectorTest.java | 2 +- .../test/java/ai/rapids/cudf/TableTest.java | 2 +- 10 files changed, 57 insertions(+), 38 deletions(-) diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index 1a8c0f4cd9e..38a0d70b0f8 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -321,10 +321,7 @@ inline size_type __device__ row_to_value_idx(size_type idx, } else { auto list_col = cudf::detail::lists_column_device_view(col); auto child = list_col.child(); - if (parquet_col.output_as_byte_array && - (child.type().id() == type_id::INT8 || child.type().id() == type_id::UINT8)) { - break; - } + if (parquet_col.output_as_byte_array && child.type().id() == type_id::UINT8) { break; } idx = list_col.offset_at(idx); col = child; } diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 9514b053451..f2089d27a87 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -511,7 +511,7 @@ std::vector construct_schema_tree( if (col->type().id() != type_id::LIST) { return false; } auto const child_col_type = col->children[lists_column_view::child_column_index]->type().id(); - return child_col_type == type_id::INT8 or child_col_type == type_id::UINT8; + return child_col_type == type_id::UINT8; }; // There is a special case for a list column with one byte column child. This column can diff --git a/cpp/src/io/utilities/column_buffer.cpp b/cpp/src/io/utilities/column_buffer.cpp index e2d209a7c0a..de145486662 100644 --- a/cpp/src/io/utilities/column_buffer.cpp +++ b/cpp/src/io/utilities/column_buffer.cpp @@ -22,6 +22,7 @@ #include "column_buffer.hpp" #include #include +#include namespace cudf { namespace io { @@ -78,7 +79,19 @@ std::unique_ptr make_column(column_buffer& buffer, // convert to binary auto const string_col = make_strings_column(*buffer._strings, stream, mr); auto const num_rows = string_col->size(); - auto col_contest = string_col->release(); + auto col_content = string_col->release(); + + // convert to uint8 column, strings are currently stores as int8 + auto contents = + col_content.children[strings_column_view::chars_column_index].release()->release(); + auto data = contents.data.release(); + auto null_mask = contents.null_mask.release(); + + auto uint8_col = std::make_unique(data_type{type_id::UINT8}, + data->size(), + std::move(*data), + std::move(*null_mask), + UNKNOWN_NULL_COUNT); if (schema_info != nullptr) { schema_info->children.push_back(column_name_info{"offsets"}); @@ -87,10 +100,10 @@ std::unique_ptr make_column(column_buffer& buffer, return make_lists_column( num_rows, - std::move(col_contest.children[strings_column_view::offsets_column_index]), - std::move(col_contest.children[strings_column_view::chars_column_index]), + std::move(col_content.children[strings_column_view::offsets_column_index]), + std::move(uint8_col), UNKNOWN_NULL_COUNT, - std::move(*col_contest.null_mask)); + std::move(*col_content.null_mask)); } case type_id::LIST: { diff --git a/cpp/src/io/utilities/column_utils.cuh b/cpp/src/io/utilities/column_utils.cuh index fbeaaa9c0fc..598c93a1a4f 100644 --- a/cpp/src/io/utilities/column_utils.cuh +++ b/cpp/src/io/utilities/column_utils.cuh @@ -64,7 +64,7 @@ rmm::device_uvector create_leaf_column_device_views( iter, iter + parent_table_device_view.num_columns(), [col_desc, parent_col_view = parent_table_device_view, leaf_columns] __device__( - size_type index) mutable { + size_type index) { col_desc[index].parent_column = parent_col_view.begin() + index; column_device_view col = parent_col_view.column(index); // traverse till leaf column @@ -74,7 +74,7 @@ rmm::device_uvector create_leaf_column_device_views( : col.child(0); // stop early if writing a byte array if (col_desc[index].stats_dtype == dtype_byte_array && - (child.type().id() == type_id::INT8 || child.type().id() == type_id::UINT8)) { + child.type().id() == type_id::UINT8) { break; } col = child; diff --git a/cpp/src/lists/dremel.cu b/cpp/src/lists/dremel.cu index 25094536cce..66134138a5c 100644 --- a/cpp/src/lists/dremel.cu +++ b/cpp/src/lists/dremel.cu @@ -192,8 +192,7 @@ dremel_data get_dremel_data(column_view h_col, } if (curr_col.type().id() == type_id::LIST) { auto child = curr_col.child(lists_column_view::child_column_index); - if ((child.type().id() == type_id::INT8 || child.type().id() == type_id::UINT8) && - output_as_byte_array) { + if (output_as_byte_array && child.type().id() == type_id::UINT8) { // consider this the bottom break; } diff --git a/cpp/src/reshape/byte_cast.cu b/cpp/src/reshape/byte_cast.cu index 639ddb33e9a..3d0510e1e6b 100644 --- a/cpp/src/reshape/byte_cast.cu +++ b/cpp/src/reshape/byte_cast.cu @@ -101,11 +101,21 @@ std::unique_ptr byte_list_conversion::operator()( auto strings_count = input_strings.size(); if (strings_count == 0) return cudf::empty_like(input_column); - auto contents = std::make_unique(input_column, stream, mr)->release(); + auto col_content = std::make_unique(input_column, stream, mr)->release(); + auto contents = + col_content.children[strings_column_view::chars_column_index].release()->release(); + auto data = contents.data.release(); + auto null_mask = contents.null_mask.release(); + auto uint8_col = std::make_unique(data_type{type_id::UINT8}, + data->size(), + std::move(*data), + std::move(*null_mask), + UNKNOWN_NULL_COUNT); + return make_lists_column( input_column.size(), - std::move(contents.children[cudf::strings_column_view::offsets_column_index]), - std::move(contents.children[cudf::strings_column_view::chars_column_index]), + std::move(col_content.children[cudf::strings_column_view::offsets_column_index]), + std::move(uint8_col), input_column.null_count(), detail::copy_bitmask(input_column, stream, mr), stream, diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index 6f1c5ef7eb1..b13e875eabd 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -718,17 +718,17 @@ TEST_F(ParquetWriterTest, StringsAsBinary) column_wrapper col0{ascii_strings.begin(), ascii_strings.end()}; column_wrapper col1{unicode_strings.begin(), unicode_strings.end()}; column_wrapper col2{ascii_strings.begin(), ascii_strings.end()}; - cudf::test::lists_column_wrapper col3{{'M', 'o', 'n', 'd', 'a', 'y'}, - {'W', 'e', 'd', 'n', 'e', 's', 'd', 'a', 'y'}, - {'F', 'r', 'i', 'd', 'a', 'y'}, - {'M', 'o', 'n', 'd', 'a', 'y'}, - {'F', 'r', 'i', 'd', 'a', 'y'}, - {'F', 'r', 'i', 'd', 'a', 'y'}, - {'F', 'r', 'i', 'd', 'a', 'y'}, - {'F', 'u', 'n', 'd', 'a', 'y'}}; - cudf::test::lists_column_wrapper col4{ + cudf::test::lists_column_wrapper col3{{'M', 'o', 'n', 'd', 'a', 'y'}, + {'W', 'e', 'd', 'n', 'e', 's', 'd', 'a', 'y'}, + {'F', 'r', 'i', 'd', 'a', 'y'}, + {'M', 'o', 'n', 'd', 'a', 'y'}, + {'F', 'r', 'i', 'd', 'a', 'y'}, + {'F', 'r', 'i', 'd', 'a', 'y'}, + {'F', 'r', 'i', 'd', 'a', 'y'}, + {'F', 'u', 'n', 'd', 'a', 'y'}}; + cudf::test::lists_column_wrapper col4{ {'M', 'o', 'n', 'd', 'a', 'y'}, - {'W', -56, -123, 'd', 'n', -56, -123, 's', 'd', 'a', 'y'}, + {'W', 200, 133, 'd', 'n', 200, 133, 's', 'd', 'a', 'y'}, {'F', 'r', 'i', 'd', 'a', 'y'}, {'M', 'o', 'n', 'd', 'a', 'y'}, {'F', 'r', 'i', 'd', 'a', 'y'}, @@ -4459,13 +4459,13 @@ TEST_F(ParquetReaderTest, BinaryAsStrings) auto seq_col0 = random_values(num_rows); auto seq_col2 = random_values(num_rows); - auto seq_col3 = random_values(num_rows); + auto seq_col3 = random_values(num_rows); auto validity = cudf::test::iterators::no_nulls(); column_wrapper int_col{seq_col0.begin(), seq_col0.end(), validity}; column_wrapper string_col{strings.begin(), strings.end()}; column_wrapper float_col{seq_col2.begin(), seq_col2.end(), validity}; - cudf::test::lists_column_wrapper list_int_col{ + cudf::test::lists_column_wrapper list_int_col{ {'M', 'o', 'n', 'd', 'a', 'y'}, {'W', 'e', 'd', 'n', 'e', 's', 'd', 'a', 'y'}, {'F', 'r', 'i', 'd', 'a', 'y'}, @@ -4526,12 +4526,12 @@ TEST_F(ParquetReaderTest, NestedByteArray) auto seq_col0 = random_values(num_rows); auto seq_col2 = random_values(num_rows); - auto seq_col3 = random_values(num_rows); + auto seq_col3 = random_values(num_rows); auto const validity = cudf::test::iterators::no_nulls(); column_wrapper int_col{seq_col0.begin(), seq_col0.end(), validity}; column_wrapper float_col{seq_col2.begin(), seq_col2.end(), validity}; - cudf::test::lists_column_wrapper list_list_int_col{ + cudf::test::lists_column_wrapper list_list_int_col{ {{'M', 'o', 'n', 'd', 'a', 'y'}, {'W', 'e', 'd', 'n', 'e', 's', 'd', 'a', 'y'}, {'F', 'r', 'i', 'd', 'a', 'y'}}, @@ -4637,12 +4637,12 @@ TEST_F(ParquetReaderTest, StructByteArray) { constexpr auto num_rows = 100; - auto seq_col0 = random_values(num_rows); + auto seq_col0 = random_values(num_rows); auto const validity = cudf::test::iterators::no_nulls(); - column_wrapper int_col{seq_col0.begin(), seq_col0.end(), validity}; - cudf::test::lists_column_wrapper list_of_int{{seq_col0.begin(), seq_col0.begin() + 50}, - {seq_col0.begin() + 50, seq_col0.end()}}; + column_wrapper int_col{seq_col0.begin(), seq_col0.end(), validity}; + cudf::test::lists_column_wrapper list_of_int{{seq_col0.begin(), seq_col0.begin() + 50}, + {seq_col0.begin() + 50, seq_col0.end()}}; auto struct_col = cudf::test::structs_column_wrapper{{list_of_int}, validity}; auto const expected = table_view{{struct_col}}; diff --git a/cpp/tests/reshape/byte_cast_tests.cpp b/cpp/tests/reshape/byte_cast_tests.cpp index e5f3b8a1f7f..f29b3a7980f 100644 --- a/cpp/tests/reshape/byte_cast_tests.cpp +++ b/cpp/tests/reshape/byte_cast_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -327,7 +327,7 @@ TEST_F(ByteCastTest, StringValues) { strings_column_wrapper const strings_col( {"", "The quick", " brown fox...", "!\"#$%&\'()*+,-./", "0123456789:;<=>?@", "[\\]^_`{|}~"}); - lists_column_wrapper const strings_expected( + lists_column_wrapper const strings_expected( {{}, {0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63, 0x6b}, {0x20, 0x62, 0x72, 0x6f, 0x77, 0x6e, 0x20, 0x66, 0x6f, 0x78, 0x2e, 0x2e, 0x2e}, diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 7afd5abb358..f5c32b0da20 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -3942,7 +3942,7 @@ void testCastStringToByteList() { "\\THE\t8\ud720", "tést strings", "", "éé"); ColumnVector res = cv.asByteList(true); ColumnVector expected = ColumnVector.fromLists(new HostColumnVector.ListType(true, - new HostColumnVector.BasicType(true, DType.INT8)), list1, list2, list3, list4, list5, + new HostColumnVector.BasicType(true, DType.UINT8)), list1, list2, list3, list4, list5, list6, list7, list8)) { assertColumnsAreEqual(expected, res); } diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java index 4649a0e3507..f31da054091 100644 --- a/java/src/test/java/ai/rapids/cudf/TableTest.java +++ b/java/src/test/java/ai/rapids/cudf/TableTest.java @@ -621,7 +621,7 @@ void testParquetWriteToBufferChunkedBinary() { List bin2 = asList(string2); try (Table binTable = new Table.TestBuilder() - .column(new ListType(true, new BasicType(false, DType.INT8)), + .column(new ListType(true, new BasicType(false, DType.UINT8)), bin1, bin2) .build(); Table stringTable = new Table.TestBuilder() From 5d5715933c97c118255010b57bb07bd58651e218 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 18 Oct 2022 11:46:57 -0700 Subject: [PATCH 045/202] Initial draft of policies and guidelines for libcudf usage. (#11853) This PR adds a section to the developer documentation about various libcudf design decisions that affect users. These policies are important for us to document and communicate consistently. I am not sure what the best place for this information is, but I think the developer docs are a good place to start since until we address #11481 we don't have a great way to publish any non-API user-facing libcudf documentation. I've created this draft PR to solicit feedback from other libcudf devs about other policies that we should be documenting in a similar manner. Once everyone is happy with the contents, I would suggest that we merge this into the dev docs for now and then revisit a better place once we've tackled #11481. Partly addresses #5505, #1781. Resolves #4511. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Jake Hemstad (https://github.com/jrhemstad) - Bradley Dice (https://github.com/bdice) - David Wendt (https://github.com/davidwendt) URL: https://github.com/rapidsai/cudf/pull/11853 --- .../developer_guide/DEVELOPER_GUIDE.md | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md index b3774aeda38..52c443cd764 100644 --- a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md +++ b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md @@ -346,6 +346,63 @@ the device view can be obtained via function `column_device_view::create(column_ data, a specialized device view for list columns can be constructed via `lists_column_device_view(column_device_view)`. +# libcudf Policies and Design Principles + +`libcudf` is designed to provide thread-safe, single-GPU accelerated algorithm primitives for solving a wide variety of problems that arise in data science. +APIs are written to execute on the default GPU, which can be controlled by the caller through standard CUDA device APIs or environment variables like `CUDA_VISIBLE_DEVICES`. +Our goal is to enable diverse use cases like Spark or Pandas to benefit from the performance of GPUs, and libcudf relies on these higher-level layers like Spark or Dask to orchestrate multi-GPU tasks. + +To best satisfy these use-cases, libcudf prioritizes performance and flexibility, which sometimes may come at the cost of convenience. +While we welcome users to use libcudf directly, we design with the expectation that most users will be consuming libcudf through higher-level layers like Spark or cuDF Python that handle some of details that direct users of libcudf must handle on their own. +We document these policies and the reasons behind them here. + +## libcudf does not introspect data + +libcudf APIs generally do not perform deep introspection and validation of input data. +There are numerous reasons for this: +1. It violates the single responsibility principle: validation is separate from execution. +2. Since libcudf data structures store data on the GPU, any validation incurs _at minimum_ the overhead of a kernel launch, and may in general be prohibitively expensive. +3. API promises around data introspection often significantly complicate implementation. + +Users are therefore responsible for passing valid data into such APIs. +_Note that this policy does not mean that libcudf performs no validation whatsoever_. +libcudf APIs should still perform any validation that does not require introspection. +To give some idea of what should or should not be validated, here are (non-exhaustive) lists of examples. + +**Things that libcudf should validate**: +- Input column/table sizes or dtypes + +**Things that libcudf should not validate**: +- Integer overflow +- Ensuring that outputs will not exceed the 2GB size limit for a given set of inputs + + +## libcudf expects nested types to have sanitized null masks + +Various libcudf APIs accepting columns of nested dtypes (such as `LIST` or `STRUCT`) may assume that these columns have been sanitized. +In this context, sanitization refers to ensuring that the null elements in a column with a nested dtype are compatible with the elements of nested columns. +Specifically: +- Null elements of list columns should also be empty. The starting offset of a null element should be equal to the ending offset. +- Null elements of struct columns should also be null elements in the underlying structs. +- For compound columns, nulls should only be present at the level of the parent column. Child columns should not contain nulls. +- Slice operations on nested columns do not propagate offsets to child columns. + +libcudf APIs _should_ promise to never return "dirty" columns, i.e. columns containing unsanitized data. +Therefore, the only problem is if users construct input columns that are not correctly sanitized and then pass those into libcudf APIs. + +## Treat libcudf APIs as if they were asynchronous + +libcudf APIs called on the host do not guarantee that the stream is synchronized before returning. +Work in libcudf occurs on `cudf::get_default_stream().value`, which defaults to the CUDA default stream (stream 0). +Note that the stream 0 behavior differs if [per-thread default stream is enabled](https://docs.nvidia.com/cuda/cuda-runtime-api/stream-sync-behavior.html) via `CUDF_USE_PER_THREAD_DEFAULT_STREAM`. +Any data provided to or returned by libcudf that uses a separate non-blocking stream requires synchronization with the default libcudf stream to ensure stream safety. + +## libcudf generally does not make ordering guarantees + +Functions like merge or groupby in libcudf make no guarantees about the order of entries in the output. +Promising deterministic ordering is not, in general, conducive to fast parallel algorithms. +Calling code is responsible for performing sorts after the fact if sorted outputs are needed. + # libcudf++ API and Implementation ## Streams From 425fb029057858797a167f237c9dd6d2d93e2645 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 18 Oct 2022 15:47:03 -0500 Subject: [PATCH 046/202] Update flake8 to 5.0.4 and use flake8-force to check Cython. (#11736) Resolves #11684, required for eventually supporting Python 3.10 (which requires flake8 >= 4.0.0). flake8 >= 4.0.0, however, does not support parsing Cython code, even with rule exclusions. This necessitates the flake8-force plugin, which was designed (by a cupy developer) for forcing flake8 to check Cython code with a limited set of rules. Per this comment (https://github.com/rapidsai/cudf/issues/11684#issuecomment-1258747331), this PR removes duplicate pinnings between pre-commit configuration and the developer conda environment. Developers should use pre-commit for style checks consistent with the CI environment. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Lawrence Mitchell (https://github.com/wence-) - AJ Schmidt (https://github.com/ajschmidt8) - GALI PREM SAGAR (https://github.com/galipremsagar) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/11736 --- .pre-commit-config.yaml | 6 +- conda/environments/cudf_dev_cuda11.5.yml | 8 +- python/cudf/cudf/_lib/cpp/io/avro.pxd | 26 +- python/cudf/cudf/_lib/cpp/io/csv.pxd | 290 ++++++++++----------- python/cudf/cudf/_lib/cpp/io/json.pxd | 64 ++--- python/cudf/cudf/_lib/cpp/io/orc.pxd | 172 ++++++------ python/cudf/cudf/_lib/cpp/io/parquet.pxd | 62 ++--- python/cudf/cudf/core/dataframe.py | 16 +- python/cudf/cudf/core/indexed_frame.py | 8 +- python/cudf/cudf/core/series.py | 4 +- python/cudf/cudf/utils/hash_vocab_utils.py | 7 +- setup.cfg | 9 +- 12 files changed, 342 insertions(+), 330 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1046f4ebe6f..b4e57947cf9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,12 +18,14 @@ repos: # Explicitly specify the pyproject.toml at the repo root, not per-project. args: ["--config", "pyproject.toml"] - repo: https://github.com/PyCQA/flake8 - rev: 3.8.3 + rev: 5.0.4 hooks: - id: flake8 args: ["--config=setup.cfg"] - files: python/.*\.(py|pyx|pxd)$ + files: python/.*$ types: [file] + types_or: [python, cython] + additional_dependencies: ["flake8-force"] - repo: https://github.com/pre-commit/mirrors-mypy rev: 'v0.971' hooks: diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml index e27e8557c80..d7178198358 100644 --- a/conda/environments/cudf_dev_cuda11.5.yml +++ b/conda/environments/cudf_dev_cuda11.5.yml @@ -38,15 +38,9 @@ dependencies: - ipython - pandoc<=2.0.0 - cudatoolkit=11.5 - - cuda-python >=11.5,<11.7.1 + - cuda-python>=11.5,<11.7.1 - pip - - flake8=3.8.3 - - black=22.3.0 - - isort=5.10.1 - - mypy=0.971 - - types-cachetools - doxygen=1.8.20 - - pydocstyle=6.1.1 - typing_extensions - pre-commit - dask>=2022.9.2 diff --git a/python/cudf/cudf/_lib/cpp/io/avro.pxd b/python/cudf/cudf/_lib/cpp/io/avro.pxd index 6efe42e5208..9b683e5bce3 100644 --- a/python/cudf/cudf/_lib/cpp/io/avro.pxd +++ b/python/cudf/cudf/_lib/cpp/io/avro.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. from libcpp.string cimport string from libcpp.vector cimport vector @@ -11,17 +11,17 @@ cdef extern from "cudf/io/avro.hpp" \ namespace "cudf::io" nogil: cdef cppclass avro_reader_options: - avro_reader_options() except+ - cudf_io_types.source_info get_source() except+ - vector[string] get_columns() except+ - size_type get_skip_rows() except+ - size_type get_num_rows() except+ + avro_reader_options() except + + cudf_io_types.source_info get_source() except + + vector[string] get_columns() except + + size_type get_skip_rows() except + + size_type get_num_rows() except + # setters - void set_columns(vector[string] col_names) except+ - void set_skip_rows(size_type val) except+ - void set_num_rows(size_type val) except+ + void set_columns(vector[string] col_names) except + + void set_skip_rows(size_type val) except + + void set_num_rows(size_type val) except + @staticmethod avro_reader_options_builder builder( @@ -29,13 +29,13 @@ cdef extern from "cudf/io/avro.hpp" \ ) except + cdef cppclass avro_reader_options_builder: - avro_reader_options_builder() except+ + avro_reader_options_builder() except + avro_reader_options_builder( cudf_io_types.source_info src ) except + - avro_reader_options_builder& columns(vector[string] col_names) except+ - avro_reader_options_builder& skip_rows(size_type val) except+ - avro_reader_options_builder& num_rows(size_type val) except+ + avro_reader_options_builder& columns(vector[string] col_names) except + + avro_reader_options_builder& skip_rows(size_type val) except + + avro_reader_options_builder& num_rows(size_type val) except + avro_reader_options build() except + diff --git a/python/cudf/cudf/_lib/cpp/io/csv.pxd b/python/cudf/cudf/_lib/cpp/io/csv.pxd index 4afd8732320..e8064557592 100644 --- a/python/cudf/cudf/_lib/cpp/io/csv.pxd +++ b/python/cudf/cudf/_lib/cpp/io/csv.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. from libc.stdint cimport uint8_t from libcpp cimport bool @@ -20,96 +20,96 @@ cdef extern from "cudf/io/csv.hpp" \ # Getter - cudf_io_types.source_info get_source() except+ + cudf_io_types.source_info get_source() except + # Reader settings - cudf_io_types.compression_type get_compression() except+ - size_t get_byte_range_offset() except+ - size_t get_byte_range_size() except+ - vector[string] get_names() except+ - string get_prefix() except+ - bool is_enabled_mangle_dupe_cols() except+ + cudf_io_types.compression_type get_compression() except + + size_t get_byte_range_offset() except + + size_t get_byte_range_size() except + + vector[string] get_names() except + + string get_prefix() except + + bool is_enabled_mangle_dupe_cols() except + # Filter settings - vector[string] get_use_cols_names() except+ - vector[int] get_use_cols_indexes() except+ - size_type get_nrows() except+ - size_type get_skiprows() except+ - size_type get_skipfooter() except+ - size_type get_header() except+ + vector[string] get_use_cols_names() except + + vector[int] get_use_cols_indexes() except + + size_type get_nrows() except + + size_type get_skiprows() except + + size_type get_skipfooter() except + + size_type get_header() except + # Parsing settings - char get_lineterminator() except+ - char get_delimiter() except+ - char get_thousands() except+ - char get_decimal() except+ - char get_comment() except+ - bool is_enabled_windowslinetermination() except+ - bool is_enabled_delim_whitespace() except+ - bool is_enabled_skipinitialspace() except+ - bool is_enabled_skip_blank_lines() except+ - cudf_io_types.quote_style get_quoting() except+ - char get_quotechar() except+ - bool is_enabled_doublequote() except+ - vector[string] get_parse_dates_names() except+ - vector[int] get_parse_dates_indexes() except+ - vector[string] get_parse_hex_names() except+ - vector[int] get_parse_hex_indexes() except+ + char get_lineterminator() except + + char get_delimiter() except + + char get_thousands() except + + char get_decimal() except + + char get_comment() except + + bool is_enabled_windowslinetermination() except + + bool is_enabled_delim_whitespace() except + + bool is_enabled_skipinitialspace() except + + bool is_enabled_skip_blank_lines() except + + cudf_io_types.quote_style get_quoting() except + + char get_quotechar() except + + bool is_enabled_doublequote() except + + vector[string] get_parse_dates_names() except + + vector[int] get_parse_dates_indexes() except + + vector[string] get_parse_hex_names() except + + vector[int] get_parse_hex_indexes() except + # Conversion settings - vector[string] get_dtype() except+ - vector[string] get_true_values() except+ - vector[string] get_false_values() except+ - vector[string] get_na_values() except+ - bool is_enabled_keep_default_na() except+ - bool is_enabled_na_filter() except+ - bool is_enabled_dayfirst() except+ + vector[string] get_dtype() except + + vector[string] get_true_values() except + + vector[string] get_false_values() except + + vector[string] get_na_values() except + + bool is_enabled_keep_default_na() except + + bool is_enabled_na_filter() except + + bool is_enabled_dayfirst() except + # setter # Reader settings - void set_compression(cudf_io_types.compression_type comp) except+ - void set_byte_range_offset(size_t val) except+ - void set_byte_range_size(size_t val) except+ - void set_names(vector[string] val) except+ - void set_prefix(string pfx) except+ - void set_mangle_dupe_cols(bool val) except+ + void set_compression(cudf_io_types.compression_type comp) except + + void set_byte_range_offset(size_t val) except + + void set_byte_range_size(size_t val) except + + void set_names(vector[string] val) except + + void set_prefix(string pfx) except + + void set_mangle_dupe_cols(bool val) except + # Filter settings - void set_use_cols_names(vector[string] col_names) except+ - void set_use_cols_indexes(vector[int] col_ind) except+ - void set_nrows(size_type n_rows) except+ - void set_skiprows(size_type val) except+ - void set_skipfooter(size_type val) except+ - void set_header(size_type hdr) except+ + void set_use_cols_names(vector[string] col_names) except + + void set_use_cols_indexes(vector[int] col_ind) except + + void set_nrows(size_type n_rows) except + + void set_skiprows(size_type val) except + + void set_skipfooter(size_type val) except + + void set_header(size_type hdr) except + # Parsing settings - void set_lineterminator(char val) except+ - void set_delimiter(char val) except+ - void set_thousands(char val) except+ - void set_decimal(char val) except+ - void set_comment(char val) except+ - void enable_windowslinetermination(bool val) except+ - void enable_delim_whitespace(bool val) except+ - void enable_skipinitialspace(bool val) except+ - void enable_skip_blank_lines(bool val) except+ - void set_quoting(cudf_io_types.quote_style style) except+ - void set_quotechar(char val) except+ - void set_doublequote(bool val) except+ - void set_parse_dates(vector[string]) except+ - void set_parse_dates(vector[int]) except+ - void set_parse_hex(vector[string]) except+ - void set_parse_hex(vector[int]) except+ + void set_lineterminator(char val) except + + void set_delimiter(char val) except + + void set_thousands(char val) except + + void set_decimal(char val) except + + void set_comment(char val) except + + void enable_windowslinetermination(bool val) except + + void enable_delim_whitespace(bool val) except + + void enable_skipinitialspace(bool val) except + + void enable_skip_blank_lines(bool val) except + + void set_quoting(cudf_io_types.quote_style style) except + + void set_quotechar(char val) except + + void set_doublequote(bool val) except + + void set_parse_dates(vector[string]) except + + void set_parse_dates(vector[int]) except + + void set_parse_hex(vector[string]) except + + void set_parse_hex(vector[int]) except + # Conversion settings - void set_dtypes(vector[data_type] types) except+ - void set_dtypes(map[string, data_type] types) except+ - void set_true_values(vector[string] vals) except+ - void set_false_values(vector[string] vals) except+ - void set_na_values(vector[string] vals) except+ - void enable_keep_default_na(bool val) except+ - void enable_na_filter(bool val) except+ - void enable_dayfirst(bool val) except+ - void set_timestamp_type(data_type type) except+ + void set_dtypes(vector[data_type] types) except + + void set_dtypes(map[string, data_type] types) except + + void set_true_values(vector[string] vals) except + + void set_false_values(vector[string] vals) except + + void set_na_values(vector[string] vals) except + + void enable_keep_default_na(bool val) except + + void enable_na_filter(bool val) except + + void enable_dayfirst(bool val) except + + void set_timestamp_type(data_type type) except + @staticmethod csv_reader_options_builder builder( @@ -125,115 +125,115 @@ cdef extern from "cudf/io/csv.hpp" \ csv_reader_options_builder& source( cudf_io_types.source_info info - ) except+ + ) except + # Reader settings csv_reader_options_builder& compression( cudf_io_types.compression_type comp - ) except+ - csv_reader_options_builder& byte_range_offset(size_t val) except+ - csv_reader_options_builder& byte_range_size(size_t val) except+ - csv_reader_options_builder& names(vector[string] val) except+ - csv_reader_options_builder& prefix(string pfx) except+ - csv_reader_options_builder& mangle_dupe_cols(bool val) except+ + ) except + + csv_reader_options_builder& byte_range_offset(size_t val) except + + csv_reader_options_builder& byte_range_size(size_t val) except + + csv_reader_options_builder& names(vector[string] val) except + + csv_reader_options_builder& prefix(string pfx) except + + csv_reader_options_builder& mangle_dupe_cols(bool val) except + # Filter settings csv_reader_options_builder& use_cols_names( vector[string] col_names - ) except+ + ) except + csv_reader_options_builder& use_cols_indexes( vector[int] col_ind - ) except+ - csv_reader_options_builder& nrows(size_type n_rows) except+ - csv_reader_options_builder& skiprows(size_type val) except+ - csv_reader_options_builder& skipfooter(size_type val) except+ - csv_reader_options_builder& header(size_type hdr) except+ + ) except + + csv_reader_options_builder& nrows(size_type n_rows) except + + csv_reader_options_builder& skiprows(size_type val) except + + csv_reader_options_builder& skipfooter(size_type val) except + + csv_reader_options_builder& header(size_type hdr) except + # Parsing settings - csv_reader_options_builder& lineterminator(char val) except+ - csv_reader_options_builder& delimiter(char val) except+ - csv_reader_options_builder& thousands(char val) except+ - csv_reader_options_builder& decimal(char val) except+ - csv_reader_options_builder& comment(char val) except+ - csv_reader_options_builder& windowslinetermination(bool val) except+ - csv_reader_options_builder& delim_whitespace(bool val) except+ - csv_reader_options_builder& skipinitialspace(bool val) except+ - csv_reader_options_builder& skip_blank_lines(bool val) except+ + csv_reader_options_builder& lineterminator(char val) except + + csv_reader_options_builder& delimiter(char val) except + + csv_reader_options_builder& thousands(char val) except + + csv_reader_options_builder& decimal(char val) except + + csv_reader_options_builder& comment(char val) except + + csv_reader_options_builder& windowslinetermination(bool val) except + + csv_reader_options_builder& delim_whitespace(bool val) except + + csv_reader_options_builder& skipinitialspace(bool val) except + + csv_reader_options_builder& skip_blank_lines(bool val) except + csv_reader_options_builder& quoting( cudf_io_types.quote_style style - ) except+ - csv_reader_options_builder& quotechar(char val) except+ - csv_reader_options_builder& doublequote(bool val) except+ - csv_reader_options_builder& parse_dates(vector[string]) except+ - csv_reader_options_builder& parse_dates(vector[int]) except+ + ) except + + csv_reader_options_builder& quotechar(char val) except + + csv_reader_options_builder& doublequote(bool val) except + + csv_reader_options_builder& parse_dates(vector[string]) except + + csv_reader_options_builder& parse_dates(vector[int]) except + # Conversion settings - csv_reader_options_builder& dtypes(vector[string] types) except+ - csv_reader_options_builder& dtypes(vector[data_type] types) except+ + csv_reader_options_builder& dtypes(vector[string] types) except + + csv_reader_options_builder& dtypes(vector[data_type] types) except + csv_reader_options_builder& dtypes( map[string, data_type] types - ) except+ - csv_reader_options_builder& true_values(vector[string] vals) except+ - csv_reader_options_builder& false_values(vector[string] vals) except+ - csv_reader_options_builder& na_values(vector[string] vals) except+ - csv_reader_options_builder& keep_default_na(bool val) except+ - csv_reader_options_builder& na_filter(bool val) except+ - csv_reader_options_builder& dayfirst(bool val) except+ - csv_reader_options_builder& timestamp_type(data_type type) except+ + ) except + + csv_reader_options_builder& true_values(vector[string] vals) except + + csv_reader_options_builder& false_values(vector[string] vals) except + + csv_reader_options_builder& na_values(vector[string] vals) except + + csv_reader_options_builder& keep_default_na(bool val) except + + csv_reader_options_builder& na_filter(bool val) except + + csv_reader_options_builder& dayfirst(bool val) except + + csv_reader_options_builder& timestamp_type(data_type type) except + - csv_reader_options build() except+ + csv_reader_options build() except + cdef cudf_io_types.table_with_metadata read_csv( csv_reader_options &options ) except + cdef cppclass csv_writer_options: - csv_writer_options() except+ - - cudf_io_types.sink_info get_sink() except+ - cudf_table_view.table_view get_table() except+ - cudf_io_types.table_metadata get_metadata() except+ - string get_na_rep() except+ - bool is_enabled_include_header() except+ - size_type get_rows_per_chunk() except+ - string get_line_terminator() except+ - char get_inter_column_delimiter() except+ - string get_true_value() except+ - string get_false_value() except+ + csv_writer_options() except + + + cudf_io_types.sink_info get_sink() except + + cudf_table_view.table_view get_table() except + + cudf_io_types.table_metadata get_metadata() except + + string get_na_rep() except + + bool is_enabled_include_header() except + + size_type get_rows_per_chunk() except + + string get_line_terminator() except + + char get_inter_column_delimiter() except + + string get_true_value() except + + string get_false_value() except + # setter - void set_metadata(cudf_io_types.table_metadata* val) except+ - void set_na_rep(string val) except+ - void enable_include_header(bool val) except+ - void set_rows_per_chunk(size_type val) except+ - void set_line_terminator(string term) except+ - void set_inter_column_delimiter(char delim) except+ - void set__true_value(string val) except+ - void set_false_value(string val) except+ + void set_metadata(cudf_io_types.table_metadata* val) except + + void set_na_rep(string val) except + + void enable_include_header(bool val) except + + void set_rows_per_chunk(size_type val) except + + void set_line_terminator(string term) except + + void set_inter_column_delimiter(char delim) except + + void set__true_value(string val) except + + void set_false_value(string val) except + @staticmethod csv_writer_options_builder builder( cudf_io_types.sink_info sink, cudf_table_view.table_view table - ) except+ + ) except + cdef cppclass csv_writer_options_builder: - csv_writer_options_builder() except+ + csv_writer_options_builder() except + csv_writer_options_builder( cudf_io_types.sink_info sink, cudf_table_view.table_view table - ) except+ + ) except + csv_writer_options_builder& metadata( cudf_io_types.table_metadata* val - ) except+ - csv_writer_options_builder& na_rep(string val) except+ - csv_writer_options_builder& include_header(bool val) except+ - csv_writer_options_builder& rows_per_chunk(size_type val) except+ - csv_writer_options_builder& line_terminator(string term) except+ - csv_writer_options_builder& inter_column_delimiter(char delim) except+ - csv_writer_options_builder& true_value(string val) except+ - csv_writer_options_builder& false_value(string val) except+ - - csv_writer_options build() except+ + ) except + + csv_writer_options_builder& na_rep(string val) except + + csv_writer_options_builder& include_header(bool val) except + + csv_writer_options_builder& rows_per_chunk(size_type val) except + + csv_writer_options_builder& line_terminator(string term) except + + csv_writer_options_builder& inter_column_delimiter(char delim) except + + csv_writer_options_builder& true_value(string val) except + + csv_writer_options_builder& false_value(string val) except + + + csv_writer_options build() except + cdef void write_csv(csv_writer_options args) except + diff --git a/python/cudf/cudf/_lib/cpp/io/json.pxd b/python/cudf/cudf/_lib/cpp/io/json.pxd index 7333aad7ddf..ab87e2cbb4b 100644 --- a/python/cudf/cudf/_lib/cpp/io/json.pxd +++ b/python/cudf/cudf/_lib/cpp/io/json.pxd @@ -20,71 +20,71 @@ cdef extern from "cudf/io/json.hpp" \ map[string, schema_element] child_types cdef cppclass json_reader_options: - json_reader_options() except+ - cudf_io_types.source_info get_source() except+ - vector[string] get_dtypes() except+ + json_reader_options() except + + cudf_io_types.source_info get_source() except + + vector[string] get_dtypes() except + cudf_io_types.compression_type get_compression() except + - size_type get_byte_range_offset() except+ - size_type get_byte_range_size() except+ - bool is_enabled_lines() except+ - bool is_enabled_dayfirst() except+ - bool is_enabled_experimental() except+ + size_type get_byte_range_offset() except + + size_type get_byte_range_size() except + + bool is_enabled_lines() except + + bool is_enabled_dayfirst() except + + bool is_enabled_experimental() except + # setter - void set_dtypes(vector[data_type] types) except+ - void set_dtypes(map[string, schema_element] types) except+ + void set_dtypes(vector[data_type] types) except + + void set_dtypes(map[string, schema_element] types) except + void set_compression( cudf_io_types.compression_type compression - ) except+ - void set_byte_range_offset(size_type offset) except+ - void set_byte_range_size(size_type size) except+ - void enable_lines(bool val) except+ - void enable_dayfirst(bool val) except+ - void enable_experimental(bool val) except+ - void enable_keep_quotes(bool val) except+ + ) except + + void set_byte_range_offset(size_type offset) except + + void set_byte_range_size(size_type size) except + + void enable_lines(bool val) except + + void enable_dayfirst(bool val) except + + void enable_experimental(bool val) except + + void enable_keep_quotes(bool val) except + @staticmethod json_reader_options_builder builder( cudf_io_types.source_info src - ) except+ + ) except + cdef cppclass json_reader_options_builder: - json_reader_options_builder() except+ + json_reader_options_builder() except + json_reader_options_builder( cudf_io_types.source_info src - ) except+ + ) except + json_reader_options_builder& dtypes( vector[string] types - ) except+ + ) except + json_reader_options_builder& dtypes( vector[data_type] types - ) except+ + ) except + json_reader_options_builder& dtypes( map[string, schema_element] types - ) except+ + ) except + json_reader_options_builder& compression( cudf_io_types.compression_type compression - ) except+ + ) except + json_reader_options_builder& byte_range_offset( size_type offset - ) except+ + ) except + json_reader_options_builder& byte_range_size( size_type size - ) except+ + ) except + json_reader_options_builder& lines( bool val - ) except+ + ) except + json_reader_options_builder& dayfirst( bool val - ) except+ + ) except + json_reader_options_builder& experimental( bool val - ) except+ + ) except + json_reader_options_builder& keep_quotes( bool val - ) except+ + ) except + - json_reader_options build() except+ + json_reader_options build() except + cdef cudf_io_types.table_with_metadata read_json( - json_reader_options &options) except+ + json_reader_options &options) except + diff --git a/python/cudf/cudf/_lib/cpp/io/orc.pxd b/python/cudf/cudf/_lib/cpp/io/orc.pxd index 3e44ef98348..ec26fff3779 100644 --- a/python/cudf/cudf/_lib/cpp/io/orc.pxd +++ b/python/cudf/cudf/_lib/cpp/io/orc.pxd @@ -16,45 +16,45 @@ cdef extern from "cudf/io/orc.hpp" \ namespace "cudf::io" nogil: cdef cppclass orc_reader_options: - orc_reader_options() except+ - - cudf_io_types.source_info get_source() except+ - vector[vector[size_type]] get_stripes() except+ - size_type get_skip_rows() except+ - size_type get_num_rows() except+ - bool is_enabled_use_index() except+ - bool is_enabled_use_np_dtypes() except+ - data_type get_timestamp_type() except+ - bool is_enabled_decimals_as_float64() except+ - int get_forced_decimals_scale() except+ - - void set_columns(vector[string] col_names) except+ - void set_stripes(vector[vector[size_type]] strps) except+ - void set_skip_rows(size_type rows) except+ - void set_num_rows(size_type nrows) except+ - void enable_use_index(bool val) except+ - void enable_use_np_dtypes(bool val) except+ - void set_timestamp_type(data_type type) except+ + orc_reader_options() except + + + cudf_io_types.source_info get_source() except + + vector[vector[size_type]] get_stripes() except + + size_type get_skip_rows() except + + size_type get_num_rows() except + + bool is_enabled_use_index() except + + bool is_enabled_use_np_dtypes() except + + data_type get_timestamp_type() except + + bool is_enabled_decimals_as_float64() except + + int get_forced_decimals_scale() except + + + void set_columns(vector[string] col_names) except + + void set_stripes(vector[vector[size_type]] strps) except + + void set_skip_rows(size_type rows) except + + void set_num_rows(size_type nrows) except + + void enable_use_index(bool val) except + + void enable_use_np_dtypes(bool val) except + + void set_timestamp_type(data_type type) except + @staticmethod orc_reader_options_builder builder( cudf_io_types.source_info src - ) except+ + ) except + cdef cppclass orc_reader_options_builder: - orc_reader_options_builder() except+ - orc_reader_options_builder(cudf_io_types.source_info &src) except+ + orc_reader_options_builder() except + + orc_reader_options_builder(cudf_io_types.source_info &src) except + - orc_reader_options_builder& columns(vector[string] col_names) except+ + orc_reader_options_builder& columns(vector[string] col_names) except + orc_reader_options_builder& \ - stripes(vector[vector[size_type]] strps) except+ - orc_reader_options_builder& skip_rows(size_type rows) except+ - orc_reader_options_builder& num_rows(size_type nrows) except+ - orc_reader_options_builder& use_index(bool val) except+ - orc_reader_options_builder& use_np_dtypes(bool val) except+ - orc_reader_options_builder& timestamp_type(data_type type) except+ + stripes(vector[vector[size_type]] strps) except + + orc_reader_options_builder& skip_rows(size_type rows) except + + orc_reader_options_builder& num_rows(size_type nrows) except + + orc_reader_options_builder& use_index(bool val) except + + orc_reader_options_builder& use_np_dtypes(bool val) except + + orc_reader_options_builder& timestamp_type(data_type type) except + - orc_reader_options build() except+ + orc_reader_options build() except + cdef cudf_io_types.table_with_metadata read_orc( orc_reader_options opts @@ -62,108 +62,110 @@ cdef extern from "cudf/io/orc.hpp" \ cdef cppclass orc_writer_options: orc_writer_options() - cudf_io_types.sink_info get_sink() except+ - cudf_io_types.compression_type get_compression() except+ - bool is_enabled_statistics() except+ - size_t get_stripe_size_bytes() except+ - size_type get_stripe_size_rows() except+ - size_type get_row_index_stride() except+ - cudf_table_view.table_view get_table() except+ - const cudf_io_types.table_input_metadata *get_metadata() except+ + cudf_io_types.sink_info get_sink() except + + cudf_io_types.compression_type get_compression() except + + bool is_enabled_statistics() except + + size_t get_stripe_size_bytes() except + + size_type get_stripe_size_rows() except + + size_type get_row_index_stride() except + + cudf_table_view.table_view get_table() except + + const cudf_io_types.table_input_metadata *get_metadata() except + # setter - void set_compression(cudf_io_types.compression_type comp) except+ - void enable_statistics(bool val) except+ - void set_stripe_size_bytes(size_t val) except+ - void set_stripe_size_rows(size_type val) except+ - void set_row_index_stride(size_type val) except+ - void set_table(cudf_table_view.table_view tbl) except+ - void set_metadata(cudf_io_types.table_input_metadata* meta) except+ + void set_compression(cudf_io_types.compression_type comp) except + + void enable_statistics(bool val) except + + void set_stripe_size_bytes(size_t val) except + + void set_stripe_size_rows(size_type val) except + + void set_row_index_stride(size_type val) except + + void set_table(cudf_table_view.table_view tbl) except + + void set_metadata(cudf_io_types.table_input_metadata* meta) except + void set_key_value_metadata(map[string, string] kvm) except + @staticmethod orc_writer_options_builder builder( cudf_io_types.sink_info &sink, cudf_table_view.table_view &tbl - ) except+ + ) except + cdef cppclass orc_writer_options_builder: # setter orc_writer_options_builder& compression( cudf_io_types.compression_type comp - ) except+ - orc_writer_options_builder& enable_statistics(bool val) except+ - orc_writer_options_builder& stripe_size_bytes(size_t val) except+ - orc_writer_options_builder& stripe_size_rows(size_type val) except+ - orc_writer_options_builder& row_index_stride(size_type val) except+ + ) except + + orc_writer_options_builder& enable_statistics(bool val) except + + orc_writer_options_builder& stripe_size_bytes(size_t val) except + + orc_writer_options_builder& stripe_size_rows(size_type val) except + + orc_writer_options_builder& row_index_stride(size_type val) except + orc_writer_options_builder& table( cudf_table_view.table_view tbl - ) except+ + ) except + orc_writer_options_builder& metadata( cudf_io_types.table_input_metadata *meta - ) except+ + ) except + orc_writer_options_builder& key_value_metadata( map[string, string] kvm - ) except+ + ) except + - orc_writer_options build() except+ + orc_writer_options build() except + cdef void write_orc(orc_writer_options options) except + cdef cppclass chunked_orc_writer_options: - chunked_orc_writer_options() except+ - cudf_io_types.sink_info get_sink() except+ - cudf_io_types.compression_type get_compression() except+ - bool enable_statistics() except+ - size_t stripe_size_bytes() except+ - size_type stripe_size_rows() except+ - size_type row_index_stride() except+ - cudf_table_view.table_view get_table() except+ + chunked_orc_writer_options() except + + cudf_io_types.sink_info get_sink() except + + cudf_io_types.compression_type get_compression() except + + bool enable_statistics() except + + size_t stripe_size_bytes() except + + size_type stripe_size_rows() except + + size_type row_index_stride() except + + cudf_table_view.table_view get_table() except + const cudf_io_types.table_input_metadata *get_metadata( - ) except+ + ) except + # setter - void set_compression(cudf_io_types.compression_type comp) except+ - void enable_statistics(bool val) except+ - void set_stripe_size_bytes(size_t val) except+ - void set_stripe_size_rows(size_type val) except+ - void set_row_index_stride(size_type val) except+ - void set_table(cudf_table_view.table_view tbl) except+ + void set_compression(cudf_io_types.compression_type comp) except + + void enable_statistics(bool val) except + + void set_stripe_size_bytes(size_t val) except + + void set_stripe_size_rows(size_type val) except + + void set_row_index_stride(size_type val) except + + void set_table(cudf_table_view.table_view tbl) except + void set_metadata( cudf_io_types.table_input_metadata* meta - ) except+ + ) except + void set_key_value_metadata(map[string, string] kvm) except + @staticmethod chunked_orc_writer_options_builder builder( cudf_io_types.sink_info &sink - ) except+ + ) except + cdef cppclass chunked_orc_writer_options_builder: # setter chunked_orc_writer_options_builder& compression( cudf_io_types.compression_type comp - ) except+ - chunked_orc_writer_options_builder& enable_statistics(bool val) except+ - orc_writer_options_builder& stripe_size_bytes(size_t val) except+ - orc_writer_options_builder& stripe_size_rows(size_type val) except+ - orc_writer_options_builder& row_index_stride(size_type val) except+ + ) except + + chunked_orc_writer_options_builder& enable_statistics( + bool val + ) except + + orc_writer_options_builder& stripe_size_bytes(size_t val) except + + orc_writer_options_builder& stripe_size_rows(size_type val) except + + orc_writer_options_builder& row_index_stride(size_type val) except + chunked_orc_writer_options_builder& table( cudf_table_view.table_view tbl - ) except+ + ) except + chunked_orc_writer_options_builder& metadata( cudf_io_types.table_input_metadata *meta - ) except+ + ) except + chunked_orc_writer_options_builder& key_value_metadata( map[string, string] kvm - ) except+ + ) except + - chunked_orc_writer_options build() except+ + chunked_orc_writer_options build() except + cdef cppclass orc_chunked_writer: - orc_chunked_writer() except+ - orc_chunked_writer(chunked_orc_writer_options args) except+ + orc_chunked_writer() except + + orc_chunked_writer(chunked_orc_writer_options args) except + orc_chunked_writer& write( cudf_table_view.table_view table_, - ) except+ - void close() except+ + ) except + + void close() except + diff --git a/python/cudf/cudf/_lib/cpp/io/parquet.pxd b/python/cudf/cudf/_lib/cpp/io/parquet.pxd index f388fff3beb..98b839ba9b8 100644 --- a/python/cudf/cudf/_lib/cpp/io/parquet.pxd +++ b/python/cudf/cudf/_lib/cpp/io/parquet.pxd @@ -66,11 +66,11 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: cudf_io_types.statistics_freq get_stats_level() except + cudf_table_view.table_view get_table() except + const cudf_io_types.table_input_metadata get_metadata() except + - string get_column_chunks_file_paths() except+ - size_t get_row_group_size_bytes() except+ - size_type get_row_group_size_rows() except+ - size_t get_max_page_size_bytes() except+ - size_type get_max_page_size_rows() except+ + string get_column_chunks_file_paths() except + + size_t get_row_group_size_bytes() except + + size_type get_row_group_size_rows() except + + size_t get_max_page_size_bytes() except + + size_type get_max_page_size_rows() except + void set_partitions( vector[cudf_io_types.partition_info] partitions @@ -90,10 +90,10 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: void set_column_chunks_file_paths( vector[string] column_chunks_file_paths ) except + - void set_row_group_size_bytes(size_t val) except+ - void set_row_group_size_rows(size_type val) except+ - void set_max_page_size_bytes(size_t val) except+ - void set_max_page_size_rows(size_type val) except+ + void set_row_group_size_bytes(size_t val) except + + void set_row_group_size_rows(size_type val) except + + void set_max_page_size_bytes(size_t val) except + + void set_max_page_size_rows(size_type val) except + @staticmethod parquet_writer_options_builder builder( @@ -131,16 +131,16 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: ) except + parquet_writer_options_builder& row_group_size_bytes( size_t val - ) except+ + ) except + parquet_writer_options_builder& row_group_size_rows( size_type val - ) except+ + ) except + parquet_writer_options_builder& max_page_size_bytes( size_t val - ) except+ + ) except + parquet_writer_options_builder& max_page_size_rows( size_type val - ) except+ + ) except + parquet_writer_options build() except + @@ -154,11 +154,11 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: cudf_io_types.compression_type get_compression() except + cudf_io_types.statistics_freq get_stats_level() except + cudf_io_types.table_input_metadata* get_metadata( - ) except+ - size_t get_row_group_size_bytes() except+ - size_type get_row_group_size_rows() except+ - size_t get_max_page_size_bytes() except+ - size_type get_max_page_size_rows() except+ + ) except + + size_t get_row_group_size_bytes() except + + size_type get_row_group_size_rows() except + + size_t get_max_page_size_bytes() except + + size_type get_max_page_size_rows() except + void set_metadata( cudf_io_types.table_input_metadata *m @@ -172,10 +172,10 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: void set_compression( cudf_io_types.compression_type compression ) except + - void set_row_group_size_bytes(size_t val) except+ - void set_row_group_size_rows(size_type val) except+ - void set_max_page_size_bytes(size_t val) except+ - void set_max_page_size_rows(size_type val) except+ + void set_row_group_size_bytes(size_t val) except + + void set_row_group_size_rows(size_type val) except + + void set_max_page_size_bytes(size_t val) except + + void set_max_page_size_rows(size_type val) except + @staticmethod chunked_parquet_writer_options_builder builder( @@ -201,32 +201,32 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: ) except + chunked_parquet_writer_options_builder& row_group_size_bytes( size_t val - ) except+ + ) except + chunked_parquet_writer_options_builder& row_group_size_rows( size_type val - ) except+ + ) except + chunked_parquet_writer_options_builder& max_page_size_bytes( size_t val - ) except+ + ) except + chunked_parquet_writer_options_builder& max_page_size_rows( size_type val - ) except+ + ) except + chunked_parquet_writer_options build() except + cdef cppclass parquet_chunked_writer: - parquet_chunked_writer() except+ - parquet_chunked_writer(chunked_parquet_writer_options args) except+ + parquet_chunked_writer() except + + parquet_chunked_writer(chunked_parquet_writer_options args) except + parquet_chunked_writer& write( cudf_table_view.table_view table_, - ) except+ + ) except + parquet_chunked_writer& write( const cudf_table_view.table_view& table_, const vector[cudf_io_types.partition_info]& partitions, - ) except+ + ) except + unique_ptr[vector[uint8_t]] close( vector[string] column_chunks_file_paths, - ) except+ + ) except + cdef unique_ptr[vector[uint8_t]] merge_row_group_metadata( const vector[unique_ptr[vector[uint8_t]]]& metadata_list diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index f00c7d1f2b5..126da0f883a 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -3333,7 +3333,11 @@ def agg(self, aggs, axis=None): @_cudf_nvtx_annotate def nlargest(self, n, columns, keep="first"): - """Get the rows of the DataFrame sorted by the n largest value of *columns* + """Return the first *n* rows ordered by *columns* in descending order. + + Return the first *n* rows with the largest values in *columns*, in + descending order. The columns that are not specified are returned as + well, but not used for ordering. Parameters ---------- @@ -3396,7 +3400,11 @@ def nlargest(self, n, columns, keep="first"): return self._n_largest_or_smallest(True, n, columns, keep) def nsmallest(self, n, columns, keep="first"): - """Get the rows of the DataFrame sorted by the n smallest value of *columns* + """Return the first *n* rows ordered by *columns* in ascending order. + + Return the first *n* rows with the smallest values in *columns*, in + ascending order. The columns that are not specified are returned as + well, but not used for ordering. Parameters ---------- @@ -5879,7 +5887,7 @@ def _columns_view(self, columns): @_cudf_nvtx_annotate def select_dtypes(self, include=None, exclude=None): - """Return a subset of the DataFrame’s columns based on the column dtypes. + """Return a subset of the DataFrame's columns based on the column dtypes. Parameters ---------- @@ -5938,7 +5946,7 @@ def select_dtypes(self, include=None, exclude=None): 3 False 2.0 4 True 1.0 5 False 2.0 - """ + """ # noqa: E501 # code modified from: # https://github.com/pandas-dev/pandas/blob/master/pandas/core/frame.py#L3196 diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 0acacc798a1..bbb1c95bef6 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -4719,10 +4719,12 @@ def _drop_rows_by_labels( level: Union[int, str], errors: str, ) -> DataFrameOrSeries: - """Remove rows specified by `labels`. If `errors="raise"`, an error is raised - if some items in `labels` do not exist in `obj._index`. + """Remove rows specified by `labels`. - Will raise if level(int) is greater or equal to index nlevels + If `errors="raise"`, an error is raised if some items in `labels` do not + exist in `obj._index`. + + Will raise if level(int) is greater or equal to index nlevels. """ if isinstance(level, int) and level >= obj.index.nlevels: raise ValueError("Param level out of bounds.") diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 7493202a3d1..07e1782d788 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -889,7 +889,7 @@ def reindex(self, *args, **kwargs): DataFrame, followed by the original Series values. When `drop` is True, a `Series` is returned. In either case, if ``inplace=True``, no value is returned. -""", +""", # noqa: E501 example=""" >>> series = cudf.Series(['a', 'b', 'c', 'd'], index=[10, 11, 12, 13]) >>> series @@ -2998,7 +2998,7 @@ def describe( @_cudf_nvtx_annotate def digitize(self, bins, right=False): - """Return the indices of the bins to which each value in series belongs. + """Return the indices of the bins to which each value belongs. Notes ----- diff --git a/python/cudf/cudf/utils/hash_vocab_utils.py b/python/cudf/cudf/utils/hash_vocab_utils.py index cecf0c36bc2..a0915951240 100644 --- a/python/cudf/cudf/utils/hash_vocab_utils.py +++ b/python/cudf/cudf/utils/hash_vocab_utils.py @@ -253,9 +253,10 @@ def hash_vocab( hashed_vocab = {_sdbm_hash(key): value for key, value in vocab.items()} - error_message = """Collision occurred and only sdbm token hash current supported :( - Can be extended to use random hashes if needed""" - + error_message = ( + "A collision occurred and only sdbm token hash is currently " + "supported. This can be extended to use random hashes if needed." + ) assert len(hashed_vocab) == len(vocab), error_message ( diff --git a/setup.cfg b/setup.cfg index d196e8605b2..d810178c44b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,8 +1,9 @@ # Copyright (c) 2017-2022, NVIDIA CORPORATION. [flake8] -filename = *.py, *.pyx, *.pxd +filename = *.py, *.pyx, *.pxd, *.pxi exclude = __init__.py, *.egg, build, docs, .git +force-check = True ignore = # line break before binary operator W503, @@ -14,11 +15,13 @@ per-file-ignores = # E225: Missing whitespace around operators (breaks cython casting syntax like ) # E226: Missing whitespace around arithmetic operators (breaks cython pointer syntax like int*) # E227: Missing whitespace around bitwise or shift operator (Can also break casting syntax) + # E275: Missing whitespace after keyword (Doesn't work with Cython except?) # E402: invalid syntax (works for Python, not Cython) # E999: invalid syntax (works for Python, not Cython) # W504: line break after binary operator (breaks lines that end with a pointer) - *.pyx: E211, E225, E226, E227, E402, E999, W504 - *.pxd: E211, E225, E226, E227, E402, E999, W504 + *.pyx: E211, E225, E226, E227, E275, E402, E999, W504 + *.pxd: E211, E225, E226, E227, E275, E402, E999, W504 + *.pxi: E211, E225, E226, E227, E275, E402, E999, W504 [pydocstyle] # Due to https://github.com/PyCQA/pydocstyle/issues/363, we must exclude rather From 6ca2ceb8e200d55f1f681a4ca086614a28d67ad1 Mon Sep 17 00:00:00 2001 From: Alessandro Bellina Date: Tue, 18 Oct 2022 17:23:42 -0500 Subject: [PATCH 047/202] Adds retryCount to RmmEventHandler.onAllocFailure (#11940) This adds the method `boolean onAllocFailure(long sizeRequested, int retryCount)` to `RmmEventHandler`, to help handling code keep track of the number of times an allocation failure has been retried. With this code callers can perform extra logic that depends on whether the callback was due to a brand new allocation failure, or one that has failed in the past and is being retried. This will be used here: https://github.com/NVIDIA/spark-rapids/issues/6768 Authors: - Alessandro Bellina (https://github.com/abellina) Approvers: - Jason Lowe (https://github.com/jlowe) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/11940 --- .../java/ai/rapids/cudf/RmmEventHandler.java | 23 ++++++++++++-- java/src/main/native/src/RmmJni.cpp | 31 +++++++++++++++---- .../src/test/java/ai/rapids/cudf/RmmTest.java | 17 +++++----- 3 files changed, 56 insertions(+), 15 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/RmmEventHandler.java b/java/src/main/java/ai/rapids/cudf/RmmEventHandler.java index 85442402403..19707b85bcb 100644 --- a/java/src/main/java/ai/rapids/cudf/RmmEventHandler.java +++ b/java/src/main/java/ai/rapids/cudf/RmmEventHandler.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,9 +22,28 @@ public interface RmmEventHandler { /** * Invoked on a memory allocation failure. * @param sizeRequested number of bytes that failed to allocate + * @deprecated deprecated in favor of onAllocFailure(long, boolean) * @return true if the memory allocation should be retried or false if it should fail */ - boolean onAllocFailure(long sizeRequested); + default boolean onAllocFailure(long sizeRequested) { + // this should not be called since it was the previous interface, + // and it was abstract before, throwing by default for good measure. + throw new UnsupportedOperationException( + "Unexpected invocation of deprecated onAllocFailure without retry count."); + } + + /** + * Invoked on a memory allocation failure. + * @param sizeRequested number of bytes that failed to allocate + * @param retryCount number of times this allocation has been retried after failure + * @return true if the memory allocation should be retried or false if it should fail + */ + default boolean onAllocFailure(long sizeRequested, int retryCount) { + // newer code should override this implementation of `onAllocFailure` to handle + // `retryCount`. Otherwise, we call the prior implementation to not + // break existing code. + return onAllocFailure(sizeRequested); + } /** * Get the memory thresholds that will trigger {@link #onAllocThreshold(long)} diff --git a/java/src/main/native/src/RmmJni.cpp b/java/src/main/native/src/RmmJni.cpp index ce3e6ffb285..2b4c5ae59f5 100644 --- a/java/src/main/native/src/RmmJni.cpp +++ b/java/src/main/native/src/RmmJni.cpp @@ -150,9 +150,15 @@ class java_event_handler_memory_resource final : public device_memory_resource { if (cls == nullptr) { throw cudf::jni::jni_exception("class not found"); } - on_alloc_fail_method = env->GetMethodID(cls, "onAllocFailure", "(J)Z"); + on_alloc_fail_method = env->GetMethodID(cls, "onAllocFailure", "(JI)Z"); if (on_alloc_fail_method == nullptr) { - throw cudf::jni::jni_exception("onAllocFailure method"); + use_old_alloc_fail_interface = true; + on_alloc_fail_method = env->GetMethodID(cls, "onAllocFailure", "(J)Z"); + if (on_alloc_fail_method == nullptr) { + throw cudf::jni::jni_exception("onAllocFailure method"); + } + } else { + use_old_alloc_fail_interface = false; } on_alloc_threshold_method = env->GetMethodID(cls, "onAllocThreshold", "(J)V"); if (on_alloc_threshold_method == nullptr) { @@ -190,6 +196,7 @@ class java_event_handler_memory_resource final : public device_memory_resource { JavaVM *jvm; jobject handler_obj; jmethodID on_alloc_fail_method; + bool use_old_alloc_fail_interface; jmethodID on_alloc_threshold_method; jmethodID on_dealloc_threshold_method; @@ -209,10 +216,18 @@ class java_event_handler_memory_resource final : public device_memory_resource { } } - bool on_alloc_fail(std::size_t num_bytes) { + bool on_alloc_fail(std::size_t num_bytes, int retry_count) { JNIEnv *env = cudf::jni::get_jni_env(jvm); - jboolean result = - env->CallBooleanMethod(handler_obj, on_alloc_fail_method, static_cast(num_bytes)); + jboolean result = false; + if (!use_old_alloc_fail_interface) { + result = + env->CallBooleanMethod(handler_obj, on_alloc_fail_method, static_cast(num_bytes), + static_cast(retry_count)); + + } else { + result = + env->CallBooleanMethod(handler_obj, on_alloc_fail_method, static_cast(num_bytes)); + } if (env->ExceptionCheck()) { throw std::runtime_error("onAllocFailure handler threw an exception"); } @@ -240,13 +255,17 @@ class java_event_handler_memory_resource final : public device_memory_resource { void *do_allocate(std::size_t num_bytes, rmm::cuda_stream_view stream) override { std::size_t total_before; void *result; + // a non-zero retry_count signifies that the `on_alloc_fail` + // callback is being invoked while re-attempting an allocation + // that had previously failed. + int retry_count = 0; while (true) { try { total_before = get_total_bytes_allocated(); result = resource->allocate(num_bytes, stream); break; } catch (rmm::out_of_memory const &e) { - if (!on_alloc_fail(num_bytes)) { + if (!on_alloc_fail(num_bytes, retry_count++)) { throw; } } diff --git a/java/src/test/java/ai/rapids/cudf/RmmTest.java b/java/src/test/java/ai/rapids/cudf/RmmTest.java index c56b131de86..09fbedd8a1c 100644 --- a/java/src/test/java/ai/rapids/cudf/RmmTest.java +++ b/java/src/test/java/ai/rapids/cudf/RmmTest.java @@ -73,11 +73,13 @@ public void testTotalAllocated(int rmmAllocMode) { public void testEventHandler(int rmmAllocMode) { AtomicInteger invokedCount = new AtomicInteger(); AtomicLong amountRequested = new AtomicLong(); + AtomicInteger timesRetried = new AtomicInteger(); RmmEventHandler handler = new BaseRmmEventHandler() { @Override - public boolean onAllocFailure(long sizeRequested) { + public boolean onAllocFailure(long sizeRequested, int retryCount) { int count = invokedCount.incrementAndGet(); + timesRetried.set(retryCount); amountRequested.set(sizeRequested); return count != 3; } @@ -100,6 +102,7 @@ public boolean onAllocFailure(long sizeRequested) { } assertEquals(3, invokedCount.get()); + assertEquals(2, timesRetried.get()); assertEquals(requested, amountRequested.get()); // verify after a failure we can still allocate something more reasonable @@ -114,7 +117,7 @@ public void testSetEventHandlerTwice() { // installing an event handler the first time should not be an error Rmm.setEventHandler(new BaseRmmEventHandler() { @Override - public boolean onAllocFailure(long sizeRequested) { + public boolean onAllocFailure(long sizeRequested, int retryCount) { return false; } }); @@ -122,7 +125,7 @@ public boolean onAllocFailure(long sizeRequested) { // installing a second event handler is an error RmmEventHandler otherHandler = new BaseRmmEventHandler() { @Override - public boolean onAllocFailure(long sizeRequested) { + public boolean onAllocFailure(long sizeRequested, int retryCount) { return true; } }; @@ -138,7 +141,7 @@ public void testClearEventHandler() { // create an event handler that will always retry RmmEventHandler retryHandler = new BaseRmmEventHandler() { @Override - public boolean onAllocFailure(long sizeRequested) { + public boolean onAllocFailure(long sizeRequested, int retryCount) { return true; } }; @@ -165,7 +168,7 @@ public void testAllocOnlyThresholds() { RmmEventHandler handler = new RmmEventHandler() { @Override - public boolean onAllocFailure(long sizeRequested) { + public boolean onAllocFailure(long sizeRequested, int retryCount) { return false; } @@ -228,7 +231,7 @@ public void testThresholds() { RmmEventHandler handler = new RmmEventHandler() { @Override - public boolean onAllocFailure(long sizeRequested) { + public boolean onAllocFailure(long sizeRequested, int retryCount) { return false; } @@ -308,7 +311,7 @@ public void testExceptionHandling() { RmmEventHandler handler = new RmmEventHandler() { @Override - public boolean onAllocFailure(long sizeRequested) { + public boolean onAllocFailure(long sizeRequested, int retryCount) { throw new AllocFailException(); } From 08e4ec2a64050c8e70b052c4ccf5f59073c77c8c Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Wed, 19 Oct 2022 14:55:54 -0400 Subject: [PATCH 048/202] Refactor pad/zfill functions for reuse with strings udf (#11914) Refactors the main device code used for `cudf::strings::pad` and `cudf::strings::zfill` for reuse in strings UDF pad and zfill functions. No new functions or features have been added, updated, or removed. The detail functions have been mainly just be moved to new file `cpp/include/cudf/strings/detail/pad_impl.cuh` Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Nghia Truong (https://github.com/ttnghia) - Tobias Ribizel (https://github.com/upsj) URL: https://github.com/rapidsai/cudf/pull/11914 --- cpp/include/cudf/strings/detail/pad_impl.cuh | 126 +++++++++++++++++++ cpp/src/strings/padding.cu | 63 +++------- 2 files changed, 144 insertions(+), 45 deletions(-) create mode 100644 cpp/include/cudf/strings/detail/pad_impl.cuh diff --git a/cpp/include/cudf/strings/detail/pad_impl.cuh b/cpp/include/cudf/strings/detail/pad_impl.cuh new file mode 100644 index 00000000000..648c240bfbc --- /dev/null +++ b/cpp/include/cudf/strings/detail/pad_impl.cuh @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace cudf { +namespace strings { +namespace detail { + +/** + * @brief Return the size in bytes of padding d_str to width characters using a fill character + * with byte length of fill_char_size + * + * Pad does not perform truncation. That is, If `d_str.length() > width` then `d_str.size_bytes()` + * is returned. + * + * @param d_str String to pad + * @param width Number of characters for the padded string result + * @param fill_char_size Size of the fill character in bytes + * @return The number of bytes required for the pad + */ +__device__ size_type compute_padded_size(string_view d_str, + size_type width, + size_type fill_char_size) +{ + auto const length = d_str.length(); + auto bytes = d_str.size_bytes(); + if (width > length) // no truncating; + bytes += fill_char_size * (width - length); // add padding + return bytes; +} + +/** + * @brief Pad d_str with fill_char into output up to width characters + * + * Pad does not perform truncation. That is, If `d_str.length() > width` then + * then d_str is copied into output. + * + * @tparam side Specifies where fill_char is added to d_str + * @param d_str String to pad + * @param width Number of characters for the padded string result + * @param fill_char Size of the fill character in bytes + * @param output Device memory to copy the padded string into + */ +template +__device__ void pad_impl(cudf::string_view d_str, + cudf::size_type width, + cudf::char_utf8 fill_char, + char* output) +{ + auto length = d_str.length(); + if constexpr (side == side_type::LEFT) { + while (length++ < width) { + output += from_char_utf8(fill_char, output); + } + copy_string(output, d_str); + } + if constexpr (side == side_type::RIGHT) { + output = copy_string(output, d_str); + while (length++ < width) { + output += from_char_utf8(fill_char, output); + } + } + if constexpr (side == side_type::BOTH) { + auto const pad_size = width - length; + // an odd width will right-justify + auto right_pad = (width % 2) ? pad_size / 2 : (pad_size - pad_size / 2); + auto left_pad = pad_size - right_pad; // e.g. width=7: "++foxx+"; width=6: "+fox++" + while (left_pad-- > 0) { + output += from_char_utf8(fill_char, output); + } + output = copy_string(output, d_str); + while (right_pad-- > 0) { + output += from_char_utf8(fill_char, output); + } + } +} + +/** + * @brief Prepend d_str with '0' into output up to width characters + * + * Pad does not perform truncation. That is, If `d_str.length() > width` then + * then d_str is copied into output. + * + * If d_str starts with a sign character ('-' or '+') then '0' padding + * starts after the sign. + * + * @param d_str String to pad + * @param width Number of characters for the padded string result + * @param output Device memory to copy the padded string into + */ +__device__ void zfill_impl(cudf::string_view d_str, cudf::size_type width, char* output) +{ + auto length = d_str.length(); + auto in_ptr = d_str.data(); + // if the string starts with a sign, output the sign first + if (!d_str.empty() && (*in_ptr == '-' || *in_ptr == '+')) { + *output++ = *in_ptr++; + d_str = cudf::string_view{in_ptr, d_str.size_bytes() - 1}; + } + while (length++ < width) + *output++ = '0'; // prepend zero char + copy_string(output, d_str); +} + +} // namespace detail +} // namespace strings +} // namespace cudf diff --git a/cpp/src/strings/padding.cu b/cpp/src/strings/padding.cu index e601eeb6b6e..e4002525af9 100644 --- a/cpp/src/strings/padding.cu +++ b/cpp/src/strings/padding.cu @@ -20,8 +20,7 @@ #include #include #include -#include -#include +#include #include #include #include @@ -38,6 +37,7 @@ namespace cudf { namespace strings { namespace detail { namespace { + struct compute_pad_output_length_fn { column_device_view d_strings; size_type width; @@ -47,11 +47,7 @@ struct compute_pad_output_length_fn { { if (d_strings.is_null(idx)) return 0; string_view d_str = d_strings.element(idx); - size_type bytes = d_str.size_bytes(); - size_type length = d_str.length(); - if (width > length) // no truncating - bytes += fill_char_size * (width - length); // add padding - return bytes; + return compute_padded_size(d_str, width, fill_char_size); } }; @@ -96,13 +92,10 @@ std::unique_ptr pad( thrust::make_counting_iterator(0), strings_count, [d_strings, width, d_fill_char, d_offsets, d_chars] __device__(size_type idx) { - if (d_strings.is_null(idx)) return; - string_view d_str = d_strings.element(idx); - auto length = d_str.length(); - char* ptr = d_chars + d_offsets[idx]; - while (length++ < width) - ptr += from_char_utf8(d_fill_char, ptr); - copy_string(ptr, d_str); + if (d_strings.is_valid(idx)) { + pad_impl( + d_strings.element(idx), width, d_fill_char, d_chars + d_offsets[idx]); + } }); } else if (side == side_type::RIGHT) { thrust::for_each_n( @@ -110,13 +103,10 @@ std::unique_ptr pad( thrust::make_counting_iterator(0), strings_count, [d_strings, width, d_fill_char, d_offsets, d_chars] __device__(size_type idx) { - if (d_strings.is_null(idx)) return; - string_view d_str = d_strings.element(idx); - auto length = d_str.length(); - char* ptr = d_chars + d_offsets[idx]; - ptr = copy_string(ptr, d_str); - while (length++ < width) - ptr += from_char_utf8(d_fill_char, ptr); + if (d_strings.is_valid(idx)) { + pad_impl( + d_strings.element(idx), width, d_fill_char, d_chars + d_offsets[idx]); + } }); } else if (side == side_type::BOTH) { thrust::for_each_n( @@ -124,18 +114,10 @@ std::unique_ptr pad( thrust::make_counting_iterator(0), strings_count, [d_strings, width, d_fill_char, d_offsets, d_chars] __device__(size_type idx) { - if (d_strings.is_null(idx)) return; - string_view d_str = d_strings.element(idx); - char* ptr = d_chars + d_offsets[idx]; - auto pad = static_cast(width - d_str.length()); - auto right_pad = (width & 1) ? pad / 2 : (pad - pad / 2); // odd width = right-justify - auto left_pad = - pad - right_pad; // e.g. width=7 gives "++foxx+" while width=6 gives "+fox++" - while (left_pad-- > 0) - ptr += from_char_utf8(d_fill_char, ptr); - ptr = copy_string(ptr, d_str); - while (right_pad-- > 0) - ptr += from_char_utf8(d_fill_char, ptr); + if (d_strings.is_valid(idx)) { + pad_impl( + d_strings.element(idx), width, d_fill_char, d_chars + d_offsets[idx]); + } }); } @@ -174,19 +156,10 @@ std::unique_ptr zfill( thrust::make_counting_iterator(0), input.size(), [d_strings, width, d_offsets, d_chars] __device__(size_type idx) { - if (d_strings.is_null(idx)) return; - auto d_str = d_strings.element(idx); - auto length = d_str.length(); - auto in_ptr = d_str.data(); - auto out_ptr = d_chars + d_offsets[idx]; - // if the string starts with a sign, output the sign first - if (!d_str.empty() && (*in_ptr == '-' || *in_ptr == '+')) { - *out_ptr++ = *in_ptr++; - d_str = string_view{in_ptr, d_str.size_bytes() - 1}; + if (d_strings.is_valid(idx)) { + zfill_impl( + d_strings.element(idx), width, d_chars + d_offsets[idx]); } - while (length++ < width) - *out_ptr++ = '0'; // prepend zero char - copy_string(out_ptr, d_str); }); return make_strings_column(input.size(), From 08ffeccca565aff25f7ca0e718bde8de99dffd35 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Wed, 19 Oct 2022 16:12:10 -0400 Subject: [PATCH 049/202] Fix some gtests incorrectly coded in namespace cudf::test (part I) (#11917) Fixes a few simple gtests that may not get touched in the course of other PRs. This removes the `using namespace cudf::test` or similar declaration from gtests where it is improperly used. No code logic has changed just variable declarations and function calls. Reference #11734 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Nghia Truong (https://github.com/ttnghia) - Tobias Ribizel (https://github.com/upsj) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/11917 --- cpp/tests/bitmask/is_element_valid_tests.cpp | 43 +-- cpp/tests/hashing/hash_test.cpp | 377 ++++++++++--------- cpp/tests/interop/dlpack_test.cpp | 68 ++-- cpp/tests/lists/explode_tests.cpp | 87 +++-- cpp/tests/reshape/byte_cast_tests.cpp | 181 ++++----- cpp/tests/reshape/tile_tests.cpp | 23 +- 6 files changed, 401 insertions(+), 378 deletions(-) diff --git a/cpp/tests/bitmask/is_element_valid_tests.cpp b/cpp/tests/bitmask/is_element_valid_tests.cpp index 383448c0dd8..888d0103f03 100644 --- a/cpp/tests/bitmask/is_element_valid_tests.cpp +++ b/cpp/tests/bitmask/is_element_valid_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,15 +23,12 @@ #include -namespace cudf { -namespace test { - -struct IsElementValidTest : public BaseFixture { +struct IsElementValidTest : public cudf::test::BaseFixture { }; TEST_F(IsElementValidTest, IsElementValidBasic) { - fixed_width_column_wrapper col({1, 1, 1, 1, 1}, {1, 0, 0, 0, 1}); + cudf::test::fixed_width_column_wrapper col({1, 1, 1, 1, 1}, {1, 0, 0, 0, 1}); EXPECT_TRUE(cudf::detail::is_element_valid_sync(col, 0)); EXPECT_FALSE(cudf::detail::is_element_valid_sync(col, 1)); EXPECT_FALSE(cudf::detail::is_element_valid_sync(col, 2)); @@ -41,12 +38,12 @@ TEST_F(IsElementValidTest, IsElementValidBasic) TEST_F(IsElementValidTest, IsElementValidLarge) { - auto filter = [](auto i) { return static_cast(i % 3); }; - auto val = thrust::make_counting_iterator(0); - auto valid = cudf::detail::make_counting_transform_iterator(0, filter); - size_type num_rows = 1000; + auto filter = [](auto i) { return static_cast(i % 3); }; + auto val = thrust::make_counting_iterator(0); + auto valid = cudf::detail::make_counting_transform_iterator(0, filter); + cudf::size_type num_rows = 1000; - fixed_width_column_wrapper col(val, val + num_rows, valid); + cudf::test::fixed_width_column_wrapper col(val, val + num_rows, valid); for (int i = 0; i < num_rows; i++) { EXPECT_EQ(cudf::detail::is_element_valid_sync(col, i), filter(i)); @@ -55,16 +52,16 @@ TEST_F(IsElementValidTest, IsElementValidLarge) TEST_F(IsElementValidTest, IsElementValidOffset) { - fixed_width_column_wrapper col({1, 1, 1, 1, 1}, {1, 0, 0, 0, 1}); + cudf::test::fixed_width_column_wrapper col({1, 1, 1, 1, 1}, {1, 0, 0, 0, 1}); { - auto offset_col = slice(col, {1, 5}).front(); + auto offset_col = cudf::slice(col, {1, 5}).front(); EXPECT_FALSE(cudf::detail::is_element_valid_sync(offset_col, 0)); EXPECT_FALSE(cudf::detail::is_element_valid_sync(offset_col, 1)); EXPECT_FALSE(cudf::detail::is_element_valid_sync(offset_col, 2)); EXPECT_TRUE(cudf::detail::is_element_valid_sync(offset_col, 3)); } { - auto offset_col = slice(col, {2, 5}).front(); + auto offset_col = cudf::slice(col, {2, 5}).front(); EXPECT_FALSE(cudf::detail::is_element_valid_sync(offset_col, 0)); EXPECT_FALSE(cudf::detail::is_element_valid_sync(offset_col, 1)); EXPECT_TRUE(cudf::detail::is_element_valid_sync(offset_col, 2)); @@ -73,20 +70,16 @@ TEST_F(IsElementValidTest, IsElementValidOffset) TEST_F(IsElementValidTest, IsElementValidOffsetLarge) { - auto filter = [](auto i) { return static_cast(i % 3); }; - size_type offset = 37; - auto val = thrust::make_counting_iterator(0); - auto valid = cudf::detail::make_counting_transform_iterator(0, filter); - size_type num_rows = 1000; + auto filter = [](auto i) { return static_cast(i % 3); }; + cudf::size_type offset = 37; + auto val = thrust::make_counting_iterator(0); + auto valid = cudf::detail::make_counting_transform_iterator(0, filter); + cudf::size_type num_rows = 1000; - fixed_width_column_wrapper col(val, val + num_rows, valid); - auto offset_col = slice(col, {offset, num_rows}).front(); + cudf::test::fixed_width_column_wrapper col(val, val + num_rows, valid); + auto offset_col = cudf::slice(col, {offset, num_rows}).front(); for (int i = 0; i < offset_col.size(); i++) { EXPECT_EQ(cudf::detail::is_element_valid_sync(offset_col, i), filter(i + offset)); } } - -} // namespace test - -} // namespace cudf diff --git a/cpp/tests/hashing/hash_test.cpp b/cpp/tests/hashing/hash_test.cpp index baa7ba07ee4..c1a73761e8d 100644 --- a/cpp/tests/hashing/hash_test.cpp +++ b/cpp/tests/hashing/hash_test.cpp @@ -24,37 +24,35 @@ #include #include -using cudf::test::fixed_width_column_wrapper; -using cudf::test::strings_column_wrapper; -using namespace cudf::test; -using namespace cudf::test::iterators; - -constexpr debug_output_level verbosity{debug_output_level::ALL_ERRORS}; +constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::ALL_ERRORS}; class HashTest : public cudf::test::BaseFixture { }; TEST_F(HashTest, MultiValue) { - strings_column_wrapper const strings_col({"", - "The quick brown fox", - "jumps over the lazy dog.", - "All work and no play makes Jack a dull boy", - R"(!"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~)"}); + cudf::test::strings_column_wrapper const strings_col( + {"", + "The quick brown fox", + "jumps over the lazy dog.", + "All work and no play makes Jack a dull boy", + R"(!"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~)"}); using limits = std::numeric_limits; - fixed_width_column_wrapper const ints_col({0, 100, -100, limits::min(), limits::max()}); + cudf::test::fixed_width_column_wrapper const ints_col( + {0, 100, -100, limits::min(), limits::max()}); // Different truth values should be equal - fixed_width_column_wrapper const bools_col1({0, 1, 1, 1, 0}); - fixed_width_column_wrapper const bools_col2({0, 1, 2, 255, 0}); + cudf::test::fixed_width_column_wrapper const bools_col1({0, 1, 1, 1, 0}); + cudf::test::fixed_width_column_wrapper const bools_col2({0, 1, 2, 255, 0}); using ts = cudf::timestamp_s; - fixed_width_column_wrapper const secs_col({ts::duration::zero(), - static_cast(100), - static_cast(-100), - ts::duration::min(), - ts::duration::max()}); + cudf::test::fixed_width_column_wrapper const secs_col( + {ts::duration::zero(), + static_cast(100), + static_cast(-100), + ts::duration::min(), + ts::duration::max()}); auto const input1 = cudf::table_view({strings_col, ints_col, bools_col1, secs_col}); auto const input2 = cudf::table_view({strings_col, ints_col, bools_col2, secs_col}); @@ -69,45 +67,49 @@ TEST_F(HashTest, MultiValue) TEST_F(HashTest, MultiValueNulls) { // Nulls with different values should be equal - strings_column_wrapper const strings_col1({"", - "The quick brown fox", - "jumps over the lazy dog.", - "All work and no play makes Jack a dull boy", - R"(!"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~)"}, - {0, 1, 1, 0, 1}); - strings_column_wrapper const strings_col2({"different but null", - "The quick brown fox", - "jumps over the lazy dog.", - "I am Jack's complete lack of null value", - R"(!"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~)"}, - {0, 1, 1, 0, 1}); + cudf::test::strings_column_wrapper const strings_col1( + {"", + "The quick brown fox", + "jumps over the lazy dog.", + "All work and no play makes Jack a dull boy", + R"(!"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~)"}, + {0, 1, 1, 0, 1}); + cudf::test::strings_column_wrapper const strings_col2( + {"different but null", + "The quick brown fox", + "jumps over the lazy dog.", + "I am Jack's complete lack of null value", + R"(!"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~)"}, + {0, 1, 1, 0, 1}); // Nulls with different values should be equal using limits = std::numeric_limits; - fixed_width_column_wrapper const ints_col1({0, 100, -100, limits::min(), limits::max()}, - {1, 0, 0, 1, 1}); - fixed_width_column_wrapper const ints_col2({0, -200, 200, limits::min(), limits::max()}, - {1, 0, 0, 1, 1}); + cudf::test::fixed_width_column_wrapper const ints_col1( + {0, 100, -100, limits::min(), limits::max()}, {1, 0, 0, 1, 1}); + cudf::test::fixed_width_column_wrapper const ints_col2( + {0, -200, 200, limits::min(), limits::max()}, {1, 0, 0, 1, 1}); // Nulls with different values should be equal // Different truth values should be equal - fixed_width_column_wrapper const bools_col1({0, 1, 0, 1, 1}, {1, 1, 0, 0, 1}); - fixed_width_column_wrapper const bools_col2({0, 2, 1, 0, 255}, {1, 1, 0, 0, 1}); + cudf::test::fixed_width_column_wrapper const bools_col1({0, 1, 0, 1, 1}, {1, 1, 0, 0, 1}); + cudf::test::fixed_width_column_wrapper const bools_col2({0, 2, 1, 0, 255}, {1, 1, 0, 0, 1}); // Nulls with different values should be equal using ts = cudf::timestamp_s; - fixed_width_column_wrapper const secs_col1({ts::duration::zero(), - static_cast(100), - static_cast(-100), - ts::duration::min(), - ts::duration::max()}, - {1, 0, 0, 1, 1}); - fixed_width_column_wrapper const secs_col2({ts::duration::zero(), - static_cast(-200), - static_cast(200), - ts::duration::min(), - ts::duration::max()}, - {1, 0, 0, 1, 1}); + cudf::test::fixed_width_column_wrapper const secs_col1( + {ts::duration::zero(), + static_cast(100), + static_cast(-100), + ts::duration::min(), + ts::duration::max()}, + {1, 0, 0, 1, 1}); + cudf::test::fixed_width_column_wrapper const secs_col2( + {ts::duration::zero(), + static_cast(-200), + static_cast(200), + ts::duration::min(), + ts::duration::max()}, + {1, 0, 0, 1, 1}); auto const input1 = cudf::table_view({strings_col1, ints_col1, bools_col1, secs_col1}); auto const input2 = cudf::table_view({strings_col2, ints_col2, bools_col2, secs_col2}); @@ -355,7 +357,7 @@ TYPED_TEST_SUITE(HashTestTyped, cudf::test::FixedWidthTypes); TYPED_TEST(HashTestTyped, Equality) { - fixed_width_column_wrapper const col{0, 127, 1, 2, 8}; + cudf::test::fixed_width_column_wrapper const col{0, 127, 1, 2, 8}; auto const input = cudf::table_view({col}); // Hash of same input should be equal @@ -377,8 +379,8 @@ TYPED_TEST(HashTestTyped, EqualityNulls) using T = TypeParam; // Nulls with different values should be equal - fixed_width_column_wrapper const col1({0, 127, 1, 2, 8}, {0, 1, 1, 1, 1}); - fixed_width_column_wrapper const col2({1, 127, 1, 2, 8}, {0, 1, 1, 1, 1}); + cudf::test::fixed_width_column_wrapper const col1({0, 127, 1, 2, 8}, {0, 1, 1, 1, 1}); + cudf::test::fixed_width_column_wrapper const col2({1, 127, 1, 2, 8}, {0, 1, 1, 1, 1}); auto const input1 = cudf::table_view({col1}); auto const input2 = cudf::table_view({col2}); @@ -410,10 +412,11 @@ TYPED_TEST(HashTestFloatTyped, TestExtremes) T nan = std::numeric_limits::quiet_NaN(); T inf = std::numeric_limits::infinity(); - fixed_width_column_wrapper const col({T(0.0), T(100.0), T(-100.0), min, max, nan, inf, -inf}); - fixed_width_column_wrapper const col_neg_zero( + cudf::test::fixed_width_column_wrapper const col( + {T(0.0), T(100.0), T(-100.0), min, max, nan, inf, -inf}); + cudf::test::fixed_width_column_wrapper const col_neg_zero( {T(-0.0), T(100.0), T(-100.0), min, max, nan, inf, -inf}); - fixed_width_column_wrapper const col_neg_nan( + cudf::test::fixed_width_column_wrapper const col_neg_nan( {T(0.0), T(100.0), T(-100.0), min, max, -nan, inf, -inf}); auto const table_col = cudf::table_view({col}); @@ -505,76 +508,77 @@ TEST_F(SparkMurmurHash3Test, MultiValueWithSeeds) println(s"combined => ${df.select(hash(col("*"))).collect.mkString(",")}") */ - fixed_width_column_wrapper const hash_structs_expected( + cudf::test::fixed_width_column_wrapper const hash_structs_expected( {-105406170, 90479889, -678041645, 1667387937, 301478567}); - fixed_width_column_wrapper const hash_strings_expected( + cudf::test::fixed_width_column_wrapper const hash_strings_expected( {142593372, 1217302703, -715697185, -2061143941, -111635966}); - fixed_width_column_wrapper const hash_doubles_expected( + cudf::test::fixed_width_column_wrapper const hash_doubles_expected( {-1670924195, -853646085, -1281358385, 1897734433, -508695674}); - fixed_width_column_wrapper const hash_timestamps_expected( + cudf::test::fixed_width_column_wrapper const hash_timestamps_expected( {-1670924195, 1114849490, 904948192, -1832979433, 1752430209}); - fixed_width_column_wrapper const hash_decimal64_expected( + cudf::test::fixed_width_column_wrapper const hash_decimal64_expected( {-1670924195, 1114849490, 904948192, 1962370902, -1795328666}); - fixed_width_column_wrapper const hash_longs_expected( + cudf::test::fixed_width_column_wrapper const hash_longs_expected( {-1670924195, 1114849490, 904948192, -853646085, -1604625029}); - fixed_width_column_wrapper const hash_floats_expected( + cudf::test::fixed_width_column_wrapper const hash_floats_expected( {933211791, 723455942, -349261430, -1225560532, -338752985}); - fixed_width_column_wrapper const hash_dates_expected( + cudf::test::fixed_width_column_wrapper const hash_dates_expected( {933211791, 751823303, -1080202046, -1906567553, -1503850410}); - fixed_width_column_wrapper const hash_decimal32_expected( + cudf::test::fixed_width_column_wrapper const hash_decimal32_expected( {-1670924195, 1114849490, 904948192, -1454351396, -193774131}); - fixed_width_column_wrapper const hash_ints_expected( + cudf::test::fixed_width_column_wrapper const hash_ints_expected( {933211791, 751823303, -1080202046, 723455942, 133916647}); - fixed_width_column_wrapper const hash_shorts_expected( + cudf::test::fixed_width_column_wrapper const hash_shorts_expected( {933211791, 751823303, -1080202046, -1871935946, 1249274084}); - fixed_width_column_wrapper const hash_bytes_expected( + cudf::test::fixed_width_column_wrapper const hash_bytes_expected( {933211791, 751823303, -1080202046, 1110053733, 1135925485}); - fixed_width_column_wrapper const hash_bools_expected( + cudf::test::fixed_width_column_wrapper const hash_bools_expected( {933211791, -559580957, -559580957, -559580957, 933211791}); - fixed_width_column_wrapper const hash_decimal128_expected( + cudf::test::fixed_width_column_wrapper const hash_decimal128_expected( {-783713497, -295670906, 1398487324, -52622807, -1359749815}); - fixed_width_column_wrapper const hash_combined_expected( + cudf::test::fixed_width_column_wrapper const hash_combined_expected( {401603227, 588162166, 552160517, 1132537411, -326043017}); using double_limits = std::numeric_limits; using long_limits = std::numeric_limits; using float_limits = std::numeric_limits; using int_limits = std::numeric_limits; - fixed_width_column_wrapper a_col{0, 100, -100, 0x1234'5678, -0x7654'3210}; - strings_column_wrapper b_col{"a", "bc", "def", "ghij", "klmno"}; - fixed_width_column_wrapper x_col{ + cudf::test::fixed_width_column_wrapper a_col{0, 100, -100, 0x1234'5678, -0x7654'3210}; + cudf::test::strings_column_wrapper b_col{"a", "bc", "def", "ghij", "klmno"}; + cudf::test::fixed_width_column_wrapper x_col{ 0.f, 100.f, -100.f, float_limits::infinity(), -float_limits::infinity()}; - fixed_width_column_wrapper y_col{ + cudf::test::fixed_width_column_wrapper y_col{ 0L, 100L, -100L, 0x0123'4567'89ab'cdefL, -0x0123'4567'89ab'cdefL}; - structs_column_wrapper c_col{{x_col, y_col}}; - structs_column_wrapper const structs_col{{a_col, b_col, c_col}}; - - strings_column_wrapper const strings_col({"", - "The quick brown fox", - "jumps over the lazy dog.", - "All work and no play makes Jack a dull boy", - "!\"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~\ud720\ud721"}); - fixed_width_column_wrapper const doubles_col( + cudf::test::structs_column_wrapper c_col{{x_col, y_col}}; + cudf::test::structs_column_wrapper const structs_col{{a_col, b_col, c_col}}; + + cudf::test::strings_column_wrapper const strings_col( + {"", + "The quick brown fox", + "jumps over the lazy dog.", + "All work and no play makes Jack a dull boy", + "!\"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~\ud720\ud721"}); + cudf::test::fixed_width_column_wrapper const doubles_col( {0., -0., -double_limits::quiet_NaN(), double_limits::lowest(), double_limits::max()}); - fixed_width_column_wrapper const timestamps_col( - {0L, 100L, -100L, long_limits::min() / 1000000, long_limits::max() / 1000000}); - fixed_point_column_wrapper const decimal64_col( + cudf::test::fixed_width_column_wrapper const + timestamps_col({0L, 100L, -100L, long_limits::min() / 1000000, long_limits::max() / 1000000}); + cudf::test::fixed_point_column_wrapper const decimal64_col( {0L, 100L, -100L, -999999999999999999L, 999999999999999999L}, numeric::scale_type{-7}); - fixed_width_column_wrapper const longs_col( + cudf::test::fixed_width_column_wrapper const longs_col( {0L, 100L, -100L, long_limits::min(), long_limits::max()}); - fixed_width_column_wrapper const floats_col( + cudf::test::fixed_width_column_wrapper const floats_col( {0.f, -0.f, -float_limits::quiet_NaN(), float_limits::lowest(), float_limits::max()}); - fixed_width_column_wrapper dates_col( + cudf::test::fixed_width_column_wrapper dates_col( {0, 100, -100, int_limits::min() / 100, int_limits::max() / 100}); - fixed_point_column_wrapper const decimal32_col({0, 100, -100, -999999999, 999999999}, - numeric::scale_type{-3}); - fixed_width_column_wrapper const ints_col( + cudf::test::fixed_point_column_wrapper const decimal32_col( + {0, 100, -100, -999999999, 999999999}, numeric::scale_type{-3}); + cudf::test::fixed_width_column_wrapper const ints_col( {0, 100, -100, int_limits::min(), int_limits::max()}); - fixed_width_column_wrapper const shorts_col({0, 100, -100, -32768, 32767}); - fixed_width_column_wrapper const bytes_col({0, 100, -100, -128, 127}); - fixed_width_column_wrapper const bools_col1({0, 1, 1, 1, 0}); - fixed_width_column_wrapper const bools_col2({0, 1, 2, 255, 0}); - fixed_point_column_wrapper<__int128_t> const decimal128_col( + cudf::test::fixed_width_column_wrapper const shorts_col({0, 100, -100, -32768, 32767}); + cudf::test::fixed_width_column_wrapper const bytes_col({0, 100, -100, -128, 127}); + cudf::test::fixed_width_column_wrapper const bools_col1({0, 1, 1, 1, 0}); + cudf::test::fixed_width_column_wrapper const bools_col2({0, 1, 2, 255, 0}); + cudf::test::fixed_point_column_wrapper<__int128_t> const decimal128_col( {static_cast<__int128>(0), static_cast<__int128>(100), static_cast<__int128>(-1), @@ -644,14 +648,15 @@ TEST_F(SparkMurmurHash3Test, StringsWithSeed) // .map(org.apache.spark.sql.catalyst.expressions.Murmur3HashFunction.hash( // _, org.apache.spark.sql.types.StringType, 314))) - fixed_width_column_wrapper const hash_strings_expected_seed_314( + cudf::test::fixed_width_column_wrapper const hash_strings_expected_seed_314( {1467149710, 723257560, -1620282500, -2001858707, 1588473657}); - strings_column_wrapper const strings_col({"", - "The quick brown fox", - "jumps over the lazy dog.", - "All work and no play makes Jack a dull boy", - "!\"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~\ud720\ud721"}); + cudf::test::strings_column_wrapper const strings_col( + {"", + "The quick brown fox", + "jumps over the lazy dog.", + "All work and no play makes Jack a dull boy", + "!\"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~\ud720\ud721"}); constexpr auto hasher = cudf::hash_id::HASH_SPARK_MURMUR3; auto const hash_strings = cudf::hash(cudf::table_view({strings_col}), hasher, 314); @@ -691,27 +696,28 @@ TEST_F(SparkMurmurHash3Test, ListValues) df2.show(false) */ - auto const null = -1; - auto nested_list = cudf::test::lists_column_wrapper({{}, - {1}, - {1, 2}, - {1, 2, 3}, - {1, 2}, - {3}, - {1}, - {2, 3}, - {1}, - {{null, 2, 3}, nulls_at({0})}, - {1, 2}, - {3}, - {{null}, nulls_at({0})}, - {1, 2}, - {}, - {3}}, - nulls_at({0, 14})); + auto const null = -1; + auto nested_list = + cudf::test::lists_column_wrapper({{}, + {1}, + {1, 2}, + {1, 2, 3}, + {1, 2}, + {3}, + {1}, + {2, 3}, + {1}, + {{null, 2, 3}, cudf::test::iterators::nulls_at({0})}, + {1, 2}, + {3}, + {{null}, cudf::test::iterators::nulls_at({0})}, + {1, 2}, + {}, + {3}}, + cudf::test::iterators::nulls_at({0, 14})); auto offsets = cudf::test::fixed_width_column_wrapper{0, 0, 0, 1, 2, 3, 4, 6, 8, 10, 13, 16}; - auto list_validity = nulls_at({0}); + auto list_validity = cudf::test::iterators::nulls_at({0}); auto list_validity_buffer = cudf::test::detail::make_null_mask(list_validity, list_validity + 11); auto list_column = cudf::make_lists_column(11, offsets.release(), @@ -766,11 +772,18 @@ TEST_F(SparkMurmurHash3Test, StructOfListValues) */ auto const null = -1; - auto col1 = cudf::test::lists_column_wrapper( - {{}, {0}, {{1, null}, nulls_at({1})}, {{1, null}, nulls_at({1})}, {}, {} /*NULL*/, {2, 3}}, - nulls_at({5})); + auto col1 = + cudf::test::lists_column_wrapper({{}, + {0}, + {{1, null}, cudf::test::iterators::nulls_at({1})}, + {{1, null}, cudf::test::iterators::nulls_at({1})}, + {}, + {} /*NULL*/, + {2, 3}}, + cudf::test::iterators::nulls_at({5})); auto col2 = cudf::test::lists_column_wrapper( - {{}, {0}, {} /*NULL*/, {}, {{null, 1}, nulls_at({0})}, {1}, {4, 5}}, nulls_at({2})); + {{}, {0}, {} /*NULL*/, {}, {{null, 1}, cudf::test::iterators::nulls_at({0})}, {1}, {4, 5}}, + cudf::test::iterators::nulls_at({2})); auto struct_column = cudf::test::structs_column_wrapper{{col1, col2}}; auto expect = cudf::test::fixed_width_column_wrapper{ @@ -813,12 +826,15 @@ TEST_F(SparkMurmurHash3Test, ListOfStructValues) */ auto const null = -1; - auto col1 = fixed_width_column_wrapper({0, null, null, 1, null, null, 2, 2, null, 2, 4}, - nulls_at({1, 2, 4, 5, 8})); - auto col2 = fixed_width_column_wrapper({0, null, null, null, 1, 1, 3, 3, null, 3, 5}, - nulls_at({1, 2, 3, 8})); - auto struct_column = structs_column_wrapper{{col1, col2}, {1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1}}; - auto offsets = fixed_width_column_wrapper{0, 1, 2, 3, 4, 5, 7, 9, 11}; + auto col1 = cudf::test::fixed_width_column_wrapper( + {0, null, null, 1, null, null, 2, 2, null, 2, 4}, + cudf::test::iterators::nulls_at({1, 2, 4, 5, 8})); + auto col2 = cudf::test::fixed_width_column_wrapper( + {0, null, null, null, 1, 1, 3, 3, null, 3, 5}, cudf::test::iterators::nulls_at({1, 2, 3, 8})); + auto struct_column = + cudf::test::structs_column_wrapper{{col1, col2}, {1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1}}; + auto offsets = + cudf::test::fixed_width_column_wrapper{0, 1, 2, 3, 4, 5, 7, 9, 11}; auto list_nullmask = std::vector(1, 8); auto list_validity_buffer = cudf::test::detail::make_null_mask(list_nullmask.begin(), list_nullmask.end()); @@ -847,7 +863,7 @@ class MD5HashTest : public cudf::test::BaseFixture { TEST_F(MD5HashTest, MultiValue) { - strings_column_wrapper const strings_col( + cudf::test::strings_column_wrapper const strings_col( {"", "A 60 character string to test MD5's message padding algorithm", "A very long (greater than 128 bytes/char string) to test a multi hash-step data point in the " @@ -855,24 +871,27 @@ TEST_F(MD5HashTest, MultiValue) "All work and no play makes Jack a dull boy", R"(!"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~)"}); - strings_column_wrapper const md5_string_results1({"d41d8cd98f00b204e9800998ecf8427e", - "682240021651ae166d08fe2a014d5c09", - "3669d5225fddbb34676312ca3b78bbd9", - "c61a4185135eda043f35e92c3505e180", - "52da74c75cb6575d25be29e66bd0adde"}); + cudf::test::strings_column_wrapper const md5_string_results1( + {"d41d8cd98f00b204e9800998ecf8427e", + "682240021651ae166d08fe2a014d5c09", + "3669d5225fddbb34676312ca3b78bbd9", + "c61a4185135eda043f35e92c3505e180", + "52da74c75cb6575d25be29e66bd0adde"}); - strings_column_wrapper const md5_string_results2({"d41d8cd98f00b204e9800998ecf8427e", - "e5a5682e82278e78dbaad9a689df7a73", - "4121ab1bb6e84172fd94822645862ae9", - "28970886501efe20164213855afe5850", - "6bc1b872103cc6a02d882245b8516e2e"}); + cudf::test::strings_column_wrapper const md5_string_results2( + {"d41d8cd98f00b204e9800998ecf8427e", + "e5a5682e82278e78dbaad9a689df7a73", + "4121ab1bb6e84172fd94822645862ae9", + "28970886501efe20164213855afe5850", + "6bc1b872103cc6a02d882245b8516e2e"}); using limits = std::numeric_limits; - fixed_width_column_wrapper const ints_col({0, 100, -100, limits::min(), limits::max()}); + cudf::test::fixed_width_column_wrapper const ints_col( + {0, 100, -100, limits::min(), limits::max()}); // Different truth values should be equal - fixed_width_column_wrapper const bools_col1({0, 1, 1, 1, 0}); - fixed_width_column_wrapper const bools_col2({0, 1, 2, 255, 0}); + cudf::test::fixed_width_column_wrapper const bools_col1({0, 1, 1, 1, 0}); + cudf::test::fixed_width_column_wrapper const bools_col2({0, 1, 2, 255, 0}); auto const string_input1 = cudf::table_view({strings_col}); auto const string_input2 = cudf::table_view({strings_col, strings_col}); @@ -894,7 +913,7 @@ TEST_F(MD5HashTest, MultiValue) TEST_F(MD5HashTest, MultiValueNulls) { // Nulls with different values should be equal - strings_column_wrapper const strings_col1( + cudf::test::strings_column_wrapper const strings_col1( {"", "Different but null!", "A very long (greater than 128 bytes/char string) to test a multi hash-step data point in the " @@ -902,7 +921,7 @@ TEST_F(MD5HashTest, MultiValueNulls) "All work and no play makes Jack a dull boy", R"(!"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~)"}, {1, 0, 0, 1, 0}); - strings_column_wrapper const strings_col2( + cudf::test::strings_column_wrapper const strings_col2( {"", "A 60 character string to test MD5's message padding algorithm", "Very different... but null", @@ -912,15 +931,15 @@ TEST_F(MD5HashTest, MultiValueNulls) // Nulls with different values should be equal using limits = std::numeric_limits; - fixed_width_column_wrapper const ints_col1({0, 100, -100, limits::min(), limits::max()}, - {1, 0, 0, 1, 1}); - fixed_width_column_wrapper const ints_col2({0, -200, 200, limits::min(), limits::max()}, - {1, 0, 0, 1, 1}); + cudf::test::fixed_width_column_wrapper const ints_col1( + {0, 100, -100, limits::min(), limits::max()}, {1, 0, 0, 1, 1}); + cudf::test::fixed_width_column_wrapper const ints_col2( + {0, -200, 200, limits::min(), limits::max()}, {1, 0, 0, 1, 1}); // Nulls with different values should be equal // Different truth values should be equal - fixed_width_column_wrapper const bools_col1({0, 1, 0, 1, 1}, {1, 1, 0, 0, 1}); - fixed_width_column_wrapper const bools_col2({0, 2, 1, 0, 255}, {1, 1, 0, 0, 1}); + cudf::test::fixed_width_column_wrapper const bools_col1({0, 1, 0, 1, 1}, {1, 1, 0, 0, 1}); + cudf::test::fixed_width_column_wrapper const bools_col2({0, 2, 1, 0, 255}, {1, 1, 0, 0, 1}); auto const input1 = cudf::table_view({strings_col1, ints_col1, bools_col1}); auto const input2 = cudf::table_view({strings_col2, ints_col2, bools_col2}); @@ -936,7 +955,7 @@ TEST_F(MD5HashTest, StringListsNulls) { auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 0; }); - strings_column_wrapper const strings_col( + cudf::test::strings_column_wrapper const strings_col( {"", "A 60 character string to test MD5's message padding algorithm", "A very long (greater than 128 bytes/char string) to test a multi hash-step data point in the " @@ -944,7 +963,7 @@ TEST_F(MD5HashTest, StringListsNulls) "All work and no play makes Jack a dull boy", R"(!"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~)"}); - lists_column_wrapper strings_list_col( + cudf::test::lists_column_wrapper strings_list_col( {{""}, {{"NULL", "A 60 character string to test MD5's message padding algorithm"}, validity}, {"A very long (greater than 128 bytes/char string) to test a multi hash-step data point in " @@ -971,7 +990,7 @@ TYPED_TEST_SUITE(MD5HashTestTyped, cudf::test::NumericTypes); TYPED_TEST(MD5HashTestTyped, Equality) { - fixed_width_column_wrapper const col({0, 127, 1, 2, 8}); + cudf::test::fixed_width_column_wrapper const col({0, 127, 1, 2, 8}); auto const input = cudf::table_view({col}); // Hash of same input should be equal @@ -987,8 +1006,8 @@ TYPED_TEST(MD5HashTestTyped, EqualityNulls) using T = TypeParam; // Nulls with different values should be equal - fixed_width_column_wrapper const col1({0, 127, 1, 2, 8}, {0, 1, 1, 1, 1}); - fixed_width_column_wrapper const col2({1, 127, 1, 2, 8}, {0, 1, 1, 1, 1}); + cudf::test::fixed_width_column_wrapper const col1({0, 127, 1, 2, 8}, {0, 1, 1, 1, 1}); + cudf::test::fixed_width_column_wrapper const col2({1, 127, 1, 2, 8}, {0, 1, 1, 1, 1}); auto const input1 = cudf::table_view({col1}); auto const input2 = cudf::table_view({col2}); @@ -1002,15 +1021,15 @@ TYPED_TEST(MD5HashTestTyped, EqualityNulls) TEST_F(MD5HashTest, TestBoolListsWithNulls) { - fixed_width_column_wrapper const col1({0, 255, 255, 16, 27, 18, 100, 1, 2}, - {1, 0, 0, 0, 1, 1, 1, 0, 0}); - fixed_width_column_wrapper const col2({0, 255, 255, 32, 81, 68, 3, 101, 4}, - {1, 0, 0, 1, 0, 1, 0, 1, 0}); - fixed_width_column_wrapper const col3({0, 255, 255, 64, 49, 42, 5, 6, 102}, - {1, 0, 0, 1, 1, 0, 0, 0, 1}); + cudf::test::fixed_width_column_wrapper const col1({0, 255, 255, 16, 27, 18, 100, 1, 2}, + {1, 0, 0, 0, 1, 1, 1, 0, 0}); + cudf::test::fixed_width_column_wrapper const col2({0, 255, 255, 32, 81, 68, 3, 101, 4}, + {1, 0, 0, 1, 0, 1, 0, 1, 0}); + cudf::test::fixed_width_column_wrapper const col3({0, 255, 255, 64, 49, 42, 5, 6, 102}, + {1, 0, 0, 1, 1, 0, 0, 0, 1}); auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; }); - lists_column_wrapper const list_col( + cudf::test::lists_column_wrapper const list_col( {{0, 0, 0}, {1}, {}, {{1, 1, 1}, validity}, {1, 1}, {1, 1}, {1}, {1}, {1}}, validity); auto const input1 = cudf::table_view({col1, col2, col3}); @@ -1027,22 +1046,23 @@ template class MD5HashListTestTyped : public cudf::test::BaseFixture { }; -using NumericTypesNoBools = Concat; +using NumericTypesNoBools = + cudf::test::Concat; TYPED_TEST_SUITE(MD5HashListTestTyped, NumericTypesNoBools); TYPED_TEST(MD5HashListTestTyped, TestListsWithNulls) { using T = TypeParam; - fixed_width_column_wrapper const col1({0, 255, 255, 16, 27, 18, 100, 1, 2}, - {1, 0, 0, 0, 1, 1, 1, 0, 0}); - fixed_width_column_wrapper const col2({0, 255, 255, 32, 81, 68, 3, 101, 4}, - {1, 0, 0, 1, 0, 1, 0, 1, 0}); - fixed_width_column_wrapper const col3({0, 255, 255, 64, 49, 42, 5, 6, 102}, - {1, 0, 0, 1, 1, 0, 0, 0, 1}); + cudf::test::fixed_width_column_wrapper const col1({0, 255, 255, 16, 27, 18, 100, 1, 2}, + {1, 0, 0, 0, 1, 1, 1, 0, 0}); + cudf::test::fixed_width_column_wrapper const col2({0, 255, 255, 32, 81, 68, 3, 101, 4}, + {1, 0, 0, 1, 0, 1, 0, 1, 0}); + cudf::test::fixed_width_column_wrapper const col3({0, 255, 255, 64, 49, 42, 5, 6, 102}, + {1, 0, 0, 1, 1, 0, 0, 0, 1}); auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; }); - lists_column_wrapper const list_col( + cudf::test::lists_column_wrapper const list_col( {{0, 0, 0}, {127}, {}, {{32, 127, 64}, validity}, {27, 49}, {18, 68}, {100}, {101}, {102}}, validity); @@ -1070,8 +1090,9 @@ TYPED_TEST(MD5HashTestFloatTyped, TestExtremes) T nan = std::numeric_limits::quiet_NaN(); T inf = std::numeric_limits::infinity(); - fixed_width_column_wrapper const col1({T(0.0), T(100.0), T(-100.0), min, max, nan, inf, -inf}); - fixed_width_column_wrapper const col2( + cudf::test::fixed_width_column_wrapper const col1( + {T(0.0), T(100.0), T(-100.0), min, max, nan, inf, -inf}); + cudf::test::fixed_width_column_wrapper const col2( {T(-0.0), T(100.0), T(-100.0), min, max, -nan, inf, -inf}); auto const input1 = cudf::table_view({col1}); @@ -1091,9 +1112,9 @@ TYPED_TEST(MD5HashTestFloatTyped, TestListExtremes) T nan = std::numeric_limits::quiet_NaN(); T inf = std::numeric_limits::infinity(); - lists_column_wrapper const col1( + cudf::test::lists_column_wrapper const col1( {{T(0.0)}, {T(100.0), T(-100.0)}, {min, max, nan}, {inf, -inf}}); - lists_column_wrapper const col2( + cudf::test::lists_column_wrapper const col2( {{T(-0.0)}, {T(100.0), T(-100.0)}, {min, max, -nan}, {inf, -inf}}); auto const input1 = cudf::table_view({col1}); diff --git a/cpp/tests/interop/dlpack_test.cpp b/cpp/tests/interop/dlpack_test.cpp index da9f80cf3d7..2862590d05f 100644 --- a/cpp/tests/interop/dlpack_test.cpp +++ b/cpp/tests/interop/dlpack_test.cpp @@ -24,8 +24,6 @@ #include -using namespace cudf::test; - struct dlpack_deleter { void operator()(DLManagedTensor* tensor) { tensor->deleter(tensor); } }; @@ -61,7 +59,7 @@ void validate_dtype(DLDataType const& dtype) EXPECT_EQ(sizeof(T) * 8, dtype.bits); } -class DLPackUntypedTests : public BaseFixture { +class DLPackUntypedTests : public cudf::test::BaseFixture { }; TEST_F(DLPackUntypedTests, EmptyTableToDlpack) @@ -73,8 +71,8 @@ TEST_F(DLPackUntypedTests, EmptyTableToDlpack) TEST_F(DLPackUntypedTests, EmptyColsToDlpack) { - fixed_width_column_wrapper col1({}); - fixed_width_column_wrapper col2({}); + cudf::test::fixed_width_column_wrapper col1({}); + cudf::test::fixed_width_column_wrapper col2({}); cudf::table_view input({col1, col2}); unique_managed_tensor tensor(cudf::to_dlpack(input)); validate_dtype(tensor->dl_tensor.dtype); @@ -97,30 +95,30 @@ TEST_F(DLPackUntypedTests, NullTensorFromDlpack) TEST_F(DLPackUntypedTests, MultipleTypesToDlpack) { - fixed_width_column_wrapper col1({1, 2, 3, 4}); - fixed_width_column_wrapper col2({1, 2, 3, 4}); + cudf::test::fixed_width_column_wrapper col1({1, 2, 3, 4}); + cudf::test::fixed_width_column_wrapper col2({1, 2, 3, 4}); cudf::table_view input({col1, col2}); EXPECT_THROW(cudf::to_dlpack(input), cudf::logic_error); } TEST_F(DLPackUntypedTests, InvalidNullsToDlpack) { - fixed_width_column_wrapper col1({1, 2, 3, 4}); - fixed_width_column_wrapper col2({1, 2, 3, 4}, {1, 0, 1, 1}); + cudf::test::fixed_width_column_wrapper col1({1, 2, 3, 4}); + cudf::test::fixed_width_column_wrapper col2({1, 2, 3, 4}, {1, 0, 1, 1}); cudf::table_view input({col1, col2}); EXPECT_THROW(cudf::to_dlpack(input), cudf::logic_error); } TEST_F(DLPackUntypedTests, StringTypeToDlpack) { - strings_column_wrapper col({"foo", "bar", "baz"}); + cudf::test::strings_column_wrapper col({"foo", "bar", "baz"}); cudf::table_view input({col}); EXPECT_THROW(cudf::to_dlpack(input), cudf::logic_error); } TEST_F(DLPackUntypedTests, UnsupportedDeviceTypeFromDlpack) { - fixed_width_column_wrapper col({1, 2, 3, 4}); + cudf::test::fixed_width_column_wrapper col({1, 2, 3, 4}); cudf::table_view input({col}); unique_managed_tensor tensor(cudf::to_dlpack(input)); @@ -131,7 +129,7 @@ TEST_F(DLPackUntypedTests, UnsupportedDeviceTypeFromDlpack) TEST_F(DLPackUntypedTests, InvalidDeviceIdFromDlpack) { - fixed_width_column_wrapper col({1, 2, 3, 4}); + cudf::test::fixed_width_column_wrapper col({1, 2, 3, 4}); cudf::table_view input({col}); unique_managed_tensor tensor(cudf::to_dlpack(input)); @@ -142,7 +140,7 @@ TEST_F(DLPackUntypedTests, InvalidDeviceIdFromDlpack) TEST_F(DLPackUntypedTests, UnsupportedDimsFromDlpack) { - fixed_width_column_wrapper col({1, 2, 3, 4}); + cudf::test::fixed_width_column_wrapper col({1, 2, 3, 4}); cudf::table_view input({col}); unique_managed_tensor tensor(cudf::to_dlpack(input)); @@ -153,7 +151,7 @@ TEST_F(DLPackUntypedTests, UnsupportedDimsFromDlpack) TEST_F(DLPackUntypedTests, TooManyRowsFromDlpack) { - fixed_width_column_wrapper col({1, 2, 3, 4}); + cudf::test::fixed_width_column_wrapper col({1, 2, 3, 4}); cudf::table_view input({col}); unique_managed_tensor tensor(cudf::to_dlpack(input)); @@ -165,8 +163,8 @@ TEST_F(DLPackUntypedTests, TooManyRowsFromDlpack) TEST_F(DLPackUntypedTests, TooManyColsFromDlpack) { - fixed_width_column_wrapper col1({1, 2, 3, 4}); - fixed_width_column_wrapper col2({5, 6, 7, 8}); + cudf::test::fixed_width_column_wrapper col1({1, 2, 3, 4}); + cudf::test::fixed_width_column_wrapper col2({5, 6, 7, 8}); cudf::table_view input({col1, col2}); unique_managed_tensor tensor(cudf::to_dlpack(input)); @@ -178,7 +176,7 @@ TEST_F(DLPackUntypedTests, TooManyColsFromDlpack) TEST_F(DLPackUntypedTests, InvalidTypeFromDlpack) { - fixed_width_column_wrapper col({1, 2, 3, 4}); + cudf::test::fixed_width_column_wrapper col({1, 2, 3, 4}); cudf::table_view input({col}); unique_managed_tensor tensor(cudf::to_dlpack(input)); @@ -189,7 +187,7 @@ TEST_F(DLPackUntypedTests, InvalidTypeFromDlpack) TEST_F(DLPackUntypedTests, UnsupportedIntBitsizeFromDlpack) { - fixed_width_column_wrapper col({1, 2, 3, 4}); + cudf::test::fixed_width_column_wrapper col({1, 2, 3, 4}); cudf::table_view input({col}); unique_managed_tensor tensor(cudf::to_dlpack(input)); @@ -200,7 +198,7 @@ TEST_F(DLPackUntypedTests, UnsupportedIntBitsizeFromDlpack) TEST_F(DLPackUntypedTests, UnsupportedFloatBitsizeFromDlpack) { - fixed_width_column_wrapper col({1, 2, 3, 4}); + cudf::test::fixed_width_column_wrapper col({1, 2, 3, 4}); cudf::table_view input({col}); unique_managed_tensor tensor(cudf::to_dlpack(input)); @@ -211,7 +209,7 @@ TEST_F(DLPackUntypedTests, UnsupportedFloatBitsizeFromDlpack) TEST_F(DLPackUntypedTests, UnsupportedLanesFromDlpack) { - fixed_width_column_wrapper col({1, 2, 3, 4}); + cudf::test::fixed_width_column_wrapper col({1, 2, 3, 4}); cudf::table_view input({col}); unique_managed_tensor tensor(cudf::to_dlpack(input)); @@ -335,20 +333,20 @@ TEST_F(DLPackUntypedTests, UnsupportedStridedColMajor2DTensorFromDlpack) } template -class DLPackTimestampTests : public BaseFixture { +class DLPackTimestampTests : public cudf::test::BaseFixture { }; -TYPED_TEST_SUITE(DLPackTimestampTests, ChronoTypes); +TYPED_TEST_SUITE(DLPackTimestampTests, cudf::test::ChronoTypes); TYPED_TEST(DLPackTimestampTests, ChronoTypesToDlpack) { - fixed_width_column_wrapper col({1, 2, 3, 4}); + cudf::test::fixed_width_column_wrapper col({1, 2, 3, 4}); cudf::table_view input({col}); EXPECT_THROW(cudf::to_dlpack(input), cudf::logic_error); } template -class DLPackNumericTests : public BaseFixture { +class DLPackNumericTests : public cudf::test::BaseFixture { }; // The list of supported types comes from DLDataType_to_data_type() in cpp/src/dlpack/dlpack.cpp @@ -360,7 +358,7 @@ TYPED_TEST_SUITE(DLPackNumericTests, SupportedTypes); TYPED_TEST(DLPackNumericTests, ToDlpack1D) { // Test nullable column with no nulls - fixed_width_column_wrapper col({1, 2, 3, 4}, {1, 1, 1, 1}); + cudf::test::fixed_width_column_wrapper col({1, 2, 3, 4}, {1, 1, 1, 1}); auto const col_view = static_cast(col); EXPECT_FALSE(col_view.has_nulls()); EXPECT_TRUE(col_view.nullable()); @@ -389,9 +387,11 @@ TYPED_TEST(DLPackNumericTests, ToDlpack2D) using T = TypeParam; auto const col1_tmp = cudf::test::make_type_param_vector({1, 2, 3, 4}); auto const col2_tmp = cudf::test::make_type_param_vector({4, 5, 6, 7}); - std::vector> cols; - cols.push_back(fixed_width_column_wrapper(col1_tmp.cbegin(), col1_tmp.cend())); - cols.push_back(fixed_width_column_wrapper(col2_tmp.cbegin(), col2_tmp.cend())); + std::vector> cols; + cols.push_back( + cudf::test::fixed_width_column_wrapper(col1_tmp.cbegin(), col1_tmp.cend())); + cols.push_back( + cudf::test::fixed_width_column_wrapper(col2_tmp.cbegin(), col2_tmp.cend())); std::vector col_views; std::transform(cols.begin(), cols.end(), std::back_inserter(col_views), [](auto const& col) { @@ -427,7 +427,7 @@ TYPED_TEST(DLPackNumericTests, ToDlpack2D) TYPED_TEST(DLPackNumericTests, FromDlpack1D) { // Use to_dlpack to generate an input tensor - fixed_width_column_wrapper col({1, 2, 3, 4}); + cudf::test::fixed_width_column_wrapper col({1, 2, 3, 4}); cudf::table_view input({col}); unique_managed_tensor tensor(cudf::to_dlpack(input)); @@ -442,9 +442,9 @@ TYPED_TEST(DLPackNumericTests, FromDlpack2D) using T = TypeParam; auto const col1 = cudf::test::make_type_param_vector({1, 2, 3, 4}); auto const col2 = cudf::test::make_type_param_vector({4, 5, 6, 7}); - std::vector> cols; - cols.push_back(fixed_width_column_wrapper(col1.cbegin(), col1.cend())); - cols.push_back(fixed_width_column_wrapper(col2.cbegin(), col2.cend())); + std::vector> cols; + cols.push_back(cudf::test::fixed_width_column_wrapper(col1.cbegin(), col1.cend())); + cols.push_back(cudf::test::fixed_width_column_wrapper(col2.cbegin(), col2.cend())); std::vector col_views; std::transform(cols.begin(), cols.end(), std::back_inserter(col_views), [](auto const& col) { @@ -479,8 +479,8 @@ TYPED_TEST(DLPackNumericTests, FromDlpackCpu) thrust::host_vector host_vector(data.begin(), data.end()); tensor.dl_tensor.data = host_vector.data(); - fixed_width_column_wrapper col1({1, 2, 3, 4}); - fixed_width_column_wrapper col2({5, 6, 7, 8}); + cudf::test::fixed_width_column_wrapper col1({1, 2, 3, 4}); + cudf::test::fixed_width_column_wrapper col2({5, 6, 7, 8}); cudf::table_view expected({col1, col2}); auto result = cudf::from_dlpack(&tensor); diff --git a/cpp/tests/lists/explode_tests.cpp b/cpp/tests/lists/explode_tests.cpp index fd22932916f..1a20a88df96 100644 --- a/cpp/tests/lists/explode_tests.cpp +++ b/cpp/tests/lists/explode_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,9 +22,8 @@ #include #include -using namespace cudf::test; -using FCW = fixed_width_column_wrapper; -using LCW = lists_column_wrapper; +using FCW = cudf::test::fixed_width_column_wrapper; +using LCW = cudf::test::lists_column_wrapper; class ExplodeTest : public cudf::test::BaseFixture { }; @@ -78,11 +77,11 @@ TEST_F(ExplodeTest, Basics) FCW a{100, 200, 300}; LCW b{LCW{1, 2, 7}, LCW{5, 6}, LCW{0, 3}}; - strings_column_wrapper c{"string0", "string1", "string2"}; + cudf::test::strings_column_wrapper c{"string0", "string1", "string2"}; FCW expected_a{100, 100, 100, 200, 200, 300, 300}; FCW expected_b{1, 2, 7, 5, 6, 0, 3}; - strings_column_wrapper expected_c{ + cudf::test::strings_column_wrapper expected_c{ "string0", "string0", "string0", "string1", "string1", "string2", "string2"}; cudf::table_view t({a, b, c}); @@ -347,14 +346,14 @@ TEST_F(ExplodeTest, NestedStructs) LCW{LCW{5, 6}}, LCW{LCW{0, 3}, LCW{5}, LCW({2, null}, valids)}}); FCW b1({100, 200, 300}); - strings_column_wrapper b2{"100", "200", "300"}; - structs_column_wrapper b({b1, b2}); + cudf::test::strings_column_wrapper b2{"100", "200", "300"}; + cudf::test::structs_column_wrapper b({b1, b2}); LCW expected_a{ LCW({1, null}, valids), LCW{7, 6, 5}, LCW{5, 6}, LCW{0, 3}, LCW{5}, LCW({2, null}, valids)}; FCW expected_b1{100, 100, 200, 300, 300, 300}; - strings_column_wrapper expected_b2{"100", "100", "200", "300", "300", "300"}; - structs_column_wrapper expected_b({expected_b1, expected_b2}); + cudf::test::strings_column_wrapper expected_b2{"100", "100", "200", "300", "300", "300"}; + cudf::test::structs_column_wrapper expected_b({expected_b1, expected_b2}); cudf::table_view t({a, b}); cudf::table_view expected({expected_a, expected_b}); @@ -435,15 +434,16 @@ TEST_F(ExplodeTest, ListOfStructsWithEmpties) // concatenated auto final_col = cudf::concatenate(std::vector({*row0, *row1, *row2, *row3, *row4})); - auto s = strings_column_wrapper({"a", "b", "c", "d", "e"}).release(); + auto s = cudf::test::strings_column_wrapper({"a", "b", "c", "d", "e"}).release(); cudf::table_view t({final_col->view(), s->view()}); - auto ret = cudf::explode(t, 0); - auto expected_numeric_col = fixed_width_column_wrapper{{1, null, null}, {1, 0, 0}}; + auto ret = cudf::explode(t, 0); + auto expected_numeric_col = + cudf::test::fixed_width_column_wrapper{{1, null, null}, {1, 0, 0}}; - auto expected_a = structs_column_wrapper{{expected_numeric_col}, {1, 1, 0}}.release(); - auto expected_b = strings_column_wrapper({"a", "b", "c"}).release(); + auto expected_a = cudf::test::structs_column_wrapper{{expected_numeric_col}, {1, 1, 0}}.release(); + auto expected_b = cudf::test::strings_column_wrapper({"a", "b", "c"}).release(); cudf::table_view expected({expected_a->view(), expected_b->view()}); @@ -464,10 +464,11 @@ TYPED_TEST(ExplodeTypedTest, ListOfStructs) // [{25, "25"}, {30, "30"}] 400 // [{15, "15"}, {20, "20"}] 500 - auto numeric_col = - fixed_width_column_wrapper{{70, 75, 50, 55, 35, 45, 25, 30, 15, 20}}; - strings_column_wrapper string_col{"70", "75", "50", "55", "35", "45", "25", "30", "15", "20"}; - auto struct_col = structs_column_wrapper{{numeric_col, string_col}}.release(); + auto numeric_col = cudf::test::fixed_width_column_wrapper{ + {70, 75, 50, 55, 35, 45, 25, 30, 15, 20}}; + cudf::test::strings_column_wrapper string_col{ + "70", "75", "50", "55", "35", "45", "25", "30", "15", "20"}; + auto struct_col = cudf::test::structs_column_wrapper{{numeric_col, string_col}}.release(); auto a = cudf::make_lists_column( 5, FCW{0, 2, 4, 6, 8, 10}.release(), std::move(struct_col), cudf::UNKNOWN_NULL_COUNT, {}); @@ -476,12 +477,13 @@ TYPED_TEST(ExplodeTypedTest, ListOfStructs) cudf::table_view t({a->view(), b}); auto ret = cudf::explode(t, 0); - auto expected_numeric_col = - fixed_width_column_wrapper{{70, 75, 50, 55, 35, 45, 25, 30, 15, 20}}; - strings_column_wrapper expected_string_col{ + auto expected_numeric_col = cudf::test::fixed_width_column_wrapper{ + {70, 75, 50, 55, 35, 45, 25, 30, 15, 20}}; + cudf::test::strings_column_wrapper expected_string_col{ "70", "75", "50", "55", "35", "45", "25", "30", "15", "20"}; - auto expected_a = structs_column_wrapper{{expected_numeric_col, expected_string_col}}.release(); + auto expected_a = + cudf::test::structs_column_wrapper{{expected_numeric_col, expected_string_col}}.release(); FCW expected_b{100, 100, 200, 200, 300, 300, 400, 400, 500, 500}; cudf::table_view expected({expected_a->view(), expected_b}); @@ -570,11 +572,11 @@ TEST_F(ExplodeOuterTest, Basics) FCW a{100, 200, 300}; LCW b{LCW{1, 2, 7}, LCW{5, 6}, LCW{0, 3}}; - strings_column_wrapper c{"string0", "string1", "string2"}; + cudf::test::strings_column_wrapper c{"string0", "string1", "string2"}; FCW expected_a{100, 100, 100, 200, 200, 300, 300}; FCW expected_b{1, 2, 7, 5, 6, 0, 3}; - strings_column_wrapper expected_c{ + cudf::test::strings_column_wrapper expected_c{ "string0", "string0", "string0", "string1", "string1", "string2", "string2"}; cudf::table_view t({a, b, c}); @@ -992,14 +994,14 @@ TEST_F(ExplodeOuterTest, NestedStructs) LCW{LCW{5, 6}}, LCW{LCW{0, 3}, LCW{5}, LCW({2, null}, valids)}}); FCW b1({100, 200, 300}); - strings_column_wrapper b2{"100", "200", "300"}; - structs_column_wrapper b({b1, b2}); + cudf::test::strings_column_wrapper b2{"100", "200", "300"}; + cudf::test::structs_column_wrapper b({b1, b2}); LCW expected_a{ LCW({1, null}, valids), LCW{7, 6, 5}, LCW{5, 6}, LCW{0, 3}, LCW{5}, LCW({2, null}, valids)}; FCW expected_b1{100, 100, 200, 300, 300, 300}; - strings_column_wrapper expected_b2{"100", "100", "200", "300", "300", "300"}; - structs_column_wrapper expected_b({expected_b1, expected_b2}); + cudf::test::strings_column_wrapper expected_b2{"100", "100", "200", "300", "300", "300"}; + cudf::test::structs_column_wrapper expected_b({expected_b1, expected_b2}); cudf::table_view t({a, b}); cudf::table_view expected({expected_a, expected_b}); @@ -1080,17 +1082,18 @@ TEST_F(ExplodeOuterTest, ListOfStructsWithEmpties) // concatenated auto final_col = cudf::concatenate(std::vector({*row0, *row1, *row2, *row3, *row4})); - auto s = strings_column_wrapper({"a", "b", "c", "d", "e"}).release(); + auto s = cudf::test::strings_column_wrapper({"a", "b", "c", "d", "e"}).release(); cudf::table_view t({final_col->view(), s->view()}); auto ret = cudf::explode_outer(t, 0); auto expected_numeric_col = - fixed_width_column_wrapper{{1, null, null, null, null}, {1, 0, 0, 0, 0}}; + cudf::test::fixed_width_column_wrapper{{1, null, null, null, null}, {1, 0, 0, 0, 0}}; - auto expected_a = structs_column_wrapper{{expected_numeric_col}, {1, 1, 0, 0, 0}}.release(); - auto expected_b = strings_column_wrapper({"a", "b", "c", "d", "e"}).release(); + auto expected_a = + cudf::test::structs_column_wrapper{{expected_numeric_col}, {1, 1, 0, 0, 0}}.release(); + auto expected_b = cudf::test::strings_column_wrapper({"a", "b", "c", "d", "e"}).release(); cudf::table_view expected({expected_a->view(), expected_b->view()}); @@ -1111,10 +1114,11 @@ TYPED_TEST(ExplodeOuterTypedTest, ListOfStructs) // [{25, "25"}, {30, "30"}] 400 // [{15, "15"}, {20, "20"}] 500 - auto numeric_col = - fixed_width_column_wrapper{{70, 75, 50, 55, 35, 45, 25, 30, 15, 20}}; - strings_column_wrapper string_col{"70", "75", "50", "55", "35", "45", "25", "30", "15", "20"}; - auto struct_col = structs_column_wrapper{{numeric_col, string_col}}.release(); + auto numeric_col = cudf::test::fixed_width_column_wrapper{ + {70, 75, 50, 55, 35, 45, 25, 30, 15, 20}}; + cudf::test::strings_column_wrapper string_col{ + "70", "75", "50", "55", "35", "45", "25", "30", "15", "20"}; + auto struct_col = cudf::test::structs_column_wrapper{{numeric_col, string_col}}.release(); auto a = cudf::make_lists_column( 5, FCW{0, 2, 4, 6, 8, 10}.release(), std::move(struct_col), cudf::UNKNOWN_NULL_COUNT, {}); @@ -1123,12 +1127,13 @@ TYPED_TEST(ExplodeOuterTypedTest, ListOfStructs) cudf::table_view t({a->view(), b}); auto ret = cudf::explode_outer(t, 0); - auto expected_numeric_col = - fixed_width_column_wrapper{{70, 75, 50, 55, 35, 45, 25, 30, 15, 20}}; - strings_column_wrapper expected_string_col{ + auto expected_numeric_col = cudf::test::fixed_width_column_wrapper{ + {70, 75, 50, 55, 35, 45, 25, 30, 15, 20}}; + cudf::test::strings_column_wrapper expected_string_col{ "70", "75", "50", "55", "35", "45", "25", "30", "15", "20"}; - auto expected_a = structs_column_wrapper{{expected_numeric_col, expected_string_col}}.release(); + auto expected_a = + cudf::test::structs_column_wrapper{{expected_numeric_col, expected_string_col}}.release(); FCW expected_b{100, 100, 200, 200, 300, 300, 400, 400, 500, 500}; cudf::table_view expected({expected_a->view(), expected_b}); diff --git a/cpp/tests/reshape/byte_cast_tests.cpp b/cpp/tests/reshape/byte_cast_tests.cpp index f29b3a7980f..6eafc9a2759 100644 --- a/cpp/tests/reshape/byte_cast_tests.cpp +++ b/cpp/tests/reshape/byte_cast_tests.cpp @@ -21,21 +21,20 @@ #include #include -using namespace cudf::test; - class ByteCastTest : public cudf::test::BaseFixture { }; TEST_F(ByteCastTest, int16ValuesWithSplit) { using limits = std::numeric_limits; - fixed_width_column_wrapper const int16_col( + cudf::test::fixed_width_column_wrapper const int16_col( {short(0), short(100), short(-100), limits::min(), limits::max()}); - lists_column_wrapper const int16_expected( + cudf::test::lists_column_wrapper const int16_expected( {{0x00, 0x00}, {0x64, 0x00}, {0x9c, 0xff}, {0x00, 0x80}, {0xff, 0x7f}}); - lists_column_wrapper const int16_expected_slice1( + cudf::test::lists_column_wrapper const int16_expected_slice1( {{0x00, 0x00}, {0x00, 0x64}, {0xff, 0x9c}}); - lists_column_wrapper const int16_expected_slice2({{0x80, 0x00}, {0x7f, 0xff}}); + cudf::test::lists_column_wrapper const int16_expected_slice2( + {{0x80, 0x00}, {0x7f, 0xff}}); std::vector splits({3}); std::vector split_column = cudf::split(int16_col, splits); @@ -54,23 +53,23 @@ TEST_F(ByteCastTest, int16ValuesWithNulls) auto odd_validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; }); - fixed_width_column_wrapper const int16_col( + cudf::test::fixed_width_column_wrapper const int16_col( {short(0), short(100), short(-100), limits::min(), limits::max()}, {0, 1, 0, 1, 0}); /* CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT compares underlying values even when specified as null, * resulting in erroneous test failures. The commented out data tests the case where underlying * values are different, but are both null. */ // auto int16_data = - // fixed_width_column_wrapper{0xee, 0xff, 0x00, 0x64, 0xee, 0xff, 0x80, 0x00, 0xee, - // 0xff}; - auto int16_data = - fixed_width_column_wrapper{0x00, 0x00, 0x00, 0x64, 0xff, 0x9c, 0x80, 0x00, 0x7f, 0xff}; + // cudf::test::fixed_width_column_wrapper{0xee, 0xff, 0x00, 0x64, 0xee, 0xff, 0x80, + // 0x00, 0xee, 0xff}; + auto int16_data = cudf::test::fixed_width_column_wrapper{ + 0x00, 0x00, 0x00, 0x64, 0xff, 0x9c, 0x80, 0x00, 0x7f, 0xff}; auto int16_expected = cudf::make_lists_column( 5, - std::move(fixed_width_column_wrapper{0, 2, 4, 6, 8, 10}.release()), + std::move(cudf::test::fixed_width_column_wrapper{0, 2, 4, 6, 8, 10}.release()), std::move(int16_data.release()), 3, - detail::make_null_mask(odd_validity, odd_validity + 5)); + cudf::test::detail::make_null_mask(odd_validity, odd_validity + 5)); auto const output_int16 = cudf::byte_cast(int16_col, cudf::flip_endianness::YES); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(output_int16->view(), int16_expected->view()); @@ -79,17 +78,19 @@ TEST_F(ByteCastTest, int16ValuesWithNulls) TEST_F(ByteCastTest, int32Values) { using limits = std::numeric_limits; - fixed_width_column_wrapper const int32_col({0, 100, -100, limits::min(), limits::max()}); - lists_column_wrapper const int32_expected_flipped({{0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x64}, - {0xff, 0xff, 0xff, 0x9c}, - {0x80, 0x00, 0x00, 0x00}, - {0x7f, 0xff, 0xff, 0xff}}); - lists_column_wrapper const int32_expected({{0x00, 0x00, 0x00, 0x00}, - {0x64, 0x00, 0x00, 0x00}, - {0x9c, 0xff, 0xff, 0xff}, - {0x00, 0x00, 0x00, 0x80}, - {0xff, 0xff, 0xff, 0x7f}}); + cudf::test::fixed_width_column_wrapper const int32_col( + {0, 100, -100, limits::min(), limits::max()}); + cudf::test::lists_column_wrapper const int32_expected_flipped( + {{0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x64}, + {0xff, 0xff, 0xff, 0x9c}, + {0x80, 0x00, 0x00, 0x00}, + {0x7f, 0xff, 0xff, 0xff}}); + cudf::test::lists_column_wrapper const int32_expected({{0x00, 0x00, 0x00, 0x00}, + {0x64, 0x00, 0x00, 0x00}, + {0x9c, 0xff, 0xff, 0xff}, + {0x00, 0x00, 0x00, 0x80}, + {0xff, 0xff, 0xff, 0x7f}}); auto const output_int32_flipped = cudf::byte_cast(int32_col, cudf::flip_endianness::YES); auto const output_int32 = cudf::byte_cast(int32_col, cudf::flip_endianness::NO); @@ -103,23 +104,24 @@ TEST_F(ByteCastTest, int32ValuesWithNulls) auto even_validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i + 1) % 2; }); - fixed_width_column_wrapper const int32_col({0, 100, -100, limits::min(), limits::max()}, - {1, 0, 1, 0, 1}); + cudf::test::fixed_width_column_wrapper const int32_col( + {0, 100, -100, limits::min(), limits::max()}, {1, 0, 1, 0, 1}); /* Data commented out below explained by comment in int16ValuesWithNulls test */ // auto int32_data = - // fixed_width_column_wrapper{0x00, 0x00, 0x00, 0x00, 0xcc, 0xdd, 0xee, 0xff, 0xff, - // 0xff, + // cudf::test::fixed_width_column_wrapper{0x00, 0x00, 0x00, 0x00, 0xcc, 0xdd, 0xee, + // 0xff, 0xff, 0xff, // 0xff, 0x9c, 0xcc, 0xdd, 0xee, 0xff, 0x7f, 0xff, 0xff, // 0xff}; - auto int32_data = - fixed_width_column_wrapper{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x64, 0xff, 0xff, - 0xff, 0x9c, 0x80, 0x00, 0x00, 0x00, 0x7f, 0xff, 0xff, 0xff}; + auto int32_data = cudf::test::fixed_width_column_wrapper{ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x64, 0xff, 0xff, + 0xff, 0x9c, 0x80, 0x00, 0x00, 0x00, 0x7f, 0xff, 0xff, 0xff}; auto int32_expected = cudf::make_lists_column( 5, - std::move(fixed_width_column_wrapper{0, 4, 8, 12, 16, 20}.release()), + std::move( + cudf::test::fixed_width_column_wrapper{0, 4, 8, 12, 16, 20}.release()), std::move(int32_data.release()), 2, - detail::make_null_mask(even_validity, even_validity + 5)); + cudf::test::detail::make_null_mask(even_validity, even_validity + 5)); auto const output_int32 = cudf::byte_cast(int32_col, cudf::flip_endianness::YES); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(output_int32->view(), int32_expected->view()); @@ -128,19 +130,19 @@ TEST_F(ByteCastTest, int32ValuesWithNulls) TEST_F(ByteCastTest, int64ValuesWithSplit) { using limits = std::numeric_limits; - fixed_width_column_wrapper const int64_col( + cudf::test::fixed_width_column_wrapper const int64_col( {long(0), long(100), long(-100), limits::min(), limits::max()}); - lists_column_wrapper const int64_expected_flipped( + cudf::test::lists_column_wrapper const int64_expected_flipped( {{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x64}, {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x9c}, {0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, {0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}); - lists_column_wrapper const int64_expected_slice1( + cudf::test::lists_column_wrapper const int64_expected_slice1( {{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, {0x64, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, {0x9c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}); - lists_column_wrapper const int64_expected_slice2( + cudf::test::lists_column_wrapper const int64_expected_slice2( {{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80}, {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f}}); @@ -161,23 +163,24 @@ TEST_F(ByteCastTest, int64ValuesWithNulls) auto odd_validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; }); - fixed_width_column_wrapper const int64_col( + cudf::test::fixed_width_column_wrapper const int64_col( {long(0), long(100), long(-100), limits::min(), limits::max()}, {0, 1, 0, 1, 0}); /* Data commented out below explained by comment in int16ValuesWithNulls test */ - // auto int64_data = fixed_width_column_wrapper{ + // auto int64_data = cudf::test::fixed_width_column_wrapper{ // 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x00, 0x64, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, 0x80, 0x00, 0x00, 0x00, // 0x00, 0x00, 0x00, 0x00, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff}; - auto int64_data = fixed_width_column_wrapper{ + auto int64_data = cudf::test::fixed_width_column_wrapper{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x64, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x9c, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; auto int64_expected = cudf::make_lists_column( 5, - std::move(fixed_width_column_wrapper{0, 8, 16, 24, 32, 40}.release()), + std::move( + cudf::test::fixed_width_column_wrapper{0, 8, 16, 24, 32, 40}.release()), std::move(int64_data.release()), 3, - detail::make_null_mask(odd_validity, odd_validity + 5)); + cudf::test::detail::make_null_mask(odd_validity, odd_validity + 5)); auto const output_int64 = cudf::byte_cast(int64_col, cudf::flip_endianness::YES); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(output_int64->view(), int64_expected->view()); @@ -188,26 +191,26 @@ TEST_F(ByteCastTest, fp32ValuesWithSplit) using limits = std::numeric_limits; float nan = limits::quiet_NaN(); float inf = limits::infinity(); - fixed_width_column_wrapper const fp32_col( + cudf::test::fixed_width_column_wrapper const fp32_col( {float(0.0), float(100.0), float(-100.0), limits::min(), limits::max(), nan, -nan, inf, -inf}); - lists_column_wrapper const fp32_expected({{0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0xc8, 0x42}, - {0x00, 0x00, 0xc8, 0xc2}, - {0x00, 0x00, 0x80, 0x00}, - {0xff, 0xff, 0x7f, 0x7f}, - {0x00, 0x00, 0xc0, 0x7f}, - {0x00, 0x00, 0xc0, 0xff}, - {0x00, 0x00, 0x80, 0x7f}, - {0x00, 0x00, 0x80, 0xff}}); - lists_column_wrapper const fp32_expected_slice1({{0x00, 0x00, 0x00, 0x00}, - {0x42, 0xc8, 0x00, 0x00}, - {0xc2, 0xc8, 0x00, 0x00}, - {0x00, 0x80, 0x00, 0x00}, - {0x7f, 0x7f, 0xff, 0xff}}); - lists_column_wrapper const fp32_expected_slice2({{0x7f, 0xc0, 0x00, 0x00}, - {0xff, 0xc0, 0x00, 0x00}, - {0x7f, 0x80, 0x00, 0x00}, - {0xff, 0x80, 0x00, 0x00}}); + cudf::test::lists_column_wrapper const fp32_expected({{0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0xc8, 0x42}, + {0x00, 0x00, 0xc8, 0xc2}, + {0x00, 0x00, 0x80, 0x00}, + {0xff, 0xff, 0x7f, 0x7f}, + {0x00, 0x00, 0xc0, 0x7f}, + {0x00, 0x00, 0xc0, 0xff}, + {0x00, 0x00, 0x80, 0x7f}, + {0x00, 0x00, 0x80, 0xff}}); + cudf::test::lists_column_wrapper const fp32_expected_slice1({{0x00, 0x00, 0x00, 0x00}, + {0x42, 0xc8, 0x00, 0x00}, + {0xc2, 0xc8, 0x00, 0x00}, + {0x00, 0x80, 0x00, 0x00}, + {0x7f, 0x7f, 0xff, 0xff}}); + cudf::test::lists_column_wrapper const fp32_expected_slice2({{0x7f, 0xc0, 0x00, 0x00}, + {0xff, 0xc0, 0x00, 0x00}, + {0x7f, 0x80, 0x00, 0x00}, + {0xff, 0x80, 0x00, 0x00}}); std::vector splits({5}); std::vector split_column = cudf::split(fp32_col, splits); @@ -226,23 +229,24 @@ TEST_F(ByteCastTest, fp32ValuesWithNulls) auto even_validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i + 1) % 2; }); - fixed_width_column_wrapper const fp32_col( + cudf::test::fixed_width_column_wrapper const fp32_col( {float(0.0), float(100.0), float(-100.0), limits::min(), limits::max()}, {1, 0, 1, 0, 1}); /* Data commented out below explained by comment in int16ValuesWithNulls test */ // auto fp32_data = - // fixed_width_column_wrapper{0x00, 0x00, 0x00, 0x00, 0xcc, 0xdd, 0xee, 0xff, 0xc2, - // 0xc8, + // cudf::test::fixed_width_column_wrapper{0x00, 0x00, 0x00, 0x00, 0xcc, 0xdd, 0xee, + // 0xff, 0xc2, 0xc8, // 0x00, 0x00, 0xcc, 0xdd, 0xee, 0xff, 0x7f, 0x7f, 0xff, // 0xff}; - auto fp32_data = - fixed_width_column_wrapper{0x00, 0x00, 0x00, 0x00, 0x42, 0xc8, 0x00, 0x00, 0xc2, 0xc8, - 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x7f, 0x7f, 0xff, 0xff}; + auto fp32_data = cudf::test::fixed_width_column_wrapper{ + 0x00, 0x00, 0x00, 0x00, 0x42, 0xc8, 0x00, 0x00, 0xc2, 0xc8, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x7f, 0x7f, 0xff, 0xff}; auto fp32_expected = cudf::make_lists_column( 5, - std::move(fixed_width_column_wrapper{0, 4, 8, 12, 16, 20}.release()), + std::move( + cudf::test::fixed_width_column_wrapper{0, 4, 8, 12, 16, 20}.release()), std::move(fp32_data.release()), 2, - detail::make_null_mask(even_validity, even_validity + 5)); + cudf::test::detail::make_null_mask(even_validity, even_validity + 5)); auto const output_fp32 = cudf::byte_cast(fp32_col, cudf::flip_endianness::YES); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(output_fp32->view(), fp32_expected->view()); @@ -253,16 +257,16 @@ TEST_F(ByteCastTest, fp64ValuesWithSplit) using limits = std::numeric_limits; double nan = limits::quiet_NaN(); double inf = limits::infinity(); - fixed_width_column_wrapper const fp64_col({double(0.0), - double(100.0), - double(-100.0), - limits::min(), - limits::max(), - nan, - -nan, - inf, - -inf}); - lists_column_wrapper const fp64_flipped_expected( + cudf::test::fixed_width_column_wrapper const fp64_col({double(0.0), + double(100.0), + double(-100.0), + limits::min(), + limits::max(), + nan, + -nan, + inf, + -inf}); + cudf::test::lists_column_wrapper const fp64_flipped_expected( {{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, {0x40, 0x59, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, {0xc0, 0x59, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, @@ -272,13 +276,13 @@ TEST_F(ByteCastTest, fp64ValuesWithSplit) {0xff, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, {0x7f, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, {0xff, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}); - lists_column_wrapper const fp64_expected_slice1( + cudf::test::lists_column_wrapper const fp64_expected_slice1( {{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x59, 0x40}, {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x59, 0xc0}, {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00}, {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xef, 0x7f}}); - lists_column_wrapper const fp64_expected_slice2( + cudf::test::lists_column_wrapper const fp64_expected_slice2( {{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x7f}, {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0xff}, {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x7f}, @@ -301,23 +305,24 @@ TEST_F(ByteCastTest, fp64ValuesWithNulls) auto odd_validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; }); - fixed_width_column_wrapper const fp64_col( + cudf::test::fixed_width_column_wrapper const fp64_col( {double(0.0), double(100.0), double(-100.0), limits::min(), limits::max()}, {0, 1, 0, 1, 0}); /* Data commented out below explained by comment in int16ValuesWithNulls test */ - // auto fp64_data = fixed_width_column_wrapper{ + // auto fp64_data = cudf::test::fixed_width_column_wrapper{ // 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, 0x40, 0x59, 0x00, 0x00, 0x00, 0x00, // 0x00, 0x00, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, 0x00, 0x10, 0x00, 0x00, // 0x00, 0x00, 0x00, 0x00, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff}; - auto fp64_data = fixed_width_column_wrapper{ + auto fp64_data = cudf::test::fixed_width_column_wrapper{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x59, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x59, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; auto fp64_expected = cudf::make_lists_column( 5, - std::move(fixed_width_column_wrapper{0, 8, 16, 24, 32, 40}.release()), + std::move( + cudf::test::fixed_width_column_wrapper{0, 8, 16, 24, 32, 40}.release()), std::move(fp64_data.release()), 3, - detail::make_null_mask(odd_validity, odd_validity + 5)); + cudf::test::detail::make_null_mask(odd_validity, odd_validity + 5)); auto const output_fp64 = cudf::byte_cast(fp64_col, cudf::flip_endianness::YES); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(output_fp64->view(), fp64_expected->view()); @@ -325,9 +330,9 @@ TEST_F(ByteCastTest, fp64ValuesWithNulls) TEST_F(ByteCastTest, StringValues) { - strings_column_wrapper const strings_col( + cudf::test::strings_column_wrapper const strings_col( {"", "The quick", " brown fox...", "!\"#$%&\'()*+,-./", "0123456789:;<=>?@", "[\\]^_`{|}~"}); - lists_column_wrapper const strings_expected( + cudf::test::lists_column_wrapper const strings_expected( {{}, {0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63, 0x6b}, {0x20, 0x62, 0x72, 0x6f, 0x77, 0x6e, 0x20, 0x66, 0x6f, 0x78, 0x2e, 0x2e, 0x2e}, diff --git a/cpp/tests/reshape/tile_tests.cpp b/cpp/tests/reshape/tile_tests.cpp index e605fd7a84b..86dcc431633 100644 --- a/cpp/tests/reshape/tile_tests.cpp +++ b/cpp/tests/reshape/tile_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,10 +24,8 @@ #include #include -using namespace cudf::test; - template -struct TileTest : public BaseFixture { +struct TileTest : public cudf::test::BaseFixture { }; TYPED_TEST_SUITE(TileTest, cudf::test::AllTypes); @@ -47,7 +45,7 @@ TYPED_TEST(TileTest, NoRows) { using T = TypeParam; - fixed_width_column_wrapper in_a({}); + cudf::test::fixed_width_column_wrapper in_a({}); cudf::table_view in(std::vector{in_a}); auto expected = in; @@ -61,10 +59,10 @@ TYPED_TEST(TileTest, OneColumn) { using T = TypeParam; - fixed_width_column_wrapper in_a({-1, 0, 1}); + cudf::test::fixed_width_column_wrapper in_a({-1, 0, 1}); cudf::table_view in(std::vector{in_a}); - fixed_width_column_wrapper expected_a({-1, 0, 1, -1, 0, 1}); + cudf::test::fixed_width_column_wrapper expected_a({-1, 0, 1, -1, 0, 1}); cudf::table_view expected(std::vector{expected_a}); auto actual = cudf::tile(in, 2); @@ -76,10 +74,11 @@ TYPED_TEST(TileTest, OneColumnNullable) { using T = TypeParam; - fixed_width_column_wrapper in_a({-1, 0, 1}, {1, 0, 0}); + cudf::test::fixed_width_column_wrapper in_a({-1, 0, 1}, {1, 0, 0}); cudf::table_view in(std::vector{in_a}); - fixed_width_column_wrapper expected_a({-1, 0, 1, -1, 0, 1}, {1, 0, 0, 1, 0, 0}); + cudf::test::fixed_width_column_wrapper expected_a({-1, 0, 1, -1, 0, 1}, + {1, 0, 0, 1, 0, 0}); cudf::table_view expected(std::vector{expected_a}); auto actual = cudf::tile(in, 2); @@ -91,7 +90,7 @@ TYPED_TEST(TileTest, OneColumnNegativeCount) { using T = TypeParam; - fixed_width_column_wrapper in_a({-1, 0, 1}, {1, 0, 0}); + cudf::test::fixed_width_column_wrapper in_a({-1, 0, 1}, {1, 0, 0}); cudf::table_view in(std::vector{in_a}); EXPECT_THROW(cudf::tile(in, -1), cudf::logic_error); @@ -101,13 +100,13 @@ TYPED_TEST(TileTest, OneColumnZeroCount) { using T = TypeParam; - fixed_width_column_wrapper in_a({-1, 0, 1}, {1, 0, 0}); + cudf::test::fixed_width_column_wrapper in_a({-1, 0, 1}, {1, 0, 0}); cudf::table_view in(std::vector{in_a}); std::vector vals{}; std::vector mask{}; - fixed_width_column_wrapper expected_a(vals.begin(), vals.end(), mask.begin()); + cudf::test::fixed_width_column_wrapper expected_a(vals.begin(), vals.end(), mask.begin()); cudf::table_view expected(std::vector{expected_a}); From 416d4d5d194bf42ffe7ca0c0090860f3c91f90aa Mon Sep 17 00:00:00 2001 From: "Richard (Rick) Zamora" Date: Thu, 20 Oct 2022 08:20:21 -0500 Subject: [PATCH 050/202] Enable backend dispatching for Dask-DataFrame creation (#11920) This PR depends on https://github.com/dask/dask/pull/9475 (**Now Merged**) After dask#9475, external libraries are now able to implement (and expose) their own `DataFrameBackendEntrypoint` definitions to specify custom creation functions for DataFrame collections. This PR introduces the `CudfBackendEntrypoint` class to create `dask_cudf.DataFrame` collections using the `dask.dataframe` API. By installing `dask_cudf` with this entrypoint definition in place, you get the following behavior in `dask.dataframe`: ```python import dask.dataframe as dd import dask # Tell Dask that you want to create DataFrame collections # with the "cudf" backend (for supported creation functions). # This can also be used in a context, or set in a yaml file dask.config.set({"dataframe.backend": "cudf"}) ddf = dd.from_dict({"a": range(10)}, npartitions=2) type(ddf) # dask_cudf.core.DataFrame ``` Note that the code snippet above does not require an explicit import of `cudf` or `dask_cudf`. The following creation functions will support backend dispatching after dask#9475: - `from_dict` - `read_paquet` - `read_json` - `read_orc` - `read_csv` - `read_hdf` See also: https://github.com/dask/design-docs/pull/1 Authors: - Richard (Rick) Zamora (https://github.com/rjzamora) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/11920 --- python/dask_cudf/dask_cudf/backends.py | 110 ++++++++++++++++++ python/dask_cudf/dask_cudf/core.py | 23 ++-- .../dask_cudf/dask_cudf/io/tests/test_csv.py | 16 +++ .../dask_cudf/dask_cudf/io/tests/test_json.py | 17 +++ .../dask_cudf/dask_cudf/io/tests/test_orc.py | 16 ++- .../dask_cudf/io/tests/test_parquet.py | 14 +++ python/dask_cudf/dask_cudf/tests/test_core.py | 17 +++ python/dask_cudf/setup.cfg | 4 + 8 files changed, 204 insertions(+), 13 deletions(-) diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py index 2584ac47878..f02c75eb3e8 100644 --- a/python/dask_cudf/dask_cudf/backends.py +++ b/python/dask_cudf/dask_cudf/backends.py @@ -1,5 +1,6 @@ # Copyright (c) 2020-2022, NVIDIA CORPORATION. +import warnings from collections.abc import Iterator import cupy as cp @@ -8,6 +9,8 @@ import pyarrow as pa from pandas.api.types import is_scalar +import dask.dataframe as dd +from dask import config from dask.dataframe.core import get_parallel_type, meta_nonempty from dask.dataframe.dispatch import ( categorical_dtype_dispatch, @@ -426,3 +429,110 @@ def sizeof_cudf_dataframe(df): @_dask_cudf_nvtx_annotate def sizeof_cudf_series_index(obj): return obj.memory_usage() + + +def _default_backend(func, *args, **kwargs): + # Utility to call a dask.dataframe function with + # the default ("pandas") backend + + # NOTE: Some `CudfBackendEntrypoint` methods need to + # invoke the "pandas"-version of the same method, but + # with custom kwargs (e.g. `engine`). In these cases, + # an explicit "pandas" config context is needed to + # avoid a recursive loop + with config.set({"dataframe.backend": "pandas"}): + return func(*args, **kwargs) + + +try: + + # Define "cudf" backend engine to be registered with Dask + from dask.dataframe.backends import DataFrameBackendEntrypoint + + class CudfBackendEntrypoint(DataFrameBackendEntrypoint): + """Backend-entrypoint class for Dask-DataFrame + + This class is registered under the name "cudf" for the + ``dask.dataframe.backends`` entrypoint in ``setup.cfg``. + Dask-DataFrame will use the methods defined in this class + in place of ``dask.dataframe.`` when the + "dataframe.backend" configuration is set to "cudf": + + Examples + -------- + >>> import dask + >>> import dask.dataframe as dd + >>> with dask.config.set({"dataframe.backend": "cudf"}): + ... ddf = dd.from_dict({"a": range(10)}) + >>> type(ddf) + + """ + + @staticmethod + def from_dict(data, npartitions, orient="columns", **kwargs): + from dask_cudf import from_cudf + + if orient != "columns": + raise ValueError(f"orient={orient} is not supported") + # TODO: Use cudf.from_dict + # (See: https://github.com/rapidsai/cudf/issues/11934) + return from_cudf( + cudf.DataFrame(data), + npartitions=npartitions, + ) + + @staticmethod + def read_parquet(*args, engine=None, **kwargs): + from dask_cudf.io.parquet import CudfEngine + + return _default_backend( + dd.read_parquet, + *args, + engine=CudfEngine, + **kwargs, + ) + + @staticmethod + def read_json(*args, engine=None, **kwargs): + return _default_backend( + dd.read_json, + *args, + engine=cudf.read_json, + **kwargs, + ) + + @staticmethod + def read_orc(*args, **kwargs): + from dask_cudf.io import read_orc + + return read_orc(*args, **kwargs) + + @staticmethod + def read_csv(*args, **kwargs): + from dask_cudf.io import read_csv + + chunksize = kwargs.pop("chunksize", None) + blocksize = kwargs.pop("blocksize", "default") + if chunksize is None and blocksize != "default": + chunksize = blocksize + return read_csv( + *args, + chunksize=chunksize, + **kwargs, + ) + + @staticmethod + def read_hdf(*args, **kwargs): + from dask_cudf import from_dask_dataframe + + # HDF5 reader not yet implemented in cudf + warnings.warn( + "read_hdf is not yet implemented in cudf/dask_cudf. " + "Moving to cudf from pandas. Expect poor performance!" + ) + return from_dask_dataframe( + _default_backend(dd.read_hdf, *args, **kwargs) + ) + +except ImportError: + pass diff --git a/python/dask_cudf/dask_cudf/core.py b/python/dask_cudf/dask_cudf/core.py index 0bf39df313a..76705e7cbf1 100644 --- a/python/dask_cudf/dask_cudf/core.py +++ b/python/dask_cudf/dask_cudf/core.py @@ -2,10 +2,10 @@ import math import warnings -from distutils.version import LooseVersion import numpy as np import pandas as pd +from packaging.version import parse as parse_version from tlz import partition_all import dask @@ -31,7 +31,11 @@ from dask_cudf.accessors import ListMethods, StructMethods from dask_cudf.sorting import _get_shuffle_type -DASK_VERSION = LooseVersion(dask.__version__) +DASK_BACKEND_SUPPORT = parse_version(dask.__version__) >= parse_version( + "2022.10.0" +) +# TODO: Remove DASK_BACKEND_SUPPORT throughout codebase +# when dask_cudf is pinned to dask>=2022.10.0 class _Frame(dd.core._Frame, OperatorMethodMixin): @@ -736,7 +740,7 @@ def from_dask_dataframe(df): return df.map_partitions(cudf.from_pandas) -for name in [ +for name in ( "add", "sub", "mul", @@ -751,16 +755,13 @@ def from_dask_dataframe(df): "rfloordiv", "rmod", "rpow", -]: +): meth = getattr(cudf.DataFrame, name) - kwargs = {"original": cudf.DataFrame} if DASK_VERSION >= "2.11.1" else {} - DataFrame._bind_operator_method(name, meth, **kwargs) + DataFrame._bind_operator_method(name, meth, original=cudf.Series) meth = getattr(cudf.Series, name) - kwargs = {"original": cudf.Series} if DASK_VERSION >= "2.11.1" else {} - Series._bind_operator_method(name, meth, **kwargs) + Series._bind_operator_method(name, meth, original=cudf.Series) -for name in ["lt", "gt", "le", "ge", "ne", "eq"]: +for name in ("lt", "gt", "le", "ge", "ne", "eq"): meth = getattr(cudf.Series, name) - kwargs = {"original": cudf.Series} if DASK_VERSION >= "2.11.1" else {} - Series._bind_comparison_method(name, meth, **kwargs) + Series._bind_comparison_method(name, meth, original=cudf.Series) diff --git a/python/dask_cudf/dask_cudf/io/tests/test_csv.py b/python/dask_cudf/dask_cudf/io/tests/test_csv.py index 564a719fb86..7f69e208b5a 100644 --- a/python/dask_cudf/dask_cudf/io/tests/test_csv.py +++ b/python/dask_cudf/dask_cudf/io/tests/test_csv.py @@ -16,6 +16,22 @@ import dask_cudf +@pytest.mark.skipif( + not dask_cudf.core.DASK_BACKEND_SUPPORT, + reason="No backend-dispatch support", +) +def test_csv_roundtrip_backend_dispatch(tmp_path): + # Test ddf.read_csv cudf-backend dispatch + df = cudf.DataFrame({"x": [1, 2, 3, 4], "id": ["a", "b", "c", "d"]}) + ddf = dask_cudf.from_cudf(df, npartitions=2) + csv_path = str(tmp_path / "data-*.csv") + ddf.to_csv(csv_path, index=False) + with dask.config.set({"dataframe.backend": "cudf"}): + ddf2 = dd.read_csv(csv_path) + assert isinstance(ddf2, dask_cudf.DataFrame) + dd.assert_eq(ddf, ddf2, check_divisions=False, check_index=False) + + def test_csv_roundtrip(tmp_path): df = cudf.DataFrame({"x": [1, 2, 3, 4], "id": ["a", "b", "c", "d"]}) ddf = dask_cudf.from_cudf(df, npartitions=2) diff --git a/python/dask_cudf/dask_cudf/io/tests/test_json.py b/python/dask_cudf/dask_cudf/io/tests/test_json.py index 3f854bb343b..d19f7736e8e 100644 --- a/python/dask_cudf/dask_cudf/io/tests/test_json.py +++ b/python/dask_cudf/dask_cudf/io/tests/test_json.py @@ -12,6 +12,23 @@ import dask_cudf +@pytest.mark.skipif( + not dask_cudf.core.DASK_BACKEND_SUPPORT, + reason="No backend-dispatch support", +) +def test_read_json_backend_dispatch(tmp_path): + # Test ddf.read_json cudf-backend dispatch + df1 = dask.datasets.timeseries( + dtypes={"x": int, "y": int}, freq="120s" + ).reset_index(drop=True) + json_path = str(tmp_path / "data-*.json") + df1.to_json(json_path) + with dask.config.set({"dataframe.backend": "cudf"}): + df2 = dd.read_json(json_path) + assert isinstance(df2, dask_cudf.DataFrame) + dd.assert_eq(df1, df2) + + def test_read_json(tmp_path): df1 = dask.datasets.timeseries( dtypes={"x": int, "y": int}, freq="120s" diff --git a/python/dask_cudf/dask_cudf/io/tests/test_orc.py b/python/dask_cudf/dask_cudf/io/tests/test_orc.py index f19396a9b37..2291dfba536 100644 --- a/python/dask_cudf/dask_cudf/io/tests/test_orc.py +++ b/python/dask_cudf/dask_cudf/io/tests/test_orc.py @@ -6,18 +6,30 @@ import pytest +import dask from dask import dataframe as dd import cudf import dask_cudf -# import pyarrow.orc as orc - cur_dir = os.path.dirname(__file__) sample_orc = os.path.join(cur_dir, "data/orc/sample.orc") +@pytest.mark.skipif( + not dask_cudf.core.DASK_BACKEND_SUPPORT, + reason="No backend-dispatch support", +) +def test_read_orc_backend_dispatch(): + # Test ddf.read_orc cudf-backend dispatch + df1 = cudf.read_orc(sample_orc) + with dask.config.set({"dataframe.backend": "cudf"}): + df2 = dd.read_orc(sample_orc) + assert isinstance(df2, dask_cudf.DataFrame) + dd.assert_eq(df1, df2, check_index=False) + + def test_read_orc_defaults(): df1 = cudf.read_orc(sample_orc) df2 = dask_cudf.read_orc(sample_orc) diff --git a/python/dask_cudf/dask_cudf/io/tests/test_parquet.py b/python/dask_cudf/dask_cudf/io/tests/test_parquet.py index ef5741b0539..7b9f926da3f 100644 --- a/python/dask_cudf/dask_cudf/io/tests/test_parquet.py +++ b/python/dask_cudf/dask_cudf/io/tests/test_parquet.py @@ -46,6 +46,20 @@ def _divisions(setting): return {"gather_statistics": setting} +@pytest.mark.skipif( + not dask_cudf.core.DASK_BACKEND_SUPPORT, + reason="No backend-dispatch support", +) +def test_roundtrip_backend_dispatch(tmpdir): + # Test ddf.read_parquet cudf-backend dispatch + tmpdir = str(tmpdir) + ddf.to_parquet(tmpdir, engine="pyarrow") + with dask.config.set({"dataframe.backend": "cudf"}): + ddf2 = dd.read_parquet(tmpdir, index=False) + assert isinstance(ddf2, dask_cudf.DataFrame) + dd.assert_eq(ddf.reset_index(drop=False), ddf2) + + @pytest.mark.parametrize("write_metadata_file", [True, False]) @pytest.mark.parametrize("divisions", [True, False]) def test_roundtrip_from_dask(tmpdir, divisions, write_metadata_file): diff --git a/python/dask_cudf/dask_cudf/tests/test_core.py b/python/dask_cudf/dask_cudf/tests/test_core.py index 40041fd5c0e..f7c46466705 100644 --- a/python/dask_cudf/dask_cudf/tests/test_core.py +++ b/python/dask_cudf/dask_cudf/tests/test_core.py @@ -17,6 +17,23 @@ import dask_cudf as dgd +@pytest.mark.skipif( + not dgd.core.DASK_BACKEND_SUPPORT, reason="No backend-dispatch support" +) +def test_from_dict_backend_dispatch(): + # Test ddf.from_dict cudf-backend dispatch + np.random.seed(0) + data = { + "x": np.random.randint(0, 5, size=10000), + "y": np.random.normal(size=10000), + } + expect = cudf.DataFrame(data) + with dask.config.set({"dataframe.backend": "cudf"}): + ddf = dd.from_dict(data, npartitions=2) + assert isinstance(ddf, dgd.DataFrame) + dd.assert_eq(expect, ddf) + + def test_from_cudf(): np.random.seed(0) diff --git a/python/dask_cudf/setup.cfg b/python/dask_cudf/setup.cfg index 8f4c2029a87..f45bdf00430 100644 --- a/python/dask_cudf/setup.cfg +++ b/python/dask_cudf/setup.cfg @@ -38,3 +38,7 @@ skip= buck-out build dist + +[options.entry_points] +dask.dataframe.backends = + cudf = dask_cudf.backends:CudfBackendEntrypoint From ff41841cee58a2e945d39dfb1d11d823393814ed Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 20 Oct 2022 09:04:36 -0700 Subject: [PATCH 051/202] Remove validation that requires introspection (#11938) This PR removes optional validation for some APIs. Performing these validations requires data introspection, which we do not want. This PR resolves #5505. Authors: - Vyas Ramasubramani (https://github.com/vyasr) - David Wendt (https://github.com/davidwendt) Approvers: - Mark Harris (https://github.com/harrism) - GALI PREM SAGAR (https://github.com/galipremsagar) - Matthew Roeschke (https://github.com/mroeschke) - David Wendt (https://github.com/davidwendt) - Nghia Truong (https://github.com/ttnghia) - Jason Lowe (https://github.com/jlowe) URL: https://github.com/rapidsai/cudf/pull/11938 --- cpp/benchmarks/lists/copying/scatter_lists.cu | 2 +- cpp/include/cudf/copying.hpp | 18 ++-- cpp/include/cudf/detail/scatter.cuh | 11 --- cpp/include/cudf/detail/scatter.hpp | 18 +--- cpp/include/cudf/filling.hpp | 8 +- cpp/src/copying/copy.cu | 9 +- cpp/src/copying/scatter.cu | 32 ++----- cpp/src/filling/repeat.cu | 11 +-- cpp/src/groupby/sort/scan.cpp | 6 +- cpp/src/groupby/sort/sort_helper.cu | 1 - cpp/src/partitioning/partitioning.cu | 4 +- cpp/src/rolling/detail/lead_lag_nested.cuh | 1 - .../copying/scatter_list_scalar_tests.cpp | 4 +- .../copying/scatter_struct_scalar_tests.cpp | 4 +- cpp/tests/copying/scatter_tests.cpp | 87 ++++++------------- cpp/tests/filling/repeat_tests.cpp | 23 +---- java/src/main/java/ai/rapids/cudf/Table.java | 38 ++------ java/src/main/native/src/TableJni.cpp | 15 ++-- .../test/java/ai/rapids/cudf/TableTest.java | 15 +--- python/cudf/cudf/_lib/copying.pyx | 22 +++-- python/cudf/cudf/_lib/cpp/copying.pxd | 2 - python/cudf/cudf/_lib/cpp/filling.pxd | 3 +- python/cudf/cudf/_lib/filling.pyx | 9 +- python/cudf/cudf/core/column/column.py | 23 ++--- 24 files changed, 99 insertions(+), 267 deletions(-) diff --git a/cpp/benchmarks/lists/copying/scatter_lists.cu b/cpp/benchmarks/lists/copying/scatter_lists.cu index 823693721a0..d86fb0578e5 100644 --- a/cpp/benchmarks/lists/copying/scatter_lists.cu +++ b/cpp/benchmarks/lists/copying/scatter_lists.cu @@ -108,7 +108,7 @@ void BM_lists_scatter(::benchmark::State& state) for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 - scatter(table_view{{*source}}, *scatter_map, table_view{{*target}}, false, mr); + scatter(table_view{{*source}}, *scatter_map, table_view{{*target}}, mr); } state.SetBytesProcessed(static_cast(state.iterations()) * state.range(0) * 2 * diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp index 1c3ca179d17..79dcaaaf00b 100644 --- a/cpp/include/cudf/copying.hpp +++ b/cpp/include/cudf/copying.hpp @@ -140,13 +140,12 @@ std::unique_ptr reverse( * If the same index appears more than once in the scatter map, the result is * undefined. * + * If any values in `scatter_map` are outside of the interval [-n, n) where `n` + * is the number of rows in the `target` table, behavior is undefined. + * * A negative value `i` in the `scatter_map` is interpreted as `i+n`, where `n` * is the number of rows in the `target` table. * - * @throws cudf::logic_error if `check_bounds == true` and an index exists in - * `scatter_map` outside the range `[-n, n)`, where `n` is the number of rows in - * the target table. If `check_bounds == false`, the behavior is undefined. - * * @param source The input columns containing values to be scattered into the * target columns * @param scatter_map A non-nullable column of integral indices that maps the @@ -154,8 +153,6 @@ std::unique_ptr reverse( * to or less than the number of elements in the source columns. * @param target The set of columns into which values from the source_table * are to be scattered - * @param check_bounds Optionally perform bounds checking on the values of - * `scatter_map` and throw an error if any of its values are out of bounds. * @param mr Device memory resource used to allocate the returned table's device memory * @return Result of scattering values from source to target */ @@ -163,7 +160,6 @@ std::unique_ptr
scatter( table_view const& source, column_view const& scatter_map, table_view const& target, - bool check_bounds = false, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -184,9 +180,8 @@ std::unique_ptr
scatter( * If the same index appears more than once in the scatter map, the result is * undefined. * - * @throws cudf::logic_error if `check_bounds == true` and an index exists in - * `scatter_map` outside the range `[-n, n)`, where `n` is the number of rows in - * the target table. If `check_bounds == false`, the behavior is undefined. + * If any values in `scatter_map` are outside of the interval [-n, n) where `n` + * is the number of rows in the `target` table, behavior is undefined. * * @param source The input scalars containing values to be scattered into the * target columns @@ -194,8 +189,6 @@ std::unique_ptr
scatter( * the rows in the target table to be replaced by source. * @param target The set of columns into which values from the source_table * are to be scattered - * @param check_bounds Optionally perform bounds checking on the values of - * `scatter_map` and throw an error if any of its values are out of bounds. * @param mr Device memory resource used to allocate the returned table's device memory * @return Result of scattering values from source to target */ @@ -203,7 +196,6 @@ std::unique_ptr
scatter( std::vector> const& source, column_view const& indices, table_view const& target, - bool check_bounds = false, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh index 09b16b11a73..413f4c4dae4 100644 --- a/cpp/include/cudf/detail/scatter.cuh +++ b/cpp/include/cudf/detail/scatter.cuh @@ -390,7 +390,6 @@ std::unique_ptr
scatter( MapIterator scatter_map_begin, MapIterator scatter_map_end, table_view const& target, - bool check_bounds = false, rmm::cuda_stream_view stream = cudf::default_stream_value, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { @@ -398,16 +397,6 @@ std::unique_ptr
scatter( using MapType = typename thrust::iterator_traits::value_type; - if (check_bounds) { - auto const begin = -target.num_rows(); - auto const end = target.num_rows(); - auto bounds = bounds_checker{begin, end}; - CUDF_EXPECTS( - std::distance(scatter_map_begin, scatter_map_end) == - thrust::count_if(rmm::exec_policy(stream), scatter_map_begin, scatter_map_end, bounds), - "Scatter map index out of bounds"); - } - CUDF_EXPECTS(std::distance(scatter_map_begin, scatter_map_end) <= source.num_rows(), "scatter map size should be <= to number of rows in source"); diff --git a/cpp/include/cudf/detail/scatter.hpp b/cpp/include/cudf/detail/scatter.hpp index 8c993368ff2..801088b803c 100644 --- a/cpp/include/cudf/detail/scatter.hpp +++ b/cpp/include/cudf/detail/scatter.hpp @@ -45,10 +45,8 @@ namespace detail { * * If the same index appears more than once in the scatter map, the result is * undefined. - * - * @throws cudf::logic_error if `check_bounds == true` and an index exists in - * `scatter_map` outside the range `[-n, n)`, where `n` is the number of rows in - * the target table. If `check_bounds == false`, the behavior is undefined. + * If any values in `scatter_map` are outside of the interval [-n, n) where `n` + * is the number of rows in the `target` table, behavior is undefined. * * @param source The input columns containing values to be scattered into the * target columns @@ -57,8 +55,6 @@ namespace detail { * to or less than the number of elements in the source columns. * @param target The set of columns into which values from the source_table * are to be scattered - * @param check_bounds Optionally perform bounds checking on the values of - * `scatter_map` and throw an error if any of its values are out of bounds. * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned table's device memory * @return Result of scattering values from source to target @@ -67,7 +63,6 @@ std::unique_ptr
scatter( table_view const& source, column_view const& scatter_map, table_view const& target, - bool check_bounds = false, rmm::cuda_stream_view stream = cudf::default_stream_value, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); @@ -81,7 +76,6 @@ std::unique_ptr
scatter( table_view const& source, device_span const scatter_map, table_view const& target, - bool check_bounds = false, rmm::cuda_stream_view stream = cudf::default_stream_value, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); @@ -101,9 +95,8 @@ std::unique_ptr
scatter( * If the same index appears more than once in the scatter map, the result is * undefined. * - * @throws cudf::logic_error if `check_bounds == true` and an index exists in - * `scatter_map` outside the range `[-n, n)`, where `n` is the number of rows in - * the target table. If `check_bounds == false`, the behavior is undefined. + * If any values in `indices` are outside of the interval [-n, n) where `n` + * is the number of rows in the `target` table, behavior is undefined. * * @param source The input scalars containing values to be scattered into the * target columns @@ -111,8 +104,6 @@ std::unique_ptr
scatter( * the rows in the target table to be replaced by source. * @param target The set of columns into which values from the source_table * are to be scattered - * @param check_bounds Optionally perform bounds checking on the values of - * `scatter_map` and throw an error if any of its values are out of bounds. * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned table's device memory * @return Result of scattering values from source to target @@ -121,7 +112,6 @@ std::unique_ptr
scatter( std::vector> const& source, column_view const& indices, table_view const& target, - bool check_bounds = false, rmm::cuda_stream_view stream = cudf::default_stream_value, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/filling.hpp b/cpp/include/cudf/filling.hpp index 5f9d13f9a2c..8688e97ab7e 100644 --- a/cpp/include/cudf/filling.hpp +++ b/cpp/include/cudf/filling.hpp @@ -103,26 +103,22 @@ std::unique_ptr fill( * ``` * @p count should not have null values; should not contain negative values; * and the sum of count elements should not overflow the size_type's limit. - * It is undefined behavior if @p count has negative values or the sum overflows - * and @p check_count is set to false. + * The behavior of this function is undefined if @p count has negative values + * or the sum overflows. * * @throws cudf::logic_error if the data type of @p count is not size_type. * @throws cudf::logic_error if @p input_table and @p count have different * number of rows. * @throws cudf::logic_error if @p count has null values. - * @throws cudf::logic_error if @p check_count is set to true and @p count - * has negative values or the sum of @p count elements overflows. * * @param input_table Input table * @param count Non-nullable column of an integral type - * @param check_count Whether to check count (negative values and overflow) * @param mr Device memory resource used to allocate the returned table's device memory * @return The result table containing the repetitions */ std::unique_ptr
repeat( table_view const& input_table, column_view const& count, - bool check_count = false, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** diff --git a/cpp/src/copying/copy.cu b/cpp/src/copying/copy.cu index 5585eac923c..7e5b9288628 100644 --- a/cpp/src/copying/copy.cu +++ b/cpp/src/copying/copy.cu @@ -180,7 +180,6 @@ std::unique_ptr scatter_gather_based_if_else(cudf::column_view const& lh table_view{std::vector{scatter_src_lhs->get_column(0).view()}}, gather_map, table_view{std::vector{rhs}}, - false, stream, mr); @@ -208,12 +207,8 @@ std::unique_ptr scatter_gather_based_if_else(cudf::scalar const& lhs, static_cast(scatter_map_size), scatter_map.begin()}; - auto result = cudf::detail::scatter(scatter_source, - scatter_map_column_view, - table_view{std::vector{rhs}}, - false, - stream, - mr); + auto result = cudf::detail::scatter( + scatter_source, scatter_map_column_view, table_view{std::vector{rhs}}, stream, mr); return std::move(result->release()[0]); } diff --git a/cpp/src/copying/scatter.cu b/cpp/src/copying/scatter.cu index 79c27816009..63711a43c3b 100644 --- a/cpp/src/copying/scatter.cu +++ b/cpp/src/copying/scatter.cu @@ -285,7 +285,6 @@ struct column_scalar_scatterer_impl { std::unique_ptr
scatter(table_view const& source, column_view const& scatter_map, table_view const& target, - bool check_bounds, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -307,13 +306,12 @@ std::unique_ptr
scatter(table_view const& source, // create index type normalizing iterator for the scatter_map auto map_begin = indexalator_factory::make_input_iterator(scatter_map); auto map_end = map_begin + scatter_map.size(); - return detail::scatter(source, map_begin, map_end, target, check_bounds, stream, mr); + return detail::scatter(source, map_begin, map_end, target, stream, mr); } std::unique_ptr
scatter(table_view const& source, device_span const scatter_map, table_view const& target, - bool check_bounds, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -322,13 +320,12 @@ std::unique_ptr
scatter(table_view const& source, auto map_col = column_view(data_type{type_to_id()}, static_cast(scatter_map.size()), scatter_map.data()); - return scatter(source, map_col, target, check_bounds, stream, mr); + return scatter(source, map_col, target, stream, mr); } std::unique_ptr
scatter(std::vector> const& source, column_view const& indices, table_view const& target, - bool check_bounds, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -340,20 +337,9 @@ std::unique_ptr
scatter(std::vector> // Create normalizing iterator for indices column auto map_begin = indexalator_factory::make_input_iterator(indices); - auto map_end = map_begin + indices.size(); // Optionally check map index values are within the number of target rows. auto const n_rows = target.num_rows(); - if (check_bounds) { - CUDF_EXPECTS( - indices.size() == thrust::count_if(rmm::exec_policy(stream), - map_begin, - map_end, - [n_rows] __device__(size_type index) { - return ((index >= -n_rows) && (index < n_rows)); - }), - "Scatter map index out of bounds"); - } // Transform negative indices to index + target size auto scatter_rows = indices.size(); @@ -404,12 +390,8 @@ std::unique_ptr boolean_mask_scatter(column_view const& input, // The scatter map is actually a table with only one column, which is scatter map. auto scatter_map = detail::apply_boolean_mask(table_view{{indices->view()}}, boolean_mask, stream); - auto output_table = detail::scatter(table_view{{input}}, - scatter_map->get_column(0).view(), - table_view{{target}}, - false, - stream, - mr); + auto output_table = detail::scatter( + table_view{{input}}, scatter_map->get_column(0).view(), table_view{{target}}, stream, mr); // There is only one column in output_table return std::make_unique(std::move(output_table->get_column(0))); @@ -505,21 +487,19 @@ std::unique_ptr
boolean_mask_scatter( std::unique_ptr
scatter(table_view const& source, column_view const& scatter_map, table_view const& target, - bool check_bounds, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::scatter(source, scatter_map, target, check_bounds, cudf::default_stream_value, mr); + return detail::scatter(source, scatter_map, target, cudf::default_stream_value, mr); } std::unique_ptr
scatter(std::vector> const& source, column_view const& indices, table_view const& target, - bool check_bounds, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::scatter(source, indices, target, check_bounds, cudf::default_stream_value, mr); + return detail::scatter(source, indices, target, cudf::default_stream_value, mr); } std::unique_ptr
boolean_mask_scatter(table_view const& input, diff --git a/cpp/src/filling/repeat.cu b/cpp/src/filling/repeat.cu index 90c644933ec..b2587e67350 100644 --- a/cpp/src/filling/repeat.cu +++ b/cpp/src/filling/repeat.cu @@ -103,7 +103,6 @@ namespace cudf { namespace detail { std::unique_ptr
repeat(table_view const& input_table, column_view const& count, - bool check_count, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -112,19 +111,12 @@ std::unique_ptr
repeat(table_view const& input_table, if (input_table.num_rows() == 0) { return cudf::empty_like(input_table); } - if (check_count) { cudf::type_dispatcher(count.type(), count_checker{count}, stream); } - auto count_iter = cudf::detail::indexalator_factory::make_input_iterator(count); rmm::device_uvector offsets(count.size(), stream); thrust::inclusive_scan( rmm::exec_policy(stream), count_iter, count_iter + count.size(), offsets.begin()); - if (check_count) { - CUDF_EXPECTS(thrust::is_sorted(rmm::exec_policy(stream), offsets.begin(), offsets.end()), - "count has negative values or the resulting table has too many rows."); - } - size_type output_size{offsets.back_element(stream)}; rmm::device_uvector indices(output_size, stream); thrust::upper_bound(rmm::exec_policy(stream), @@ -162,11 +154,10 @@ std::unique_ptr
repeat(table_view const& input_table, std::unique_ptr
repeat(table_view const& input_table, column_view const& count, - bool check_count, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::repeat(input_table, count, check_count, cudf::default_stream_value, mr); + return detail::repeat(input_table, count, cudf::default_stream_value, mr); } std::unique_ptr
repeat(table_view const& input_table, diff --git a/cpp/src/groupby/sort/scan.cpp b/cpp/src/groupby/sort/scan.cpp index 5d345273782..743ca5e8065 100644 --- a/cpp/src/groupby/sort/scan.cpp +++ b/cpp/src/groupby/sort/scan.cpp @@ -178,9 +178,9 @@ void scan_result_functor::operator()(aggregation const& agg) stream, mr); } - result = std::move(cudf::detail::scatter( - table_view{{*result}}, *gather_map, table_view{{*result}}, false, stream, mr) - ->release()[0]); + result = std::move( + cudf::detail::scatter(table_view{{*result}}, *gather_map, table_view{{*result}}, stream, mr) + ->release()[0]); if (rank_agg._null_handling == null_policy::EXCLUDE) { result->set_null_mask(cudf::detail::copy_bitmask(get_grouped_values(), stream, mr)); } diff --git a/cpp/src/groupby/sort/sort_helper.cu b/cpp/src/groupby/sort/sort_helper.cu index a0abaf71160..53ab65e9be7 100644 --- a/cpp/src/groupby/sort/sort_helper.cu +++ b/cpp/src/groupby/sort/sort_helper.cu @@ -244,7 +244,6 @@ column_view sort_groupby_helper::unsorted_keys_labels(rmm::cuda_stream_view stre cudf::detail::scatter(table_view({group_labels_view}), scatter_map, table_view({temp_labels->view()}), - false, stream, rmm::mr::get_current_device_resource()); diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu index 3e0cc26dcdd..296a9f40fbb 100644 --- a/cpp/src/partitioning/partitioning.cu +++ b/cpp/src/partitioning/partitioning.cu @@ -610,7 +610,7 @@ std::pair, std::vector> hash_partition_table( // Use the resulting scatter map to materialize the output auto output = detail::scatter( - input, row_partition_numbers.begin(), row_partition_numbers.end(), input, false, stream, mr); + input, row_partition_numbers.begin(), row_partition_numbers.end(), input, stream, mr); stream.synchronize(); // Async D2H copy must finish before returning host vec return std::pair(std::move(output), std::move(partition_offsets)); @@ -698,7 +698,7 @@ struct dispatch_map_type { // Scatter the rows into their partitions auto scattered = - cudf::detail::scatter(t, scatter_map.begin(), scatter_map.end(), t, false, stream, mr); + cudf::detail::scatter(t, scatter_map.begin(), scatter_map.end(), t, stream, mr); return std::pair(std::move(scattered), std::move(partition_offsets)); } diff --git a/cpp/src/rolling/detail/lead_lag_nested.cuh b/cpp/src/rolling/detail/lead_lag_nested.cuh index a23786ec7f3..859ed7e5d53 100644 --- a/cpp/src/rolling/detail/lead_lag_nested.cuh +++ b/cpp/src/rolling/detail/lead_lag_nested.cuh @@ -198,7 +198,6 @@ std::unique_ptr compute_lead_lag_for_nested(aggregation::Kind op, table_view{std::vector{gathered_defaults->release()[0]->view()}}, scatter_map, table_view{std::vector{output_with_nulls->release()[0]->view()}}, - false, stream, mr); return std::move(scattered_results->release()[0]); diff --git a/cpp/tests/copying/scatter_list_scalar_tests.cpp b/cpp/tests/copying/scatter_list_scalar_tests.cpp index 7d3de9b6c15..40b5dcff7b6 100644 --- a/cpp/tests/copying/scatter_list_scalar_tests.cpp +++ b/cpp/tests/copying/scatter_list_scalar_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,7 +38,7 @@ std::unique_ptr single_scalar_scatter(column_view const& target, { std::vector> slrs{slr}; table_view targets{{target}}; - auto result = scatter(slrs, scatter_map, targets, true); + auto result = scatter(slrs, scatter_map, targets); return std::move(result->release()[0]); } diff --git a/cpp/tests/copying/scatter_struct_scalar_tests.cpp b/cpp/tests/copying/scatter_struct_scalar_tests.cpp index 44e65110f33..62201224893 100644 --- a/cpp/tests/copying/scatter_struct_scalar_tests.cpp +++ b/cpp/tests/copying/scatter_struct_scalar_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,7 +41,7 @@ TYPED_TEST_SUITE(TypedStructScalarScatterTest, FixedWidthTypes); column scatter_single_scalar(scalar const& slr, column_view scatter_map, column_view target) { - auto result = scatter({slr}, scatter_map, table_view{{target}}, false); + auto result = scatter({slr}, scatter_map, table_view{{target}}); return result->get_column(0); } diff --git a/cpp/tests/copying/scatter_tests.cpp b/cpp/tests/copying/scatter_tests.cpp index 306ab8a3d5c..f853920e24c 100644 --- a/cpp/tests/copying/scatter_tests.cpp +++ b/cpp/tests/copying/scatter_tests.cpp @@ -39,7 +39,7 @@ TEST_F(ScatterUntypedTests, ScatterMapTooLong) auto const source_table = cudf::table_view({source, source}); auto const target_table = cudf::table_view({target, target}); - EXPECT_THROW(cudf::scatter(source_table, scatter_map, target_table, true), cudf::logic_error); + EXPECT_THROW(cudf::scatter(source_table, scatter_map, target_table), cudf::logic_error); } // Throw logic error if scatter map has nulls @@ -54,7 +54,7 @@ TEST_F(ScatterUntypedTests, ScatterMapNulls) auto const source_table = cudf::table_view({source, source}); auto const target_table = cudf::table_view({target, target}); - EXPECT_THROW(cudf::scatter(source_table, scatter_map, target_table, true), cudf::logic_error); + EXPECT_THROW(cudf::scatter(source_table, scatter_map, target_table), cudf::logic_error); } // Throw logic error if scatter map has nulls @@ -72,7 +72,7 @@ TEST_F(ScatterUntypedTests, ScatterScalarMapNulls) auto const target_table = cudf::table_view({target}); - EXPECT_THROW(cudf::scatter(source_vector, scatter_map, target_table, true), cudf::logic_error); + EXPECT_THROW(cudf::scatter(source_vector, scatter_map, target_table), cudf::logic_error); } // Throw logic error if source and target have different number of columns @@ -87,7 +87,7 @@ TEST_F(ScatterUntypedTests, ScatterColumnNumberMismatch) auto const source_table = cudf::table_view({source}); auto const target_table = cudf::table_view({target, target}); - EXPECT_THROW(cudf::scatter(source_table, scatter_map, target_table, true), cudf::logic_error); + EXPECT_THROW(cudf::scatter(source_table, scatter_map, target_table), cudf::logic_error); } // Throw logic error if number of scalars doesn't match number of columns @@ -105,7 +105,7 @@ TEST_F(ScatterUntypedTests, ScatterScalarColumnNumberMismatch) auto const target_table = cudf::table_view({target, target}); - EXPECT_THROW(cudf::scatter(source_vector, scatter_map, target_table, true), cudf::logic_error); + EXPECT_THROW(cudf::scatter(source_vector, scatter_map, target_table), cudf::logic_error); } // Throw logic error if source and target have different data types @@ -120,7 +120,7 @@ TEST_F(ScatterUntypedTests, ScatterDataTypeMismatch) auto const source_table = cudf::table_view({source}); auto const target_table = cudf::table_view({target}); - EXPECT_THROW(cudf::scatter(source_table, scatter_map, target_table, true), cudf::logic_error); + EXPECT_THROW(cudf::scatter(source_table, scatter_map, target_table), cudf::logic_error); } // Throw logic error if source and target have different data types @@ -138,7 +138,7 @@ TEST_F(ScatterUntypedTests, ScatterScalarDataTypeMismatch) auto const target_table = cudf::table_view({target}); - EXPECT_THROW(cudf::scatter(source_vector, scatter_map, target_table, true), cudf::logic_error); + EXPECT_THROW(cudf::scatter(source_vector, scatter_map, target_table), cudf::logic_error); } template @@ -148,43 +148,6 @@ class ScatterIndexTypeTests : public cudf::test::BaseFixture { using IndexTypes = cudf::test::Types; TYPED_TEST_SUITE(ScatterIndexTypeTests, IndexTypes); -// Throw logic error if check_bounds is set and index is out of bounds -TYPED_TEST(ScatterIndexTypeTests, ScatterOutOfBounds) -{ - using cudf::test::fixed_width_column_wrapper; - - fixed_width_column_wrapper source({1, 2, 3, 4, 5, 6}); - fixed_width_column_wrapper target({10, 20, 30, 40, 50, 60, 70, 80}); - fixed_width_column_wrapper upper_bound({-3, 3, 1, 8}); - fixed_width_column_wrapper lower_bound({-3, 3, 1, -9}); - - auto const source_table = cudf::table_view({source, source}); - auto const target_table = cudf::table_view({target, target}); - - EXPECT_THROW(cudf::scatter(source_table, upper_bound, target_table, true), cudf::logic_error); - EXPECT_THROW(cudf::scatter(source_table, lower_bound, target_table, true), cudf::logic_error); -} - -// Throw logic error if check_bounds is set and index is out of bounds -TYPED_TEST(ScatterIndexTypeTests, ScatterScalarOutOfBounds) -{ - using cudf::scalar_type_t; - using cudf::test::fixed_width_column_wrapper; - - auto const source = scalar_type_t(100, true); - std::reference_wrapper slr_ref{source}; - std::vector> source_vector{slr_ref}; - - fixed_width_column_wrapper target({10, 20, 30, 40, 50, 60, 70, 80}); - fixed_width_column_wrapper upper_bound({-3, 3, 1, 8}); - fixed_width_column_wrapper lower_bound({-3, 3, 1, -9}); - - auto const target_table = cudf::table_view({target}); - - EXPECT_THROW(cudf::scatter(source_vector, upper_bound, target_table, true), cudf::logic_error); - EXPECT_THROW(cudf::scatter(source_vector, lower_bound, target_table, true), cudf::logic_error); -} - // Validate that each of the index types work TYPED_TEST(ScatterIndexTypeTests, ScatterIndexType) { @@ -199,7 +162,7 @@ TYPED_TEST(ScatterIndexTypeTests, ScatterIndexType) auto const target_table = cudf::table_view({target, target}); auto const expected_table = cudf::table_view({expected, expected}); - auto const result = cudf::scatter(source_table, scatter_map, target_table, true); + auto const result = cudf::scatter(source_table, scatter_map, target_table); CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table); } @@ -221,7 +184,7 @@ TYPED_TEST(ScatterIndexTypeTests, ScatterScalarIndexType) auto const target_table = cudf::table_view({target}); auto const expected_table = cudf::table_view({expected}); - auto const result = cudf::scatter(source_vector, scatter_map, target_table, true); + auto const result = cudf::scatter(source_vector, scatter_map, target_table); CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table); } @@ -248,7 +211,7 @@ TYPED_TEST(ScatterInvalidIndexTypeTests, ScatterInvalidIndexType) auto const source_table = cudf::table_view({source, source}); auto const target_table = cudf::table_view({target, target}); - EXPECT_THROW(cudf::scatter(source_table, scatter_map, target_table, true), cudf::logic_error); + EXPECT_THROW(cudf::scatter(source_table, scatter_map, target_table), cudf::logic_error); } // Throw logic error if scatter map column has invalid data type @@ -266,7 +229,7 @@ TYPED_TEST(ScatterInvalidIndexTypeTests, ScatterScalarInvalidIndexType) auto const target_table = cudf::table_view({target}); - EXPECT_THROW(cudf::scatter(source_vector, scatter_map, target_table, true), cudf::logic_error); + EXPECT_THROW(cudf::scatter(source_vector, scatter_map, target_table), cudf::logic_error); } template @@ -287,7 +250,7 @@ TYPED_TEST(ScatterDataTypeTests, EmptyScatterMap) auto const source_table = cudf::table_view({source, source}); auto const target_table = cudf::table_view({target, target}); - auto const result = cudf::scatter(source_table, scatter_map, target_table, true); + auto const result = cudf::scatter(source_table, scatter_map, target_table); // Expect a copy of the input table CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), target_table); @@ -309,7 +272,7 @@ TYPED_TEST(ScatterDataTypeTests, EmptyScalarScatterMap) auto const target_table = cudf::table_view({target}); - auto const result = cudf::scatter(source_vector, scatter_map, target_table, true); + auto const result = cudf::scatter(source_vector, scatter_map, target_table); // Expect a copy of the input table CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), target_table); @@ -328,7 +291,7 @@ TYPED_TEST(ScatterDataTypeTests, ScatterNoNulls) auto const target_table = cudf::table_view({target, target}); auto const expected_table = cudf::table_view({expected, expected}); - auto const result = cudf::scatter(source_table, scatter_map, target_table, true); + auto const result = cudf::scatter(source_table, scatter_map, target_table); CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table); } @@ -348,7 +311,7 @@ TYPED_TEST(ScatterDataTypeTests, ScatterBothNulls) auto const target_table = cudf::table_view({target, target}); auto const expected_table = cudf::table_view({expected, expected}); - auto const result = cudf::scatter(source_table, scatter_map, target_table, true); + auto const result = cudf::scatter(source_table, scatter_map, target_table); CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table); } @@ -367,7 +330,7 @@ TYPED_TEST(ScatterDataTypeTests, ScatterSourceNulls) auto const target_table = cudf::table_view({target, target}); auto const expected_table = cudf::table_view({expected, expected}); - auto const result = cudf::scatter(source_table, scatter_map, target_table, true); + auto const result = cudf::scatter(source_table, scatter_map, target_table); CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table); } @@ -387,7 +350,7 @@ TYPED_TEST(ScatterDataTypeTests, ScatterTargetNulls) auto const target_table = cudf::table_view({target, target}); auto const expected_table = cudf::table_view({expected, expected}); - auto const result = cudf::scatter(source_table, scatter_map, target_table, true); + auto const result = cudf::scatter(source_table, scatter_map, target_table); CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table); } @@ -409,7 +372,7 @@ TYPED_TEST(ScatterDataTypeTests, ScatterScalarNoNulls) auto const target_table = cudf::table_view({target}); auto const expected_table = cudf::table_view({expected}); - auto const result = cudf::scatter(source_vector, scatter_map, target_table, true); + auto const result = cudf::scatter(source_vector, scatter_map, target_table); CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table); } @@ -433,7 +396,7 @@ TYPED_TEST(ScatterDataTypeTests, ScatterScalarTargetNulls) auto const target_table = cudf::table_view({target}); auto const expected_table = cudf::table_view({expected}); - auto const result = cudf::scatter(source_vector, scatter_map, target_table, true); + auto const result = cudf::scatter(source_vector, scatter_map, target_table); CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table); } @@ -457,7 +420,7 @@ TYPED_TEST(ScatterDataTypeTests, ScatterScalarSourceNulls) auto const target_table = cudf::table_view({target}); auto const expected_table = cudf::table_view({expected}); - auto const result = cudf::scatter(source_vector, scatter_map, target_table, true); + auto const result = cudf::scatter(source_vector, scatter_map, target_table); CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table); } @@ -482,7 +445,7 @@ TYPED_TEST(ScatterDataTypeTests, ScatterScalarBothNulls) auto const target_table = cudf::table_view({target}); auto const expected_table = cudf::table_view({expected}); - auto const result = cudf::scatter(source_vector, scatter_map, target_table, true); + auto const result = cudf::scatter(source_vector, scatter_map, target_table); CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table); } @@ -510,7 +473,7 @@ TYPED_TEST(ScatterDataTypeTests, ScatterSourceNullsLarge) auto const target_table = cudf::table_view({target, target}); auto const expected_table = cudf::table_view({expected, expected}); - auto const result = cudf::scatter(source_table, scatter_map, target_table, true); + auto const result = cudf::scatter(source_table, scatter_map, target_table); CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table); } @@ -540,7 +503,7 @@ TEST_F(ScatterStringsTests, ScatterNoNulls) auto const target_table = cudf::table_view({target, target}); auto const expected_table = cudf::table_view({expected, expected}); - auto const result = cudf::scatter(source_table, scatter_map, target_table, true); + auto const result = cudf::scatter(source_table, scatter_map, target_table); CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table); } @@ -568,7 +531,7 @@ TEST_F(ScatterStringsTests, ScatterScalarNoNulls) auto const target_table = cudf::table_view({target}); auto const expected_table = cudf::table_view({expected}); - auto const result = cudf::scatter(source_vector, scatter_map, target_table, true); + auto const result = cudf::scatter(source_vector, scatter_map, target_table); CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table); } @@ -937,7 +900,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointScatter) auto const target_table = cudf::table_view({target, target}); auto const expected_table = cudf::table_view({expected, expected}); - auto const result = cudf::scatter(source_table, scatter_map, target_table, true); + auto const result = cudf::scatter(source_table, scatter_map, target_table); CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table, result->view()); } diff --git a/cpp/tests/filling/repeat_tests.cpp b/cpp/tests/filling/repeat_tests.cpp index 7d30298b1bd..df8dceb0f8d 100644 --- a/cpp/tests/filling/repeat_tests.cpp +++ b/cpp/tests/filling/repeat_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -272,24 +272,3 @@ TEST_F(RepeatErrorTestFixture, CountHasNulls) // input_table.has_nulls() == true EXPECT_THROW(auto ret = cudf::repeat(input_table, count), cudf::logic_error); } - -TEST_F(RepeatErrorTestFixture, NegativeCountOrOverflow) -{ - auto input = cudf::test::fixed_width_column_wrapper( - thrust::make_counting_iterator(0), thrust::make_counting_iterator(0) + 100); - - auto count_neg = cudf::test::fixed_width_column_wrapper( - thrust::make_constant_iterator(-1, 0), thrust::make_constant_iterator(-1, 100)); - - auto value = std::numeric_limits::max() / 10; - auto count_overflow = cudf::test::fixed_width_column_wrapper( - thrust::make_constant_iterator(value, 0), thrust::make_constant_iterator(value, 100)); - - cudf::table_view input_table{{input}}; - - // negative - EXPECT_THROW(auto p_ret = cudf::repeat(input_table, count_neg, true), cudf::logic_error); - - // overflow - EXPECT_THROW(auto p_ret = cudf::repeat(input_table, count_overflow, true), cudf::logic_error); -} diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java index dbc2a28c38c..c6f606e971d 100644 --- a/java/src/main/java/ai/rapids/cudf/Table.java +++ b/java/src/main/java/ai/rapids/cudf/Table.java @@ -706,10 +706,10 @@ private static native long[] dropDuplicates(long nativeHandle, int[] keyColumns, private static native long[] gather(long tableHandle, long gatherView, boolean checkBounds); private static native long[] scatterTable(long srcTableHandle, long scatterView, - long targetTableHandle, boolean checkBounds) + long targetTableHandle) throws CudfException; private static native long[] scatterScalars(long[] srcScalarHandles, long scatterView, - long targetTableHandle, boolean checkBounds) + long targetTableHandle) throws CudfException; private static native long[] convertToRows(long nativeHandle); @@ -723,8 +723,7 @@ private static native long[] scatterScalars(long[] srcScalarHandles, long scatte private static native long[] repeatStaticCount(long tableHandle, int count); private static native long[] repeatColumnCount(long tableHandle, - long columnHandle, - boolean checkCount); + long columnHandle); private static native long rowBitCount(long tableHandle) throws CudfException; @@ -1686,22 +1685,7 @@ public Table repeat(int count) { * @throws CudfException on any error. */ public Table repeat(ColumnView counts) { - return repeat(counts, true); - } - - /** - * Create a new table by repeating each row of this table. The number of - * repetitions of each row is defined by the corresponding value in counts. - * @param counts the number of times to repeat each row. Cannot have nulls, must be an - * Integer type, and must have one entry for each row in the table. - * @param checkCount should counts be checked for errors before processing. Be careful if you - * disable this because if you pass in bad data you might just get back an - * empty table or bad data. - * @return the new Table. - * @throws CudfException on any error. - */ - public Table repeat(ColumnView counts, boolean checkCount) { - return new Table(repeatColumnCount(this.nativeHandle, counts.getNativeView(), checkCount)); + return new Table(repeatColumnCount(this.nativeHandle, counts.getNativeView())); } /** @@ -2349,14 +2333,11 @@ public Table gather(ColumnView gatherMap, OutOfBoundsPolicy outOfBoundsPolicy) { * * @param scatterMap The map of indexes. Must be non-nullable and integral type. * @param target The table into which rows from the current table are to be scattered out-of-place. - * @param checkBounds Optionally perform bounds checking on the values of`scatterMap` and throw - * an exception if any of its values are out of bounds. * @return A new table which is the result of out-of-place scattering the source table into the * target table. */ - public Table scatter(ColumnView scatterMap, Table target, boolean checkBounds) { - return new Table(scatterTable(nativeHandle, scatterMap.getNativeView(), target.getNativeView(), - checkBounds)); + public Table scatter(ColumnView scatterMap, Table target) { + return new Table(scatterTable(nativeHandle, scatterMap.getNativeView(), target.getNativeView())); } /** @@ -2376,20 +2357,17 @@ public Table scatter(ColumnView scatterMap, Table target, boolean checkBounds) { * @param source The input scalars containing values to be scattered into the target table. * @param scatterMap The map of indexes. Must be non-nullable and integral type. * @param target The table into which the values from source are to be scattered out-of-place. - * @param checkBounds Optionally perform bounds checking on the values of`scatterMap` and throw - * an exception if any of its values are out of bounds. * @return A new table which is the result of out-of-place scattering the source values into the * target table. */ - public static Table scatter(Scalar[] source, ColumnView scatterMap, Table target, - boolean checkBounds) { + public static Table scatter(Scalar[] source, ColumnView scatterMap, Table target) { long[] srcScalarHandles = new long[source.length]; for(int i = 0; i < source.length; ++i) { assert source[i] != null : "Scalar vectors passed in should not contain null"; srcScalarHandles[i] = source[i].getScalarHandle(); } return new Table(scatterScalars(srcScalarHandles, scatterMap.getNativeView(), - target.getNativeView(), checkBounds)); + target.getNativeView())); } private static GatherMap[] buildJoinGatherMaps(long[] gatherMapData) { diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index c23c5a3ccb2..cbd0aee335e 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -2979,8 +2979,7 @@ Java_ai_rapids_cudf_Table_convertToRowsFixedWidthOptimized(JNIEnv *env, jclass, JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_scatterTable(JNIEnv *env, jclass, jlong j_input, jlong j_map, - jlong j_target, - jboolean check_bounds) { + jlong j_target) { JNI_NULL_CHECK(env, j_input, "input table is null", 0); JNI_NULL_CHECK(env, j_map, "map column is null", 0); JNI_NULL_CHECK(env, j_target, "target table is null", 0); @@ -2989,15 +2988,14 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_scatterTable(JNIEnv *env, auto const input = reinterpret_cast(j_input); auto const map = reinterpret_cast(j_map); auto const target = reinterpret_cast(j_target); - return convert_table_for_return(env, cudf::scatter(*input, *map, *target, check_bounds)); + return convert_table_for_return(env, cudf::scatter(*input, *map, *target)); } CATCH_STD(env, 0); } JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_scatterScalars(JNIEnv *env, jclass, jlongArray j_input, - jlong j_map, jlong j_target, - jboolean check_bounds) { + jlong j_map, jlong j_target) { JNI_NULL_CHECK(env, j_input, "input scalars array is null", 0); JNI_NULL_CHECK(env, j_map, "map column is null", 0); JNI_NULL_CHECK(env, j_target, "target table is null", 0); @@ -3009,7 +3007,7 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_scatterScalars(JNIEnv *en [](auto &scalar) { return std::ref(*scalar); }); auto const map = reinterpret_cast(j_map); auto const target = reinterpret_cast(j_target); - return convert_table_for_return(env, cudf::scatter(input, *map, *target, check_bounds)); + return convert_table_for_return(env, cudf::scatter(input, *map, *target)); } CATCH_STD(env, 0); } @@ -3094,15 +3092,14 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_repeatStaticCount(JNIEnv JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_repeatColumnCount(JNIEnv *env, jclass, jlong input_jtable, - jlong count_jcol, - jboolean check_count) { + jlong count_jcol) { JNI_NULL_CHECK(env, input_jtable, "input table is null", 0); JNI_NULL_CHECK(env, count_jcol, "count column is null", 0); try { cudf::jni::auto_set_device(env); auto const input = reinterpret_cast(input_jtable); auto const count = reinterpret_cast(count_jcol); - return convert_table_for_return(env, cudf::repeat(*input, *count, check_count)); + return convert_table_for_return(env, cudf::repeat(*input, *count)); } CATCH_STD(env, 0); } diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java index f31da054091..f564a55463b 100644 --- a/java/src/test/java/ai/rapids/cudf/TableTest.java +++ b/java/src/test/java/ai/rapids/cudf/TableTest.java @@ -2689,17 +2689,6 @@ void testRepeatColumn() { } } - @Test - void testRepeatColumnBad() { - try (Table t = new Table.TestBuilder() - .column(1, 2) - .column("a", "b") - .build(); - ColumnVector repeats = ColumnVector.fromBytes((byte)2, (byte)-1)) { - assertThrows(CudfException.class, () -> t.repeat(repeats)); - } - } - @Test void testInterleaveIntColumns() { try (Table t = new Table.TestBuilder() @@ -6963,7 +6952,7 @@ void testScatterTable() { .decimal32Column(-3, 1, -2, 2, 4, 3) .decimal64Column(-8, 100001L, -200002L, 200002L, 400004L, 300003L) .build(); - Table result = srcTable.scatter(scatterMap, targetTable, false)) { + Table result = srcTable.scatter(scatterMap, targetTable)) { assertTablesAreEqual(expected, result); } } @@ -6981,7 +6970,7 @@ void testScatterScalars() { .column(0, -2, 0, -4, 0) .column("A", "BB", "A", "BBBB", "A") .build(); - Table result = Table.scatter(new Scalar[] { s1, s2 }, scatterMap, targetTable, false)) { + Table result = Table.scatter(new Scalar[] { s1, s2 }, scatterMap, targetTable)) { assertTablesAreEqual(expected, result); } } diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx index f1183e008f8..a9cfbbbe223 100644 --- a/python/cudf/cudf/_lib/copying.pyx +++ b/python/cudf/cudf/_lib/copying.pyx @@ -194,8 +194,7 @@ def gather( cdef scatter_scalar(list source_device_slrs, column_view scatter_map, - table_view target_table, - bool bounds_check): + table_view target_table): cdef vector[reference_wrapper[constscalar]] c_source cdef DeviceScalar d_slr cdef unique_ptr[table] c_result @@ -212,7 +211,6 @@ cdef scatter_scalar(list source_device_slrs, c_source, scatter_map, target_table, - bounds_check ) ) @@ -221,8 +219,7 @@ cdef scatter_scalar(list source_device_slrs, cdef scatter_column(list source_columns, column_view scatter_map, - table_view target_table, - bool bounds_check): + table_view target_table): cdef table_view c_source = table_view_from_columns(source_columns) cdef unique_ptr[table] c_result @@ -232,7 +229,6 @@ cdef scatter_column(list source_columns, c_source, scatter_map, target_table, - bounds_check ) ) return columns_from_unique_ptr(move(c_result)) @@ -257,14 +253,24 @@ def scatter(list sources, Column scatter_map, list target_columns, cdef column_view scatter_map_view = scatter_map.view() cdef table_view target_table_view = table_view_from_columns(target_columns) + if bounds_check: + n_rows = len(target_columns[0]) + if not ( + (scatter_map >= -n_rows).all() + and (scatter_map < n_rows).all() + ): + raise IndexError( + f"index out of bounds for column of size {n_rows}" + ) + if isinstance(sources[0], Column): return scatter_column( - sources, scatter_map_view, target_table_view, bounds_check + sources, scatter_map_view, target_table_view ) else: source_scalars = [as_device_scalar(slr) for slr in sources] return scatter_scalar( - source_scalars, scatter_map_view, target_table_view, bounds_check + source_scalars, scatter_map_view, target_table_view ) diff --git a/python/cudf/cudf/_lib/cpp/copying.pxd b/python/cudf/cudf/_lib/cpp/copying.pxd index a1c433774b5..bc89d364004 100644 --- a/python/cudf/cudf/_lib/cpp/copying.pxd +++ b/python/cudf/cudf/_lib/cpp/copying.pxd @@ -44,14 +44,12 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: table_view source_table, column_view scatter_map, table_view target_table, - bool bounds_check ) except + cdef unique_ptr[table] scatter ( vector[reference_wrapper[constscalar]] source_scalars, column_view indices, table_view target, - bool bounds_check ) except + ctypedef enum mask_allocation_policy: diff --git a/python/cudf/cudf/_lib/cpp/filling.pxd b/python/cudf/cudf/_lib/cpp/filling.pxd index 4233ab60ff2..e412f294537 100644 --- a/python/cudf/cudf/_lib/cpp/filling.pxd +++ b/python/cudf/cudf/_lib/cpp/filling.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. from libcpp cimport bool from libcpp.memory cimport unique_ptr @@ -29,7 +29,6 @@ cdef extern from "cudf/filling.hpp" namespace "cudf" nogil: cdef unique_ptr[table] repeat( const table_view & input, const column_view & count, - bool check_count ) except + cdef unique_ptr[table] repeat( diff --git a/python/cudf/cudf/_lib/filling.pyx b/python/cudf/cudf/_lib/filling.pyx index 7de63def6a6..592d56158a1 100644 --- a/python/cudf/cudf/_lib/filling.pyx +++ b/python/cudf/cudf/_lib/filling.pyx @@ -54,24 +54,23 @@ def fill(Column destination, int begin, int end, DeviceScalar value): return Column.from_unique_ptr(move(c_result)) -def repeat(list inp, object count, bool check_count=False): +def repeat(list inp, object count): if isinstance(count, Column): - return _repeat_via_column(inp, count, check_count) + return _repeat_via_column(inp, count) else: return _repeat_via_size_type(inp, count) -def _repeat_via_column(list inp, Column count, bool check_count): +def _repeat_via_column(list inp, Column count): cdef table_view c_inp = table_view_from_columns(inp) cdef column_view c_count = count.view() - cdef bool c_check_count = check_count + cdef bool c_check_count = False cdef unique_ptr[table] c_result with nogil: c_result = move(cpp_filling.repeat( c_inp, c_count, - c_check_count )) return columns_from_unique_ptr(move(c_result)) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 66ae984ee81..7291b695312 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -571,21 +571,14 @@ def _scatter_by_column( self._check_scatter_key_length(num_keys, value) - try: - if is_bool_dtype(key.dtype): - return libcudf.copying.boolean_mask_scatter( - [value], [self], key - )[0]._with_type_metadata(self.dtype) - else: - return libcudf.copying.scatter([value], key, [self])[ - 0 - ]._with_type_metadata(self.dtype) - except RuntimeError as e: - if "out of bounds" in str(e): - raise IndexError( - f"index out of bounds for column of size {len(self)}" - ) from e - raise + if is_bool_dtype(key.dtype): + return libcudf.copying.boolean_mask_scatter([value], [self], key)[ + 0 + ]._with_type_metadata(self.dtype) + else: + return libcudf.copying.scatter([value], key, [self])[ + 0 + ]._with_type_metadata(self.dtype) def _check_scatter_key_length( self, num_keys: int, value: Union[cudf.core.scalar.Scalar, ColumnBase] From 536ddd08fd2e86940c55dab178f4a1a1e9361539 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Thu, 20 Oct 2022 12:47:32 -0400 Subject: [PATCH 052/202] Tell jitify_preprocess where to search for libnvrtc (#11787) On machines with multiple CUDA Toolkits installed it is possible to have a mismatch between the version of `nvcc` used to compile code and the version of `libnvrtc` used for the JIT code. This generally occurs when `LD_LIBRARY_PATH` points to a different version of the CUDA Toolkit. We now explicitly specify what toolkit library directory to search when JIT code during libcudf compilation. Authors: - Robert Maynard (https://github.com/robertmaynard) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/11787 --- cpp/cmake/Modules/JitifyPreprocessKernels.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake index 8ce98c6d582..0d52a064761 100644 --- a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake +++ b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake @@ -39,7 +39,8 @@ function(jit_preprocess_files) VERBATIM COMMAND ${CMAKE_COMMAND} -E make_directory "${jit_output_directory}" COMMAND - jitify_preprocess ${ARG_FILE} -o + "${CMAKE_COMMAND}" -E env LD_LIBRARY_PATH="${CUDAToolkit_LIBRARY_DIR}" + $ ${ARG_FILE} -o ${CUDF_GENERATED_INCLUDE_DIR}/include/jit_preprocessed_files -i -m -std=c++17 -remove-unused-globals -D_FILE_OFFSET_BITS=64 -D__CUDACC_RTC__ -I${CUDF_SOURCE_DIR}/include -I${CUDF_SOURCE_DIR}/src ${libcudacxx_includes} -I${CUDAToolkit_INCLUDE_DIRS} From 98185fef48070ed916a52b86bd330cfcb83d827b Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Thu, 20 Oct 2022 12:18:56 -0700 Subject: [PATCH 053/202] Fix writing of Parquet files with many fragments (#11869) This PR fixes an error that can occur when very small page sizes are used when writing Parquet files. #11551 changed from fixed 5000 row page fragments to a scaled value based on the requested max page size. For small page sizes, the number of fragments to process can exceed 64k. The number of fragments is used as the `y` dimension when calling `gpuInitPageFragments`, and when it exceeds 64k the kernel fails to launch, ultimately leading to an invalid memory access. Authors: - Ed Seidl (https://github.com/etseidl) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - Bradley Dice (https://github.com/bdice) - Karthikeyan (https://github.com/karthikeyann) URL: https://github.com/rapidsai/cudf/pull/11869 --- cpp/src/io/parquet/page_enc.cu | 184 ++++++++++++++++++--------------- cpp/tests/io/parquet_test.cpp | 18 ++++ 2 files changed, 117 insertions(+), 85 deletions(-) diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index cdee066a06a..7c5651b1ef8 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -61,6 +61,12 @@ constexpr int32_t NO_TRUNC_STATS = 0; // minimum scratch space required for encoding statistics constexpr size_t MIN_STATS_SCRATCH_SIZE = sizeof(__int128_t); +// mask to determine lane id +constexpr uint32_t WARP_MASK = cudf::detail::warp_size - 1; + +// currently 64k - 1 +constexpr uint32_t MAX_GRID_Y_SIZE = (1 << 16) - 1; + struct frag_init_state_s { parquet_column_device_view col; PageFragment frag; @@ -116,82 +122,87 @@ __global__ void __launch_bounds__(block_size) using block_reduce = cub::BlockReduce; __shared__ typename block_reduce::TempStorage reduce_storage; - frag_init_state_s* const s = &state_g; - uint32_t t = threadIdx.x; - int frag_y = blockIdx.y; - auto const physical_type = col_desc[blockIdx.x].physical_type; + frag_init_state_s* const s = &state_g; + uint32_t const t = threadIdx.x; + auto const physical_type = col_desc[blockIdx.x].physical_type; + uint32_t const num_fragments_per_column = frag.size().second; - if (t == 0) s->col = col_desc[blockIdx.x]; + if (t == 0) { s->col = col_desc[blockIdx.x]; } __syncthreads(); - if (!t) { - // Find which partition this fragment came from - auto it = - thrust::upper_bound(thrust::seq, part_frag_offset.begin(), part_frag_offset.end(), frag_y); - int p = it - part_frag_offset.begin() - 1; - int part_end_row = partitions[p].start_row + partitions[p].num_rows; - s->frag.start_row = (frag_y - part_frag_offset[p]) * fragment_size + partitions[p].start_row; - - // frag.num_rows = fragment_size except for the last fragment in partition which can be smaller. - // num_rows is fixed but fragment size could be larger if the data is strings or nested. - s->frag.num_rows = min(fragment_size, part_end_row - s->frag.start_row); - s->frag.num_dict_vals = 0; - s->frag.fragment_data_size = 0; - s->frag.dict_data_size = 0; - - s->frag.start_value_idx = row_to_value_idx(s->frag.start_row, s->col); - size_type end_value_idx = row_to_value_idx(s->frag.start_row + s->frag.num_rows, s->col); - s->frag.num_leaf_values = end_value_idx - s->frag.start_value_idx; - - if (s->col.level_offsets != nullptr) { - // For nested schemas, the number of values in a fragment is not directly related to the - // number of encoded data elements or the number of rows. It is simply the number of - // repetition/definition values which together encode validity and nesting information. - size_type first_level_val_idx = s->col.level_offsets[s->frag.start_row]; - size_type last_level_val_idx = s->col.level_offsets[s->frag.start_row + s->frag.num_rows]; - s->frag.num_values = last_level_val_idx - first_level_val_idx; - } else { - s->frag.num_values = s->frag.num_rows; - } - } + auto const leaf_type = s->col.leaf_column->type().id(); auto const dtype_len = physical_type_len(physical_type, leaf_type); - __syncthreads(); - size_type nvals = s->frag.num_leaf_values; - size_type start_value_idx = s->frag.start_value_idx; - - for (uint32_t i = 0; i < nvals; i += block_size) { - uint32_t val_idx = start_value_idx + i + t; - uint32_t is_valid = (i + t < nvals && val_idx < s->col.leaf_column->size()) - ? s->col.leaf_column->is_valid(val_idx) - : 0; - uint32_t len; - if (is_valid) { - len = dtype_len; - if (physical_type == BYTE_ARRAY) { - switch (leaf_type) { - case type_id::STRING: { - auto str = s->col.leaf_column->element(val_idx); - len += str.size_bytes(); - } break; - case type_id::LIST: { - auto list_element = - get_element(*s->col.leaf_column, val_idx); - len += list_element.size_bytes(); - } break; - default: CUDF_UNREACHABLE("Unsupported data type for leaf column"); - } + for (uint32_t frag_y = blockIdx.y; frag_y < num_fragments_per_column; frag_y += gridDim.y) { + if (t == 0) { + // Find which partition this fragment came from + auto it = + thrust::upper_bound(thrust::seq, part_frag_offset.begin(), part_frag_offset.end(), frag_y); + int p = it - part_frag_offset.begin() - 1; + int part_end_row = partitions[p].start_row + partitions[p].num_rows; + s->frag.start_row = (frag_y - part_frag_offset[p]) * fragment_size + partitions[p].start_row; + + // frag.num_rows = fragment_size except for the last fragment in partition which can be + // smaller. num_rows is fixed but fragment size could be larger if the data is strings or + // nested. + s->frag.num_rows = min(fragment_size, part_end_row - s->frag.start_row); + s->frag.num_dict_vals = 0; + s->frag.fragment_data_size = 0; + s->frag.dict_data_size = 0; + + s->frag.start_value_idx = row_to_value_idx(s->frag.start_row, s->col); + size_type end_value_idx = row_to_value_idx(s->frag.start_row + s->frag.num_rows, s->col); + s->frag.num_leaf_values = end_value_idx - s->frag.start_value_idx; + + if (s->col.level_offsets != nullptr) { + // For nested schemas, the number of values in a fragment is not directly related to the + // number of encoded data elements or the number of rows. It is simply the number of + // repetition/definition values which together encode validity and nesting information. + size_type first_level_val_idx = s->col.level_offsets[s->frag.start_row]; + size_type last_level_val_idx = s->col.level_offsets[s->frag.start_row + s->frag.num_rows]; + s->frag.num_values = last_level_val_idx - first_level_val_idx; + } else { + s->frag.num_values = s->frag.num_rows; } - } else { - len = 0; } + __syncthreads(); - len = block_reduce(reduce_storage).Sum(len); - if (!t) { s->frag.fragment_data_size += len; } + size_type nvals = s->frag.num_leaf_values; + size_type start_value_idx = s->frag.start_value_idx; + + for (uint32_t i = 0; i < nvals; i += block_size) { + uint32_t val_idx = start_value_idx + i + t; + uint32_t is_valid = (i + t < nvals && val_idx < s->col.leaf_column->size()) + ? s->col.leaf_column->is_valid(val_idx) + : 0; + uint32_t len; + if (is_valid) { + len = dtype_len; + if (physical_type == BYTE_ARRAY) { + switch (leaf_type) { + case type_id::STRING: { + auto str = s->col.leaf_column->element(val_idx); + len += str.size_bytes(); + } break; + case type_id::LIST: { + auto list_element = + get_element(*s->col.leaf_column, val_idx); + len += list_element.size_bytes(); + } break; + default: CUDF_UNREACHABLE("Unsupported data type for leaf column"); + } + } + } else { + len = 0; + } + + len = block_reduce(reduce_storage).Sum(len); + if (t == 0) { s->frag.fragment_data_size += len; } + __syncthreads(); + } __syncthreads(); + if (t == 0) { frag[blockIdx.x][frag_y] = s->frag; } } - __syncthreads(); - if (t == 0) frag[blockIdx.x][blockIdx.y] = s->frag; } // blockDim {128,1,1} @@ -200,21 +211,21 @@ __global__ void __launch_bounds__(128) device_2dspan fragments, device_span col_desc) { - // TODO: why not 1 block per warp? - __shared__ __align__(8) statistics_group group_g[4]; - - uint32_t lane_id = threadIdx.x & 0x1f; - uint32_t frag_id = blockIdx.y * 4 + (threadIdx.x >> 5); - uint32_t column_id = blockIdx.x; - auto num_fragments_per_column = fragments.size().second; - statistics_group* const g = &group_g[threadIdx.x >> 5]; - if (!lane_id && frag_id < num_fragments_per_column) { - g->col = &col_desc[column_id]; - g->start_row = fragments[column_id][frag_id].start_value_idx; - g->num_rows = fragments[column_id][frag_id].num_leaf_values; + uint32_t const lane_id = threadIdx.x & WARP_MASK; + uint32_t const column_id = blockIdx.x; + uint32_t const num_fragments_per_column = fragments.size().second; + + uint32_t frag_id = blockIdx.y * 4 + (threadIdx.x / cudf::detail::warp_size); + while (frag_id < num_fragments_per_column) { + if (lane_id == 0) { + statistics_group g; + g.col = &col_desc[column_id]; + g.start_row = fragments[column_id][frag_id].start_value_idx; + g.num_rows = fragments[column_id][frag_id].num_leaf_values; + groups[column_id][frag_id] = g; + } + frag_id += gridDim.y * 4; } - __syncthreads(); - if (frag_id < num_fragments_per_column and lane_id == 0) groups[column_id][frag_id] = *g; } // blockDim {128,1,1} @@ -2017,9 +2028,10 @@ void InitPageFragments(device_2dspan frag, uint32_t fragment_size, rmm::cuda_stream_view stream) { - auto num_columns = frag.size().first; - auto num_fragments_per_column = frag.size().second; - dim3 dim_grid(num_columns, num_fragments_per_column); // 1 threadblock per fragment + auto const num_columns = frag.size().first; + auto const num_fragments_per_column = frag.size().second; + auto const grid_y = std::min(static_cast(num_fragments_per_column), MAX_GRID_Y_SIZE); + dim3 const dim_grid(num_columns, grid_y); // 1 threadblock per fragment gpuInitPageFragments<512><<>>( frag, col_desc, partitions, part_frag_offset, fragment_size); } @@ -2031,8 +2043,10 @@ void InitFragmentStatistics(device_2dspan groups, { int const num_columns = col_desc.size(); int const num_fragments_per_column = fragments.size().second; - auto grid_y = util::div_rounding_up_safe(num_fragments_per_column, 128 / cudf::detail::warp_size); - dim3 dim_grid(num_columns, grid_y); // 1 warp per fragment + auto const y_dim = + util::div_rounding_up_safe(num_fragments_per_column, 128 / cudf::detail::warp_size); + auto const grid_y = std::min(static_cast(y_dim), MAX_GRID_Y_SIZE); + dim3 const dim_grid(num_columns, grid_y); // 1 warp per fragment gpuInitFragmentStats<<>>(groups, fragments, col_desc); } diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index b13e875eabd..9bb2aa207e4 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -1048,6 +1048,24 @@ TEST_F(ParquetWriterTest, HostBuffer) cudf::test::expect_metadata_equal(expected_metadata, result.metadata); } +TEST_F(ParquetWriterTest, ManyFragments) +{ + srand(31337); + auto const expected = create_random_fixed_table(10, 6'000'000, false); + + auto const filepath = temp_env->get_temp_filepath("ManyFragments.parquet"); + cudf::io::parquet_writer_options const args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected) + .max_page_size_bytes(8 * 1024); + cudf::io::write_parquet(args); + + cudf::io::parquet_reader_options const read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto const result = cudf::io::read_parquet(read_opts); + + CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected); +} + TEST_F(ParquetWriterTest, NonNullable) { srand(31337); From ee9ffd04acf9ab05af2a510dda50d73574c63569 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 20 Oct 2022 17:21:20 -0500 Subject: [PATCH 054/202] Default to equal NaNs in make_collect_set_aggregation. (#11621) Partially resolves #11329. This helps to align our default behaviors for null and NaN equality across APIs, specifically for `make_collect_set_aggregation` in this PR. All functions should default to treating null values as equal to one another and NaN values as equal to one another. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - David Wendt (https://github.com/davidwendt) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/11621 --- cpp/include/cudf/aggregation.hpp | 7 ++--- cpp/tests/groupby/collect_set_tests.cpp | 15 ++++++++--- cpp/tests/reductions/collect_ops_tests.cpp | 12 ++++++--- cpp/tests/rolling/collect_ops_test.cpp | 30 ++++++++++++++-------- 4 files changed, 43 insertions(+), 21 deletions(-) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index a26a0c7947b..a92da0b0347 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -515,9 +515,10 @@ std::unique_ptr make_collect_list_aggregation( * @return A COLLECT_SET aggregation object */ template -std::unique_ptr make_collect_set_aggregation(null_policy null_handling = null_policy::INCLUDE, - null_equality nulls_equal = null_equality::EQUAL, - nan_equality nans_equal = nan_equality::UNEQUAL); +std::unique_ptr make_collect_set_aggregation( + null_policy null_handling = null_policy::INCLUDE, + null_equality nulls_equal = null_equality::EQUAL, + nan_equality nans_equal = nan_equality::ALL_EQUAL); /** * @brief Factory to create a LAG aggregation diff --git a/cpp/tests/groupby/collect_set_tests.cpp b/cpp/tests/groupby/collect_set_tests.cpp index cf324cf3a8e..818a4c63a1f 100644 --- a/cpp/tests/groupby/collect_set_tests.cpp +++ b/cpp/tests/groupby/collect_set_tests.cpp @@ -248,7 +248,10 @@ TEST_F(CollectSetTest, FloatsWithNaN) vals_expected = {{{-2.3e-5f, 1.0f, 2.3e5f, -NAN, -NAN, NAN, NAN, 0.0f}, validity_col{true, true, true, true, true, true, true, false}}}; auto const [out_keys, out_lists] = - groupby_collect_set(keys, vals, CollectSetTest::collect_set()); + groupby_collect_set(keys, + vals, + cudf::make_collect_set_aggregation( + null_policy::INCLUDE, null_equality::EQUAL, nan_equality::UNEQUAL)); CUDF_TEST_EXPECT_COLUMNS_EQUAL(keys_expected, *out_keys, verbosity); CUDF_TEST_EXPECT_COLUMNS_EQUAL(vals_expected, *out_lists, verbosity); } @@ -258,7 +261,10 @@ TEST_F(CollectSetTest, FloatsWithNaN) vals_expected = {{{-2.3e-5f, 1.0f, 2.3e5f, -NAN, -NAN, NAN, NAN, 0.0f, 0.0f}, validity_col{true, true, true, true, true, true, true, false, false}}}; auto const [out_keys, out_lists] = - groupby_collect_set(keys, vals, CollectSetTest::collect_set_null_unequal()); + groupby_collect_set(keys, + vals, + cudf::make_collect_set_aggregation( + null_policy::INCLUDE, null_equality::UNEQUAL, nan_equality::UNEQUAL)); CUDF_TEST_EXPECT_COLUMNS_EQUAL(keys_expected, *out_keys, verbosity); CUDF_TEST_EXPECT_COLUMNS_EQUAL(vals_expected, *out_lists, verbosity); } @@ -267,7 +273,10 @@ TEST_F(CollectSetTest, FloatsWithNaN) { vals_expected = {{-2.3e-5f, 1.0f, 2.3e5f, -NAN, -NAN, NAN, NAN}}; auto const [out_keys, out_lists] = - groupby_collect_set(keys, vals, CollectSetTest::collect_set_null_exclude()); + groupby_collect_set(keys, + vals, + cudf::make_collect_set_aggregation( + null_policy::EXCLUDE, null_equality::EQUAL, nan_equality::UNEQUAL)); CUDF_TEST_EXPECT_COLUMNS_EQUAL(keys_expected, *out_keys, verbosity); CUDF_TEST_EXPECT_COLUMNS_EQUAL(vals_expected, *out_lists, verbosity); } diff --git a/cpp/tests/reductions/collect_ops_tests.cpp b/cpp/tests/reductions/collect_ops_tests.cpp index a0fdab5e994..842aaa3ab07 100644 --- a/cpp/tests/reductions/collect_ops_tests.cpp +++ b/cpp/tests/reductions/collect_ops_tests.cpp @@ -196,15 +196,19 @@ TEST_F(CollectTest, CollectSetWithNaN) // nan unequal with null equal fp_wrapper expected1{{-2.3e-5f, 1.0f, 2.3e5f, -NAN, -NAN, NAN, NAN, 0.0f}, {1, 1, 1, 1, 1, 1, 1, 0}}; - auto const ret1 = collect_set(col, make_collect_set_aggregation()); + auto const ret1 = + collect_set(col, + make_collect_set_aggregation( + null_policy::INCLUDE, null_equality::EQUAL, nan_equality::UNEQUAL)); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, dynamic_cast(ret1.get())->view()); // nan unequal with null unequal fp_wrapper expected2{{-2.3e-5f, 1.0f, 2.3e5f, -NAN, -NAN, NAN, NAN, 0.0f, 0.0f}, {1, 1, 1, 1, 1, 1, 1, 0, 0}}; - auto const ret2 = collect_set( - col, - make_collect_set_aggregation(null_policy::INCLUDE, null_equality::UNEQUAL)); + auto const ret2 = + collect_set(col, + make_collect_set_aggregation( + null_policy::INCLUDE, null_equality::UNEQUAL, nan_equality::UNEQUAL)); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, dynamic_cast(ret2.get())->view()); // nan equal with null equal diff --git a/cpp/tests/rolling/collect_ops_test.cpp b/cpp/tests/rolling/collect_ops_test.cpp index 9dc13b2f9f7..8a396d793a3 100644 --- a/cpp/tests/rolling/collect_ops_test.cpp +++ b/cpp/tests/rolling/collect_ops_test.cpp @@ -2118,13 +2118,14 @@ TEST_F(CollectSetTest, FloatGroupedRollingWindowWithNaNs) auto const following = 1; auto const min_periods = 1; // test on nan_equality::UNEQUAL - auto const result = - grouped_rolling_collect_set(table_view{std::vector{group_column}}, - input_column, - preceding, - following, - min_periods, - *make_collect_set_aggregation()); + auto const result = grouped_rolling_collect_set( + table_view{std::vector{group_column}}, + input_column, + preceding, + following, + min_periods, + *make_collect_set_aggregation( + null_policy::INCLUDE, null_equality::EQUAL, nan_equality::UNEQUAL)); auto const expected_result = lists_column_wrapper{ {{0.2341, 1.23}, std::initializer_list{true, true}}, @@ -2186,7 +2187,8 @@ TEST_F(CollectSetTest, BasicRollingWindowWithNaNs) prev_column, foll_column, 1, - *make_collect_set_aggregation()); + *make_collect_set_aggregation( + null_policy::INCLUDE, null_equality::EQUAL, nan_equality::UNEQUAL)); auto const expected_result = lists_column_wrapper{ @@ -2200,8 +2202,13 @@ TEST_F(CollectSetTest, BasicRollingWindowWithNaNs) CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result->view(), result_column_based_window->view()); - auto const result_fixed_window = rolling_collect_set( - input_column, 2, 1, 1, *make_collect_set_aggregation()); + auto const result_fixed_window = + rolling_collect_set(input_column, + 2, + 1, + 1, + *make_collect_set_aggregation( + null_policy::INCLUDE, null_equality::EQUAL, nan_equality::UNEQUAL)); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result->view(), result_fixed_window->view()); auto const result_with_nulls_excluded = @@ -2209,7 +2216,8 @@ TEST_F(CollectSetTest, BasicRollingWindowWithNaNs) 2, 1, 1, - *make_collect_set_aggregation(null_policy::EXCLUDE)); + *make_collect_set_aggregation( + null_policy::EXCLUDE, null_equality::EQUAL, nan_equality::UNEQUAL)); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result->view(), result_with_nulls_excluded->view()); From 5803015be119d7a52b11500489477592fbfb7177 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 20 Oct 2022 17:25:07 -0500 Subject: [PATCH 055/202] Rename libcudf++ to libcudf. (#11953) For consistency across our documentation, this PR renames `libcudf++` to `libcudf`. Authors: - Bradley Dice (https://github.com/bdice) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - David Wendt (https://github.com/davidwendt) - GALI PREM SAGAR (https://github.com/galipremsagar) - Vyas Ramasubramani (https://github.com/vyasr) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/11953 --- cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md | 4 ++-- cpp/doxygen/developer_guide/DOCUMENTATION.md | 2 +- python/cudf/cudf/_lib/avro.pyx | 4 ++-- python/cudf/cudf/_lib/io/utils.pyx | 6 +++--- python/cudf/cudf/_lib/json.pyx | 2 +- python/cudf/cudf/_lib/reduce.pyx | 6 +++--- python/cudf/cudf/core/frame.py | 2 +- 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md index 52c443cd764..606dabcb937 100644 --- a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md +++ b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md @@ -176,7 +176,7 @@ Resource ownership is an essential concept in libcudf. In short, an "owning" obj resource (such as device memory). It acquires that resource during construction and releases the resource in destruction ([RAII](https://en.cppreference.com/w/cpp/language/raii)). A "non-owning" object does not own resources. Any class in libcudf with the `*_view` suffix is non-owning. For more -detail see the [`libcudf++` presentation.](https://docs.google.com/presentation/d/1zKzAtc1AWFKfMhiUlV5yRZxSiPLwsObxMlWRWz_f5hA/edit?usp=sharing) +detail see the [`libcudf` presentation.](https://docs.google.com/presentation/d/1zKzAtc1AWFKfMhiUlV5yRZxSiPLwsObxMlWRWz_f5hA/edit?usp=sharing) libcudf functions typically take views as input (`column_view` or `table_view`) and produce `unique_ptr`s to owning objects as output. For example, @@ -403,7 +403,7 @@ Functions like merge or groupby in libcudf make no guarantees about the order of Promising deterministic ordering is not, in general, conducive to fast parallel algorithms. Calling code is responsible for performing sorts after the fact if sorted outputs are needed. -# libcudf++ API and Implementation +# libcudf API and Implementation ## Streams diff --git a/cpp/doxygen/developer_guide/DOCUMENTATION.md b/cpp/doxygen/developer_guide/DOCUMENTATION.md index 8a7d89c8dbd..07ef1bdc530 100644 --- a/cpp/doxygen/developer_guide/DOCUMENTATION.md +++ b/cpp/doxygen/developer_guide/DOCUMENTATION.md @@ -1,4 +1,4 @@ -# libcudf++ C++ Documentation Guide +# libcudf C++ Documentation Guide These guidelines apply to documenting all libcudf C++ source files using doxygen style formatting although only public APIs and classes are actually [published](https://docs.rapids.ai/api/libcudf/stable/index.html). diff --git a/python/cudf/cudf/_lib/avro.pyx b/python/cudf/cudf/_lib/avro.pyx index b6e23e7c3a0..0c8886ca356 100644 --- a/python/cudf/cudf/_lib/avro.pyx +++ b/python/cudf/cudf/_lib/avro.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. from libcpp.string cimport string from libcpp.utility cimport move @@ -16,7 +16,7 @@ from cudf._lib.utils cimport data_from_unique_ptr cpdef read_avro(datasource, columns=None, skip_rows=-1, num_rows=-1): """ - Cython function to call libcudf++ read_avro, see `read_avro`. + Cython function to call libcudf read_avro, see `read_avro`. See Also -------- diff --git a/python/cudf/cudf/_lib/io/utils.pyx b/python/cudf/cudf/_lib/io/utils.pyx index 18b26bb5aa6..21939ff39b6 100644 --- a/python/cudf/cudf/_lib/io/utils.pyx +++ b/python/cudf/cudf/_lib/io/utils.pyx @@ -30,7 +30,7 @@ import cudf from cudf.api.types import is_struct_dtype -# Converts the Python source input to libcudf++ IO source_info +# Converts the Python source input to libcudf IO source_info # with the appropriate type and source values cdef source_info make_source_info(list src) except*: if not src: @@ -80,7 +80,7 @@ cdef source_info make_source_info(list src) except*: return source_info(c_host_buffers) -# Converts the Python sink input to libcudf++ IO sink_info. +# Converts the Python sink input to libcudf IO sink_info. cdef sink_info make_sinks_info( list src, vector[unique_ptr[data_sink]] & sink ) except*: @@ -129,7 +129,7 @@ cdef sink_info make_sink_info(src, unique_ptr[data_sink] & sink) except*: return info -# Adapts a python io.IOBase object as a libcudf++ IO data_sink. This lets you +# Adapts a python io.IOBase object as a libcudf IO data_sink. This lets you # write from cudf to any python file-like object (File/BytesIO/SocketIO etc) cdef cppclass iobase_data_sink(data_sink): object buf diff --git a/python/cudf/cudf/_lib/json.pyx b/python/cudf/cudf/_lib/json.pyx index b0aafc275d6..5efe40ed2e9 100644 --- a/python/cudf/cudf/_lib/json.pyx +++ b/python/cudf/cudf/_lib/json.pyx @@ -46,7 +46,7 @@ cpdef read_json(object filepaths_or_buffers, # If input data is a JSON string (or StringIO), hold a reference to # the encoded memoryview externally to ensure the encoded buffer - # isn't destroyed before calling libcudf++ `read_json()` + # isn't destroyed before calling libcudf `read_json()` for idx in range(len(filepaths_or_buffers)): if isinstance(filepaths_or_buffers[idx], io.StringIO): filepaths_or_buffers[idx] = \ diff --git a/python/cudf/cudf/_lib/reduce.pyx b/python/cudf/cudf/_lib/reduce.pyx index bdbe7e1c668..c1494df9cac 100644 --- a/python/cudf/cudf/_lib/reduce.pyx +++ b/python/cudf/cudf/_lib/reduce.pyx @@ -35,7 +35,7 @@ cimport cudf._lib.cpp.types as libcudf_types def reduce(reduction_op, Column incol, dtype=None, **kwargs): """ - Top level Cython reduce function wrapping libcudf++ reductions. + Top level Cython reduce function wrapping libcudf reductions. Parameters ---------- @@ -91,7 +91,7 @@ def reduce(reduction_op, Column incol, dtype=None, **kwargs): def scan(scan_op, Column incol, inclusive, **kwargs): """ - Top level Cython scan function wrapping libcudf++ scans. + Top level Cython scan function wrapping libcudf scans. Parameters ---------- @@ -122,7 +122,7 @@ def scan(scan_op, Column incol, inclusive, **kwargs): def minmax(Column incol): """ - Top level Cython minmax function wrapping libcudf++ minmax. + Top level Cython minmax function wrapping libcudf minmax. Parameters ---------- diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 4fb914a6409..9e539ee157b 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -1412,7 +1412,7 @@ def searchsorted( >>> df.searchsorted(values_df, ascending=False) array([4, 4, 4, 0], dtype=int32) """ - # Call libcudf++ search_sorted primitive + # Call libcudf search_sorted primitive if na_position not in {"first", "last"}: raise ValueError(f"invalid na_position: {na_position}") From b9ba9e3e47e66bdc716c01c10606580ca92fa587 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Fri, 21 Oct 2022 10:12:01 -0400 Subject: [PATCH 056/202] Update Unit Testing in libcudf guidelines to code tests outside the cudf::test namespace (#11959) Update text to include coding tests outside the `cudf` or the `cudf::test` namespace. Realized our test guidelines needed to be updated while working on #11734. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - Vyas Ramasubramani (https://github.com/vyasr) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/11959 --- cpp/doxygen/developer_guide/TESTING.md | 150 +++++++++++++------------ 1 file changed, 78 insertions(+), 72 deletions(-) diff --git a/cpp/doxygen/developer_guide/TESTING.md b/cpp/doxygen/developer_guide/TESTING.md index 31747e31ccb..198590bb35c 100644 --- a/cpp/doxygen/developer_guide/TESTING.md +++ b/cpp/doxygen/developer_guide/TESTING.md @@ -6,6 +6,13 @@ Unit tests in libcudf are written using **Important:** Instead of including `gtest/gtest.h` directly, use `#include `. +Also, write test code in the global namespace. That is, +do not write test code in the `cudf` or the `cudf::test` namespace or their +sub-namespaces. +Likewise, do not use `using namespace cudf;` or `using namespace cudf::test;` +in the global namespace. + + ## Best Practices: What Should We Test? In general we should test to make sure all code paths are covered. This is not always easy or @@ -38,8 +45,8 @@ groupby). Here are some other guidelines. does happen); columns with zero size but that somehow have non-null data pointers; and struct columns with no children. - * Decimal types are not included in the `NumericTypes` type list, but are included in - `FixedWidthTypes`, so be careful that tests either include or exclude decimal types as + * Decimal types are not included in the `cudf::test::NumericTypes` type list, but are included in + `cudf::test::FixedWidthTypes`, so be careful that tests either include or exclude decimal types as appropriate. @@ -99,8 +106,8 @@ list defined in `TestTypes` (`int, float, double`). The list of types that are used in tests should be consistent across all tests. To ensure consistency, several sets of common type lists are provided in -`include/cudf_test/type_lists.hpp`. For example, `NumericTypes` is a type list of all numeric types, -`FixedWidthTypes` is a list of all fixed-width element types, and `AllTypes` is a list of every +`include/cudf_test/type_lists.hpp`. For example, `cudf::test::NumericTypes` is a type list of all numeric types, +`FixedWidthTypes` is a list of all fixed-width element types, and `cudf::test::AllTypes` is a list of every element type that libcudf supports. ```c++ @@ -126,9 +133,8 @@ the `N`th type within the nested list, use `GetType`. Imagine testing all possible two-type combinations of ``. This could be done manually: ```c++ -using namespace cudf::test; template -TwoTypesFixture : BaseFixture{...}; +TwoTypesFixture : cudf::test::BaseFixture{...}; using TwoTypesList = Types< Types, Types, Types, Types >; TYPED_TEST_SUITE(TwoTypesFixture, TwoTypesList); @@ -178,9 +184,9 @@ transparently passed to any API expecting a `column_view` or `mutable_column_vie #### fixed_width_column_wrapper -The `fixed_width_column_wrapper` class should be used for constructing and initializing columns of +The `cudf::test::fixed_width_column_wrapper` class should be used for constructing and initializing columns of any fixed-width element type, e.g., numeric types, timestamp types, Boolean, etc. -`fixed_width_column_wrapper` provides constructors that accept an iterator range to generate each +`cudf::test::fixed_width_column_wrapper` provides constructors that accept an iterator range to generate each element in the column. For nullable columns, an additional iterator can be provided to indicate the validity of each element. There are also constructors that accept a `std::initializer_list` for the column elements and optionally for the validity of each element. @@ -189,25 +195,25 @@ Example: ```c++ // Creates a non-nullable column of INT32 elements with 5 elements: {0, 1, 2, 3, 4} -auto elements = make_counting_transform_iterator(0, [](auto i){return i;}); -fixed_width_column_wrapper w(elements, elements + 5); +auto elements = cudf::detail::make_counting_transform_iterator(0, [](auto i){return i;}); +cudf::test::fixed_width_column_wrapper w(elements, elements + 5); // Creates a nullable column of INT32 elements with 5 elements: {null, 1, null, 3, null} -auto elements = make_counting_transform_iterator(0, [](auto i){return i;}); -auto validity = make_counting_transform_iterator(0, [](auto i){return i % 2;}) -fixed_width_column_wrapper w(elements, elements + 5, validity); +auto elements = cudf::detail::make_counting_transform_iterator(0, [](auto i){return i;}); +auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i){return i % 2;}) +cudf::test::fixed_width_column_wrapper w(elements, elements + 5, validity); // Creates a non-nullable INT32 column with 4 elements: {1, 2, 3, 4} -fixed_width_column_wrapper w{{1, 2, 3, 4}}; +cudf::test::fixed_width_column_wrapper w{{1, 2, 3, 4}}; // Creates a nullable INT32 column with 4 elements: {1, NULL, 3, NULL} -fixed_width_column_wrapper w{ {1,2,3,4}, {1, 0, 1, 0}}; +cudf::test::fixed_width_column_wrapper w{ {1,2,3,4}, {1, 0, 1, 0}}; ``` #### fixed_point_column_wrapper -The `fixed_point_column_wrapper` class should be used for constructing and initializing columns of -any fixed-point element type (DECIMAL32 or DECIMAL64). `fixed_point_column_wrapper` provides +The `cudf::test::fixed_point_column_wrapper` class should be used for constructing and initializing columns of +any fixed-point element type (DECIMAL32 or DECIMAL64). `cudf::test::fixed_point_column_wrapper` provides constructors that accept an iterator range to generate each element in the column. For nullable columns, an additional iterator can be provided to indicate the validity of each element. Constructors also take the scale of the fixed-point values to create. @@ -215,20 +221,20 @@ Constructors also take the scale of the fixed-point values to create. Example: ```c++ - // Creates a non-nullable column of 4 DECIMAL32 elements of scale 3: {1000, 2000, 3000, 4000} - auto elements = make_counting_transform_iterator(0, [](auto i){ return i; }); - fixed_point_column_wrapper w(elements, elements + 4, 3); - - // Creates a nullable column of 5 DECIMAL32 elements of scale 2: {null, 100, null, 300, null} - auto elements = make_counting_transform_iterator(0, [](auto i){ return i; }); - auto validity = make_counting_transform_iterator(0, [](auto i){ return i % 2; }); - fixed_point_column_wrapper w(elements, elements + 5, validity, 2); +// Creates a non-nullable column of 4 DECIMAL32 elements of scale 3: {1000, 2000, 3000, 4000} +auto elements = cudf::detail::make_counting_transform_iterator(0, [](auto i){ return i; }); +cudf::test::fixed_point_column_wrapper w(elements, elements + 4, 3); + +// Creates a nullable column of 5 DECIMAL32 elements of scale 2: {null, 100, null, 300, null} +auto elements = cudf::detail::make_counting_transform_iterator(0, [](auto i){ return i; }); +auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i){ return i % 2; }); +cudf::test::fixed_point_column_wrapper w(elements, elements + 5, validity, 2); ``` #### dictionary_column_wrapper -The `dictionary_column_wrapper` class should be used to create dictionary columns. -`dictionary_column_wrapper` provides constructors that accept an iterator range to generate each +The `cudf::test::dictionary_column_wrapper` class should be used to create dictionary columns. +`cudf::test::dictionary_column_wrapper` provides constructors that accept an iterator range to generate each element in the column. For nullable columns, an additional iterator can be provided to indicate the validity of each element. There are also constructors that accept a `std::initializer_list` for the column elements and optionally for the validity of each element. @@ -239,43 +245,43 @@ Example: // Creates a non-nullable dictionary column of INT32 elements with 5 elements // keys = {0, 2, 6}, indices = {0, 1, 1, 2, 2} std::vector elements{0, 2, 2, 6, 6}; -dictionary_column_wrapper w(element.begin(), elements.end()); +cudf::test::dictionary_column_wrapper w(element.begin(), elements.end()); // Creates a nullable dictionary column with 5 elements and a validity iterator. std::vector elements{0, 2, 0, 6, 0}; // Validity iterator here sets even rows to null. -auto validity = make_counting_transform_iterator(0, [](auto i){return i % 2;}) +auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i){return i % 2;}) // keys = {2, 6}, indices = {NULL, 0, NULL, 1, NULL} -dictionary_column_wrapper w(elements, elements + 5, validity); +cudf::test::dictionary_column_wrapper w(elements, elements + 5, validity); // Creates a non-nullable dictionary column with 4 elements. // keys = {1, 2, 3}, indices = {0, 1, 2, 0} -dictionary_column_wrapper w{{1, 2, 3, 1}}; +cudf::test::dictionary_column_wrapper w{{1, 2, 3, 1}}; // Creates a nullable dictionary column with 4 elements and validity initializer. // keys = {1, 3}, indices = {0, NULL, 1, NULL} -dictionary_column_wrapper w{ {1, 0, 3, 0}, {1, 0, 1, 0}}; +cudf::test::dictionary_column_wrapper w{ {1, 0, 3, 0}, {1, 0, 1, 0}}; // Creates a nullable column of dictionary elements with 5 elements and validity initializer. std::vector elements{0, 2, 2, 6, 6}; // keys = {2, 6}, indices = {NULL, 0, NULL, 1, NULL} -dictionary_width_column_wrapper w(elements, elements + 5, {0, 1, 0, 1, 0}); +cudf::test::dictionary_width_column_wrapper w(elements, elements + 5, {0, 1, 0, 1, 0}); // Creates a non-nullable dictionary column with 7 string elements std::vector strings{"", "aaa", "bbb", "aaa", "bbb", "ccc", "bbb"}; // keys = {"","aaa","bbb","ccc"}, indices = {0, 1, 2, 1, 2, 3, 2} -dictionary_column_wrapper d(strings.begin(), strings.end()); +cudf::test::dictionary_column_wrapper d(strings.begin(), strings.end()); // Creates a nullable dictionary column with 7 string elements and a validity iterator. // Validity iterator here sets even rows to null. // keys = {"a", "bb"}, indices = {NULL, 1, NULL, 1, NULL, 0, NULL} -auto validity = make_counting_transform_iterator(0, [](auto i){return i % 2;}); -dictionary_column_wrapper d({"", "bb", "", "bb", "", "a", ""}, validity); +auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i){return i % 2;}); +cudf::test::dictionary_column_wrapper d({"", "bb", "", "bb", "", "a", ""}, validity); ``` #### strings_column_wrapper -The `strings_column_wrapper` class should be used to create columns of strings. It provides +The `cudf::test::strings_column_wrapper` class should be used to create columns of strings. It provides constructors that accept an iterator range to generate each string in the column. For nullable columns, an additional iterator can be provided to indicate the validity of each string. There are also constructors that accept a `std::initializer_list` for the column's strings and @@ -287,27 +293,27 @@ Example: // Creates a non-nullable STRING column with 7 string elements: // {"", "this", "is", "a", "column", "of", "strings"} std::vector strings{"", "this", "is", "a", "column", "of", "strings"}; -strings_column_wrapper s(strings.begin(), strings.end()); +cudf::test::strings_column_wrapper s(strings.begin(), strings.end()); // Creates a nullable STRING column with 7 string elements: // {NULL, "this", NULL, "a", NULL, "of", NULL} std::vector strings{"", "this", "is", "a", "column", "of", "strings"}; -auto validity = make_counting_transform_iterator(0, [](auto i){return i % 2;}); -strings_column_wrapper s(strings.begin(), strings.end(), validity); +auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i){return i % 2;}); +cudf::test::strings_column_wrapper s(strings.begin(), strings.end(), validity); // Creates a non-nullable STRING column with 7 string elements: // {"", "this", "is", "a", "column", "of", "strings"} -strings_column_wrapper s({"", "this", "is", "a", "column", "of", "strings"}); +cudf::test::strings_column_wrapper s({"", "this", "is", "a", "column", "of", "strings"}); // Creates a nullable STRING column with 7 string elements: // {NULL, "this", NULL, "a", NULL, "of", NULL} -auto validity = make_counting_transform_iterator(0, [](auto i){return i % 2;}); -strings_column_wrapper s({"", "this", "is", "a", "column", "of", "strings"}, validity); +auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i){return i % 2;}); +cudf::test::strings_column_wrapper s({"", "this", "is", "a", "column", "of", "strings"}, validity); ``` #### lists_column_wrapper -The `lists_column_wrapper` class should be used to create columns of lists. It provides +The `cudf::test::lists_column_wrapper` class should be used to create columns of lists. It provides constructors that accept an iterator range to generate each list in the column. For nullable columns, an additional iterator can be provided to indicate the validity of each list. There are also constructors that accept a `std::initializer_list` for the column's lists and @@ -318,50 +324,50 @@ Example: ```c++ // Creates an empty LIST column // [] -lists_column_wrapper l{}; +cudf::test::lists_column_wrapper l{}; // Creates a LIST column with 1 list composed of 2 total integers // [{0, 1}] -lists_column_wrapper l{0, 1}; +cudf::test::lists_column_wrapper l{0, 1}; // Creates a LIST column with 3 lists // [{0, 1}, {2, 3}, {4, 5}] -lists_column_wrapper l{ {0, 1}, {2, 3}, {4, 5} }; +cudf::test::lists_column_wrapper l{ {0, 1}, {2, 3}, {4, 5} }; // Creates a LIST of LIST columns with 2 lists on the top level and // 4 below // [ {{0, 1}, {2, 3}}, {{4, 5}, {6, 7}} ] -lists_column_wrapper l{ {{0, 1}, {2, 3}}, {{4, 5}, {6, 7}} }; +cudf::test::lists_column_wrapper l{ {{0, 1}, {2, 3}}, {{4, 5}, {6, 7}} }; // Creates a LIST column with 1 list composed of 5 total integers // [{0, 1, 2, 3, 4}] -auto elements = make_counting_transform_iterator(0, [](auto i){return i*2;}); -lists_column_wrapper l(elements, elements+5); +auto elements = cudf::detail::make_counting_transform_iterator(0, [](auto i){return i*2;}); +cudf::test::lists_column_wrapper l(elements, elements+5); // Creates a LIST column with 1 lists composed of 2 total integers // [{0, NULL}] -auto validity = make_counting_transform_iterator(0, [](auto i){return i % 2;}); -lists_column_wrapper l{{0, 1}, validity}; +auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i){return i % 2;}); +cudf::test::lists_column_wrapper l{{0, 1}, validity}; // Creates a LIST column with 1 lists composed of 5 total integers // [{0, NULL, 2, NULL, 4}] -auto elements = make_counting_transform_iterator(0, [](auto i){return i*2;}); -auto validity = make_counting_transform_iterator(0, [](auto i){return i % 2;}); -lists_column_wrapper l(elements, elements+5, validity); +auto elements = cudf::detail::make_counting_transform_iterator(0, [](auto i){return i*2;}); +auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i){return i % 2;}); +cudf::test::lists_column_wrapper l(elements, elements+5, validity); // Creates a LIST column with 1 list composed of 2 total strings // [{"abc", "def"}] -lists_column_wrapper l{"abc", "def"}; +cudf::test::lists_column_wrapper l{"abc", "def"}; // Creates a LIST of LIST columns with 2 lists on the top level and 4 below // [ {{0, 1}, NULL}, {{4, 5}, NULL} ] -auto validity = make_counting_transform_iterator(0, [](auto i){return i % 2;}); -lists_column_wrapper l{ {{{0, 1}, {2, 3}}, validity}, {{{4, 5}, {6, 7}}, validity} }; +auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i){return i % 2;}); +cudf::test::lists_column_wrapper l{ {{{0, 1}, {2, 3}}, validity}, {{{4, 5}, {6, 7}}, validity} }; ``` #### structs_column_wrapper -The `structs_column_wrapper` class should be used to create columns of structs. It provides +The `cudf::test::structs_column_wrapper` class should be used to create columns of structs. It provides constructors that accept a vector or initializer list of pre-constructed columns or column wrappers for child columns. For nullable columns, an additional iterator can be provided to indicate the validity of each struct. @@ -370,41 +376,41 @@ Examples: ```c++ // The following constructs a column for struct< int, string >. -auto child_int_col = fixed_width_column_wrapper{ 1, 2, 3, 4, 5 }.release(); -auto child_string_col = string_column_wrapper {"All", "the", "leaves", "are", "brown"}.release(); +auto child_int_col = cudf::test::fixed_width_column_wrapper{ 1, 2, 3, 4, 5 }.release(); +auto child_string_col = cudf::test::string_column_wrapper {"All", "the", "leaves", "are", "brown"}.release(); -std::vector> child_columns; +std::vector> child_columns; child_columns.push_back(std::move(child_int_col)); child_columns.push_back(std::move(child_string_col)); -struct_column_wrapper struct_column_wrapper{ +cudf::test::struct_col wrapper wrapper{ child_cols, {1,0,1,0,1} // Validity }; -auto struct_col {struct_column_wrapper.release()}; +auto struct_col {wrapper.release()}; // The following constructs a column for struct< int, string >. -fixed_width_column_wrapper child_int_col_wrapper{ 1, 2, 3, 4, 5 }; -string_column_wrapper child_string_col_wrapper {"All", "the", "leaves", "are", "brown"}; +cudf::test::fixed_width_column_wrapper child_int_col_wrapper{ 1, 2, 3, 4, 5 }; +cudf::test::string_column_wrapper child_string_col_wrapper {"All", "the", "leaves", "are", "brown"}; -struct_column_wrapper struct_column_wrapper{ +cudf::test::struct_column_wrapper wrapper{ {child_int_col_wrapper, child_string_col_wrapper} {1,0,1,0,1} // Validity }; -auto struct_col {struct_column_wrapper.release()}; +auto struct_col {wrapper.release()}; // The following constructs a column for struct< int, string >. -fixed_width_column_wrapper child_int_col_wrapper{ 1, 2, 3, 4, 5 }; -string_column_wrapper child_string_col_wrapper {"All", "the", "leaves", "are", "brown"}; +cudf::test::fixed_width_column_wrapper child_int_col_wrapper{ 1, 2, 3, 4, 5 }; +cudf::test::string_column_wrapper child_string_col_wrapper {"All", "the", "leaves", "are", "brown"}; -struct_column_wrapper struct_column_wrapper{ +cudf::test::struct_column_wrapper wrapper{ {child_int_col_wrapper, child_string_col_wrapper} cudf::detail::make_counting_transform_iterator(0, [](auto i){ return i % 2; }) // Validity }; -auto struct_col {struct_column_wrapper.release()}; +auto struct_col {wrapper.release()}; ``` ### Column Comparison Utilities From dec8bde1d5cc7462e52535a4e26f2c1be507a237 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 21 Oct 2022 07:45:01 -0700 Subject: [PATCH 057/202] Add tests ensuring that cudf's default stream is always used (#11875) This PR ensures that cudf's default stream is properly passed to all kernel launches so that nothing implicitly runs on the CUDA default stream. It adds a small library that is built during the tests and overloads CUDA functions to throw an exception when usage of the default stream is detected. It also fixes all remaining usage of anything other than cudf's default stream (I fixed most of the issues in previous PRs, but I found a few others when finalizing this one). Resolves #11929 Resolves #11942 ### Important notes for reviewers: - **The changeset is deceptively large.** The vast majority of the changes are just a global find-and-replace of `cudf::get_default_stream()` for `cudf::default_stream_value`, as well as a few smaller fixes such as missing `CUDF_TEST_PROGRAM_MAIN` in a couple of tests and usage of `rmm::cuda_stream_default`. The meaningful changes are: - The new default stream getter/setter in `default_stream.[hpp|cpp]` - The addition of `cpp/tests/utilities/identify_stream_usage` - The changes to the base testing fixture in `cpp/include/cudf_test/base_fixture.hpp` to inject the custom stream. - The changes to CI in `ci/gpu/build.sh` to build and use the new library. - This PR is a breaking change because it moves the default stream into the detail namespace. Going forward the default stream may only be accessed using the public accessor `cudf::get_default_stream()`. I have added a corresponding setter, but it is also in the detail namespace since I do not want to publicly support changing the default stream yet, only for the purpose of testing. Reviewers, please leave comments if you disagree with those choices. - I have made getting and setting the default stream thread-safe, but there is still only a single stream. In multi-threaded applications we may want to support a stream per thread so that users could manually achieve PTDS with more fine-tuned control. Is this worthwhile? Even if it is, I'm inclined to wait for a subsequent PR to implement this unless someone feels strongly otherwise. - I'm currently only overloading `cudaLaunchKernel`. I can add overloads for other functions as well, but I didn't want to go through the effort of overloading every possible API. If reviewers have a minimal set that they'd like to see overloaded, let me know. [I've included links to all the relevant pages of the CUDA runtime API in the identify_stream_usage.cu file](https://github.com/rapidsai/cudf/pull/11875/files#diff-0b2762207c27c080acd2114475c7a1c06377a7c18c4e9c3de60ecbdc82a4dc61R99) if someone wants to look through them. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Jason Lowe (https://github.com/jlowe) - Bradley Dice (https://github.com/bdice) - Sevag H (https://github.com/sevagh) - https://github.com/brandon-b-miller - Jake Hemstad (https://github.com/jrhemstad) - David Wendt (https://github.com/davidwendt) URL: https://github.com/rapidsai/cudf/pull/11875 --- ci/gpu/build.sh | 17 +- ci/release/update-version.sh | 3 + conda/recipes/libcudf/meta.yaml | 2 + cpp/benchmarks/column/concatenate.cpp | 6 +- cpp/benchmarks/common/generate_input.cu | 20 +- .../common/random_distribution_factory.cuh | 6 +- cpp/benchmarks/copying/copy_if_else.cpp | 2 +- cpp/benchmarks/copying/shift.cu | 2 +- cpp/benchmarks/groupby/group_max.cpp | 2 +- cpp/benchmarks/groupby/group_nunique.cpp | 2 +- cpp/benchmarks/groupby/group_struct_keys.cpp | 2 +- cpp/benchmarks/hashing/hash.cpp | 2 +- cpp/benchmarks/io/csv/csv_reader_input.cpp | 2 +- cpp/benchmarks/io/csv/csv_reader_options.cpp | 2 +- cpp/benchmarks/io/json/nested_json.cpp | 6 +- cpp/benchmarks/io/orc/orc_reader_input.cpp | 2 +- cpp/benchmarks/io/orc/orc_reader_options.cpp | 2 +- cpp/benchmarks/io/orc/orc_writer.cpp | 6 +- cpp/benchmarks/io/orc/orc_writer_chunks.cpp | 4 +- .../io/parquet/parquet_reader_input.cpp | 2 +- .../io/parquet/parquet_reader_options.cpp | 2 +- cpp/benchmarks/io/parquet/parquet_writer.cpp | 6 +- .../io/parquet/parquet_writer_chunks.cpp | 4 +- cpp/benchmarks/io/text/multibyte_split.cpp | 4 +- cpp/benchmarks/iterator/iterator.cu | 2 +- cpp/benchmarks/join/generate_input_tables.cuh | 2 +- cpp/benchmarks/join/join_common.hpp | 4 +- cpp/benchmarks/lists/copying/scatter_lists.cu | 2 +- cpp/benchmarks/quantiles/quantiles.cpp | 2 +- cpp/benchmarks/reduction/segment_reduce.cu | 2 +- cpp/benchmarks/sort/rank.cpp | 2 +- cpp/benchmarks/sort/sort.cpp | 2 +- cpp/benchmarks/sort/sort_strings.cpp | 2 +- cpp/benchmarks/stream_compaction/distinct.cpp | 4 +- cpp/benchmarks/stream_compaction/unique.cpp | 2 +- cpp/benchmarks/string/case.cpp | 2 +- cpp/benchmarks/string/combine.cpp | 2 +- cpp/benchmarks/string/contains.cpp | 2 +- cpp/benchmarks/string/copy.cu | 2 +- cpp/benchmarks/string/factory.cu | 4 +- cpp/benchmarks/string/filter.cpp | 2 +- cpp/benchmarks/string/find.cpp | 2 +- cpp/benchmarks/string/like.cpp | 2 +- cpp/benchmarks/string/repeat_strings.cpp | 8 +- cpp/benchmarks/string/replace.cpp | 2 +- cpp/benchmarks/string/replace_re.cpp | 2 +- cpp/benchmarks/string/split.cpp | 2 +- cpp/benchmarks/string/substring.cpp | 2 +- cpp/benchmarks/string/translate.cpp | 2 +- cpp/benchmarks/string/url_decode.cu | 2 +- .../synchronization/synchronization.hpp | 4 +- cpp/benchmarks/text/normalize.cpp | 2 +- cpp/benchmarks/text/normalize_spaces.cpp | 2 +- cpp/benchmarks/text/tokenize.cpp | 2 +- .../type_dispatcher/type_dispatcher.cu | 4 +- cpp/include/cudf/binaryop.hpp | 4 +- cpp/include/cudf/column/column.hpp | 6 +- .../cudf/column/column_device_view.cuh | 4 +- cpp/include/cudf/column/column_factories.hpp | 34 +- cpp/include/cudf/detail/binaryop.hpp | 8 +- cpp/include/cudf/detail/concatenate.hpp | 4 +- cpp/include/cudf/detail/copy.hpp | 42 +-- cpp/include/cudf/detail/copy_if.cuh | 2 +- cpp/include/cudf/detail/copy_range.cuh | 6 +- cpp/include/cudf/detail/datetime.hpp | 32 +- cpp/include/cudf/detail/fill.hpp | 4 +- cpp/include/cudf/detail/gather.cuh | 2 +- cpp/include/cudf/detail/gather.hpp | 4 +- .../detail/groupby/group_replace_nulls.hpp | 2 +- cpp/include/cudf/detail/hashing.hpp | 8 +- cpp/include/cudf/detail/interop.hpp | 8 +- cpp/include/cudf/detail/is_element_valid.hpp | 2 +- cpp/include/cudf/detail/join.hpp | 2 +- cpp/include/cudf/detail/label_bins.hpp | 2 +- cpp/include/cudf/detail/null_mask.hpp | 4 +- cpp/include/cudf/detail/quantiles.hpp | 6 +- .../cudf/detail/reduction_functions.hpp | 42 +-- cpp/include/cudf/detail/repeat.hpp | 4 +- cpp/include/cudf/detail/replace.hpp | 14 +- cpp/include/cudf/detail/reshape.hpp | 4 +- cpp/include/cudf/detail/rolling.hpp | 2 +- cpp/include/cudf/detail/round.hpp | 2 +- cpp/include/cudf/detail/scatter.cuh | 2 +- cpp/include/cudf/detail/scatter.hpp | 10 +- cpp/include/cudf/detail/sequence.hpp | 6 +- cpp/include/cudf/detail/sorting.hpp | 18 +- cpp/include/cudf/detail/stream_compaction.hpp | 22 +- cpp/include/cudf/detail/structs/utilities.hpp | 4 +- cpp/include/cudf/detail/tdigest/tdigest.hpp | 6 +- cpp/include/cudf/detail/transform.hpp | 16 +- cpp/include/cudf/detail/transpose.hpp | 2 +- cpp/include/cudf/detail/unary.hpp | 10 +- cpp/include/cudf/detail/utilities/cuda.cuh | 2 +- .../cudf/detail/utilities/default_stream.hpp | 36 ++ .../detail/utilities/vector_factories.hpp | 18 +- cpp/include/cudf/detail/valid_if.cuh | 2 +- .../cudf/dictionary/detail/concatenate.hpp | 2 +- cpp/include/cudf/dictionary/detail/encode.hpp | 4 +- .../cudf/dictionary/detail/replace.hpp | 4 +- cpp/include/cudf/dictionary/detail/search.hpp | 4 +- .../cudf/dictionary/detail/update_keys.hpp | 12 +- .../cudf/dictionary/dictionary_factories.hpp | 4 +- cpp/include/cudf/io/detail/avro.hpp | 2 +- cpp/include/cudf/io/detail/csv.hpp | 2 +- cpp/include/cudf/io/detail/json.hpp | 2 +- cpp/include/cudf/io/detail/orc.hpp | 2 +- cpp/include/cudf/join.hpp | 14 +- cpp/include/cudf/lists/detail/concatenate.hpp | 2 +- cpp/include/cudf/lists/detail/gather.cuh | 2 +- cpp/include/cudf/lists/detail/scatter.cuh | 6 +- .../cudf/lists/lists_column_factories.hpp | 2 +- cpp/include/cudf/partitioning.hpp | 2 +- cpp/include/cudf/scalar/scalar.hpp | 80 ++--- cpp/include/cudf/scalar/scalar_factories.hpp | 24 +- cpp/include/cudf/strings/detail/combine.hpp | 4 +- .../cudf/strings/detail/concatenate.hpp | 2 +- cpp/include/cudf/strings/detail/copying.hpp | 2 +- cpp/include/cudf/strings/detail/fill.hpp | 2 +- cpp/include/cudf/strings/detail/json.hpp | 2 +- cpp/include/cudf/strings/detail/replace.hpp | 8 +- cpp/include/cudf/strings/detail/scatter.cuh | 2 +- cpp/include/cudf/strings/detail/utilities.cuh | 6 +- cpp/include/cudf/strings/detail/utilities.hpp | 4 +- .../cudf/table/experimental/row_operators.cuh | 4 +- cpp/include/cudf/table/table.hpp | 2 +- cpp/include/cudf/table/table_device_view.cuh | 4 +- cpp/include/cudf/utilities/default_stream.hpp | 14 +- cpp/include/cudf_test/base_fixture.hpp | 36 +- cpp/include/cudf_test/column_utilities.hpp | 4 +- cpp/include/cudf_test/column_wrapper.hpp | 12 +- .../stream_checking_resource_adapter.hpp | 166 +++++++++ cpp/include/cudf_test/tdigest_utilities.cuh | 6 +- cpp/include/nvtext/bpe_tokenize.hpp | 4 +- cpp/include/nvtext/detail/tokenize.hpp | 8 +- cpp/src/binaryop/binaryop.cpp | 8 +- cpp/src/binaryop/compiled/binary_ops.hpp | 12 +- .../binaryop/compiled/struct_binary_ops.cuh | 4 +- cpp/src/bitmask/null_mask.cu | 10 +- cpp/src/column/column.cu | 4 +- cpp/src/column/column_view.cpp | 4 +- cpp/src/copying/concatenate.cu | 6 +- cpp/src/copying/contiguous_split.cu | 2 +- cpp/src/copying/copy.cpp | 4 +- cpp/src/copying/copy.cu | 8 +- cpp/src/copying/copy_range.cu | 4 +- cpp/src/copying/gather.cu | 2 +- cpp/src/copying/get_element.cu | 2 +- cpp/src/copying/pack.cpp | 2 +- cpp/src/copying/purge_nonempty_nulls.cu | 6 +- cpp/src/copying/reverse.cu | 4 +- cpp/src/copying/sample.cu | 2 +- cpp/src/copying/scatter.cu | 8 +- cpp/src/copying/shift.cu | 2 +- cpp/src/copying/slice.cu | 8 +- cpp/src/copying/split.cpp | 8 +- cpp/src/datetime/datetime_ops.cu | 40 +-- cpp/src/dictionary/add_keys.cu | 2 +- cpp/src/dictionary/decode.cu | 2 +- cpp/src/dictionary/encode.cu | 2 +- cpp/src/dictionary/remove_keys.cu | 10 +- cpp/src/dictionary/search.cu | 4 +- cpp/src/dictionary/set_keys.cu | 4 +- cpp/src/filling/calendrical_month_sequence.cu | 2 +- cpp/src/filling/fill.cu | 4 +- cpp/src/filling/repeat.cu | 4 +- cpp/src/filling/sequence.cu | 4 +- cpp/src/groupby/groupby.cu | 10 +- cpp/src/hash/concurrent_unordered_map.cuh | 12 +- cpp/src/hash/hash_allocator.cuh | 8 +- cpp/src/hash/hashing.cu | 2 +- cpp/src/interop/dlpack.cpp | 4 +- cpp/src/interop/from_arrow.cu | 2 +- cpp/src/interop/to_arrow.cu | 4 +- cpp/src/io/fst/logical_stack.cuh | 2 +- cpp/src/io/functions.cpp | 22 +- cpp/src/io/json/json_column.cu | 6 +- cpp/src/io/json/json_tree.cu | 10 +- cpp/src/io/json/nested_json_gpu.cu | 2 +- cpp/src/io/orc/timezone.cuh | 2 +- cpp/src/io/text/bgzip_data_chunk_source.cu | 6 +- cpp/src/io/text/multibyte_split.cu | 20 +- cpp/src/io/utilities/hostdevice_vector.hpp | 2 +- cpp/src/join/conditional_join.cu | 18 +- cpp/src/join/conditional_join.hpp | 4 +- cpp/src/join/cross_join.cu | 2 +- cpp/src/join/join.cu | 6 +- cpp/src/join/mixed_join.cu | 10 +- cpp/src/join/mixed_join_semi.cu | 8 +- cpp/src/join/semi_join.cu | 4 +- cpp/src/labeling/label_bins.cu | 2 +- .../combine/concatenate_list_elements.cu | 2 +- cpp/src/lists/combine/concatenate_rows.cu | 2 +- cpp/src/lists/contains.cu | 10 +- cpp/src/lists/copying/segmented_gather.cu | 2 +- cpp/src/lists/count_elements.cu | 2 +- cpp/src/lists/explode.cu | 8 +- cpp/src/lists/extract.cu | 4 +- cpp/src/lists/segmented_sort.cu | 4 +- cpp/src/lists/sequences.cu | 4 +- cpp/src/lists/set_operations.cu | 8 +- .../stream_compaction/apply_boolean_mask.cu | 2 +- cpp/src/lists/stream_compaction/distinct.cu | 2 +- cpp/src/merge/merge.cu | 4 +- cpp/src/partitioning/partitioning.cu | 2 +- cpp/src/partitioning/round_robin.cu | 4 +- cpp/src/quantiles/quantile.cu | 2 +- cpp/src/quantiles/quantiles.cu | 6 +- cpp/src/quantiles/tdigest/tdigest.cu | 2 +- cpp/src/reductions/minmax.cu | 2 +- cpp/src/reductions/reductions.cpp | 6 +- cpp/src/reductions/scan/scan.cpp | 2 +- cpp/src/reductions/segmented_reductions.cpp | 4 +- cpp/src/replace/clamp.cu | 4 +- cpp/src/replace/nans.cu | 8 +- cpp/src/replace/nulls.cu | 6 +- cpp/src/replace/replace.cu | 2 +- cpp/src/reshape/byte_cast.cu | 2 +- cpp/src/reshape/interleave_columns.cu | 2 +- cpp/src/reshape/tile.cu | 2 +- .../rolling/detail/range_window_bounds.hpp | 2 +- cpp/src/rolling/grouped_rolling.cu | 8 +- cpp/src/rolling/rolling.cu | 6 +- cpp/src/round/round.cu | 2 +- cpp/src/scalar/scalar.cpp | 6 +- cpp/src/search/contains_column.cu | 2 +- cpp/src/search/contains_scalar.cu | 2 +- cpp/src/search/search_ordered.cu | 4 +- cpp/src/sort/is_sorted.cu | 2 +- cpp/src/sort/rank.cu | 2 +- cpp/src/sort/segmented_sort.cu | 8 +- cpp/src/sort/sort.cu | 8 +- cpp/src/sort/stable_sort.cu | 4 +- .../stream_compaction/apply_boolean_mask.cu | 2 +- cpp/src/stream_compaction/distinct.cu | 2 +- cpp/src/stream_compaction/drop_nans.cu | 4 +- cpp/src/stream_compaction/drop_nulls.cu | 4 +- cpp/src/stream_compaction/unique.cu | 2 +- cpp/src/strings/attributes.cu | 6 +- cpp/src/strings/capitalize.cu | 6 +- cpp/src/strings/case.cu | 6 +- cpp/src/strings/char_types/char_types.cu | 4 +- cpp/src/strings/combine/concatenate.cu | 4 +- cpp/src/strings/combine/join.cu | 2 +- cpp/src/strings/combine/join_list_elements.cu | 4 +- cpp/src/strings/contains.cu | 6 +- cpp/src/strings/convert/convert_booleans.cu | 4 +- cpp/src/strings/convert/convert_datetime.cu | 6 +- cpp/src/strings/convert/convert_durations.cu | 4 +- .../strings/convert/convert_fixed_point.cu | 6 +- cpp/src/strings/convert/convert_floats.cu | 6 +- cpp/src/strings/convert/convert_hex.cu | 6 +- cpp/src/strings/convert/convert_integers.cu | 8 +- cpp/src/strings/convert/convert_ipv4.cu | 6 +- cpp/src/strings/convert/convert_lists.cu | 2 +- cpp/src/strings/convert/convert_urls.cu | 4 +- cpp/src/strings/extract/extract.cu | 2 +- cpp/src/strings/extract/extract_all.cu | 2 +- cpp/src/strings/filter_chars.cu | 2 +- cpp/src/strings/json/json_path.cu | 2 +- cpp/src/strings/like.cu | 2 +- cpp/src/strings/padding.cu | 6 +- cpp/src/strings/repeat_strings.cu | 8 +- cpp/src/strings/replace/backref_re.cu | 2 +- cpp/src/strings/replace/multi_re.cu | 2 +- cpp/src/strings/replace/replace.cu | 6 +- cpp/src/strings/replace/replace_re.cu | 4 +- cpp/src/strings/search/find.cu | 20 +- cpp/src/strings/search/find_multiple.cu | 2 +- cpp/src/strings/search/findall.cu | 2 +- cpp/src/strings/split/partition.cu | 8 +- cpp/src/strings/split/split.cu | 8 +- cpp/src/strings/split/split_re.cu | 8 +- cpp/src/strings/split/split_record.cu | 6 +- cpp/src/strings/strings_column_factories.cu | 2 +- cpp/src/strings/strip.cu | 4 +- cpp/src/strings/substring.cu | 10 +- cpp/src/strings/translate.cu | 2 +- cpp/src/strings/wrap.cu | 2 +- cpp/src/text/detokenize.cu | 2 +- cpp/src/text/edit_distance.cu | 4 +- cpp/src/text/generate_ngrams.cu | 6 +- cpp/src/text/ngrams_tokenize.cu | 4 +- cpp/src/text/normalize.cu | 4 +- cpp/src/text/replace.cu | 4 +- cpp/src/text/stemmer.cu | 6 +- cpp/src/text/subword/bpe_tokenizer.cu | 2 +- cpp/src/text/subword/load_hash_file.cu | 2 +- cpp/src/text/subword/load_merges_file.cu | 2 +- cpp/src/text/subword/subword_tokenize.cu | 2 +- cpp/src/text/tokenize.cu | 10 +- cpp/src/transform/bools_to_mask.cu | 2 +- cpp/src/transform/compute_column.cu | 2 +- cpp/src/transform/encode.cu | 2 +- cpp/src/transform/mask_to_bools.cu | 2 +- cpp/src/transform/nans_to_nulls.cu | 2 +- cpp/src/transform/one_hot_encode.cu | 2 +- cpp/src/transform/row_bit_count.cu | 2 +- cpp/src/transform/transform.cpp | 2 +- cpp/src/transpose/transpose.cu | 2 +- cpp/src/unary/cast_ops.cu | 2 +- cpp/src/unary/math_ops.cu | 2 +- cpp/src/unary/nan_ops.cu | 4 +- cpp/src/unary/null_ops.cu | 4 +- cpp/src/utilities/default_stream.cpp | 13 +- cpp/tests/bitmask/bitmask_tests.cpp | 242 ++++++------- cpp/tests/bitmask/set_nullmask_tests.cu | 2 +- cpp/tests/column/column_device_view_test.cu | 4 +- cpp/tests/column/column_test.cu | 44 +-- cpp/tests/column/compound_test.cu | 34 +- cpp/tests/column/factories_test.cpp | 2 +- cpp/tests/copying/concatenate_tests.cu | 24 +- cpp/tests/copying/detail_gather_tests.cu | 4 +- cpp/tests/copying/scatter_list_tests.cpp | 4 +- cpp/tests/copying/shift_tests.cpp | 4 +- .../device_atomics/device_atomics_test.cu | 10 +- cpp/tests/error/error_handling_test.cu | 9 +- cpp/tests/fixed_point/fixed_point_tests.cu | 8 +- cpp/tests/groupby/lists_tests.cu | 4 +- cpp/tests/groupby/tdigest_tests.cu | 6 +- cpp/tests/hash_map/map_test.cu | 30 +- cpp/tests/io/comp/decomp_test.cpp | 10 +- cpp/tests/io/json_tree.cpp | 14 +- cpp/tests/io/json_type_cast_test.cu | 14 +- cpp/tests/io/nested_json_test.cpp | 20 +- cpp/tests/io/text/data_chunk_source_test.cpp | 18 +- cpp/tests/io/type_inference_test.cu | 73 ++-- cpp/tests/iterator/iterator_tests.cuh | 14 +- .../optional_iterator_test_numeric.cu | 6 +- .../iterator/pair_iterator_test_numeric.cu | 2 +- cpp/tests/join/conditional_join_tests.cu | 26 +- cpp/tests/join/join_tests.cpp | 4 +- cpp/tests/join/mixed_join_tests.cu | 10 +- cpp/tests/quantiles/percentile_approx_test.cu | 8 +- cpp/tests/quantiles/tdigest_utilities.cu | 12 +- .../reductions/segmented_reduction_tests.cpp | 134 +++++--- cpp/tests/replace/replace_nulls_tests.cpp | 4 +- cpp/tests/scalar/factories_test.cpp | 2 +- cpp/tests/scalar/scalar_device_view_test.cu | 22 +- .../apply_boolean_mask_tests.cpp | 2 +- cpp/tests/strings/datetime_tests.cpp | 23 +- cpp/tests/strings/factories_test.cu | 20 +- .../table/experimental_row_operator_tests.cu | 8 +- cpp/tests/table/table_view_tests.cu | 2 +- cpp/tests/transform/row_bit_count_test.cu | 12 +- cpp/tests/types/type_dispatcher_test.cu | 8 +- cpp/tests/unary/cast_tests.cpp | 20 +- cpp/tests/utilities/column_utilities.cu | 62 ++-- .../identify_stream_usage/CMakeLists.txt | 60 ++++ .../identify_stream_usage.cpp | 322 ++++++++++++++++++ .../test_default_stream_identification.cu | 39 +++ cpp/tests/utilities_tests/span_tests.cu | 31 +- cpp/tests/wrappers/timestamps_test.cu | 20 +- .../main/native/include/maps_column_view.hpp | 10 +- java/src/main/native/src/ColumnViewJni.cpp | 2 +- java/src/main/native/src/ColumnViewJni.hpp | 4 +- java/src/main/native/src/TableJni.cpp | 2 +- .../main/native/src/aggregation128_utils.hpp | 4 +- java/src/main/native/src/row_conversion.cu | 2 +- java/src/main/native/src/row_conversion.hpp | 8 +- .../cpp/src/strings/udf/udf_apis.cu | 2 +- 360 files changed, 2053 insertions(+), 1319 deletions(-) create mode 100644 cpp/include/cudf/detail/utilities/default_stream.hpp create mode 100644 cpp/include/cudf_test/stream_checking_resource_adapter.hpp create mode 100644 cpp/tests/utilities/identify_stream_usage/CMakeLists.txt create mode 100644 cpp/tests/utilities/identify_stream_usage/identify_stream_usage.cpp create mode 100644 cpp/tests/utilities/identify_stream_usage/test_default_stream_identification.cu diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 41dac0e5e0f..fc020c4ca1e 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -202,11 +202,26 @@ else conda list --show-channel-urls gpuci_logger "GoogleTests" + + # Set up library for finding incorrect default stream usage. + cd "$WORKSPACE/cpp/tests/utilities/identify_stream_usage/" + mkdir build && cd build && cmake .. -GNinja && ninja && ninja test + STREAM_IDENTIFY_LIB="$WORKSPACE/cpp/tests/utilities/identify_stream_usage/build/libidentify_stream_usage.so" + # Run libcudf and libcudf_kafka gtests from libcudf-tests package for gt in "$CONDA_PREFIX/bin/gtests/libcudf"*/* ; do test_name=$(basename ${gt}) + echo "Running GoogleTest $test_name" - ${gt} --gtest_output=xml:"$WORKSPACE/test-results/" + if [[ ${test_name} == "SPAN_TEST" ]]; then + # This one test is specifically designed to test using a thrust device + # vector, so we expect and allow it to include default stream usage. + gtest_filter="SpanTest.CanConstructFromDeviceContainers" + GTEST_CUDF_STREAM_MODE="custom" LD_PRELOAD=${STREAM_IDENTIFY_LIB} ${gt} --gtest_output=xml:"$WORKSPACE/test-results/" --gtest_filter="-${gtest_filter}" + ${gt} --gtest_output=xml:"$WORKSPACE/test-results/" --gtest_filter="${gtest_filter}" + else + GTEST_CUDF_STREAM_MODE="custom" LD_PRELOAD=${STREAM_IDENTIFY_LIB} ${gt} --gtest_output=xml:"$WORKSPACE/test-results/" + fi done # Test libcudf (csv, orc, and parquet) with `LIBCUDF_CUFILE_POLICY=KVIKIO` diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index c23f558f071..52dc22b6c49 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -34,6 +34,9 @@ function sed_runner() { # cpp update sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/CMakeLists.txt +# cpp stream testing update +sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/tests/utilities/identify_stream_usage/CMakeLists.txt + # Python update sed_runner 's/'"cudf_version .*)"'/'"cudf_version ${NEXT_FULL_TAG})"'/g' python/cudf/CMakeLists.txt diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index ccb0d685062..739c5409ca4 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -113,6 +113,7 @@ outputs: - test -f $PREFIX/include/cudf/detail/transpose.hpp - test -f $PREFIX/include/cudf/detail/unary.hpp - test -f $PREFIX/include/cudf/detail/utilities/alignment.hpp + - test -f $PREFIX/include/cudf/detail/utilities/default_stream.hpp - test -f $PREFIX/include/cudf/detail/utilities/linked_column.hpp - test -f $PREFIX/include/cudf/detail/utilities/int_fastdiv.h - test -f $PREFIX/include/cudf/detail/utilities/integer_utils.hpp @@ -275,6 +276,7 @@ outputs: - test -f $PREFIX/include/cudf_test/file_utilities.hpp - test -f $PREFIX/include/cudf_test/io_metadata_utilities.hpp - test -f $PREFIX/include/cudf_test/iterator_utilities.hpp + - test -f $PREFIX/include/cudf_test/stream_checking_resource_adapter.hpp - test -f $PREFIX/include/cudf_test/table_utilities.hpp - test -f $PREFIX/include/cudf_test/timestamp_utilities.cuh - test -f $PREFIX/include/cudf_test/type_list_utilities.hpp diff --git a/cpp/benchmarks/column/concatenate.cpp b/cpp/benchmarks/column/concatenate.cpp index 99aa414fae3..3260159b409 100644 --- a/cpp/benchmarks/column/concatenate.cpp +++ b/cpp/benchmarks/column/concatenate.cpp @@ -49,7 +49,7 @@ static void BM_concatenate(benchmark::State& state) CUDF_CHECK_CUDA(0); for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::default_stream_value); + cuda_event_timer raii(state, true, cudf::get_default_stream()); auto result = cudf::concatenate(column_views); } @@ -91,7 +91,7 @@ static void BM_concatenate_tables(benchmark::State& state) CUDF_CHECK_CUDA(0); for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::default_stream_value); + cuda_event_timer raii(state, true, cudf::get_default_stream()); auto result = cudf::concatenate(table_views); } @@ -150,7 +150,7 @@ static void BM_concatenate_strings(benchmark::State& state) CUDF_CHECK_CUDA(0); for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::default_stream_value); + cuda_event_timer raii(state, true, cudf::get_default_stream()); auto result = cudf::concatenate(column_views); } diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu index 2bcdaa6760c..50adab71200 100644 --- a/cpp/benchmarks/common/generate_input.cu +++ b/cpp/benchmarks/common/generate_input.cu @@ -207,7 +207,7 @@ struct random_value_fn()>> { } else { // Don't need a random seconds generator for sub-second intervals seconds_gen = [range_s](thrust::minstd_rand&, size_t size) { - rmm::device_uvector result(size, cudf::default_stream_value); + rmm::device_uvector result(size, cudf::get_default_stream()); thrust::fill(thrust::device, result.begin(), result.end(), range_s.second.count()); return result; }; @@ -225,7 +225,7 @@ struct random_value_fn()>> { { auto const sec = seconds_gen(engine, size); auto const ns = nanoseconds_gen(engine, size); - rmm::device_uvector result(size, cudf::default_stream_value); + rmm::device_uvector result(size, cudf::get_default_stream()); thrust::transform( thrust::device, sec.begin(), @@ -307,7 +307,7 @@ struct random_value_fn>> { random_value_fn(distribution_params const& desc) : dist{[valid_prob = desc.probability_true](thrust::minstd_rand& engine, size_t size) -> rmm::device_uvector { - rmm::device_uvector result(size, cudf::default_stream_value); + rmm::device_uvector result(size, cudf::get_default_stream()); thrust::tabulate( thrust::device, result.begin(), result.end(), bool_generator(engine, valid_prob)); return result; @@ -359,7 +359,7 @@ rmm::device_uvector sample_indices_with_run_length(cudf::size_t return samples_indices[sample_idx]; }); rmm::device_uvector repeated_sample_indices(num_rows, - cudf::default_stream_value); + cudf::get_default_stream()); thrust::copy(thrust::device, avg_repeated_sample_indices_iterator, avg_repeated_sample_indices_iterator + num_rows, @@ -401,8 +401,8 @@ std::unique_ptr create_random_column(data_profile const& profile, // Distribution for picking elements from the array of samples auto const avg_run_len = profile.get_avg_run_length(); - rmm::device_uvector data(0, cudf::default_stream_value); - rmm::device_uvector null_mask(0, cudf::default_stream_value); + rmm::device_uvector data(0, cudf::get_default_stream()); + rmm::device_uvector null_mask(0, cudf::get_default_stream()); if (profile.get_cardinality() == 0 and avg_run_len == 1) { data = value_dist(engine, num_rows); @@ -418,8 +418,8 @@ std::unique_ptr create_random_column(data_profile const& profile, // generate n samples and gather. auto const sample_indices = sample_indices_with_run_length(avg_run_len, cardinality, num_rows, engine); - data = rmm::device_uvector(num_rows, cudf::default_stream_value); - null_mask = rmm::device_uvector(num_rows, cudf::default_stream_value); + data = rmm::device_uvector(num_rows, cudf::get_default_stream()); + null_mask = rmm::device_uvector(num_rows, cudf::get_default_stream()); thrust::gather( thrust::device, sample_indices.begin(), sample_indices.end(), samples.begin(), data.begin()); thrust::gather(thrust::device, @@ -498,12 +498,12 @@ std::unique_ptr create_random_utf8_string_column(data_profile cons auto valid_lengths = thrust::make_transform_iterator( thrust::make_zip_iterator(thrust::make_tuple(lengths.begin(), null_mask.begin())), valid_or_zero{}); - rmm::device_uvector offsets(num_rows + 1, cudf::default_stream_value); + rmm::device_uvector offsets(num_rows + 1, cudf::get_default_stream()); thrust::exclusive_scan( thrust::device, valid_lengths, valid_lengths + lengths.size(), offsets.begin()); // offfsets are ready. auto chars_length = *thrust::device_pointer_cast(offsets.end() - 1); - rmm::device_uvector chars(chars_length, cudf::default_stream_value); + rmm::device_uvector chars(chars_length, cudf::get_default_stream()); thrust::for_each_n(thrust::device, thrust::make_zip_iterator(offsets.begin(), offsets.begin() + 1), num_rows, diff --git a/cpp/benchmarks/common/random_distribution_factory.cuh b/cpp/benchmarks/common/random_distribution_factory.cuh index 3cfab858793..36b968c6010 100644 --- a/cpp/benchmarks/common/random_distribution_factory.cuh +++ b/cpp/benchmarks/common/random_distribution_factory.cuh @@ -148,7 +148,7 @@ distribution_fn make_distribution(distribution_id dist_id, T lower_bound, T u case distribution_id::NORMAL: return [lower_bound, upper_bound, dist = make_normal_dist(lower_bound, upper_bound)]( thrust::minstd_rand& engine, size_t size) -> rmm::device_uvector { - rmm::device_uvector result(size, cudf::default_stream_value); + rmm::device_uvector result(size, cudf::get_default_stream()); thrust::tabulate(thrust::device, result.begin(), result.end(), @@ -158,7 +158,7 @@ distribution_fn make_distribution(distribution_id dist_id, T lower_bound, T u case distribution_id::UNIFORM: return [lower_bound, upper_bound, dist = make_uniform_dist(lower_bound, upper_bound)]( thrust::minstd_rand& engine, size_t size) -> rmm::device_uvector { - rmm::device_uvector result(size, cudf::default_stream_value); + rmm::device_uvector result(size, cudf::get_default_stream()); thrust::tabulate(thrust::device, result.begin(), result.end(), @@ -169,7 +169,7 @@ distribution_fn make_distribution(distribution_id dist_id, T lower_bound, T u // kind of exponential distribution from lower_bound to upper_bound. return [lower_bound, upper_bound, dist = geometric_distribution(lower_bound, upper_bound)]( thrust::minstd_rand& engine, size_t size) -> rmm::device_uvector { - rmm::device_uvector result(size, cudf::default_stream_value); + rmm::device_uvector result(size, cudf::get_default_stream()); thrust::tabulate(thrust::device, result.begin(), result.end(), diff --git a/cpp/benchmarks/copying/copy_if_else.cpp b/cpp/benchmarks/copying/copy_if_else.cpp index 82f4e15ecb0..9a153a7094c 100644 --- a/cpp/benchmarks/copying/copy_if_else.cpp +++ b/cpp/benchmarks/copying/copy_if_else.cpp @@ -45,7 +45,7 @@ static void BM_copy_if_else(benchmark::State& state, bool nulls) cudf::column_view lhs(input->view().column(0)); for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::default_stream_value); + cuda_event_timer raii(state, true, cudf::get_default_stream()); cudf::copy_if_else(lhs, rhs, decision); } } diff --git a/cpp/benchmarks/copying/shift.cu b/cpp/benchmarks/copying/shift.cu index a849b7da58b..957313134b3 100644 --- a/cpp/benchmarks/copying/shift.cu +++ b/cpp/benchmarks/copying/shift.cu @@ -24,7 +24,7 @@ template > std::unique_ptr make_scalar( T value = 0, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { auto s = new ScalarType(value, true, stream, mr); diff --git a/cpp/benchmarks/groupby/group_max.cpp b/cpp/benchmarks/groupby/group_max.cpp index 8454d1afee6..4956cce0daf 100644 --- a/cpp/benchmarks/groupby/group_max.cpp +++ b/cpp/benchmarks/groupby/group_max.cpp @@ -52,7 +52,7 @@ void bench_groupby_max(nvbench::state& state, nvbench::type_list) requests[0].values = vals->view(); requests[0].aggregations.push_back(cudf::make_max_aggregation()); - state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value())); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { auto const result = gb_obj.aggregate(requests); }); } diff --git a/cpp/benchmarks/groupby/group_nunique.cpp b/cpp/benchmarks/groupby/group_nunique.cpp index 1f95b5d5899..05698c04058 100644 --- a/cpp/benchmarks/groupby/group_nunique.cpp +++ b/cpp/benchmarks/groupby/group_nunique.cpp @@ -65,7 +65,7 @@ void bench_groupby_nunique(nvbench::state& state, nvbench::type_list) auto const requests = make_aggregation_request_vector( *vals, cudf::make_nunique_aggregation()); - state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value())); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { auto const result = gb_obj.aggregate(requests); }); } diff --git a/cpp/benchmarks/groupby/group_struct_keys.cpp b/cpp/benchmarks/groupby/group_struct_keys.cpp index 227a4d5259a..cc6f0faaf41 100644 --- a/cpp/benchmarks/groupby/group_struct_keys.cpp +++ b/cpp/benchmarks/groupby/group_struct_keys.cpp @@ -83,7 +83,7 @@ void bench_groupby_struct_keys(nvbench::state& state) requests[0].aggregations.push_back(cudf::make_min_aggregation()); // Set up nvbench default stream - auto stream = cudf::default_stream_value; + auto stream = cudf::get_default_stream(); state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); state.exec(nvbench::exec_tag::sync, diff --git a/cpp/benchmarks/hashing/hash.cpp b/cpp/benchmarks/hashing/hash.cpp index e997bf296c5..1053c2e4694 100644 --- a/cpp/benchmarks/hashing/hash.cpp +++ b/cpp/benchmarks/hashing/hash.cpp @@ -35,7 +35,7 @@ static void BM_hash(benchmark::State& state, cudf::hash_id hid, contains_nulls h data->get_column(0).set_null_mask(rmm::device_buffer{}, 0); for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::default_stream_value); + cuda_event_timer raii(state, true, cudf::get_default_stream()); cudf::hash(data->view(), hid); } } diff --git a/cpp/benchmarks/io/csv/csv_reader_input.cpp b/cpp/benchmarks/io/csv/csv_reader_input.cpp index 4f895e13f1b..27fea856332 100644 --- a/cpp/benchmarks/io/csv/csv_reader_input.cpp +++ b/cpp/benchmarks/io/csv/csv_reader_input.cpp @@ -47,7 +47,7 @@ void csv_read_common(DataType const& data_types, cudf::io::csv_reader_options::builder(source_sink.make_source_info()); auto const mem_stats_logger = cudf::memory_stats_logger(); // init stats logger - state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value())); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch& launch, auto& timer) { try_drop_l3_cache(); // Drop L3 cache for accurate measurement diff --git a/cpp/benchmarks/io/csv/csv_reader_options.cpp b/cpp/benchmarks/io/csv/csv_reader_options.cpp index b569dc65f3d..04522c16d5c 100644 --- a/cpp/benchmarks/io/csv/csv_reader_options.cpp +++ b/cpp/benchmarks/io/csv/csv_reader_options.cpp @@ -66,7 +66,7 @@ void BM_csv_read_varying_options( size_t const chunk_size = source_sink.size() / num_chunks; cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks; auto const mem_stats_logger = cudf::memory_stats_logger(); - state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value())); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch& launch, auto& timer) { try_drop_l3_cache(); // Drop L3 cache for accurate measurement diff --git a/cpp/benchmarks/io/json/nested_json.cpp b/cpp/benchmarks/io/json/nested_json.cpp index bb3e13a3a01..1fe0218bb0f 100644 --- a/cpp/benchmarks/io/json/nested_json.cpp +++ b/cpp/benchmarks/io/json/nested_json.cpp @@ -68,16 +68,16 @@ void BM_NESTED_JSON(nvbench::state& state) auto const string_size{size_type(state.get_int64("string_size"))}; auto const default_options = cudf::io::json_reader_options{}; - auto input = make_test_json_data(string_size, cudf::default_stream_value); + auto input = make_test_json_data(string_size, cudf::get_default_stream()); state.add_element_count(input.size()); // Run algorithm auto const mem_stats_logger = cudf::memory_stats_logger(); - state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value())); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { // Allocate device-side temporary storage & run algorithm cudf::io::json::detail::device_parse_nested_json( - input, default_options, cudf::default_stream_value); + input, default_options, cudf::get_default_stream()); }); auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp index 46f14cc4874..8c6f9f32f61 100644 --- a/cpp/benchmarks/io/orc/orc_reader_input.cpp +++ b/cpp/benchmarks/io/orc/orc_reader_input.cpp @@ -38,7 +38,7 @@ void orc_read_common(cudf::io::orc_writer_options const& opts, cudf::io::orc_reader_options::builder(source_sink.make_source_info()); auto mem_stats_logger = cudf::memory_stats_logger(); // init stats logger - state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value())); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch& launch, auto& timer) { try_drop_l3_cache(); diff --git a/cpp/benchmarks/io/orc/orc_reader_options.cpp b/cpp/benchmarks/io/orc/orc_reader_options.cpp index da64fdcac3a..6ca7a494642 100644 --- a/cpp/benchmarks/io/orc/orc_reader_options.cpp +++ b/cpp/benchmarks/io/orc/orc_reader_options.cpp @@ -83,7 +83,7 @@ void BM_orc_read_varying_options(nvbench::state& state, cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks; auto mem_stats_logger = cudf::memory_stats_logger(); - state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value())); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); state.exec( nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch& launch, auto& timer) { try_drop_l3_cache(); diff --git a/cpp/benchmarks/io/orc/orc_writer.cpp b/cpp/benchmarks/io/orc/orc_writer.cpp index ddf699b0eaa..21d903d42ae 100644 --- a/cpp/benchmarks/io/orc/orc_writer.cpp +++ b/cpp/benchmarks/io/orc/orc_writer.cpp @@ -61,7 +61,7 @@ void BM_orc_write_encode(nvbench::state& state, nvbench::type_list( {device_input.data(), static_cast(device_input.size())}, - cudf::default_stream_value); + cudf::get_default_stream()); } if (source_type == data_chunk_source_type::host_pinned) { host_pinned_input.resize(static_cast(device_input.size())); @@ -184,7 +184,7 @@ static void bench_multibyte_split(nvbench::state& state, cudf::io::text::byte_range_info range{range_offset, range_size}; std::unique_ptr output; - state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value())); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { try_drop_l3_cache(); output = cudf::io::text::multibyte_split(*source, delim, range); diff --git a/cpp/benchmarks/iterator/iterator.cu b/cpp/benchmarks/iterator/iterator.cu index c121d070ca0..381cbe4824b 100644 --- a/cpp/benchmarks/iterator/iterator.cu +++ b/cpp/benchmarks/iterator/iterator.cu @@ -56,7 +56,7 @@ inline auto reduce_by_cub(OutputIterator result, InputIterator d_in, int num_ite nullptr, temp_storage_bytes, d_in, result, num_items, cudf::DeviceSum{}, init); // Allocate temporary storage - rmm::device_buffer d_temp_storage(temp_storage_bytes, cudf::default_stream_value); + rmm::device_buffer d_temp_storage(temp_storage_bytes, cudf::get_default_stream()); // Run reduction cub::DeviceReduce::Reduce( diff --git a/cpp/benchmarks/join/generate_input_tables.cuh b/cpp/benchmarks/join/generate_input_tables.cuh index 31cef581f22..c606cd8b4c0 100644 --- a/cpp/benchmarks/join/generate_input_tables.cuh +++ b/cpp/benchmarks/join/generate_input_tables.cuh @@ -154,7 +154,7 @@ void generate_input_tables(key_type* const build_tbl, const int num_states = num_sms * std::max(num_blocks_init_build_tbl, num_blocks_init_probe_tbl) * block_size; - rmm::device_uvector devStates(num_states, cudf::default_stream_value); + rmm::device_uvector devStates(num_states, cudf::get_default_stream()); init_curand<<<(num_states - 1) / block_size + 1, block_size>>>(devStates.data(), num_states); diff --git a/cpp/benchmarks/join/join_common.hpp b/cpp/benchmarks/join/join_common.hpp index 1a87c2d1158..d4fb0862506 100644 --- a/cpp/benchmarks/join/join_common.hpp +++ b/cpp/benchmarks/join/join_common.hpp @@ -142,7 +142,7 @@ static void BM_join(state_type& state, Join JoinFunc) // Benchmark the inner join operation if constexpr (std::is_same_v and (not is_conditional)) { for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::default_stream_value); + cuda_event_timer raii(state, true, cudf::get_default_stream()); auto result = JoinFunc(probe_table.select(columns_to_join), build_table.select(columns_to_join), @@ -168,7 +168,7 @@ static void BM_join(state_type& state, Join JoinFunc) cudf::ast::operation(cudf::ast::ast_operator::EQUAL, col_ref_left_0, col_ref_right_0); for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::default_stream_value); + cuda_event_timer raii(state, true, cudf::get_default_stream()); auto result = JoinFunc(probe_table, build_table, left_zero_eq_right_zero, cudf::null_equality::UNEQUAL); diff --git a/cpp/benchmarks/lists/copying/scatter_lists.cu b/cpp/benchmarks/lists/copying/scatter_lists.cu index d86fb0578e5..02ad97fee11 100644 --- a/cpp/benchmarks/lists/copying/scatter_lists.cu +++ b/cpp/benchmarks/lists/copying/scatter_lists.cu @@ -40,7 +40,7 @@ class ScatterLists : public cudf::benchmark { template void BM_lists_scatter(::benchmark::State& state) { - auto stream = cudf::default_stream_value; + auto stream = cudf::get_default_stream(); auto mr = rmm::mr::get_current_device_resource(); const size_type base_size{(size_type)state.range(0)}; diff --git a/cpp/benchmarks/quantiles/quantiles.cpp b/cpp/benchmarks/quantiles/quantiles.cpp index 7c0a88584f8..599cff2bcda 100644 --- a/cpp/benchmarks/quantiles/quantiles.cpp +++ b/cpp/benchmarks/quantiles/quantiles.cpp @@ -50,7 +50,7 @@ static void BM_quantiles(benchmark::State& state, bool nulls) thrust::seq, q.begin(), q.end(), [n_quantiles](auto i) { return i * (1.0f / n_quantiles); }); for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::default_stream_value); + cuda_event_timer raii(state, true, cudf::get_default_stream()); auto result = cudf::quantiles(input, q); // auto result = (stable) ? cudf::stable_sorted_order(input) : cudf::sorted_order(input); diff --git a/cpp/benchmarks/reduction/segment_reduce.cu b/cpp/benchmarks/reduction/segment_reduce.cu index d2c15c87c2b..e063adb25f9 100644 --- a/cpp/benchmarks/reduction/segment_reduce.cu +++ b/cpp/benchmarks/reduction/segment_reduce.cu @@ -109,7 +109,7 @@ void BM_Simple_Segmented_Reduction(nvbench::state& state, auto const input_view = input->view(); auto const offset_span = cudf::device_span{offsets}; - state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value())); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); state.exec( nvbench::exec_tag::sync, [input_view, output_type, offset_span, &agg](nvbench::launch& launch) { segmented_reduce(input_view, offset_span, *agg, output_type, cudf::null_policy::INCLUDE); diff --git a/cpp/benchmarks/sort/rank.cpp b/cpp/benchmarks/sort/rank.cpp index 66277443800..2c26f4fa15d 100644 --- a/cpp/benchmarks/sort/rank.cpp +++ b/cpp/benchmarks/sort/rank.cpp @@ -37,7 +37,7 @@ static void BM_rank(benchmark::State& state, bool nulls) auto keys = create_random_column(cudf::type_to_id(), row_count{n_rows}, profile); for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::default_stream_value); + cuda_event_timer raii(state, true, cudf::get_default_stream()); auto result = cudf::rank(keys->view(), cudf::rank_method::FIRST, diff --git a/cpp/benchmarks/sort/sort.cpp b/cpp/benchmarks/sort/sort.cpp index 13502ce0959..304bac06632 100644 --- a/cpp/benchmarks/sort/sort.cpp +++ b/cpp/benchmarks/sort/sort.cpp @@ -42,7 +42,7 @@ static void BM_sort(benchmark::State& state, bool nulls) cudf::table_view input{*input_table}; for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::default_stream_value); + cuda_event_timer raii(state, true, cudf::get_default_stream()); auto result = (stable) ? cudf::stable_sorted_order(input) : cudf::sorted_order(input); } diff --git a/cpp/benchmarks/sort/sort_strings.cpp b/cpp/benchmarks/sort/sort_strings.cpp index 701b392f80b..572c05d69cb 100644 --- a/cpp/benchmarks/sort/sort_strings.cpp +++ b/cpp/benchmarks/sort/sort_strings.cpp @@ -32,7 +32,7 @@ static void BM_sort(benchmark::State& state) auto const table = create_random_table({cudf::type_id::STRING}, row_count{n_rows}); for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::default_stream_value); + cuda_event_timer raii(state, true, cudf::get_default_stream()); cudf::sort(table->view()); } } diff --git a/cpp/benchmarks/stream_compaction/distinct.cpp b/cpp/benchmarks/stream_compaction/distinct.cpp index 23960b24b89..512554ff1bc 100644 --- a/cpp/benchmarks/stream_compaction/distinct.cpp +++ b/cpp/benchmarks/stream_compaction/distinct.cpp @@ -41,7 +41,7 @@ void nvbench_distinct(nvbench::state& state, nvbench::type_list) auto input_column = source_column->view(); auto input_table = cudf::table_view({input_column, input_column, input_column, input_column}); - state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value())); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { auto result = cudf::distinct(input_table, {0}, @@ -83,7 +83,7 @@ void nvbench_distinct_list(nvbench::state& state, nvbench::type_list) auto const table = create_random_table( {dtype}, table_size_bytes{static_cast(size)}, data_profile{builder}, 0); - state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value())); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { auto result = cudf::distinct(*table, {0}, diff --git a/cpp/benchmarks/stream_compaction/unique.cpp b/cpp/benchmarks/stream_compaction/unique.cpp index bcf9628b19f..652d55fb8ce 100644 --- a/cpp/benchmarks/stream_compaction/unique.cpp +++ b/cpp/benchmarks/stream_compaction/unique.cpp @@ -62,7 +62,7 @@ void nvbench_unique(nvbench::state& state, nvbench::type_listview(); auto input_table = cudf::table_view({input_column, input_column, input_column, input_column}); - state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value())); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { auto result = cudf::unique(input_table, {0}, Keep, cudf::null_equality::EQUAL); }); diff --git a/cpp/benchmarks/string/case.cpp b/cpp/benchmarks/string/case.cpp index 1c43fa0f077..72b6fcaff0e 100644 --- a/cpp/benchmarks/string/case.cpp +++ b/cpp/benchmarks/string/case.cpp @@ -32,7 +32,7 @@ static void BM_case(benchmark::State& state) cudf::strings_column_view input(column->view()); for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::default_stream_value); + cuda_event_timer raii(state, true, cudf::get_default_stream()); cudf::strings::to_lower(input); } diff --git a/cpp/benchmarks/string/combine.cpp b/cpp/benchmarks/string/combine.cpp index a8d0224916b..46bcda9ae92 100644 --- a/cpp/benchmarks/string/combine.cpp +++ b/cpp/benchmarks/string/combine.cpp @@ -41,7 +41,7 @@ static void BM_combine(benchmark::State& state) cudf::string_scalar separator("+"); for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::default_stream_value); + cuda_event_timer raii(state, true, cudf::get_default_stream()); cudf::strings::concatenate(table->view(), separator); } diff --git a/cpp/benchmarks/string/contains.cpp b/cpp/benchmarks/string/contains.cpp index fd04d599e5e..f7f394ea048 100644 --- a/cpp/benchmarks/string/contains.cpp +++ b/cpp/benchmarks/string/contains.cpp @@ -85,7 +85,7 @@ static void BM_contains(benchmark::State& state, contains_type ct) auto pattern = patterns[pattern_index]; for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::default_stream_value); + cuda_event_timer raii(state, true, cudf::get_default_stream()); switch (ct) { case contains_type::contains: // contains_re and matches_re use the same main logic cudf::strings::contains_re(input, pattern); diff --git a/cpp/benchmarks/string/copy.cu b/cpp/benchmarks/string/copy.cu index 318d2d524a3..669b12aa56b 100644 --- a/cpp/benchmarks/string/copy.cu +++ b/cpp/benchmarks/string/copy.cu @@ -58,7 +58,7 @@ static void BM_copy(benchmark::State& state, copy_type ct) thrust::default_random_engine()); for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::default_stream_value); + cuda_event_timer raii(state, true, cudf::get_default_stream()); switch (ct) { case gather: cudf::gather(source->view(), index_map); break; case scatter: cudf::scatter(source->view(), index_map, target->view()); break; diff --git a/cpp/benchmarks/string/factory.cu b/cpp/benchmarks/string/factory.cu index 0e937b91e98..b75de16e901 100644 --- a/cpp/benchmarks/string/factory.cu +++ b/cpp/benchmarks/string/factory.cu @@ -55,7 +55,7 @@ static void BM_factory(benchmark::State& state) cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile); auto d_column = cudf::column_device_view::create(column->view()); - rmm::device_uvector pairs(d_column->size(), cudf::default_stream_value); + rmm::device_uvector pairs(d_column->size(), cudf::get_default_stream()); thrust::transform(thrust::device, d_column->pair_begin(), d_column->pair_end(), @@ -63,7 +63,7 @@ static void BM_factory(benchmark::State& state) string_view_to_pair{}); for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::default_stream_value); + cuda_event_timer raii(state, true, cudf::get_default_stream()); cudf::make_strings_column(pairs); } diff --git a/cpp/benchmarks/string/filter.cpp b/cpp/benchmarks/string/filter.cpp index 4001fef5da6..f07c11ee6ca 100644 --- a/cpp/benchmarks/string/filter.cpp +++ b/cpp/benchmarks/string/filter.cpp @@ -49,7 +49,7 @@ static void BM_filter_chars(benchmark::State& state, FilterAPI api) {cudf::char_utf8{'a'}, cudf::char_utf8{'c'}}}; for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::default_stream_value); + cuda_event_timer raii(state, true, cudf::get_default_stream()); switch (api) { case filter: cudf::strings::filter_characters_of_type(input, types); break; case filter_chars: cudf::strings::filter_characters(input, filter_table); break; diff --git a/cpp/benchmarks/string/find.cpp b/cpp/benchmarks/string/find.cpp index 62c76d18e1a..4ff3b59a491 100644 --- a/cpp/benchmarks/string/find.cpp +++ b/cpp/benchmarks/string/find.cpp @@ -45,7 +45,7 @@ static void BM_find_scalar(benchmark::State& state, FindAPI find_api) cudf::test::strings_column_wrapper targets({"+", "-"}); for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::default_stream_value); + cuda_event_timer raii(state, true, cudf::get_default_stream()); switch (find_api) { case find: cudf::strings::find(input, target); break; case find_multi: diff --git a/cpp/benchmarks/string/like.cpp b/cpp/benchmarks/string/like.cpp index f6649b186a4..de7382f5a75 100644 --- a/cpp/benchmarks/string/like.cpp +++ b/cpp/benchmarks/string/like.cpp @@ -81,7 +81,7 @@ static void bench_like(nvbench::state& state) // This pattern forces reading the entire target string (when matched expected) auto pattern = std::string("% 5W4_"); // regex equivalent: ".* 5W4." - state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value())); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); // gather some throughput statistics as well auto chars_size = input.chars_size(); state.add_element_count(chars_size, "chars_size"); // number of bytes; diff --git a/cpp/benchmarks/string/repeat_strings.cpp b/cpp/benchmarks/string/repeat_strings.cpp index db02fec13c2..1844e93bc53 100644 --- a/cpp/benchmarks/string/repeat_strings.cpp +++ b/cpp/benchmarks/string/repeat_strings.cpp @@ -55,7 +55,7 @@ static void BM_repeat_strings_scalar_times(benchmark::State& state) auto const strings_col = cudf::strings_column_view(table->view().column(0)); for ([[maybe_unused]] auto _ : state) { - [[maybe_unused]] cuda_event_timer raii(state, true, cudf::default_stream_value); + [[maybe_unused]] cuda_event_timer raii(state, true, cudf::get_default_stream()); cudf::strings::repeat_strings(strings_col, default_repeat_times); } @@ -71,7 +71,7 @@ static void BM_repeat_strings_column_times(benchmark::State& state) auto const repeat_times_col = table->view().column(1); for ([[maybe_unused]] auto _ : state) { - [[maybe_unused]] cuda_event_timer raii(state, true, cudf::default_stream_value); + [[maybe_unused]] cuda_event_timer raii(state, true, cudf::get_default_stream()); cudf::strings::repeat_strings(strings_col, repeat_times_col); } @@ -88,7 +88,7 @@ static void BM_compute_output_strings_sizes(benchmark::State& state) auto const repeat_times_col = table->view().column(1); for ([[maybe_unused]] auto _ : state) { - [[maybe_unused]] cuda_event_timer raii(state, true, cudf::default_stream_value); + [[maybe_unused]] cuda_event_timer raii(state, true, cudf::get_default_stream()); cudf::strings::repeat_strings_output_sizes(strings_col, repeat_times_col); } @@ -107,7 +107,7 @@ static void BM_repeat_strings_column_times_precomputed_sizes(benchmark::State& s cudf::strings::repeat_strings_output_sizes(strings_col, repeat_times_col); for ([[maybe_unused]] auto _ : state) { - [[maybe_unused]] cuda_event_timer raii(state, true, cudf::default_stream_value); + [[maybe_unused]] cuda_event_timer raii(state, true, cudf::get_default_stream()); cudf::strings::repeat_strings(strings_col, repeat_times_col, *sizes); } diff --git a/cpp/benchmarks/string/replace.cpp b/cpp/benchmarks/string/replace.cpp index e25bf679dbc..b25af14ec2a 100644 --- a/cpp/benchmarks/string/replace.cpp +++ b/cpp/benchmarks/string/replace.cpp @@ -48,7 +48,7 @@ static void BM_replace(benchmark::State& state, replace_type rt) cudf::test::strings_column_wrapper repls({"", ""}); for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::default_stream_value); + cuda_event_timer raii(state, true, cudf::get_default_stream()); switch (rt) { case scalar: cudf::strings::replace(input, target, repl); break; case slice: cudf::strings::replace_slice(input, repl, 1, 10); break; diff --git a/cpp/benchmarks/string/replace_re.cpp b/cpp/benchmarks/string/replace_re.cpp index f8b03daa338..7e9d6036750 100644 --- a/cpp/benchmarks/string/replace_re.cpp +++ b/cpp/benchmarks/string/replace_re.cpp @@ -42,7 +42,7 @@ static void BM_replace(benchmark::State& state, replace_type rt) cudf::test::strings_column_wrapper repls({"#", ""}); for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::default_stream_value); + cuda_event_timer raii(state, true, cudf::get_default_stream()); switch (rt) { case replace_type::replace_re: // contains_re and matches_re use the same main logic cudf::strings::replace_re(input, "\\d+"); diff --git a/cpp/benchmarks/string/split.cpp b/cpp/benchmarks/string/split.cpp index 3a7a96b025d..0f005c462cc 100644 --- a/cpp/benchmarks/string/split.cpp +++ b/cpp/benchmarks/string/split.cpp @@ -43,7 +43,7 @@ static void BM_split(benchmark::State& state, split_type rt) cudf::string_scalar target("+"); for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::default_stream_value); + cuda_event_timer raii(state, true, cudf::get_default_stream()); switch (rt) { case split: cudf::strings::split(input, target); break; case split_ws: cudf::strings::split(input); break; diff --git a/cpp/benchmarks/string/substring.cpp b/cpp/benchmarks/string/substring.cpp index 7ae5ad6f581..1201b240013 100644 --- a/cpp/benchmarks/string/substring.cpp +++ b/cpp/benchmarks/string/substring.cpp @@ -52,7 +52,7 @@ static void BM_substring(benchmark::State& state, substring_type rt) cudf::test::strings_column_wrapper delimiters(delim_itr, delim_itr + n_rows); for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::default_stream_value); + cuda_event_timer raii(state, true, cudf::get_default_stream()); switch (rt) { case position: cudf::strings::slice_strings(input, 1, max_str_length / 2); break; case multi_position: cudf::strings::slice_strings(input, starts, stops); break; diff --git a/cpp/benchmarks/string/translate.cpp b/cpp/benchmarks/string/translate.cpp index 359a3756ef2..efc2fa3154b 100644 --- a/cpp/benchmarks/string/translate.cpp +++ b/cpp/benchmarks/string/translate.cpp @@ -53,7 +53,7 @@ static void BM_translate(benchmark::State& state, int entry_count) }); for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::default_stream_value); + cuda_event_timer raii(state, true, cudf::get_default_stream()); cudf::strings::translate(input, entries); } diff --git a/cpp/benchmarks/string/url_decode.cu b/cpp/benchmarks/string/url_decode.cu index a884bc8b587..44681c924d0 100644 --- a/cpp/benchmarks/string/url_decode.cu +++ b/cpp/benchmarks/string/url_decode.cu @@ -91,7 +91,7 @@ void BM_url_decode(benchmark::State& state, int esc_seq_pct) auto strings_view = cudf::strings_column_view(column->view()); for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::default_stream_value); + cuda_event_timer raii(state, true, cudf::get_default_stream()); auto result = cudf::strings::url_decode(strings_view); } diff --git a/cpp/benchmarks/synchronization/synchronization.hpp b/cpp/benchmarks/synchronization/synchronization.hpp index e5882ff1c16..ebff1ff888d 100644 --- a/cpp/benchmarks/synchronization/synchronization.hpp +++ b/cpp/benchmarks/synchronization/synchronization.hpp @@ -35,7 +35,7 @@ for (auto _ : state){ // default stream, could be another stream - rmm::cuda_stream_view stream{cudf::default_stream_value}; + rmm::cuda_stream_view stream{cudf::get_default_stream()}; // Create (Construct) an object of this class. You HAVE to pass in the // benchmark::State object you are using. It measures the time from its @@ -85,7 +85,7 @@ class cuda_event_timer { */ cuda_event_timer(benchmark::State& state, bool flush_l2_cache, - rmm::cuda_stream_view stream = cudf::default_stream_value); + rmm::cuda_stream_view stream = cudf::get_default_stream()); // The user must provide a benchmark::State object to set // the timer so we disable the default c'tor. diff --git a/cpp/benchmarks/text/normalize.cpp b/cpp/benchmarks/text/normalize.cpp index e5a0a1a95f4..91d873224d3 100644 --- a/cpp/benchmarks/text/normalize.cpp +++ b/cpp/benchmarks/text/normalize.cpp @@ -37,7 +37,7 @@ static void BM_normalize(benchmark::State& state, bool to_lower) cudf::strings_column_view input(column->view()); for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::default_stream_value); + cuda_event_timer raii(state, true, cudf::get_default_stream()); nvtext::normalize_characters(input, to_lower); } diff --git a/cpp/benchmarks/text/normalize_spaces.cpp b/cpp/benchmarks/text/normalize_spaces.cpp index 414cd119575..85eaf54d4ea 100644 --- a/cpp/benchmarks/text/normalize_spaces.cpp +++ b/cpp/benchmarks/text/normalize_spaces.cpp @@ -38,7 +38,7 @@ static void BM_normalize(benchmark::State& state) cudf::strings_column_view input(column->view()); for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::default_stream_value); + cuda_event_timer raii(state, true, cudf::get_default_stream()); nvtext::normalize_spaces(input); } diff --git a/cpp/benchmarks/text/tokenize.cpp b/cpp/benchmarks/text/tokenize.cpp index 4d8df6ae37c..4695a62f1c0 100644 --- a/cpp/benchmarks/text/tokenize.cpp +++ b/cpp/benchmarks/text/tokenize.cpp @@ -44,7 +44,7 @@ static void BM_tokenize(benchmark::State& state, tokenize_type tt) cudf::test::strings_column_wrapper delimiters({" ", "+", "-"}); for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::default_stream_value); + cuda_event_timer raii(state, true, cudf::get_default_stream()); switch (tt) { case tokenize_type::single: // single whitespace delimiter diff --git a/cpp/benchmarks/type_dispatcher/type_dispatcher.cu b/cpp/benchmarks/type_dispatcher/type_dispatcher.cu index b1d2498f0e6..34b1e0254dd 100644 --- a/cpp/benchmarks/type_dispatcher/type_dispatcher.cu +++ b/cpp/benchmarks/type_dispatcher/type_dispatcher.cu @@ -188,10 +188,10 @@ void type_dispatcher_benchmark(::benchmark::State& state) std::vector h_vec(n_cols); std::vector h_vec_p(n_cols); std::transform(h_vec.begin(), h_vec.end(), h_vec_p.begin(), [source_size](auto& col) { - col.resize(source_size * sizeof(TypeParam), cudf::default_stream_value); + col.resize(source_size * sizeof(TypeParam), cudf::get_default_stream()); return static_cast(col.data()); }); - rmm::device_uvector d_vec(n_cols, cudf::default_stream_value); + rmm::device_uvector d_vec(n_cols, cudf::get_default_stream()); if (dispatching_type == NO_DISPATCHING) { CUDF_CUDA_TRY(cudaMemcpy( diff --git a/cpp/include/cudf/binaryop.hpp b/cpp/include/cudf/binaryop.hpp index c82fd1b52a1..554a38e03e5 100644 --- a/cpp/include/cudf/binaryop.hpp +++ b/cpp/include/cudf/binaryop.hpp @@ -232,7 +232,7 @@ namespace binops { std::pair scalar_col_valid_mask_and( column_view const& col, scalar const& s, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); namespace compiled { @@ -255,7 +255,7 @@ void apply_sorting_struct_binary_op(mutable_column_view& out, bool is_lhs_scalar, bool is_rhs_scalar, binary_operator op, - rmm::cuda_stream_view stream = cudf::default_stream_value); + rmm::cuda_stream_view stream = cudf::get_default_stream()); } // namespace detail } // namespace compiled } // namespace binops diff --git a/cpp/include/cudf/column/column.hpp b/cpp/include/cudf/column/column.hpp index c5f6d339ae9..4f42910856f 100644 --- a/cpp/include/cudf/column/column.hpp +++ b/cpp/include/cudf/column/column.hpp @@ -64,7 +64,7 @@ class column { * @param mr Device memory resource to use for all device memory allocations */ column(column const& other, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -146,7 +146,7 @@ class column { * @param mr Device memory resource to use for all device memory allocations */ explicit column(column_view view, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -208,7 +208,7 @@ class column { */ void set_null_mask(rmm::device_buffer const& new_null_mask, size_type new_null_count = UNKNOWN_NULL_COUNT, - rmm::cuda_stream_view stream = cudf::default_stream_value); + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @brief Updates the count of null elements. diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index 4f9a09fb621..1361866d0aa 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -821,7 +821,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base { *`source_view` available in device memory. */ static std::unique_ptr> create( - column_view source_view, rmm::cuda_stream_view stream = cudf::default_stream_value); + column_view source_view, rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @brief Destroy the `column_device_view` object. @@ -974,7 +974,7 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view static std::unique_ptr> create(mutable_column_view source_view, - rmm::cuda_stream_view stream = cudf::default_stream_value); + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @brief Returns pointer to the base device memory allocation casted to diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp index 5c691d866bd..85f4deecb1d 100644 --- a/cpp/include/cudf/column/column_factories.hpp +++ b/cpp/include/cudf/column/column_factories.hpp @@ -75,7 +75,7 @@ std::unique_ptr make_numeric_column( data_type type, size_type size, mask_state state = mask_state::UNALLOCATED, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -101,7 +101,7 @@ std::unique_ptr make_numeric_column( size_type size, B&& null_mask, size_type null_count = cudf::UNKNOWN_NULL_COUNT, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { CUDF_EXPECTS(is_numeric(type), "Invalid, non-numeric type."); @@ -132,7 +132,7 @@ std::unique_ptr make_fixed_point_column( data_type type, size_type size, mask_state state = mask_state::UNALLOCATED, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -157,7 +157,7 @@ std::unique_ptr make_fixed_point_column( size_type size, B&& null_mask, size_type null_count = cudf::UNKNOWN_NULL_COUNT, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { CUDF_EXPECTS(is_fixed_point(type), "Invalid, non-fixed_point type."); @@ -189,7 +189,7 @@ std::unique_ptr make_timestamp_column( data_type type, size_type size, mask_state state = mask_state::UNALLOCATED, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -215,7 +215,7 @@ std::unique_ptr make_timestamp_column( size_type size, B&& null_mask, size_type null_count = cudf::UNKNOWN_NULL_COUNT, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { CUDF_EXPECTS(is_timestamp(type), "Invalid, non-timestamp type."); @@ -247,7 +247,7 @@ std::unique_ptr make_duration_column( data_type type, size_type size, mask_state state = mask_state::UNALLOCATED, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -273,7 +273,7 @@ std::unique_ptr make_duration_column( size_type size, B&& null_mask, size_type null_count = cudf::UNKNOWN_NULL_COUNT, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { CUDF_EXPECTS(is_duration(type), "Invalid, non-duration type."); @@ -305,7 +305,7 @@ std::unique_ptr make_fixed_width_column( data_type type, size_type size, mask_state state = mask_state::UNALLOCATED, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -331,7 +331,7 @@ std::unique_ptr make_fixed_width_column( size_type size, B&& null_mask, size_type null_count = cudf::UNKNOWN_NULL_COUNT, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { CUDF_EXPECTS(is_fixed_width(type), "Invalid, non-fixed-width type."); @@ -370,7 +370,7 @@ std::unique_ptr make_fixed_width_column( */ std::unique_ptr make_strings_column( cudf::device_span const> strings, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -402,7 +402,7 @@ std::unique_ptr make_strings_column( std::unique_ptr make_strings_column( cudf::device_span string_views, const string_view null_placeholder, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -438,7 +438,7 @@ std::unique_ptr make_strings_column( cudf::device_span offsets, cudf::device_span null_mask = {}, size_type null_count = cudf::UNKNOWN_NULL_COUNT, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -547,7 +547,7 @@ std::unique_ptr make_lists_column( std::unique_ptr child_column, size_type null_count, rmm::device_buffer&& null_mask, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -578,7 +578,7 @@ std::unique_ptr make_structs_column( std::vector>&& child_columns, size_type null_count, rmm::device_buffer&& null_mask, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -598,7 +598,7 @@ std::unique_ptr make_structs_column( std::unique_ptr make_column_from_scalar( scalar const& s, size_type size, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -618,7 +618,7 @@ std::unique_ptr make_column_from_scalar( std::unique_ptr make_dictionary_from_scalar( scalar const& s, size_type size, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/include/cudf/detail/binaryop.hpp b/cpp/include/cudf/detail/binaryop.hpp index 8deac88a645..944f2eef743 100644 --- a/cpp/include/cudf/detail/binaryop.hpp +++ b/cpp/include/cudf/detail/binaryop.hpp @@ -35,7 +35,7 @@ std::unique_ptr binary_operation( column_view const& rhs, std::string const& ptx, data_type output_type, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -49,7 +49,7 @@ std::unique_ptr binary_operation( column_view const& rhs, binary_operator op, data_type output_type, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -63,7 +63,7 @@ std::unique_ptr binary_operation( scalar const& rhs, binary_operator op, data_type output_type, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -77,7 +77,7 @@ std::unique_ptr binary_operation( column_view const& rhs, binary_operator op, data_type output_type, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/concatenate.hpp b/cpp/include/cudf/detail/concatenate.hpp index 08a37acead2..ae5c95c4645 100644 --- a/cpp/include/cudf/detail/concatenate.hpp +++ b/cpp/include/cudf/detail/concatenate.hpp @@ -35,7 +35,7 @@ namespace detail { */ std::unique_ptr concatenate( host_span columns_to_concat, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -45,7 +45,7 @@ std::unique_ptr concatenate( */ std::unique_ptr
concatenate( host_span tables_to_concat, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/include/cudf/detail/copy.hpp b/cpp/include/cudf/detail/copy.hpp index a2cbe8c5238..23ed9090f05 100644 --- a/cpp/include/cudf/detail/copy.hpp +++ b/cpp/include/cudf/detail/copy.hpp @@ -77,7 +77,7 @@ ColumnView slice(ColumnView const& input, cudf::size_type begin, cudf::size_type */ std::vector slice(column_view const& input, host_span indices, - rmm::cuda_stream_view stream = cudf::default_stream_value); + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @copydoc cudf::slice(column_view const&, std::initializer_list) * @@ -85,7 +85,7 @@ std::vector slice(column_view const& input, */ std::vector slice(column_view const& input, std::initializer_list indices, - rmm::cuda_stream_view stream = cudf::default_stream_value); + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @copydoc cudf::slice(table_view const&, host_span) @@ -94,7 +94,7 @@ std::vector slice(column_view const& input, */ std::vector slice(table_view const& input, host_span indices, - rmm::cuda_stream_view stream = cudf::default_stream_value); + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @copydoc cudf::slice(table_view const&, std::initializer_list) * @@ -102,7 +102,7 @@ std::vector slice(table_view const& input, */ std::vector slice(table_view const& input, std::initializer_list indices, - rmm::cuda_stream_view stream = cudf::default_stream_value); + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @copydoc cudf::split(column_view const&, host_span) @@ -111,7 +111,7 @@ std::vector slice(table_view const& input, */ std::vector split(column_view const& input, host_span splits, - rmm::cuda_stream_view stream = cudf::default_stream_value); + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @copydoc cudf::split(column_view const&, std::initializer_list) * @@ -119,7 +119,7 @@ std::vector split(column_view const& input, */ std::vector split(column_view const& input, std::initializer_list splits, - rmm::cuda_stream_view stream = cudf::default_stream_value); + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @copydoc cudf::split(table_view const&, host_span) @@ -128,7 +128,7 @@ std::vector split(column_view const& input, */ std::vector split(table_view const& input, host_span splits, - rmm::cuda_stream_view stream = cudf::default_stream_value); + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @copydoc cudf::split(table_view const&, std::initializer_list) * @@ -136,7 +136,7 @@ std::vector split(table_view const& input, */ std::vector split(table_view const& input, std::initializer_list splits, - rmm::cuda_stream_view stream = cudf::default_stream_value); + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @copydoc cudf::shift(column_view const&,size_type,scalar const&, @@ -148,7 +148,7 @@ std::unique_ptr shift( column_view const& input, size_type offset, scalar const& fill_value, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -189,7 +189,7 @@ std::unique_ptr segmented_shift( device_span segment_offsets, size_type offset, scalar const& fill_value, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -200,7 +200,7 @@ std::unique_ptr segmented_shift( std::vector contiguous_split( cudf::table_view const& input, std::vector const& splits, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -209,7 +209,7 @@ std::vector contiguous_split( * @param stream Optional CUDA stream on which to execute kernels **/ packed_columns pack(cudf::table_view const& input, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -222,7 +222,7 @@ std::unique_ptr allocate_like( column_view const& input, size_type size, mask_allocation_policy mask_alloc = mask_allocation_policy::RETAIN, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -235,7 +235,7 @@ std::unique_ptr copy_if_else( column_view const& lhs, column_view const& rhs, column_view const& boolean_mask, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -248,7 +248,7 @@ std::unique_ptr copy_if_else( scalar const& lhs, column_view const& rhs, column_view const& boolean_mask, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -261,7 +261,7 @@ std::unique_ptr copy_if_else( column_view const& lhs, scalar const& rhs, column_view const& boolean_mask, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -274,7 +274,7 @@ std::unique_ptr copy_if_else( scalar const& lhs, scalar const& rhs, column_view const& boolean_mask, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -287,7 +287,7 @@ std::unique_ptr
sample( size_type const n, sample_with_replacement replacement = sample_with_replacement::FALSE, int64_t const seed = 0, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -298,7 +298,7 @@ std::unique_ptr
sample( std::unique_ptr get_element( column_view const& input, size_type index, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -307,7 +307,7 @@ std::unique_ptr get_element( * @param stream CUDA stream used for device memory operations and kernel launches. */ bool has_nonempty_nulls(column_view const& input, - rmm::cuda_stream_view stream = cudf::default_stream_value); + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @copydoc cudf::may_have_nonempty_nulls @@ -315,7 +315,7 @@ bool has_nonempty_nulls(column_view const& input, * @param stream CUDA stream used for device memory operations and kernel launches. */ bool may_have_nonempty_nulls(column_view const& input, - rmm::cuda_stream_view stream = cudf::default_stream_value); + rmm::cuda_stream_view stream = cudf::get_default_stream()); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/copy_if.cuh b/cpp/include/cudf/detail/copy_if.cuh index 99d9f5181c7..229d96659df 100644 --- a/cpp/include/cudf/detail/copy_if.cuh +++ b/cpp/include/cudf/detail/copy_if.cuh @@ -323,7 +323,7 @@ template std::unique_ptr
copy_if( table_view const& input, Filter filter, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { CUDF_FUNC_RANGE(); diff --git a/cpp/include/cudf/detail/copy_range.cuh b/cpp/include/cudf/detail/copy_range.cuh index aaba729f2f2..09cbf706d5c 100644 --- a/cpp/include/cudf/detail/copy_range.cuh +++ b/cpp/include/cudf/detail/copy_range.cuh @@ -135,7 +135,7 @@ void copy_range(SourceValueIterator source_value_begin, mutable_column_view& target, size_type target_begin, size_type target_end, - rmm::cuda_stream_view stream = cudf::default_stream_value) + rmm::cuda_stream_view stream = cudf::get_default_stream()) { CUDF_EXPECTS((target_begin <= target_end) && (target_begin >= 0) && (target_begin < target.size()) && (target_end <= target.size()), @@ -196,7 +196,7 @@ void copy_range_in_place(column_view const& source, size_type source_begin, size_type source_end, size_type target_begin, - rmm::cuda_stream_view stream = cudf::default_stream_value); + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @copydoc cudf::copy_range @@ -209,7 +209,7 @@ std::unique_ptr copy_range( size_type source_begin, size_type source_end, size_type target_begin, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/include/cudf/detail/datetime.hpp b/cpp/include/cudf/detail/datetime.hpp index d17e641533e..d2bca74ee9b 100644 --- a/cpp/include/cudf/detail/datetime.hpp +++ b/cpp/include/cudf/detail/datetime.hpp @@ -31,7 +31,7 @@ namespace detail { */ std::unique_ptr extract_year( cudf::column_view const& column, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -41,7 +41,7 @@ std::unique_ptr extract_year( */ std::unique_ptr extract_month( cudf::column_view const& column, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -51,7 +51,7 @@ std::unique_ptr extract_month( */ std::unique_ptr extract_day( cudf::column_view const& column, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -61,7 +61,7 @@ std::unique_ptr extract_day( */ std::unique_ptr extract_weekday( cudf::column_view const& column, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -71,7 +71,7 @@ std::unique_ptr extract_weekday( */ std::unique_ptr extract_hour( cudf::column_view const& column, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -81,7 +81,7 @@ std::unique_ptr extract_hour( */ std::unique_ptr extract_minute( cudf::column_view const& column, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -91,7 +91,7 @@ std::unique_ptr extract_minute( */ std::unique_ptr extract_second( cudf::column_view const& column, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -102,7 +102,7 @@ std::unique_ptr extract_second( */ std::unique_ptr extract_millisecond_fraction( cudf::column_view const& column, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -113,7 +113,7 @@ std::unique_ptr extract_millisecond_fraction( */ std::unique_ptr extract_microsecond_fraction( cudf::column_view const& column, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -124,7 +124,7 @@ std::unique_ptr extract_microsecond_fraction( */ std::unique_ptr extract_nanosecond_fraction( cudf::column_view const& column, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -134,7 +134,7 @@ std::unique_ptr extract_nanosecond_fraction( */ std::unique_ptr last_day_of_month( cudf::column_view const& column, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -144,7 +144,7 @@ std::unique_ptr last_day_of_month( */ std::unique_ptr day_of_year( cudf::column_view const& column, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -156,7 +156,7 @@ std::unique_ptr day_of_year( std::unique_ptr add_calendrical_months( cudf::column_view const& timestamps, cudf::column_view const& months, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -168,7 +168,7 @@ std::unique_ptr add_calendrical_months( std::unique_ptr add_calendrical_months( cudf::column_view const& timestamps, cudf::scalar const& months, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -178,12 +178,12 @@ std::unique_ptr add_calendrical_months( */ std::unique_ptr is_leap_year( cudf::column_view const& column, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); std::unique_ptr extract_quarter( cudf::column_view const& column, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/include/cudf/detail/fill.hpp b/cpp/include/cudf/detail/fill.hpp index f236fa7fd43..3ac62c984fb 100644 --- a/cpp/include/cudf/detail/fill.hpp +++ b/cpp/include/cudf/detail/fill.hpp @@ -36,7 +36,7 @@ void fill_in_place(mutable_column_view& destination, size_type begin, size_type end, scalar const& value, - rmm::cuda_stream_view stream = cudf::default_stream_value); + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @copydoc cudf::fill @@ -48,7 +48,7 @@ std::unique_ptr fill( size_type begin, size_type end, scalar const& value, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh index 8bb117c3dd0..2acdc007afa 100644 --- a/cpp/include/cudf/detail/gather.cuh +++ b/cpp/include/cudf/detail/gather.cuh @@ -652,7 +652,7 @@ std::unique_ptr
gather( MapIterator gather_map_begin, MapIterator gather_map_end, out_of_bounds_policy bounds_policy = out_of_bounds_policy::DONT_CHECK, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { std::vector> destination_columns; diff --git a/cpp/include/cudf/detail/gather.hpp b/cpp/include/cudf/detail/gather.hpp index fccad73591e..2f6a9525b4e 100644 --- a/cpp/include/cudf/detail/gather.hpp +++ b/cpp/include/cudf/detail/gather.hpp @@ -66,7 +66,7 @@ std::unique_ptr
gather( column_view const& gather_map, out_of_bounds_policy bounds_policy, negative_index_policy neg_indices, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -81,7 +81,7 @@ std::unique_ptr
gather( device_span const gather_map, out_of_bounds_policy bounds_policy, negative_index_policy neg_indices, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp b/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp index faf92c996d1..6742e7d9159 100644 --- a/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp +++ b/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp @@ -40,7 +40,7 @@ std::unique_ptr group_replace_nulls( cudf::column_view const& grouped_value, device_span group_labels, cudf::replace_policy replace_policy, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/include/cudf/detail/hashing.hpp b/cpp/include/cudf/detail/hashing.hpp index 66cbf24e607..98d3713c5c5 100644 --- a/cpp/include/cudf/detail/hashing.hpp +++ b/cpp/include/cudf/detail/hashing.hpp @@ -35,24 +35,24 @@ std::unique_ptr hash( table_view const& input, hash_id hash_function = hash_id::HASH_MURMUR3, uint32_t seed = cudf::DEFAULT_HASH_SEED, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); std::unique_ptr murmur_hash3_32( table_view const& input, uint32_t seed = cudf::DEFAULT_HASH_SEED, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); std::unique_ptr spark_murmur_hash3_32( table_view const& input, uint32_t seed = cudf::DEFAULT_HASH_SEED, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); std::unique_ptr md5_hash( table_view const& input, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /* Copyright 2005-2014 Daniel James. diff --git a/cpp/include/cudf/detail/interop.hpp b/cpp/include/cudf/detail/interop.hpp index 1417be358de..3d22530f5b3 100644 --- a/cpp/include/cudf/detail/interop.hpp +++ b/cpp/include/cudf/detail/interop.hpp @@ -34,7 +34,7 @@ namespace detail { */ std::unique_ptr
from_dlpack( DLManagedTensor const* managed_tensor, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -44,7 +44,7 @@ std::unique_ptr
from_dlpack( */ DLManagedTensor* to_dlpack( table_view const& input, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); // Creating arrow as per given type_id and buffer arguments @@ -104,7 +104,7 @@ data_type arrow_to_cudf_type(arrow::DataType const& arrow_type); */ std::shared_ptr to_arrow(table_view input, std::vector const& metadata = {}, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), arrow::MemoryPool* ar_mr = arrow::default_memory_pool()); /** @@ -114,7 +114,7 @@ std::shared_ptr to_arrow(table_view input, */ std::unique_ptr
from_arrow( arrow::Table const& input_table, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/include/cudf/detail/is_element_valid.hpp b/cpp/include/cudf/detail/is_element_valid.hpp index f9f42bdae1d..e70fa8cfe5f 100644 --- a/cpp/include/cudf/detail/is_element_valid.hpp +++ b/cpp/include/cudf/detail/is_element_valid.hpp @@ -41,7 +41,7 @@ namespace detail { bool is_element_valid_sync(column_view const& col_view, size_type element_index, - rmm::cuda_stream_view stream = cudf::default_stream_value); + rmm::cuda_stream_view stream = cudf::get_default_stream()); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/join.hpp b/cpp/include/cudf/detail/join.hpp index a0385674f36..51cda214f7b 100644 --- a/cpp/include/cudf/detail/join.hpp +++ b/cpp/include/cudf/detail/join.hpp @@ -91,7 +91,7 @@ struct hash_join { */ hash_join(cudf::table_view const& build, cudf::null_equality compare_nulls, - rmm::cuda_stream_view stream = cudf::default_stream_value); + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @copydoc cudf::hash_join::inner_join diff --git a/cpp/include/cudf/detail/label_bins.hpp b/cpp/include/cudf/detail/label_bins.hpp index 846893b70f6..af9f5fb82f5 100644 --- a/cpp/include/cudf/detail/label_bins.hpp +++ b/cpp/include/cudf/detail/label_bins.hpp @@ -51,7 +51,7 @@ std::unique_ptr label_bins( inclusive left_inclusive, column_view const& right_edges, inclusive right_inclusive, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/include/cudf/detail/null_mask.hpp b/cpp/include/cudf/detail/null_mask.hpp index 5d4f62e0feb..f75e3b06ccf 100644 --- a/cpp/include/cudf/detail/null_mask.hpp +++ b/cpp/include/cudf/detail/null_mask.hpp @@ -34,7 +34,7 @@ namespace detail { rmm::device_buffer create_null_mask( size_type size, mask_state state, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -46,7 +46,7 @@ void set_null_mask(bitmask_type* bitmask, size_type begin_bit, size_type end_bit, bool valid, - rmm::cuda_stream_view stream = cudf::default_stream_value); + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @brief Given a bitmask, counts the number of set (1) bits in the range diff --git a/cpp/include/cudf/detail/quantiles.hpp b/cpp/include/cudf/detail/quantiles.hpp index 82b8ff35bfc..c75b2d135d8 100644 --- a/cpp/include/cudf/detail/quantiles.hpp +++ b/cpp/include/cudf/detail/quantiles.hpp @@ -35,7 +35,7 @@ std::unique_ptr quantile( interpolation interp = interpolation::LINEAR, column_view const& ordered_indices = {}, bool exact = true, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -50,7 +50,7 @@ std::unique_ptr
quantiles( cudf::sorted is_input_sorted = sorted::NO, std::vector const& column_order = {}, std::vector const& null_precedence = {}, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -62,7 +62,7 @@ std::unique_ptr
quantiles( std::unique_ptr percentile_approx( tdigest::tdigest_column_view const& input, column_view const& percentiles, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/include/cudf/detail/reduction_functions.hpp b/cpp/include/cudf/detail/reduction_functions.hpp index 7877fe13951..fa6652b0db3 100644 --- a/cpp/include/cudf/detail/reduction_functions.hpp +++ b/cpp/include/cudf/detail/reduction_functions.hpp @@ -46,7 +46,7 @@ std::unique_ptr sum( column_view const& col, data_type const output_dtype, std::optional> init, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -67,7 +67,7 @@ std::unique_ptr min( column_view const& col, data_type const output_dtype, std::optional> init, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -88,7 +88,7 @@ std::unique_ptr max( column_view const& col, data_type const output_dtype, std::optional> init, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -110,7 +110,7 @@ std::unique_ptr any( column_view const& col, data_type const output_dtype, std::optional> init, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -132,7 +132,7 @@ std::unique_ptr all( column_view const& col, data_type const output_dtype, std::optional> init, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -154,7 +154,7 @@ std::unique_ptr product( column_view const& col, data_type const output_dtype, std::optional> init, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -174,7 +174,7 @@ std::unique_ptr product( std::unique_ptr sum_of_squares( column_view const& col, data_type const output_dtype, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -194,7 +194,7 @@ std::unique_ptr sum_of_squares( std::unique_ptr mean( column_view const& col, data_type const output_dtype, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -217,7 +217,7 @@ std::unique_ptr variance( column_view const& col, data_type const output_dtype, cudf::size_type ddof, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -240,7 +240,7 @@ std::unique_ptr standard_deviation( column_view const& col, data_type const output_dtype, cudf::size_type ddof, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -271,7 +271,7 @@ std::unique_ptr nth_element( column_view const& col, size_type n, null_policy null_handling, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -286,7 +286,7 @@ std::unique_ptr nth_element( std::unique_ptr collect_list( column_view const& col, null_policy null_handling, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -299,7 +299,7 @@ std::unique_ptr collect_list( */ std::unique_ptr merge_lists( lists_column_view const& col, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -318,7 +318,7 @@ std::unique_ptr collect_set( null_policy null_handling, null_equality nulls_equal, nan_equality nans_equal, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -335,7 +335,7 @@ std::unique_ptr merge_sets( lists_column_view const& col, null_equality nulls_equal, nan_equality nans_equal, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -363,7 +363,7 @@ std::unique_ptr segmented_sum( data_type const output_dtype, null_policy null_handling, std::optional> init, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -391,7 +391,7 @@ std::unique_ptr segmented_product( data_type const output_dtype, null_policy null_handling, std::optional> init, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -418,7 +418,7 @@ std::unique_ptr segmented_min( data_type const output_dtype, null_policy null_handling, std::optional> init, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -445,7 +445,7 @@ std::unique_ptr segmented_max( data_type const output_dtype, null_policy null_handling, std::optional> init, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -473,7 +473,7 @@ std::unique_ptr segmented_any( data_type const output_dtype, null_policy null_handling, std::optional> init, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -501,7 +501,7 @@ std::unique_ptr segmented_all( data_type const output_dtype, null_policy null_handling, std::optional> init, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace reduction diff --git a/cpp/include/cudf/detail/repeat.hpp b/cpp/include/cudf/detail/repeat.hpp index 9bd03878579..39a0de1bd31 100644 --- a/cpp/include/cudf/detail/repeat.hpp +++ b/cpp/include/cudf/detail/repeat.hpp @@ -36,7 +36,7 @@ std::unique_ptr
repeat( table_view const& input_table, column_view const& count, bool check_count, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -48,7 +48,7 @@ std::unique_ptr
repeat( std::unique_ptr
repeat( table_view const& input_table, size_type count, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/include/cudf/detail/replace.hpp b/cpp/include/cudf/detail/replace.hpp index 4c2c6e3b171..8e6e0729d07 100644 --- a/cpp/include/cudf/detail/replace.hpp +++ b/cpp/include/cudf/detail/replace.hpp @@ -34,7 +34,7 @@ namespace detail { std::unique_ptr replace_nulls( column_view const& input, cudf::column_view const& replacement, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -46,7 +46,7 @@ std::unique_ptr replace_nulls( std::unique_ptr replace_nulls( column_view const& input, scalar const& replacement, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -58,7 +58,7 @@ std::unique_ptr replace_nulls( std::unique_ptr replace_nulls( column_view const& input, replace_policy const& replace_policy, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -70,7 +70,7 @@ std::unique_ptr replace_nulls( std::unique_ptr replace_nans( column_view const& input, column_view const& replacement, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -82,7 +82,7 @@ std::unique_ptr replace_nans( std::unique_ptr replace_nans( column_view const& input, scalar const& replacement, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -94,7 +94,7 @@ std::unique_ptr find_and_replace_all( column_view const& input_col, column_view const& values_to_replace, column_view const& replacement_values, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -104,7 +104,7 @@ std::unique_ptr find_and_replace_all( */ std::unique_ptr normalize_nans_and_zeros( column_view const& input, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/include/cudf/detail/reshape.hpp b/cpp/include/cudf/detail/reshape.hpp index be10b2c582d..205761d6888 100644 --- a/cpp/include/cudf/detail/reshape.hpp +++ b/cpp/include/cudf/detail/reshape.hpp @@ -33,7 +33,7 @@ namespace detail { std::unique_ptr
tile( table_view const& input, size_type count, - rmm::cuda_stream_view = cudf::default_stream_value, + rmm::cuda_stream_view = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -43,7 +43,7 @@ std::unique_ptr
tile( */ std::unique_ptr interleave_columns( table_view const& input, - rmm::cuda_stream_view = cudf::default_stream_value, + rmm::cuda_stream_view = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/include/cudf/detail/rolling.hpp b/cpp/include/cudf/detail/rolling.hpp index e0bdde98c0a..40bedf4046d 100644 --- a/cpp/include/cudf/detail/rolling.hpp +++ b/cpp/include/cudf/detail/rolling.hpp @@ -45,7 +45,7 @@ std::unique_ptr rolling_window( column_view const& following_window, size_type min_periods, rolling_aggregation const& agg, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/include/cudf/detail/round.hpp b/cpp/include/cudf/detail/round.hpp index 49e6c528eb3..89c9ce6d0e7 100644 --- a/cpp/include/cudf/detail/round.hpp +++ b/cpp/include/cudf/detail/round.hpp @@ -35,7 +35,7 @@ std::unique_ptr round( column_view const& input, int32_t decimal_places, rounding_method method, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh index 413f4c4dae4..af4854965ee 100644 --- a/cpp/include/cudf/detail/scatter.cuh +++ b/cpp/include/cudf/detail/scatter.cuh @@ -390,7 +390,7 @@ std::unique_ptr
scatter( MapIterator scatter_map_begin, MapIterator scatter_map_end, table_view const& target, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { CUDF_FUNC_RANGE(); diff --git a/cpp/include/cudf/detail/scatter.hpp b/cpp/include/cudf/detail/scatter.hpp index 801088b803c..515df255f4a 100644 --- a/cpp/include/cudf/detail/scatter.hpp +++ b/cpp/include/cudf/detail/scatter.hpp @@ -63,7 +63,7 @@ std::unique_ptr
scatter( table_view const& source, column_view const& scatter_map, table_view const& target, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -76,7 +76,7 @@ std::unique_ptr
scatter( table_view const& source, device_span const scatter_map, table_view const& target, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -112,7 +112,7 @@ std::unique_ptr
scatter( std::vector> const& source, column_view const& indices, table_view const& target, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -127,7 +127,7 @@ std::unique_ptr
boolean_mask_scatter( table_view const& source, table_view const& target, column_view const& boolean_mask, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -143,7 +143,7 @@ std::unique_ptr
boolean_mask_scatter( std::vector> const& source, table_view const& target, column_view const& boolean_mask, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/include/cudf/detail/sequence.hpp b/cpp/include/cudf/detail/sequence.hpp index 8b3ef46d0ad..a4bebb1886c 100644 --- a/cpp/include/cudf/detail/sequence.hpp +++ b/cpp/include/cudf/detail/sequence.hpp @@ -36,7 +36,7 @@ std::unique_ptr sequence( size_type size, scalar const& init, scalar const& step, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -49,7 +49,7 @@ std::unique_ptr sequence( std::unique_ptr sequence( size_type size, scalar const& init, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -64,7 +64,7 @@ std::unique_ptr calendrical_month_sequence( size_type size, scalar const& init, size_type months, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/include/cudf/detail/sorting.hpp b/cpp/include/cudf/detail/sorting.hpp index a68407d9194..66b3f5071c6 100644 --- a/cpp/include/cudf/detail/sorting.hpp +++ b/cpp/include/cudf/detail/sorting.hpp @@ -36,7 +36,7 @@ std::unique_ptr sorted_order( table_view const& input, std::vector const& column_order = {}, std::vector const& null_precedence = {}, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -48,7 +48,7 @@ std::unique_ptr stable_sorted_order( table_view const& input, std::vector const& column_order = {}, std::vector const& null_precedence = {}, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -61,7 +61,7 @@ std::unique_ptr
sort_by_key( table_view const& keys, std::vector const& column_order = {}, std::vector const& null_precedence = {}, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -74,7 +74,7 @@ std::unique_ptr
stable_sort_by_key( table_view const& keys, std::vector const& column_order = {}, std::vector const& null_precedence = {}, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -87,7 +87,7 @@ std::unique_ptr segmented_sorted_order( column_view const& segment_offsets, std::vector const& column_order = {}, std::vector const& null_precedence = {}, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -100,7 +100,7 @@ std::unique_ptr stable_segmented_sorted_order( column_view const& segment_offsets, std::vector const& column_order = {}, std::vector const& null_precedence = {}, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -114,7 +114,7 @@ std::unique_ptr
segmented_sort_by_key( column_view const& segment_offsets, std::vector const& column_order = {}, std::vector const& null_precedence = {}, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -128,7 +128,7 @@ std::unique_ptr
stable_segmented_sort_by_key( column_view const& segment_offsets, std::vector const& column_order = {}, std::vector const& null_precedence = {}, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -140,7 +140,7 @@ std::unique_ptr
sort( table_view const& values, std::vector const& column_order = {}, std::vector const& null_precedence = {}, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/include/cudf/detail/stream_compaction.hpp b/cpp/include/cudf/detail/stream_compaction.hpp index 0db929c523c..1651e8b33b6 100644 --- a/cpp/include/cudf/detail/stream_compaction.hpp +++ b/cpp/include/cudf/detail/stream_compaction.hpp @@ -36,7 +36,7 @@ std::unique_ptr
drop_nulls( table_view const& input, std::vector const& keys, cudf::size_type keep_threshold, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -49,7 +49,7 @@ std::unique_ptr
drop_nans( table_view const& input, std::vector const& keys, cudf::size_type keep_threshold, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -60,7 +60,7 @@ std::unique_ptr
drop_nans( std::unique_ptr
apply_boolean_mask( table_view const& input, column_view const& boolean_mask, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -73,7 +73,7 @@ std::unique_ptr
unique( std::vector const& keys, duplicate_keep_option keep, null_equality nulls_equal = null_equality::EQUAL, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -87,7 +87,7 @@ std::unique_ptr
distinct( duplicate_keep_option keep = duplicate_keep_option::KEEP_ANY, null_equality nulls_equal = null_equality::EQUAL, nan_equality nans_equal = nan_equality::ALL_EQUAL, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -116,7 +116,7 @@ std::unique_ptr
stable_distinct( duplicate_keep_option keep = duplicate_keep_option::KEEP_ANY, null_equality nulls_equal = null_equality::EQUAL, nan_equality nans_equal = nan_equality::ALL_EQUAL, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -138,7 +138,7 @@ rmm::device_uvector get_distinct_indices( duplicate_keep_option keep = duplicate_keep_option::KEEP_ANY, null_equality nulls_equal = null_equality::EQUAL, nan_equality nans_equal = nan_equality::ALL_EQUAL, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -149,7 +149,7 @@ rmm::device_uvector get_distinct_indices( cudf::size_type unique_count(column_view const& input, null_policy null_handling, nan_policy nan_handling, - rmm::cuda_stream_view stream = cudf::default_stream_value); + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @copydoc cudf::unique_count(table_view const&, null_equality) @@ -158,7 +158,7 @@ cudf::size_type unique_count(column_view const& input, */ cudf::size_type unique_count(table_view const& input, null_equality nulls_equal = null_equality::EQUAL, - rmm::cuda_stream_view stream = cudf::default_stream_value); + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @copydoc cudf::distinct_count(column_view const&, null_policy, nan_policy) @@ -168,7 +168,7 @@ cudf::size_type unique_count(table_view const& input, cudf::size_type distinct_count(column_view const& input, null_policy null_handling, nan_policy nan_handling, - rmm::cuda_stream_view stream = cudf::default_stream_value); + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @copydoc cudf::distinct_count(table_view const&, null_equality) @@ -177,7 +177,7 @@ cudf::size_type distinct_count(column_view const& input, */ cudf::size_type distinct_count(table_view const& input, null_equality nulls_equal = null_equality::EQUAL, - rmm::cuda_stream_view stream = cudf::default_stream_value); + rmm::cuda_stream_view stream = cudf::get_default_stream()); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/structs/utilities.hpp b/cpp/include/cudf/detail/structs/utilities.hpp index 1a4b8f02dd3..03e752c102d 100644 --- a/cpp/include/cudf/detail/structs/utilities.hpp +++ b/cpp/include/cudf/detail/structs/utilities.hpp @@ -189,7 +189,7 @@ void superimpose_parent_nulls(bitmask_type const* parent_null_mask, */ std::tuple> superimpose_parent_nulls( column_view const& parent, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -215,7 +215,7 @@ std::tuple> superimpose_paren */ std::tuple> superimpose_parent_nulls( table_view const& table, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** diff --git a/cpp/include/cudf/detail/tdigest/tdigest.hpp b/cpp/include/cudf/detail/tdigest/tdigest.hpp index 41e734ffe83..f1b795e21a9 100644 --- a/cpp/include/cudf/detail/tdigest/tdigest.hpp +++ b/cpp/include/cudf/detail/tdigest/tdigest.hpp @@ -139,7 +139,7 @@ std::unique_ptr make_tdigest_column( std::unique_ptr&& tdigest_offsets, std::unique_ptr&& min_values, std::unique_ptr&& max_values, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -153,7 +153,7 @@ std::unique_ptr make_tdigest_column( * @returns An empty tdigest column. */ std::unique_ptr make_empty_tdigest_column( - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -167,7 +167,7 @@ std::unique_ptr make_empty_tdigest_column( * @returns An empty tdigest scalar. */ std::unique_ptr make_empty_tdigest_scalar( - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** diff --git a/cpp/include/cudf/detail/transform.hpp b/cpp/include/cudf/detail/transform.hpp index 929c4700873..26cdf917cda 100644 --- a/cpp/include/cudf/detail/transform.hpp +++ b/cpp/include/cudf/detail/transform.hpp @@ -34,7 +34,7 @@ std::unique_ptr transform( std::string const& unary_udf, data_type output_type, bool is_ptx, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -45,7 +45,7 @@ std::unique_ptr transform( std::unique_ptr compute_column( table_view const table, ast::operation const& expr, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -55,7 +55,7 @@ std::unique_ptr compute_column( */ std::pair, size_type> nans_to_nulls( column_view const& input, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -65,7 +65,7 @@ std::pair, size_type> nans_to_nulls( */ std::pair, cudf::size_type> bools_to_mask( column_view const& input, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -75,7 +75,7 @@ std::pair, cudf::size_type> bools_to_mask( */ std::pair, std::unique_ptr> encode( cudf::table_view const& input, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -86,7 +86,7 @@ std::pair, std::unique_ptr> encode( std::pair, table_view> one_hot_encode( column_view const& input, column_view const& categories, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -98,7 +98,7 @@ std::unique_ptr mask_to_bools( bitmask_type const* null_mask, size_type begin_bit, size_type end_bit, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -108,7 +108,7 @@ std::unique_ptr mask_to_bools( */ std::unique_ptr row_bit_count( table_view const& t, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/include/cudf/detail/transpose.hpp b/cpp/include/cudf/detail/transpose.hpp index 367421a5ee1..14f80a99de9 100644 --- a/cpp/include/cudf/detail/transpose.hpp +++ b/cpp/include/cudf/detail/transpose.hpp @@ -30,7 +30,7 @@ namespace detail { */ std::pair, table_view> transpose( table_view const& input, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/include/cudf/detail/unary.hpp b/cpp/include/cudf/detail/unary.hpp index 5d1c29aba78..c92b4f7683b 100644 --- a/cpp/include/cudf/detail/unary.hpp +++ b/cpp/include/cudf/detail/unary.hpp @@ -50,7 +50,7 @@ std::unique_ptr true_if( InputIterator end, size_type size, Predicate p, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { auto output = @@ -71,7 +71,7 @@ std::unique_ptr true_if( std::unique_ptr unary_operation( cudf::column_view const& input, cudf::unary_operator op, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -82,7 +82,7 @@ std::unique_ptr unary_operation( std::unique_ptr cast( column_view const& input, data_type type, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -92,7 +92,7 @@ std::unique_ptr cast( */ std::unique_ptr is_nan( cudf::column_view const& input, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -102,7 +102,7 @@ std::unique_ptr is_nan( */ std::unique_ptr is_not_nan( cudf::column_view const& input, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/include/cudf/detail/utilities/cuda.cuh b/cpp/include/cudf/detail/utilities/cuda.cuh index d57078f892f..02564ea1343 100644 --- a/cpp/include/cudf/detail/utilities/cuda.cuh +++ b/cpp/include/cudf/detail/utilities/cuda.cuh @@ -171,7 +171,7 @@ __global__ void single_thread_kernel(F f) */ template void device_single_thread(Functor functor, - rmm::cuda_stream_view stream = cudf::default_stream_value) + rmm::cuda_stream_view stream = cudf::get_default_stream()) { single_thread_kernel<<<1, 1, 0, stream.value()>>>(functor); } diff --git a/cpp/include/cudf/detail/utilities/default_stream.hpp b/cpp/include/cudf/detail/utilities/default_stream.hpp new file mode 100644 index 00000000000..fa438f142b7 --- /dev/null +++ b/cpp/include/cudf/detail/utilities/default_stream.hpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace cudf { + +namespace detail { + +/** + * @brief Default stream for cudf + * + * Use this value to ensure the correct stream is used when compiled with per + * thread default stream. + */ +extern rmm::cuda_stream_view const default_stream_value; + +} // namespace detail + +} // namespace cudf diff --git a/cpp/include/cudf/detail/utilities/vector_factories.hpp b/cpp/include/cudf/detail/utilities/vector_factories.hpp index d7fdb153c19..d59ecea8bb0 100644 --- a/cpp/include/cudf/detail/utilities/vector_factories.hpp +++ b/cpp/include/cudf/detail/utilities/vector_factories.hpp @@ -72,7 +72,7 @@ rmm::device_uvector make_zeroed_device_uvector_async( template rmm::device_uvector make_zeroed_device_uvector_sync( std::size_t size, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { rmm::device_uvector ret(size, stream, mr); @@ -148,7 +148,7 @@ rmm::device_uvector make_device_uvector_async( template rmm::device_uvector make_device_uvector_async( device_span source_data, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { rmm::device_uvector ret(source_data.size(), stream, mr); @@ -201,7 +201,7 @@ rmm::device_uvector make_device_uvector_async( template rmm::device_uvector make_device_uvector_sync( host_span source_data, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { auto ret = make_device_uvector_async(source_data, stream, mr); @@ -228,7 +228,7 @@ template < std::is_convertible_v>>* = nullptr> rmm::device_uvector make_device_uvector_sync( Container const& c, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { return make_device_uvector_sync(host_span{c}, stream, mr); @@ -249,7 +249,7 @@ rmm::device_uvector make_device_uvector_sync( template rmm::device_uvector make_device_uvector_sync( device_span source_data, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { auto ret = make_device_uvector_async(source_data, stream, mr); @@ -276,7 +276,7 @@ template < std::is_convertible_v>>* = nullptr> rmm::device_uvector make_device_uvector_sync( Container const& c, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { return make_device_uvector_sync(device_span{c}, stream, mr); @@ -367,7 +367,7 @@ template < std::enable_if_t< std::is_convertible_v>>* = nullptr> std::vector make_std_vector_sync( - Container const& c, rmm::cuda_stream_view stream = cudf::default_stream_value) + Container const& c, rmm::cuda_stream_view stream = cudf::get_default_stream()) { return make_std_vector_sync(device_span{c}, stream); } @@ -424,7 +424,7 @@ thrust::host_vector make_host_vector_async( */ template thrust::host_vector make_host_vector_sync( - device_span v, rmm::cuda_stream_view stream = cudf::default_stream_value) + device_span v, rmm::cuda_stream_view stream = cudf::get_default_stream()) { auto result = make_host_vector_async(v, stream); stream.synchronize(); @@ -448,7 +448,7 @@ template < std::enable_if_t< std::is_convertible_v>>* = nullptr> thrust::host_vector make_host_vector_sync( - Container const& c, rmm::cuda_stream_view stream = cudf::default_stream_value) + Container const& c, rmm::cuda_stream_view stream = cudf::get_default_stream()) { return make_host_vector_sync(device_span{c}, stream); } diff --git a/cpp/include/cudf/detail/valid_if.cuh b/cpp/include/cudf/detail/valid_if.cuh index 0fe7edad21d..56cc73e63e2 100644 --- a/cpp/include/cudf/detail/valid_if.cuh +++ b/cpp/include/cudf/detail/valid_if.cuh @@ -90,7 +90,7 @@ std::pair valid_if( InputIterator begin, InputIterator end, Predicate p, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { CUDF_EXPECTS(begin <= end, "Invalid range."); diff --git a/cpp/include/cudf/dictionary/detail/concatenate.hpp b/cpp/include/cudf/dictionary/detail/concatenate.hpp index 9f154a054f8..e893e9d6499 100644 --- a/cpp/include/cudf/dictionary/detail/concatenate.hpp +++ b/cpp/include/cudf/dictionary/detail/concatenate.hpp @@ -39,7 +39,7 @@ namespace detail { */ std::unique_ptr concatenate( host_span columns, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/include/cudf/dictionary/detail/encode.hpp b/cpp/include/cudf/dictionary/detail/encode.hpp index 17173564a9a..454b8400f87 100644 --- a/cpp/include/cudf/dictionary/detail/encode.hpp +++ b/cpp/include/cudf/dictionary/detail/encode.hpp @@ -54,7 +54,7 @@ namespace detail { std::unique_ptr encode( column_view const& column, data_type indices_type = data_type{type_id::UINT32}, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -74,7 +74,7 @@ std::unique_ptr encode( */ std::unique_ptr decode( dictionary_column_view const& dictionary_column, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** diff --git a/cpp/include/cudf/dictionary/detail/replace.hpp b/cpp/include/cudf/dictionary/detail/replace.hpp index 2b38a6c40ec..a13a5eee6cb 100644 --- a/cpp/include/cudf/dictionary/detail/replace.hpp +++ b/cpp/include/cudf/dictionary/detail/replace.hpp @@ -42,7 +42,7 @@ namespace detail { std::unique_ptr replace_nulls( dictionary_column_view const& input, dictionary_column_view const& replacement, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -60,7 +60,7 @@ std::unique_ptr replace_nulls( std::unique_ptr replace_nulls( dictionary_column_view const& input, scalar const& replacement, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/include/cudf/dictionary/detail/search.hpp b/cpp/include/cudf/dictionary/detail/search.hpp index 4f7939b32a7..9cf45eafc7d 100644 --- a/cpp/include/cudf/dictionary/detail/search.hpp +++ b/cpp/include/cudf/dictionary/detail/search.hpp @@ -34,7 +34,7 @@ namespace detail { std::unique_ptr get_index( dictionary_column_view const& dictionary, scalar const& key, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -59,7 +59,7 @@ std::unique_ptr get_index( std::unique_ptr get_insert_index( dictionary_column_view const& dictionary, scalar const& key, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/include/cudf/dictionary/detail/update_keys.hpp b/cpp/include/cudf/dictionary/detail/update_keys.hpp index 53fd71e0375..23681d36ee1 100644 --- a/cpp/include/cudf/dictionary/detail/update_keys.hpp +++ b/cpp/include/cudf/dictionary/detail/update_keys.hpp @@ -35,7 +35,7 @@ namespace detail { std::unique_ptr add_keys( dictionary_column_view const& dictionary_column, column_view const& new_keys, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -47,7 +47,7 @@ std::unique_ptr add_keys( std::unique_ptr remove_keys( dictionary_column_view const& dictionary_column, column_view const& keys_to_remove, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -58,7 +58,7 @@ std::unique_ptr remove_keys( */ std::unique_ptr remove_unused_keys( dictionary_column_view const& dictionary_column, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -70,7 +70,7 @@ std::unique_ptr remove_unused_keys( std::unique_ptr set_keys( dictionary_column_view const& dictionary_column, column_view const& keys, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -81,7 +81,7 @@ std::unique_ptr set_keys( */ std::vector> match_dictionaries( cudf::host_span input, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -105,7 +105,7 @@ std::vector> match_dictionaries( */ std::pair>, std::vector> match_dictionaries( std::vector tables, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/include/cudf/dictionary/dictionary_factories.hpp b/cpp/include/cudf/dictionary/dictionary_factories.hpp index b27fa25a27a..821981ad148 100644 --- a/cpp/include/cudf/dictionary/dictionary_factories.hpp +++ b/cpp/include/cudf/dictionary/dictionary_factories.hpp @@ -65,7 +65,7 @@ namespace cudf { std::unique_ptr make_dictionary_column( column_view const& keys_column, column_view const& indices_column, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -117,7 +117,7 @@ std::unique_ptr make_dictionary_column(std::unique_ptr keys_colu std::unique_ptr make_dictionary_column( std::unique_ptr keys_column, std::unique_ptr indices_column, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/include/cudf/io/detail/avro.hpp b/cpp/include/cudf/io/detail/avro.hpp index 9551b1f05df..00665873b67 100644 --- a/cpp/include/cudf/io/detail/avro.hpp +++ b/cpp/include/cudf/io/detail/avro.hpp @@ -39,7 +39,7 @@ namespace avro { table_with_metadata read_avro( std::unique_ptr&& source, avro_reader_options const& options, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace avro diff --git a/cpp/include/cudf/io/detail/csv.hpp b/cpp/include/cudf/io/detail/csv.hpp index 0d79ecd0d77..920b815ce12 100644 --- a/cpp/include/cudf/io/detail/csv.hpp +++ b/cpp/include/cudf/io/detail/csv.hpp @@ -55,7 +55,7 @@ void write_csv(data_sink* sink, table_view const& table, const table_metadata* metadata, csv_writer_options const& options, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace csv diff --git a/cpp/include/cudf/io/detail/json.hpp b/cpp/include/cudf/io/detail/json.hpp index 3e69ef8a3b8..6d0d23c3c78 100644 --- a/cpp/include/cudf/io/detail/json.hpp +++ b/cpp/include/cudf/io/detail/json.hpp @@ -39,7 +39,7 @@ namespace json { table_with_metadata read_json( std::vector>& sources, json_reader_options const& options, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace json diff --git a/cpp/include/cudf/io/detail/orc.hpp b/cpp/include/cudf/io/detail/orc.hpp index 79fcf4bd916..10bdf6e3e71 100644 --- a/cpp/include/cudf/io/detail/orc.hpp +++ b/cpp/include/cudf/io/detail/orc.hpp @@ -75,7 +75,7 @@ class reader { * @return The set of columns along with table metadata */ table_with_metadata read(orc_reader_options const& options, - rmm::cuda_stream_view stream = cudf::default_stream_value); + rmm::cuda_stream_view stream = cudf::get_default_stream()); }; /** diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp index bc3bfef3a7d..b613a661d95 100644 --- a/cpp/include/cudf/join.hpp +++ b/cpp/include/cudf/join.hpp @@ -287,7 +287,7 @@ class hash_join { */ hash_join(cudf::table_view const& build, null_equality compare_nulls, - rmm::cuda_stream_view stream = cudf::default_stream_value); + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * Returns the row indices that can be used to construct the result of performing @@ -308,7 +308,7 @@ class hash_join { std::unique_ptr>> inner_join(cudf::table_view const& probe, std::optional output_size = {}, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const; /** @@ -330,7 +330,7 @@ class hash_join { std::unique_ptr>> left_join(cudf::table_view const& probe, std::optional output_size = {}, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const; /** @@ -352,7 +352,7 @@ class hash_join { std::unique_ptr>> full_join(cudf::table_view const& probe, std::optional output_size = {}, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const; /** @@ -366,7 +366,7 @@ class hash_join { * `build` and `probe` as the the join keys . */ [[nodiscard]] std::size_t inner_join_size( - cudf::table_view const& probe, rmm::cuda_stream_view stream = cudf::default_stream_value) const; + cudf::table_view const& probe, rmm::cuda_stream_view stream = cudf::get_default_stream()) const; /** * Returns the exact number of matches (rows) when performing a left join with the specified probe @@ -379,7 +379,7 @@ class hash_join { * and `probe` as the the join keys . */ [[nodiscard]] std::size_t left_join_size( - cudf::table_view const& probe, rmm::cuda_stream_view stream = cudf::default_stream_value) const; + cudf::table_view const& probe, rmm::cuda_stream_view stream = cudf::get_default_stream()) const; /** * Returns the exact number of matches (rows) when performing a full join with the specified probe @@ -395,7 +395,7 @@ class hash_join { */ std::size_t full_join_size( cudf::table_view const& probe, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const; private: diff --git a/cpp/include/cudf/lists/detail/concatenate.hpp b/cpp/include/cudf/lists/detail/concatenate.hpp index e2e17579c85..f2982a67389 100644 --- a/cpp/include/cudf/lists/detail/concatenate.hpp +++ b/cpp/include/cudf/lists/detail/concatenate.hpp @@ -45,7 +45,7 @@ namespace detail { */ std::unique_ptr concatenate( host_span columns, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/include/cudf/lists/detail/gather.cuh b/cpp/include/cudf/lists/detail/gather.cuh index 9cbe9582456..f53e8ca8033 100644 --- a/cpp/include/cudf/lists/detail/gather.cuh +++ b/cpp/include/cudf/lists/detail/gather.cuh @@ -321,7 +321,7 @@ std::unique_ptr segmented_gather( lists_column_view const& source_column, lists_column_view const& gather_map_list, out_of_bounds_policy bounds_policy = out_of_bounds_policy::DONT_CHECK, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/include/cudf/lists/detail/scatter.cuh b/cpp/include/cudf/lists/detail/scatter.cuh index 2e60df4a5ae..c343eea1014 100644 --- a/cpp/include/cudf/lists/detail/scatter.cuh +++ b/cpp/include/cudf/lists/detail/scatter.cuh @@ -96,7 +96,7 @@ std::unique_ptr scatter_impl( MapIterator scatter_map_end, column_view const& source, column_view const& target, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { CUDF_EXPECTS(column_types_equal(source, target), "Mismatched column types."); @@ -169,7 +169,7 @@ std::unique_ptr scatter( MapIterator scatter_map_begin, MapIterator scatter_map_end, column_view const& target, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { auto const num_rows = target.size(); @@ -226,7 +226,7 @@ std::unique_ptr scatter( MapIterator scatter_map_begin, MapIterator scatter_map_end, column_view const& target, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { auto const num_rows = target.size(); diff --git a/cpp/include/cudf/lists/lists_column_factories.hpp b/cpp/include/cudf/lists/lists_column_factories.hpp index 2b40a875cc9..e02fa3fde5f 100644 --- a/cpp/include/cudf/lists/lists_column_factories.hpp +++ b/cpp/include/cudf/lists/lists_column_factories.hpp @@ -38,7 +38,7 @@ namespace detail { std::unique_ptr make_lists_column_from_scalar( list_scalar const& value, size_type size, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/include/cudf/partitioning.hpp b/cpp/include/cudf/partitioning.hpp index 6e9f571cc9d..2c91bdf64f5 100644 --- a/cpp/include/cudf/partitioning.hpp +++ b/cpp/include/cudf/partitioning.hpp @@ -98,7 +98,7 @@ std::pair, std::vector> hash_partition( int num_partitions, hash_id hash_function = hash_id::HASH_MURMUR3, uint32_t seed = DEFAULT_HASH_SEED, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** diff --git a/cpp/include/cudf/scalar/scalar.hpp b/cpp/include/cudf/scalar/scalar.hpp index 9b9c73071af..6161639a6fb 100644 --- a/cpp/include/cudf/scalar/scalar.hpp +++ b/cpp/include/cudf/scalar/scalar.hpp @@ -64,7 +64,7 @@ class scalar { * @param is_valid true: set the value to valid. false: set it to null. * @param stream CUDA stream used for device memory operations. */ - void set_valid_async(bool is_valid, rmm::cuda_stream_view stream = cudf::default_stream_value); + void set_valid_async(bool is_valid, rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @brief Indicates whether the scalar contains a valid value. @@ -76,7 +76,7 @@ class scalar { * @return true Value is valid * @return false Value is invalid/null */ - [[nodiscard]] bool is_valid(rmm::cuda_stream_view stream = cudf::default_stream_value) const; + [[nodiscard]] bool is_valid(rmm::cuda_stream_view stream = cudf::get_default_stream()) const; /** * @brief Returns a raw pointer to the validity bool in device memory. @@ -112,7 +112,7 @@ class scalar { * @param mr Device memory resource to use for device memory allocation. */ scalar(scalar const& other, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -128,7 +128,7 @@ class scalar { */ scalar(data_type type, bool is_valid = false, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); }; @@ -164,7 +164,7 @@ class fixed_width_scalar : public scalar { * @param mr Device memory resource to use for device memory allocation. */ fixed_width_scalar(fixed_width_scalar const& other, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -173,7 +173,7 @@ class fixed_width_scalar : public scalar { * @param value New value of scalar. * @param stream CUDA stream used for device memory operations. */ - void set_value(T value, rmm::cuda_stream_view stream = cudf::default_stream_value); + void set_value(T value, rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @brief Explicit conversion operator to get the value of the scalar on the host. @@ -186,7 +186,7 @@ class fixed_width_scalar : public scalar { * @param stream CUDA stream used for device memory operations. * @return Value of the scalar */ - T value(rmm::cuda_stream_view stream = cudf::default_stream_value) const; + T value(rmm::cuda_stream_view stream = cudf::get_default_stream()) const; /** * @brief Returns a raw pointer to the value in device memory. @@ -215,7 +215,7 @@ class fixed_width_scalar : public scalar { */ fixed_width_scalar(T value, bool is_valid = true, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -228,7 +228,7 @@ class fixed_width_scalar : public scalar { */ fixed_width_scalar(rmm::device_scalar&& data, bool is_valid = true, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); }; @@ -264,7 +264,7 @@ class numeric_scalar : public detail::fixed_width_scalar { * @param mr Device memory resource to use for device memory allocation. */ numeric_scalar(numeric_scalar const& other, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -277,7 +277,7 @@ class numeric_scalar : public detail::fixed_width_scalar { */ numeric_scalar(T value, bool is_valid = true, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -290,7 +290,7 @@ class numeric_scalar : public detail::fixed_width_scalar { */ numeric_scalar(rmm::device_scalar&& data, bool is_valid = true, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); }; @@ -327,7 +327,7 @@ class fixed_point_scalar : public scalar { * @param mr Device memory resource to use for device memory allocation. */ fixed_point_scalar(fixed_point_scalar const& other, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -342,7 +342,7 @@ class fixed_point_scalar : public scalar { fixed_point_scalar(rep_type value, numeric::scale_type scale, bool is_valid = true, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -355,7 +355,7 @@ class fixed_point_scalar : public scalar { */ fixed_point_scalar(rep_type value, bool is_valid = true, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -368,7 +368,7 @@ class fixed_point_scalar : public scalar { */ fixed_point_scalar(T value, bool is_valid = true, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -383,7 +383,7 @@ class fixed_point_scalar : public scalar { fixed_point_scalar(rmm::device_scalar&& data, numeric::scale_type scale, bool is_valid = true, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -392,7 +392,7 @@ class fixed_point_scalar : public scalar { * @param stream CUDA stream used for device memory operations. * @return The value of the scalar */ - rep_type value(rmm::cuda_stream_view stream = cudf::default_stream_value) const; + rep_type value(rmm::cuda_stream_view stream = cudf::get_default_stream()) const; /** * @brief Get the decimal32, decimal64 or decimal128. @@ -400,7 +400,7 @@ class fixed_point_scalar : public scalar { * @param stream CUDA stream used for device memory operations. * @return The decimal32, decimal64 or decimal128 value */ - T fixed_point_value(rmm::cuda_stream_view stream = cudf::default_stream_value) const; + T fixed_point_value(rmm::cuda_stream_view stream = cudf::get_default_stream()) const; /** * @brief Explicit conversion operator to get the value of the scalar on the host. @@ -451,7 +451,7 @@ class string_scalar : public scalar { * @param mr Device memory resource to use for device memory allocation. */ string_scalar(string_scalar const& other, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -464,7 +464,7 @@ class string_scalar : public scalar { */ string_scalar(std::string const& string, bool is_valid = true, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -479,7 +479,7 @@ class string_scalar : public scalar { */ string_scalar(value_type const& source, bool is_valid = true, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -494,7 +494,7 @@ class string_scalar : public scalar { */ string_scalar(rmm::device_scalar& data, bool is_valid = true, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -510,7 +510,7 @@ class string_scalar : public scalar { */ string_scalar(rmm::device_buffer&& data, bool is_valid = true, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -525,7 +525,7 @@ class string_scalar : public scalar { * @return The value of the scalar in a host std::string */ [[nodiscard]] std::string to_string( - rmm::cuda_stream_view stream = cudf::default_stream_value) const; + rmm::cuda_stream_view stream = cudf::get_default_stream()) const; /** * @brief Get the value of the scalar as a string_view. @@ -533,7 +533,7 @@ class string_scalar : public scalar { * @param stream CUDA stream used for device memory operations. * @return The value of the scalar as a string_view */ - [[nodiscard]] value_type value(rmm::cuda_stream_view stream = cudf::default_stream_value) const; + [[nodiscard]] value_type value(rmm::cuda_stream_view stream = cudf::get_default_stream()) const; /** * @brief Returns the size of the string in bytes. @@ -582,7 +582,7 @@ class chrono_scalar : public detail::fixed_width_scalar { * @param mr Device memory resource to use for device memory allocation. */ chrono_scalar(chrono_scalar const& other, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -595,7 +595,7 @@ class chrono_scalar : public detail::fixed_width_scalar { */ chrono_scalar(T value, bool is_valid = true, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -608,7 +608,7 @@ class chrono_scalar : public detail::fixed_width_scalar { */ chrono_scalar(rmm::device_scalar&& data, bool is_valid = true, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); }; @@ -641,7 +641,7 @@ class timestamp_scalar : public chrono_scalar { * @param mr Device memory resource to use for device memory allocation. */ timestamp_scalar(timestamp_scalar const& other, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -657,7 +657,7 @@ class timestamp_scalar : public chrono_scalar { template timestamp_scalar(Duration2 const& value, bool is_valid, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -696,7 +696,7 @@ class duration_scalar : public chrono_scalar { * @param mr Device memory resource to use for device memory allocation. */ duration_scalar(duration_scalar const& other, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -709,7 +709,7 @@ class duration_scalar : public chrono_scalar { */ duration_scalar(rep_type value, bool is_valid, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -744,7 +744,7 @@ class list_scalar : public scalar { * @param mr Device memory resource to use for device memory allocation. */ list_scalar(list_scalar const& other, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -759,7 +759,7 @@ class list_scalar : public scalar { */ list_scalar(cudf::column_view const& data, bool is_valid = true, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -772,7 +772,7 @@ class list_scalar : public scalar { */ list_scalar(cudf::column&& data, bool is_valid = true, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -809,7 +809,7 @@ class struct_scalar : public scalar { * @param mr Device memory resource to use for device memory allocation. */ struct_scalar(struct_scalar const& other, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -824,7 +824,7 @@ class struct_scalar : public scalar { */ struct_scalar(table_view const& data, bool is_valid = true, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -839,7 +839,7 @@ class struct_scalar : public scalar { */ struct_scalar(host_span data, bool is_valid = true, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -855,7 +855,7 @@ class struct_scalar : public scalar { */ struct_scalar(table&& data, bool is_valid = true, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** diff --git a/cpp/include/cudf/scalar/scalar_factories.hpp b/cpp/include/cudf/scalar/scalar_factories.hpp index b2b52ddc488..78b6c4fd0e9 100644 --- a/cpp/include/cudf/scalar/scalar_factories.hpp +++ b/cpp/include/cudf/scalar/scalar_factories.hpp @@ -43,7 +43,7 @@ namespace cudf { */ std::unique_ptr make_numeric_scalar( data_type type, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -60,7 +60,7 @@ std::unique_ptr make_numeric_scalar( */ std::unique_ptr make_timestamp_scalar( data_type type, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -77,7 +77,7 @@ std::unique_ptr make_timestamp_scalar( */ std::unique_ptr make_duration_scalar( data_type type, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -94,7 +94,7 @@ std::unique_ptr make_duration_scalar( */ std::unique_ptr make_fixed_width_scalar( data_type type, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -111,7 +111,7 @@ std::unique_ptr make_fixed_width_scalar( */ std::unique_ptr make_string_scalar( std::string const& string, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -126,7 +126,7 @@ std::unique_ptr make_string_scalar( */ std::unique_ptr make_default_constructed_scalar( data_type type, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -141,7 +141,7 @@ std::unique_ptr make_default_constructed_scalar( */ std::unique_ptr make_empty_scalar_like( column_view const& input, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -156,7 +156,7 @@ std::unique_ptr make_empty_scalar_like( template std::unique_ptr make_fixed_width_scalar( T value, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { return std::make_unique>(value, true, stream, mr); @@ -176,7 +176,7 @@ template std::unique_ptr make_fixed_point_scalar( typename T::rep value, numeric::scale_type scale, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { return std::make_unique>(value, scale, true, stream, mr); @@ -192,7 +192,7 @@ std::unique_ptr make_fixed_point_scalar( */ std::unique_ptr make_list_scalar( column_view elements, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -207,7 +207,7 @@ std::unique_ptr make_list_scalar( */ std::unique_ptr make_struct_scalar( table_view const& data, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -222,7 +222,7 @@ std::unique_ptr make_struct_scalar( */ std::unique_ptr make_struct_scalar( host_span data, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/include/cudf/strings/detail/combine.hpp b/cpp/include/cudf/strings/detail/combine.hpp index 7df3a4ce324..3de97ed69f1 100644 --- a/cpp/include/cudf/strings/detail/combine.hpp +++ b/cpp/include/cudf/strings/detail/combine.hpp @@ -39,7 +39,7 @@ std::unique_ptr concatenate( string_scalar const& separator, string_scalar const& narep, separator_on_nulls separate_nulls = separator_on_nulls::YES, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -52,7 +52,7 @@ std::unique_ptr join_strings( strings_column_view const& strings, string_scalar const& separator, string_scalar const& narep, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** diff --git a/cpp/include/cudf/strings/detail/concatenate.hpp b/cpp/include/cudf/strings/detail/concatenate.hpp index 0df86db60b6..76397c15dad 100644 --- a/cpp/include/cudf/strings/detail/concatenate.hpp +++ b/cpp/include/cudf/strings/detail/concatenate.hpp @@ -44,7 +44,7 @@ namespace detail { */ std::unique_ptr concatenate( host_span columns, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/include/cudf/strings/detail/copying.hpp b/cpp/include/cudf/strings/detail/copying.hpp index 56e9c35c889..e4ae9917f58 100644 --- a/cpp/include/cudf/strings/detail/copying.hpp +++ b/cpp/include/cudf/strings/detail/copying.hpp @@ -53,7 +53,7 @@ std::unique_ptr copy_slice( strings_column_view const& strings, size_type start, size_type end = -1, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** diff --git a/cpp/include/cudf/strings/detail/fill.hpp b/cpp/include/cudf/strings/detail/fill.hpp index e8f9c9ca438..e6a2fa8ba4e 100644 --- a/cpp/include/cudf/strings/detail/fill.hpp +++ b/cpp/include/cudf/strings/detail/fill.hpp @@ -47,7 +47,7 @@ std::unique_ptr fill( size_type begin, size_type end, string_scalar const& value, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/include/cudf/strings/detail/json.hpp b/cpp/include/cudf/strings/detail/json.hpp index 90188910c7d..87a1040b67d 100644 --- a/cpp/include/cudf/strings/detail/json.hpp +++ b/cpp/include/cudf/strings/detail/json.hpp @@ -34,7 +34,7 @@ std::unique_ptr get_json_object( cudf::strings_column_view const& col, cudf::string_scalar const& json_path, get_json_object_options options, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/include/cudf/strings/detail/replace.hpp b/cpp/include/cudf/strings/detail/replace.hpp index ce1d5e8a925..814188d88c9 100644 --- a/cpp/include/cudf/strings/detail/replace.hpp +++ b/cpp/include/cudf/strings/detail/replace.hpp @@ -48,7 +48,7 @@ std::unique_ptr replace( string_scalar const& target, string_scalar const& repl, int32_t maxrepl = -1, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -62,7 +62,7 @@ std::unique_ptr replace_slice( string_scalar const& repl = string_scalar(""), size_type start = 0, size_type stop = -1, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -75,7 +75,7 @@ std::unique_ptr replace( strings_column_view const& strings, strings_column_view const& targets, strings_column_view const& repls, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -99,7 +99,7 @@ std::unique_ptr replace( std::unique_ptr replace_nulls( strings_column_view const& strings, string_scalar const& repl = string_scalar(""), - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/include/cudf/strings/detail/scatter.cuh b/cpp/include/cudf/strings/detail/scatter.cuh index d430f390f10..10641677ea2 100644 --- a/cpp/include/cudf/strings/detail/scatter.cuh +++ b/cpp/include/cudf/strings/detail/scatter.cuh @@ -62,7 +62,7 @@ std::unique_ptr scatter( SourceIterator end, MapIterator scatter_map, strings_column_view const& target, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { if (target.is_empty()) return make_empty_column(type_id::STRING); diff --git a/cpp/include/cudf/strings/detail/utilities.cuh b/cpp/include/cudf/strings/detail/utilities.cuh index 592f2128d0e..4eca9a5a55e 100644 --- a/cpp/include/cudf/strings/detail/utilities.cuh +++ b/cpp/include/cudf/strings/detail/utilities.cuh @@ -53,7 +53,7 @@ template std::unique_ptr make_offsets_child_column( InputIterator begin, InputIterator end, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { CUDF_EXPECTS(begin < end, "Invalid iterator range"); @@ -121,7 +121,7 @@ auto make_strings_children( SizeAndExecuteFunction size_and_exec_fn, size_type exec_size, size_type strings_count, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { auto offsets_column = make_numeric_column( @@ -178,7 +178,7 @@ template auto make_strings_children( SizeAndExecuteFunction size_and_exec_fn, size_type strings_count, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { return make_strings_children(size_and_exec_fn, strings_count, strings_count, stream, mr); diff --git a/cpp/include/cudf/strings/detail/utilities.hpp b/cpp/include/cudf/strings/detail/utilities.hpp index ceae93dfe84..f87932b4608 100644 --- a/cpp/include/cudf/strings/detail/utilities.hpp +++ b/cpp/include/cudf/strings/detail/utilities.hpp @@ -38,7 +38,7 @@ namespace detail { */ std::unique_ptr create_chars_child_column( size_type bytes, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -51,7 +51,7 @@ std::unique_ptr create_chars_child_column( */ rmm::device_uvector create_string_vector_from_column( cudf::strings_column_view const strings, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index af7091fc00c..e7b0c6eb6b6 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -820,7 +820,7 @@ class self_comparator { self_comparator(table_view const& t, host_span column_order = {}, host_span null_precedence = {}, - rmm::cuda_stream_view stream = cudf::default_stream_value) + rmm::cuda_stream_view stream = cudf::get_default_stream()) : d_t{preprocessed_table::create(t, column_order, null_precedence, stream)} { } @@ -962,7 +962,7 @@ class two_table_comparator { table_view const& right, host_span column_order = {}, host_span null_precedence = {}, - rmm::cuda_stream_view stream = cudf::default_stream_value); + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @brief Construct an owning object for performing a lexicographic comparison between two rows of diff --git a/cpp/include/cudf/table/table.hpp b/cpp/include/cudf/table/table.hpp index 3b803c2b949..6d11ed0bfad 100644 --- a/cpp/include/cudf/table/table.hpp +++ b/cpp/include/cudf/table/table.hpp @@ -69,7 +69,7 @@ class table { * @param mr Device memory resource used for allocating the device memory for the new columns */ table(table_view view, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** diff --git a/cpp/include/cudf/table/table_device_view.cuh b/cpp/include/cudf/table/table_device_view.cuh index 9f6930b57f5..511013b585d 100644 --- a/cpp/include/cudf/table/table_device_view.cuh +++ b/cpp/include/cudf/table/table_device_view.cuh @@ -175,7 +175,7 @@ class table_device_view : public detail::table_device_view_basedestroy(); }; return std::unique_ptr{ @@ -212,7 +212,7 @@ class mutable_table_device_view * available in device memory */ static auto create(mutable_table_view source_view, - rmm::cuda_stream_view stream = cudf::default_stream_value) + rmm::cuda_stream_view stream = cudf::get_default_stream()) { auto deleter = [](mutable_table_device_view* t) { t->destroy(); }; return std::unique_ptr{ diff --git a/cpp/include/cudf/utilities/default_stream.hpp b/cpp/include/cudf/utilities/default_stream.hpp index 94bc01787e3..1eec3b994d0 100644 --- a/cpp/include/cudf/utilities/default_stream.hpp +++ b/cpp/include/cudf/utilities/default_stream.hpp @@ -16,21 +16,19 @@ #pragma once +#include + +#include #include namespace cudf { /** - * @brief Default stream for cudf + * @brief Get the current default stream * - * Use this value to ensure the correct stream is used when compiled with per - * thread default stream. + * @return The current default stream. */ -#if defined(CUDF_USE_PER_THREAD_DEFAULT_STREAM) -static const rmm::cuda_stream_view default_stream_value{rmm::cuda_stream_per_thread}; -#else -static constexpr rmm::cuda_stream_view default_stream_value{}; -#endif +rmm::cuda_stream_view const get_default_stream(); /** * @brief Check if per-thread default stream is enabled. diff --git a/cpp/include/cudf_test/base_fixture.hpp b/cpp/include/cudf_test/base_fixture.hpp index e529785a758..be4d5bccd7b 100644 --- a/cpp/include/cudf_test/base_fixture.hpp +++ b/cpp/include/cudf_test/base_fixture.hpp @@ -18,12 +18,15 @@ #include +#include #include #include #include #include #include +#include +#include #include #include #include @@ -303,11 +306,18 @@ inline auto parse_cudf_test_opts(int argc, char** argv) try { cxxopts::Options options(argv[0], " - cuDF tests command line options"); const char* env_rmm_mode = std::getenv("GTEST_CUDF_RMM_MODE"); // Overridden by CLI options + const char* env_stream_mode = + std::getenv("GTEST_CUDF_STREAM_MODE"); // Overridden by CLI options auto default_rmm_mode = env_rmm_mode ? env_rmm_mode : "pool"; + auto default_stream_mode = env_stream_mode ? env_stream_mode : "default"; options.allow_unrecognised_options().add_options()( "rmm_mode", "RMM allocation mode", cxxopts::value()->default_value(default_rmm_mode)); + options.allow_unrecognised_options().add_options()( + "stream_mode", + "Whether to use a non-default stream", + cxxopts::value()->default_value(default_stream_mode)); return options.parse(argc, argv); } catch (const cxxopts::OptionException& e) { CUDF_FAIL("Error parsing command line options"); @@ -324,13 +334,21 @@ inline auto parse_cudf_test_opts(int argc, char** argv) * function parses the command line to customize test behavior, like the * allocation mode used for creating the default memory resource. */ -#define CUDF_TEST_PROGRAM_MAIN() \ - int main(int argc, char** argv) \ - { \ - ::testing::InitGoogleTest(&argc, argv); \ - auto const cmd_opts = parse_cudf_test_opts(argc, argv); \ - auto const rmm_mode = cmd_opts["rmm_mode"].as(); \ - auto resource = cudf::test::create_memory_resource(rmm_mode); \ - rmm::mr::set_current_device_resource(resource.get()); \ - return RUN_ALL_TESTS(); \ +#define CUDF_TEST_PROGRAM_MAIN() \ + int main(int argc, char** argv) \ + { \ + ::testing::InitGoogleTest(&argc, argv); \ + auto const cmd_opts = parse_cudf_test_opts(argc, argv); \ + auto const rmm_mode = cmd_opts["rmm_mode"].as(); \ + auto resource = cudf::test::create_memory_resource(rmm_mode); \ + rmm::mr::set_current_device_resource(resource.get()); \ + \ + auto const stream_mode = cmd_opts["stream_mode"].as(); \ + rmm::cuda_stream const new_default_stream{}; \ + if (stream_mode == "custom") { \ + auto adapter = make_stream_checking_resource_adaptor(resource.get()); \ + rmm::mr::set_current_device_resource(&adapter); \ + } \ + \ + return RUN_ALL_TESTS(); \ } diff --git a/cpp/include/cudf_test/column_utilities.hpp b/cpp/include/cudf_test/column_utilities.hpp index b7d890fb315..2cc90743912 100644 --- a/cpp/include/cudf_test/column_utilities.hpp +++ b/cpp/include/cudf_test/column_utilities.hpp @@ -241,11 +241,11 @@ inline std::pair, std::vector> to auto const scv = strings_column_view(c); auto const h_chars = cudf::detail::make_std_vector_sync( cudf::device_span(scv.chars().data(), scv.chars().size()), - cudf::default_stream_value); + cudf::get_default_stream()); auto const h_offsets = cudf::detail::make_std_vector_sync( cudf::device_span( scv.offsets().data() + scv.offset(), scv.size() + 1), - cudf::default_stream_value); + cudf::get_default_stream()); // build std::string vector from chars and offsets std::vector host_data; diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp index 8827372b3fd..f9f571c252a 100644 --- a/cpp/include/cudf_test/column_wrapper.hpp +++ b/cpp/include/cudf_test/column_wrapper.hpp @@ -170,7 +170,7 @@ rmm::device_buffer make_elements(InputIterator begin, InputIterator end) auto transform_begin = thrust::make_transform_iterator(begin, transformer); auto const size = cudf::distance(begin, end); auto const elements = thrust::host_vector(transform_begin, transform_begin + size); - return rmm::device_buffer{elements.data(), size * sizeof(ElementTo), cudf::default_stream_value}; + return rmm::device_buffer{elements.data(), size * sizeof(ElementTo), cudf::get_default_stream()}; } /** @@ -196,7 +196,7 @@ rmm::device_buffer make_elements(InputIterator begin, InputIterator end) auto transform_begin = thrust::make_transform_iterator(begin, transformer); auto const size = cudf::distance(begin, end); auto const elements = thrust::host_vector(transform_begin, transform_begin + size); - return rmm::device_buffer{elements.data(), size * sizeof(RepType), cudf::default_stream_value}; + return rmm::device_buffer{elements.data(), size * sizeof(RepType), cudf::get_default_stream()}; } /** @@ -223,7 +223,7 @@ rmm::device_buffer make_elements(InputIterator begin, InputIterator end) auto transformer_begin = thrust::make_transform_iterator(begin, to_rep); auto const size = cudf::distance(begin, end); auto const elements = thrust::host_vector(transformer_begin, transformer_begin + size); - return rmm::device_buffer{elements.data(), size * sizeof(RepType), cudf::default_stream_value}; + return rmm::device_buffer{elements.data(), size * sizeof(RepType), cudf::get_default_stream()}; } /** @@ -271,7 +271,7 @@ rmm::device_buffer make_null_mask(ValidityIterator begin, ValidityIterator end) auto null_mask = make_null_mask_vector(begin, end); return rmm::device_buffer{null_mask.data(), null_mask.size() * sizeof(decltype(null_mask.front())), - cudf::default_stream_value}; + cudf::get_default_stream()}; } /** @@ -547,7 +547,7 @@ class fixed_point_column_wrapper : public detail::column_wrapper { wrapped.reset(new cudf::column{ data_type, size, - rmm::device_buffer{elements.data(), size * sizeof(Rep), cudf::default_stream_value}}); + rmm::device_buffer{elements.data(), size * sizeof(Rep), cudf::get_default_stream()}}); } /** @@ -611,7 +611,7 @@ class fixed_point_column_wrapper : public detail::column_wrapper { wrapped.reset(new cudf::column{ data_type, size, - rmm::device_buffer{elements.data(), size * sizeof(Rep), cudf::default_stream_value}, + rmm::device_buffer{elements.data(), size * sizeof(Rep), cudf::get_default_stream()}, detail::make_null_mask(v, v + size), cudf::UNKNOWN_NULL_COUNT}); } diff --git a/cpp/include/cudf_test/stream_checking_resource_adapter.hpp b/cpp/include/cudf_test/stream_checking_resource_adapter.hpp new file mode 100644 index 00000000000..4a22ff148ae --- /dev/null +++ b/cpp/include/cudf_test/stream_checking_resource_adapter.hpp @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +/** + * @brief Resource that verifies that the default stream is not used in any allocation. + * + * @tparam Upstream Type of the upstream resource used for + * allocation/deallocation. + */ +template +class stream_checking_resource_adaptor final : public rmm::mr::device_memory_resource { + public: + /** + * @brief Construct a new adaptor. + * + * @throws `cudf::logic_error` if `upstream == nullptr` + * + * @param upstream The resource used for allocating/deallocating device memory + */ + stream_checking_resource_adaptor(Upstream* upstream) : upstream_{upstream} + { + CUDF_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer."); + } + + stream_checking_resource_adaptor() = delete; + ~stream_checking_resource_adaptor() override = default; + stream_checking_resource_adaptor(stream_checking_resource_adaptor const&) = delete; + stream_checking_resource_adaptor& operator=(stream_checking_resource_adaptor const&) = delete; + stream_checking_resource_adaptor(stream_checking_resource_adaptor&&) noexcept = default; + stream_checking_resource_adaptor& operator=(stream_checking_resource_adaptor&&) noexcept = + default; + + /** + * @brief Return pointer to the upstream resource. + * + * @return Pointer to the upstream resource. + */ + Upstream* get_upstream() const noexcept { return upstream_; } + + /** + * @brief Checks whether the upstream resource supports streams. + * + * @return Whether or not the upstream resource supports streams + */ + bool supports_streams() const noexcept override { return upstream_->supports_streams(); } + + /** + * @brief Query whether the resource supports the get_mem_info API. + * + * @return Whether or not the upstream resource supports get_mem_info + */ + bool supports_get_mem_info() const noexcept override + { + return upstream_->supports_get_mem_info(); + } + + private: + /** + * @brief Allocates memory of size at least `bytes` using the upstream + * resource as long as it fits inside the allocation limit. + * + * The returned pointer has at least 256B alignment. + * + * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled + * by the upstream resource. + * @throws `cudf::logic_error` if attempted on a default stream + * + * @param bytes The size, in bytes, of the allocation + * @param stream Stream on which to perform the allocation + * @return Pointer to the newly allocated memory + */ + void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override + { + verify_non_default_stream(stream); + return upstream_->allocate(bytes, stream); + } + + /** + * @brief Free allocation of size `bytes` pointed to by `ptr` + * + * @throws `cudf::logic_error` if attempted on a default stream + * + * @param ptr Pointer to be deallocated + * @param bytes Size of the allocation + * @param stream Stream on which to perform the deallocation + */ + void do_deallocate(void* ptr, std::size_t bytes, rmm::cuda_stream_view stream) override + { + verify_non_default_stream(stream); + upstream_->deallocate(ptr, bytes, stream); + } + + /** + * @brief Compare the upstream resource to another. + * + * @param other The other resource to compare to + * @return Whether or not the two resources are equivalent + */ + bool do_is_equal(device_memory_resource const& other) const noexcept override + { + if (this == &other) { return true; } + auto cast = dynamic_cast const*>(&other); + return cast != nullptr ? upstream_->is_equal(*cast->get_upstream()) + : upstream_->is_equal(other); + } + + /** + * @brief Get free and available memory from upstream resource. + * + * @throws `rmm::cuda_error` if unable to retrieve memory info. + * @throws `cudf::logic_error` if attempted on a default stream + * + * @param stream Stream on which to get the mem info. + * @return std::pair with available and free memory for resource + */ + std::pair do_get_mem_info(rmm::cuda_stream_view stream) const override + { + verify_non_default_stream(stream); + return upstream_->get_mem_info(stream); + } + + /** + * @brief Throw an error if given one of CUDA's default stream specifiers. + * + * @throws `std::runtime_error` if provided a default stream + */ + void verify_non_default_stream(rmm::cuda_stream_view const stream) const + { + auto cstream{stream.value()}; + if (cstream == cudaStreamDefault || (cstream == cudaStreamLegacy) || + (cstream == cudaStreamPerThread)) { + throw std::runtime_error("Attempted to perform an operation on a default stream!"); + } + } + + Upstream* upstream_; // the upstream resource used for satisfying allocation requests +}; + +/** + * @brief Convenience factory to return a `stream_checking_resource_adaptor` around the + * upstream resource `upstream`. + * + * @tparam Upstream Type of the upstream `device_memory_resource`. + * @param upstream Pointer to the upstream resource + */ +template +stream_checking_resource_adaptor make_stream_checking_resource_adaptor(Upstream* upstream) +{ + return stream_checking_resource_adaptor{upstream}; +} diff --git a/cpp/include/cudf_test/tdigest_utilities.cuh b/cpp/include/cudf_test/tdigest_utilities.cuh index 1a75016d78c..6e1982164e5 100644 --- a/cpp/include/cudf_test/tdigest_utilities.cuh +++ b/cpp/include/cudf_test/tdigest_utilities.cuh @@ -118,11 +118,11 @@ void tdigest_minmax_compare(cudf::tdigest::tdigest_column_view const& tdv, // verify min/max thrust::host_vector> h_spans; h_spans.push_back({input_values.begin(), static_cast(input_values.size())}); - auto spans = cudf::detail::make_device_uvector_async(h_spans, cudf::default_stream_value); + auto spans = cudf::detail::make_device_uvector_async(h_spans, cudf::get_default_stream()); auto expected_min = cudf::make_fixed_width_column( data_type{type_id::FLOAT64}, spans.size(), mask_state::UNALLOCATED); - thrust::transform(rmm::exec_policy(cudf::default_stream_value), + thrust::transform(rmm::exec_policy(cudf::get_default_stream()), spans.begin(), spans.end(), expected_min->mutable_view().template begin(), @@ -132,7 +132,7 @@ void tdigest_minmax_compare(cudf::tdigest::tdigest_column_view const& tdv, auto expected_max = cudf::make_fixed_width_column( data_type{type_id::FLOAT64}, spans.size(), mask_state::UNALLOCATED); - thrust::transform(rmm::exec_policy(cudf::default_stream_value), + thrust::transform(rmm::exec_policy(cudf::get_default_stream()), spans.begin(), spans.end(), expected_max->mutable_view().template begin(), diff --git a/cpp/include/nvtext/bpe_tokenize.hpp b/cpp/include/nvtext/bpe_tokenize.hpp index 97e354cb39b..b93d93b07c6 100644 --- a/cpp/include/nvtext/bpe_tokenize.hpp +++ b/cpp/include/nvtext/bpe_tokenize.hpp @@ -46,7 +46,7 @@ struct bpe_merge_pairs { * @param mr Device memory resource used to allocate the device memory */ bpe_merge_pairs(std::unique_ptr&& input, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -57,7 +57,7 @@ struct bpe_merge_pairs { * @param mr Device memory resource used to allocate the device memory */ bpe_merge_pairs(cudf::strings_column_view const& input, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); ~bpe_merge_pairs(); diff --git a/cpp/include/nvtext/detail/tokenize.hpp b/cpp/include/nvtext/detail/tokenize.hpp index 2b5d0bb855e..c06e6211654 100644 --- a/cpp/include/nvtext/detail/tokenize.hpp +++ b/cpp/include/nvtext/detail/tokenize.hpp @@ -38,7 +38,7 @@ namespace detail { std::unique_ptr tokenize( cudf::strings_column_view const& strings, cudf::string_scalar const& delimiter = cudf::string_scalar{""}, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -54,7 +54,7 @@ std::unique_ptr tokenize( std::unique_ptr tokenize( cudf::strings_column_view const& strings, cudf::strings_column_view const& delimiters, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -71,7 +71,7 @@ std::unique_ptr tokenize( std::unique_ptr count_tokens( cudf::strings_column_view const& strings, cudf::string_scalar const& delimiter = cudf::string_scalar{""}, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -87,7 +87,7 @@ std::unique_ptr count_tokens( std::unique_ptr count_tokens( cudf::strings_column_view const& strings, cudf::strings_column_view const& delimiters, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp index 4b79cc0581a..83ad8aa4cee 100644 --- a/cpp/src/binaryop/binaryop.cpp +++ b/cpp/src/binaryop/binaryop.cpp @@ -406,7 +406,7 @@ std::unique_ptr binary_operation(scalar const& lhs, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::binary_operation(lhs, rhs, op, output_type, cudf::default_stream_value, mr); + return detail::binary_operation(lhs, rhs, op, output_type, cudf::get_default_stream(), mr); } std::unique_ptr binary_operation(column_view const& lhs, scalar const& rhs, @@ -415,7 +415,7 @@ std::unique_ptr binary_operation(column_view const& lhs, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::binary_operation(lhs, rhs, op, output_type, cudf::default_stream_value, mr); + return detail::binary_operation(lhs, rhs, op, output_type, cudf::get_default_stream(), mr); } std::unique_ptr binary_operation(column_view const& lhs, column_view const& rhs, @@ -424,7 +424,7 @@ std::unique_ptr binary_operation(column_view const& lhs, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::binary_operation(lhs, rhs, op, output_type, cudf::default_stream_value, mr); + return detail::binary_operation(lhs, rhs, op, output_type, cudf::get_default_stream(), mr); } std::unique_ptr binary_operation(column_view const& lhs, @@ -434,7 +434,7 @@ std::unique_ptr binary_operation(column_view const& lhs, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::binary_operation(lhs, rhs, ptx, output_type, cudf::default_stream_value, mr); + return detail::binary_operation(lhs, rhs, ptx, output_type, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/binaryop/compiled/binary_ops.hpp b/cpp/src/binaryop/compiled/binary_ops.hpp index 1f711b7c899..c51993409ef 100644 --- a/cpp/src/binaryop/compiled/binary_ops.hpp +++ b/cpp/src/binaryop/compiled/binary_ops.hpp @@ -37,7 +37,7 @@ std::unique_ptr string_null_min_max( column_view const& rhs, binary_operator op, data_type output_type, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); std::unique_ptr string_null_min_max( @@ -45,7 +45,7 @@ std::unique_ptr string_null_min_max( scalar const& rhs, binary_operator op, data_type output_type, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); std::unique_ptr string_null_min_max( @@ -53,7 +53,7 @@ std::unique_ptr string_null_min_max( column_view const& rhs, binary_operator op, data_type output_type, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -80,7 +80,7 @@ std::unique_ptr binary_operation( column_view const& rhs, binary_operator op, data_type output_type, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -107,7 +107,7 @@ std::unique_ptr binary_operation( scalar const& rhs, binary_operator op, data_type output_type, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -133,7 +133,7 @@ std::unique_ptr binary_operation( column_view const& rhs, binary_operator op, data_type output_type, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); void binary_operation(mutable_column_view& out, diff --git a/cpp/src/binaryop/compiled/struct_binary_ops.cuh b/cpp/src/binaryop/compiled/struct_binary_ops.cuh index def9ebcef97..2fcf1ce4e32 100644 --- a/cpp/src/binaryop/compiled/struct_binary_ops.cuh +++ b/cpp/src/binaryop/compiled/struct_binary_ops.cuh @@ -71,7 +71,7 @@ void apply_struct_binary_op(mutable_column_view& out, bool is_lhs_scalar, bool is_rhs_scalar, PhysicalElementComparator comparator = {}, - rmm::cuda_stream_view stream = cudf::default_stream_value) + rmm::cuda_stream_view stream = cudf::get_default_stream()) { auto const compare_orders = std::vector( lhs.size(), @@ -115,7 +115,7 @@ void apply_struct_equality_op(mutable_column_view& out, bool is_rhs_scalar, binary_operator op, PhysicalEqualityComparator comparator = {}, - rmm::cuda_stream_view stream = cudf::default_stream_value) + rmm::cuda_stream_view stream = cudf::get_default_stream()) { CUDF_EXPECTS(op == binary_operator::EQUAL || op == binary_operator::NOT_EQUAL || op == binary_operator::NULL_EQUALS, diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index 4c9151533c2..6fef15d58d7 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -158,7 +158,7 @@ rmm::device_buffer create_null_mask(size_type size, mask_state state, rmm::mr::device_memory_resource* mr) { - return detail::create_null_mask(size, state, cudf::default_stream_value, mr); + return detail::create_null_mask(size, state, cudf::get_default_stream(), mr); } // Set pre-allocated null mask of given bit range [begin_bit, end_bit) to valid, if valid==true, @@ -510,25 +510,25 @@ rmm::device_buffer copy_bitmask(bitmask_type const* mask, size_type end_bit, rmm::mr::device_memory_resource* mr) { - return detail::copy_bitmask(mask, begin_bit, end_bit, cudf::default_stream_value, mr); + return detail::copy_bitmask(mask, begin_bit, end_bit, cudf::get_default_stream(), mr); } // Create a bitmask from a column view rmm::device_buffer copy_bitmask(column_view const& view, rmm::mr::device_memory_resource* mr) { - return detail::copy_bitmask(view, cudf::default_stream_value, mr); + return detail::copy_bitmask(view, cudf::get_default_stream(), mr); } std::pair bitmask_and(table_view const& view, rmm::mr::device_memory_resource* mr) { - return detail::bitmask_and(view, cudf::default_stream_value, mr); + return detail::bitmask_and(view, cudf::get_default_stream(), mr); } std::pair bitmask_or(table_view const& view, rmm::mr::device_memory_resource* mr) { - return detail::bitmask_or(view, cudf::default_stream_value, mr); + return detail::bitmask_or(view, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/column/column.cu b/cpp/src/column/column.cu index 61dfea6c26a..7b862373a5b 100644 --- a/cpp/src/column/column.cu +++ b/cpp/src/column/column.cu @@ -144,7 +144,7 @@ size_type column::null_count() const CUDF_FUNC_RANGE(); if (_null_count <= cudf::UNKNOWN_NULL_COUNT) { _null_count = cudf::detail::null_count( - static_cast(_null_mask.data()), 0, size(), cudf::default_stream_value); + static_cast(_null_mask.data()), 0, size(), cudf::get_default_stream()); } return _null_count; } @@ -182,7 +182,7 @@ void column::set_null_count(size_type new_null_count) namespace { struct create_column_from_view { cudf::column_view view; - rmm::cuda_stream_view stream{cudf::default_stream_value}; + rmm::cuda_stream_view stream{cudf::get_default_stream()}; rmm::mr::device_memory_resource* mr; template views, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::concatenate_masks(views, cudf::default_stream_value, mr); + return detail::concatenate_masks(views, cudf::get_default_stream(), mr); } // Concatenates the elements from a vector of column_views @@ -565,14 +565,14 @@ std::unique_ptr concatenate(host_span columns_to_conc rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::concatenate(columns_to_concat, cudf::default_stream_value, mr); + return detail::concatenate(columns_to_concat, cudf::get_default_stream(), mr); } std::unique_ptr
concatenate(host_span tables_to_concat, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::concatenate(tables_to_concat, cudf::default_stream_value, mr); + return detail::concatenate(tables_to_concat, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/copying/contiguous_split.cu b/cpp/src/copying/contiguous_split.cu index 0c90eb539fc..c52ca1f74df 100644 --- a/cpp/src/copying/contiguous_split.cu +++ b/cpp/src/copying/contiguous_split.cu @@ -1269,7 +1269,7 @@ std::vector contiguous_split(cudf::table_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::contiguous_split(input, splits, cudf::default_stream_value, mr); + return detail::contiguous_split(input, splits, cudf::get_default_stream(), mr); } }; // namespace cudf diff --git a/cpp/src/copying/copy.cpp b/cpp/src/copying/copy.cpp index d9a16315488..00147277231 100644 --- a/cpp/src/copying/copy.cpp +++ b/cpp/src/copying/copy.cpp @@ -183,7 +183,7 @@ std::unique_ptr allocate_like(column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::allocate_like(input, input.size(), mask_alloc, cudf::default_stream_value, mr); + return detail::allocate_like(input, input.size(), mask_alloc, cudf::get_default_stream(), mr); } std::unique_ptr allocate_like(column_view const& input, @@ -192,7 +192,7 @@ std::unique_ptr allocate_like(column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::allocate_like(input, size, mask_alloc, cudf::default_stream_value, mr); + return detail::allocate_like(input, size, mask_alloc, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/copying/copy.cu b/cpp/src/copying/copy.cu index 7e5b9288628..0978cf441d8 100644 --- a/cpp/src/copying/copy.cu +++ b/cpp/src/copying/copy.cu @@ -410,7 +410,7 @@ std::unique_ptr copy_if_else(column_view const& lhs, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::copy_if_else(lhs, rhs, boolean_mask, cudf::default_stream_value, mr); + return detail::copy_if_else(lhs, rhs, boolean_mask, cudf::get_default_stream(), mr); } std::unique_ptr copy_if_else(scalar const& lhs, @@ -419,7 +419,7 @@ std::unique_ptr copy_if_else(scalar const& lhs, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::copy_if_else(lhs, rhs, boolean_mask, cudf::default_stream_value, mr); + return detail::copy_if_else(lhs, rhs, boolean_mask, cudf::get_default_stream(), mr); } std::unique_ptr copy_if_else(column_view const& lhs, @@ -428,7 +428,7 @@ std::unique_ptr copy_if_else(column_view const& lhs, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::copy_if_else(lhs, rhs, boolean_mask, cudf::default_stream_value, mr); + return detail::copy_if_else(lhs, rhs, boolean_mask, cudf::get_default_stream(), mr); } std::unique_ptr copy_if_else(scalar const& lhs, @@ -437,7 +437,7 @@ std::unique_ptr copy_if_else(scalar const& lhs, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::copy_if_else(lhs, rhs, boolean_mask, cudf::default_stream_value, mr); + return detail::copy_if_else(lhs, rhs, boolean_mask, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/copying/copy_range.cu b/cpp/src/copying/copy_range.cu index 080a8f645bd..c5fa3a73e1a 100644 --- a/cpp/src/copying/copy_range.cu +++ b/cpp/src/copying/copy_range.cu @@ -274,7 +274,7 @@ void copy_range_in_place(column_view const& source, { CUDF_FUNC_RANGE(); return detail::copy_range_in_place( - source, target, source_begin, source_end, target_begin, cudf::default_stream_value); + source, target, source_begin, source_end, target_begin, cudf::get_default_stream()); } std::unique_ptr copy_range(column_view const& source, @@ -286,7 +286,7 @@ std::unique_ptr copy_range(column_view const& source, { CUDF_FUNC_RANGE(); return detail::copy_range( - source, target, source_begin, source_end, target_begin, cudf::default_stream_value, mr); + source, target, source_begin, source_end, target_begin, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/copying/gather.cu b/cpp/src/copying/gather.cu index d00d3a2a43e..93d05757722 100644 --- a/cpp/src/copying/gather.cu +++ b/cpp/src/copying/gather.cu @@ -85,7 +85,7 @@ std::unique_ptr
gather(table_view const& source_table, : detail::negative_index_policy::ALLOWED; return detail::gather( - source_table, gather_map, bounds_policy, index_policy, cudf::default_stream_value, mr); + source_table, gather_map, bounds_policy, index_policy, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/copying/get_element.cu b/cpp/src/copying/get_element.cu index f12b4639b25..5e76b4adbbe 100644 --- a/cpp/src/copying/get_element.cu +++ b/cpp/src/copying/get_element.cu @@ -210,7 +210,7 @@ std::unique_ptr get_element(column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::get_element(input, index, cudf::default_stream_value, mr); + return detail::get_element(input, index, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/copying/pack.cpp b/cpp/src/copying/pack.cpp index 5bc425ab7f5..427f2dfdade 100644 --- a/cpp/src/copying/pack.cpp +++ b/cpp/src/copying/pack.cpp @@ -219,7 +219,7 @@ table_view unpack(uint8_t const* metadata, uint8_t const* gpu_data) packed_columns pack(cudf::table_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::pack(input, cudf::default_stream_value, mr); + return detail::pack(input, cudf::get_default_stream(), mr); } /** diff --git a/cpp/src/copying/purge_nonempty_nulls.cu b/cpp/src/copying/purge_nonempty_nulls.cu index 30de538ec7a..35eb13119f7 100644 --- a/cpp/src/copying/purge_nonempty_nulls.cu +++ b/cpp/src/copying/purge_nonempty_nulls.cu @@ -112,7 +112,7 @@ bool has_nonempty_nulls(column_view const& input) { return detail::has_nonempty_ std::unique_ptr purge_nonempty_nulls(lists_column_view const& input, rmm::mr::device_memory_resource* mr) { - return detail::purge_nonempty_nulls(input, cudf::default_stream_value, mr); + return detail::purge_nonempty_nulls(input, cudf::get_default_stream(), mr); } /** @@ -121,7 +121,7 @@ std::unique_ptr purge_nonempty_nulls(lists_column_view const& inpu std::unique_ptr purge_nonempty_nulls(structs_column_view const& input, rmm::mr::device_memory_resource* mr) { - return detail::purge_nonempty_nulls(input, cudf::default_stream_value, mr); + return detail::purge_nonempty_nulls(input, cudf::get_default_stream(), mr); } /** @@ -130,7 +130,7 @@ std::unique_ptr purge_nonempty_nulls(structs_column_view const& in std::unique_ptr purge_nonempty_nulls(strings_column_view const& input, rmm::mr::device_memory_resource* mr) { - return detail::purge_nonempty_nulls(input, cudf::default_stream_value, mr); + return detail::purge_nonempty_nulls(input, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/copying/reverse.cu b/cpp/src/copying/reverse.cu index a1ffa115ad1..cf8ca7d9a92 100644 --- a/cpp/src/copying/reverse.cu +++ b/cpp/src/copying/reverse.cu @@ -57,13 +57,13 @@ std::unique_ptr reverse(column_view const& source_column, std::unique_ptr
reverse(table_view const& source_table, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::reverse(source_table, cudf::default_stream_value, mr); + return detail::reverse(source_table, cudf::get_default_stream(), mr); } std::unique_ptr reverse(column_view const& source_column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::reverse(source_column, cudf::default_stream_value, mr); + return detail::reverse(source_column, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/copying/sample.cu b/cpp/src/copying/sample.cu index 9a164bd053a..27a3f145caa 100644 --- a/cpp/src/copying/sample.cu +++ b/cpp/src/copying/sample.cu @@ -93,6 +93,6 @@ std::unique_ptr
sample(table_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::sample(input, n, replacement, seed, cudf::default_stream_value, mr); + return detail::sample(input, n, replacement, seed, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/copying/scatter.cu b/cpp/src/copying/scatter.cu index 63711a43c3b..7b6ff80e3e4 100644 --- a/cpp/src/copying/scatter.cu +++ b/cpp/src/copying/scatter.cu @@ -490,7 +490,7 @@ std::unique_ptr
scatter(table_view const& source, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::scatter(source, scatter_map, target, cudf::default_stream_value, mr); + return detail::scatter(source, scatter_map, target, cudf::get_default_stream(), mr); } std::unique_ptr
scatter(std::vector> const& source, @@ -499,7 +499,7 @@ std::unique_ptr
scatter(std::vector> rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::scatter(source, indices, target, cudf::default_stream_value, mr); + return detail::scatter(source, indices, target, cudf::get_default_stream(), mr); } std::unique_ptr
boolean_mask_scatter(table_view const& input, @@ -508,7 +508,7 @@ std::unique_ptr
boolean_mask_scatter(table_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::boolean_mask_scatter(input, target, boolean_mask, cudf::default_stream_value, mr); + return detail::boolean_mask_scatter(input, target, boolean_mask, cudf::get_default_stream(), mr); } std::unique_ptr
boolean_mask_scatter( @@ -518,7 +518,7 @@ std::unique_ptr
boolean_mask_scatter( rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::boolean_mask_scatter(input, target, boolean_mask, cudf::default_stream_value, mr); + return detail::boolean_mask_scatter(input, target, boolean_mask, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/copying/shift.cu b/cpp/src/copying/shift.cu index 607388cff56..a6126374ed2 100644 --- a/cpp/src/copying/shift.cu +++ b/cpp/src/copying/shift.cu @@ -174,7 +174,7 @@ std::unique_ptr shift(column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::shift(input, offset, fill_value, cudf::default_stream_value, mr); + return detail::shift(input, offset, fill_value, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/copying/slice.cu b/cpp/src/copying/slice.cu index e329756b0df..52410ada128 100644 --- a/cpp/src/copying/slice.cu +++ b/cpp/src/copying/slice.cu @@ -114,25 +114,25 @@ std::vector slice(table_view const& input, std::vector slice(column_view const& input, host_span indices) { CUDF_FUNC_RANGE(); - return detail::slice(input, indices, cudf::default_stream_value); + return detail::slice(input, indices, cudf::get_default_stream()); } std::vector slice(table_view const& input, host_span indices) { CUDF_FUNC_RANGE(); - return detail::slice(input, indices, cudf::default_stream_value); + return detail::slice(input, indices, cudf::get_default_stream()); }; std::vector slice(column_view const& input, std::initializer_list indices) { CUDF_FUNC_RANGE(); - return detail::slice(input, indices, cudf::default_stream_value); + return detail::slice(input, indices, cudf::get_default_stream()); } std::vector slice(table_view const& input, std::initializer_list indices) { CUDF_FUNC_RANGE(); - return detail::slice(input, indices, cudf::default_stream_value); + return detail::slice(input, indices, cudf::get_default_stream()); }; } // namespace cudf diff --git a/cpp/src/copying/split.cpp b/cpp/src/copying/split.cpp index 19ecd959172..b577886febf 100644 --- a/cpp/src/copying/split.cpp +++ b/cpp/src/copying/split.cpp @@ -86,26 +86,26 @@ std::vector split(cudf::column_view const& input, host_span splits) { CUDF_FUNC_RANGE(); - return detail::split(input, splits, cudf::default_stream_value); + return detail::split(input, splits, cudf::get_default_stream()); } std::vector split(cudf::table_view const& input, host_span splits) { CUDF_FUNC_RANGE(); - return detail::split(input, splits, cudf::default_stream_value); + return detail::split(input, splits, cudf::get_default_stream()); } std::vector split(column_view const& input, std::initializer_list splits) { CUDF_FUNC_RANGE(); - return detail::split(input, splits, cudf::default_stream_value); + return detail::split(input, splits, cudf::get_default_stream()); } std::vector split(table_view const& input, std::initializer_list splits) { CUDF_FUNC_RANGE(); - return detail::split(input, splits, cudf::default_stream_value); + return detail::split(input, splits, cudf::get_default_stream()); } } // namespace cudf diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu index e89792525c9..db1d04259b5 100644 --- a/cpp/src/datetime/datetime_ops.cu +++ b/cpp/src/datetime/datetime_ops.cu @@ -583,7 +583,7 @@ std::unique_ptr ceil_datetimes(column_view const& column, { CUDF_FUNC_RANGE(); return detail::round_general( - detail::rounding_function::CEIL, freq, column, cudf::default_stream_value, mr); + detail::rounding_function::CEIL, freq, column, cudf::get_default_stream(), mr); } std::unique_ptr floor_datetimes(column_view const& column, @@ -592,7 +592,7 @@ std::unique_ptr floor_datetimes(column_view const& column, { CUDF_FUNC_RANGE(); return detail::round_general( - detail::rounding_function::FLOOR, freq, column, cudf::default_stream_value, mr); + detail::rounding_function::FLOOR, freq, column, cudf::get_default_stream(), mr); } std::unique_ptr round_datetimes(column_view const& column, @@ -601,87 +601,87 @@ std::unique_ptr round_datetimes(column_view const& column, { CUDF_FUNC_RANGE(); return detail::round_general( - detail::rounding_function::ROUND, freq, column, cudf::default_stream_value, mr); + detail::rounding_function::ROUND, freq, column, cudf::get_default_stream(), mr); } std::unique_ptr extract_year(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::extract_year(column, cudf::default_stream_value, mr); + return detail::extract_year(column, cudf::get_default_stream(), mr); } std::unique_ptr extract_month(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::extract_month(column, cudf::default_stream_value, mr); + return detail::extract_month(column, cudf::get_default_stream(), mr); } std::unique_ptr extract_day(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::extract_day(column, cudf::default_stream_value, mr); + return detail::extract_day(column, cudf::get_default_stream(), mr); } std::unique_ptr extract_weekday(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::extract_weekday(column, cudf::default_stream_value, mr); + return detail::extract_weekday(column, cudf::get_default_stream(), mr); } std::unique_ptr extract_hour(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::extract_hour(column, cudf::default_stream_value, mr); + return detail::extract_hour(column, cudf::get_default_stream(), mr); } std::unique_ptr extract_minute(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::extract_minute(column, cudf::default_stream_value, mr); + return detail::extract_minute(column, cudf::get_default_stream(), mr); } std::unique_ptr extract_second(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::extract_second(column, cudf::default_stream_value, mr); + return detail::extract_second(column, cudf::get_default_stream(), mr); } std::unique_ptr extract_millisecond_fraction(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::extract_millisecond_fraction(column, cudf::default_stream_value, mr); + return detail::extract_millisecond_fraction(column, cudf::get_default_stream(), mr); } std::unique_ptr extract_microsecond_fraction(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::extract_microsecond_fraction(column, cudf::default_stream_value, mr); + return detail::extract_microsecond_fraction(column, cudf::get_default_stream(), mr); } std::unique_ptr extract_nanosecond_fraction(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::extract_nanosecond_fraction(column, cudf::default_stream_value, mr); + return detail::extract_nanosecond_fraction(column, cudf::get_default_stream(), mr); } std::unique_ptr last_day_of_month(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::last_day_of_month(column, cudf::default_stream_value, mr); + return detail::last_day_of_month(column, cudf::get_default_stream(), mr); } std::unique_ptr day_of_year(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::day_of_year(column, cudf::default_stream_value, mr); + return detail::day_of_year(column, cudf::get_default_stream(), mr); } std::unique_ptr add_calendrical_months(cudf::column_view const& timestamp_column, @@ -690,7 +690,7 @@ std::unique_ptr add_calendrical_months(cudf::column_view const& ti { CUDF_FUNC_RANGE(); return detail::add_calendrical_months( - timestamp_column, months_column, cudf::default_stream_value, mr); + timestamp_column, months_column, cudf::get_default_stream(), mr); } std::unique_ptr add_calendrical_months(cudf::column_view const& timestamp_column, @@ -698,27 +698,27 @@ std::unique_ptr add_calendrical_months(cudf::column_view const& ti rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::add_calendrical_months(timestamp_column, months, cudf::default_stream_value, mr); + return detail::add_calendrical_months(timestamp_column, months, cudf::get_default_stream(), mr); } std::unique_ptr is_leap_year(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::is_leap_year(column, cudf::default_stream_value, mr); + return detail::is_leap_year(column, cudf::get_default_stream(), mr); } std::unique_ptr days_in_month(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::days_in_month(column, cudf::default_stream_value, mr); + return detail::days_in_month(column, cudf::get_default_stream(), mr); } std::unique_ptr extract_quarter(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::extract_quarter(column, cudf::default_stream_value, mr); + return detail::extract_quarter(column, cudf::get_default_stream(), mr); } } // namespace datetime diff --git a/cpp/src/dictionary/add_keys.cu b/cpp/src/dictionary/add_keys.cu index 3dea491b6e4..0c4e20aa97f 100644 --- a/cpp/src/dictionary/add_keys.cu +++ b/cpp/src/dictionary/add_keys.cu @@ -132,7 +132,7 @@ std::unique_ptr add_keys(dictionary_column_view const& dictionary_column rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::add_keys(dictionary_column, keys, cudf::default_stream_value, mr); + return detail::add_keys(dictionary_column, keys, cudf::get_default_stream(), mr); } } // namespace dictionary diff --git a/cpp/src/dictionary/decode.cu b/cpp/src/dictionary/decode.cu index 22e2ee578a0..01411d06b62 100644 --- a/cpp/src/dictionary/decode.cu +++ b/cpp/src/dictionary/decode.cu @@ -68,7 +68,7 @@ std::unique_ptr decode(dictionary_column_view const& source, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::decode(source, cudf::default_stream_value, mr); + return detail::decode(source, cudf::get_default_stream(), mr); } } // namespace dictionary diff --git a/cpp/src/dictionary/encode.cu b/cpp/src/dictionary/encode.cu index 4e8f992b633..fe8e777b694 100644 --- a/cpp/src/dictionary/encode.cu +++ b/cpp/src/dictionary/encode.cu @@ -92,7 +92,7 @@ std::unique_ptr encode(column_view const& input_column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::encode(input_column, indices_type, cudf::default_stream_value, mr); + return detail::encode(input_column, indices_type, cudf::get_default_stream(), mr); } } // namespace dictionary diff --git a/cpp/src/dictionary/remove_keys.cu b/cpp/src/dictionary/remove_keys.cu index 4506ea98ca4..4f17fac3129 100644 --- a/cpp/src/dictionary/remove_keys.cu +++ b/cpp/src/dictionary/remove_keys.cu @@ -59,7 +59,7 @@ template std::unique_ptr remove_keys_fn( dictionary_column_view const& dictionary_column, KeysKeeper keys_to_keep_fn, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { auto const keys_view = dictionary_column.keys(); @@ -151,7 +151,7 @@ std::unique_ptr remove_keys_fn( std::unique_ptr remove_keys( dictionary_column_view const& dictionary_column, column_view const& keys_to_remove, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { CUDF_EXPECTS(!keys_to_remove.has_nulls(), "keys_to_remove must not have nulls"); @@ -168,7 +168,7 @@ std::unique_ptr remove_keys( std::unique_ptr remove_unused_keys( dictionary_column_view const& dictionary_column, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { // locate the keys to remove @@ -200,14 +200,14 @@ std::unique_ptr remove_keys(dictionary_column_view const& dictionary_col rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::remove_keys(dictionary_column, keys_to_remove, cudf::default_stream_value, mr); + return detail::remove_keys(dictionary_column, keys_to_remove, cudf::get_default_stream(), mr); } std::unique_ptr remove_unused_keys(dictionary_column_view const& dictionary_column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::remove_unused_keys(dictionary_column, cudf::default_stream_value, mr); + return detail::remove_unused_keys(dictionary_column, cudf::get_default_stream(), mr); } } // namespace dictionary diff --git a/cpp/src/dictionary/search.cu b/cpp/src/dictionary/search.cu index 3936f7470e5..8e97a387780 100644 --- a/cpp/src/dictionary/search.cu +++ b/cpp/src/dictionary/search.cu @@ -79,7 +79,7 @@ struct find_index_fn { using ScalarType = cudf::scalar_type_t; auto find_key = static_cast(key).value(stream); auto keys_view = column_device_view::create(input.keys(), stream); - auto iter = thrust::equal_range(rmm::exec_policy(cudf::default_stream_value), + auto iter = thrust::equal_range(rmm::exec_policy(cudf::get_default_stream()), keys_view->begin(), keys_view->end(), find_key); @@ -179,7 +179,7 @@ std::unique_ptr get_index(dictionary_column_view const& dictionary, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::get_index(dictionary, key, cudf::default_stream_value, mr); + return detail::get_index(dictionary, key, cudf::get_default_stream(), mr); } } // namespace dictionary diff --git a/cpp/src/dictionary/set_keys.cu b/cpp/src/dictionary/set_keys.cu index 216f00c90e1..db0c4937582 100644 --- a/cpp/src/dictionary/set_keys.cu +++ b/cpp/src/dictionary/set_keys.cu @@ -245,14 +245,14 @@ std::unique_ptr set_keys(dictionary_column_view const& dictionary_column rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::set_keys(dictionary_column, keys, cudf::default_stream_value, mr); + return detail::set_keys(dictionary_column, keys, cudf::get_default_stream(), mr); } std::vector> match_dictionaries( cudf::host_span input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::match_dictionaries(input, cudf::default_stream_value, mr); + return detail::match_dictionaries(input, cudf::get_default_stream(), mr); } } // namespace dictionary diff --git a/cpp/src/filling/calendrical_month_sequence.cu b/cpp/src/filling/calendrical_month_sequence.cu index d4b3e209c4a..f45634a615e 100644 --- a/cpp/src/filling/calendrical_month_sequence.cu +++ b/cpp/src/filling/calendrical_month_sequence.cu @@ -43,7 +43,7 @@ std::unique_ptr calendrical_month_sequence(size_type size, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::calendrical_month_sequence(size, init, months, cudf::default_stream_value, mr); + return detail::calendrical_month_sequence(size, init, months, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/filling/fill.cu b/cpp/src/filling/fill.cu index 2abb0cf9795..290fff33cf6 100644 --- a/cpp/src/filling/fill.cu +++ b/cpp/src/filling/fill.cu @@ -248,7 +248,7 @@ void fill_in_place(mutable_column_view& destination, scalar const& value) { CUDF_FUNC_RANGE(); - return detail::fill_in_place(destination, begin, end, value, cudf::default_stream_value); + return detail::fill_in_place(destination, begin, end, value, cudf::get_default_stream()); } std::unique_ptr fill(column_view const& input, @@ -258,7 +258,7 @@ std::unique_ptr fill(column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::fill(input, begin, end, value, cudf::default_stream_value, mr); + return detail::fill(input, begin, end, value, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/filling/repeat.cu b/cpp/src/filling/repeat.cu index b2587e67350..8d86a9d9827 100644 --- a/cpp/src/filling/repeat.cu +++ b/cpp/src/filling/repeat.cu @@ -157,7 +157,7 @@ std::unique_ptr
repeat(table_view const& input_table, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::repeat(input_table, count, cudf::default_stream_value, mr); + return detail::repeat(input_table, count, cudf::get_default_stream(), mr); } std::unique_ptr
repeat(table_view const& input_table, @@ -165,7 +165,7 @@ std::unique_ptr
repeat(table_view const& input_table, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::repeat(input_table, count, cudf::default_stream_value, mr); + return detail::repeat(input_table, count, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/filling/sequence.cu b/cpp/src/filling/sequence.cu index a2ae3b9e70c..284e7c46347 100644 --- a/cpp/src/filling/sequence.cu +++ b/cpp/src/filling/sequence.cu @@ -154,7 +154,7 @@ std::unique_ptr sequence(size_type size, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::sequence(size, init, step, cudf::default_stream_value, mr); + return detail::sequence(size, init, step, cudf::get_default_stream(), mr); } std::unique_ptr sequence(size_type size, @@ -162,7 +162,7 @@ std::unique_ptr sequence(size_type size, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::sequence(size, init, cudf::default_stream_value, mr); + return detail::sequence(size, init, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index cd54e921a4c..dde0037a8c3 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -196,7 +196,7 @@ std::pair, std::vector> groupby::aggr if (_keys.num_rows() == 0) { return std::pair(empty_like(_keys), empty_results(requests)); } - return dispatch_aggregation(requests, cudf::default_stream_value, mr); + return dispatch_aggregation(requests, cudf::get_default_stream(), mr); } // Compute scan requests @@ -214,13 +214,13 @@ std::pair, std::vector> groupby::scan if (_keys.num_rows() == 0) { return std::pair(empty_like(_keys), empty_results(requests)); } - return sort_scan(requests, cudf::default_stream_value, mr); + return sort_scan(requests, cudf::get_default_stream(), mr); } groupby::groups groupby::get_groups(table_view values, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - auto const stream = cudf::default_stream_value; + auto const stream = cudf::get_default_stream(); auto grouped_keys = helper().sorted_keys(stream, mr); auto const& group_offsets = helper().group_offsets(stream); @@ -252,7 +252,7 @@ std::pair, std::unique_ptr
> groupby::replace_nulls "Size mismatch between num_columns and replace_policies."); if (values.is_empty()) { return std::pair(empty_like(_keys), empty_like(values)); } - auto const stream = cudf::default_stream_value; + auto const stream = cudf::get_default_stream(); auto const& group_labels = helper().group_labels(stream); std::vector> results; @@ -298,7 +298,7 @@ std::pair, std::unique_ptr
> groupby::shift( [&](auto i) { return values.column(i).type() == fill_values[i].get().type(); }), "values and fill_value should have the same type."); - auto stream = cudf::default_stream_value; + auto stream = cudf::get_default_stream(); std::vector> results; auto const& group_offsets = helper().group_offsets(stream); std::transform( diff --git a/cpp/src/hash/concurrent_unordered_map.cuh b/cpp/src/hash/concurrent_unordered_map.cuh index c2081c596a1..a268e2ef778 100644 --- a/cpp/src/hash/concurrent_unordered_map.cuh +++ b/cpp/src/hash/concurrent_unordered_map.cuh @@ -159,7 +159,7 @@ class concurrent_unordered_map { * storage */ static auto create(size_type capacity, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), const mapped_type unused_element = std::numeric_limits::max(), const key_type unused_key = std::numeric_limits::max(), const Hasher& hash_function = hasher(), @@ -422,7 +422,7 @@ class concurrent_unordered_map { } void assign_async(const concurrent_unordered_map& other, - rmm::cuda_stream_view stream = cudf::default_stream_value) + rmm::cuda_stream_view stream = cudf::get_default_stream()) { if (other.m_capacity <= m_capacity) { m_capacity = other.m_capacity; @@ -440,7 +440,7 @@ class concurrent_unordered_map { stream.value())); } - void clear_async(rmm::cuda_stream_view stream = cudf::default_stream_value) + void clear_async(rmm::cuda_stream_view stream = cudf::get_default_stream()) { constexpr int block_size = 128; init_hashtbl<<<((m_capacity - 1) / block_size) + 1, block_size, 0, stream.value()>>>( @@ -455,7 +455,7 @@ class concurrent_unordered_map { } } - void prefetch(const int dev_id, rmm::cuda_stream_view stream = cudf::default_stream_value) + void prefetch(const int dev_id, rmm::cuda_stream_view stream = cudf::get_default_stream()) { cudaPointerAttributes hashtbl_values_ptr_attributes; cudaError_t status = cudaPointerGetAttributes(&hashtbl_values_ptr_attributes, m_hashtbl_values); @@ -475,7 +475,7 @@ class concurrent_unordered_map { * * @param stream CUDA stream used for device memory operations and kernel launches. */ - void destroy(rmm::cuda_stream_view stream = cudf::default_stream_value) + void destroy(rmm::cuda_stream_view stream = cudf::get_default_stream()) { m_allocator.deallocate(m_hashtbl_values, m_capacity, stream); delete this; @@ -516,7 +516,7 @@ class concurrent_unordered_map { const Hasher& hash_function, const Equality& equal, const allocator_type& allocator, - rmm::cuda_stream_view stream = cudf::default_stream_value) + rmm::cuda_stream_view stream = cudf::get_default_stream()) : m_hf(hash_function), m_equal(equal), m_allocator(allocator), diff --git a/cpp/src/hash/hash_allocator.cuh b/cpp/src/hash/hash_allocator.cuh index 2da0a4fb4bd..b3d2556d392 100644 --- a/cpp/src/hash/hash_allocator.cuh +++ b/cpp/src/hash/hash_allocator.cuh @@ -38,14 +38,14 @@ struct managed_allocator { { } - T* allocate(std::size_t n, rmm::cuda_stream_view stream = cudf::default_stream_value) const + T* allocate(std::size_t n, rmm::cuda_stream_view stream = cudf::get_default_stream()) const { return static_cast(mr->allocate(n * sizeof(T), stream)); } void deallocate(T* p, std::size_t n, - rmm::cuda_stream_view stream = cudf::default_stream_value) const + rmm::cuda_stream_view stream = cudf::get_default_stream()) const { mr->deallocate(p, n * sizeof(T), stream); } @@ -74,14 +74,14 @@ struct default_allocator { { } - T* allocate(std::size_t n, rmm::cuda_stream_view stream = cudf::default_stream_value) const + T* allocate(std::size_t n, rmm::cuda_stream_view stream = cudf::get_default_stream()) const { return static_cast(mr->allocate(n * sizeof(T), stream)); } void deallocate(T* p, std::size_t n, - rmm::cuda_stream_view stream = cudf::default_stream_value) const + rmm::cuda_stream_view stream = cudf::get_default_stream()) const { mr->deallocate(p, n * sizeof(T), stream); } diff --git a/cpp/src/hash/hashing.cu b/cpp/src/hash/hashing.cu index e5fac1e7c2c..150017d9117 100644 --- a/cpp/src/hash/hashing.cu +++ b/cpp/src/hash/hashing.cu @@ -74,7 +74,7 @@ std::unique_ptr hash(table_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::hash(input, hash_function, seed, cudf::default_stream_value, mr); + return detail::hash(input, hash_function, seed, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/interop/dlpack.cpp b/cpp/src/interop/dlpack.cpp index 7b300924dd5..58afc8e9015 100644 --- a/cpp/src/interop/dlpack.cpp +++ b/cpp/src/interop/dlpack.cpp @@ -299,13 +299,13 @@ std::unique_ptr
from_dlpack(DLManagedTensor const* managed_tensor, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::from_dlpack(managed_tensor, cudf::default_stream_value, mr); + return detail::from_dlpack(managed_tensor, cudf::get_default_stream(), mr); } DLManagedTensor* to_dlpack(table_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::to_dlpack(input, cudf::default_stream_value, mr); + return detail::to_dlpack(input, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/interop/from_arrow.cu b/cpp/src/interop/from_arrow.cu index 86ea6f4427e..2d4501ec9f7 100644 --- a/cpp/src/interop/from_arrow.cu +++ b/cpp/src/interop/from_arrow.cu @@ -450,7 +450,7 @@ std::unique_ptr
from_arrow(arrow::Table const& input_table, { CUDF_FUNC_RANGE(); - return detail::from_arrow(input_table, cudf::default_stream_value, mr); + return detail::from_arrow(input_table, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu index eeb27c2ac05..fb203e6c3c1 100644 --- a/cpp/src/interop/to_arrow.cu +++ b/cpp/src/interop/to_arrow.cu @@ -154,7 +154,7 @@ std::shared_ptr dispatch_to_arrow::operator()( auto count = thrust::make_counting_iterator(0); - thrust::for_each(rmm::exec_policy(cudf::default_stream_value), + thrust::for_each(rmm::exec_policy(cudf::get_default_stream()), count, count + input.size(), [in = input.begin(), out = buf.data()] __device__(auto in_idx) { @@ -416,7 +416,7 @@ std::shared_ptr to_arrow(table_view input, arrow::MemoryPool* ar_mr) { CUDF_FUNC_RANGE(); - return detail::to_arrow(input, metadata, cudf::default_stream_value, ar_mr); + return detail::to_arrow(input, metadata, cudf::get_default_stream(), ar_mr); } } // namespace cudf diff --git a/cpp/src/io/fst/logical_stack.cuh b/cpp/src/io/fst/logical_stack.cuh index 9502922a379..b23a3d756df 100644 --- a/cpp/src/io/fst/logical_stack.cuh +++ b/cpp/src/io/fst/logical_stack.cuh @@ -267,7 +267,7 @@ void sparse_stack_op_to_top_of_stack(StackSymbolItT d_symbols, StackSymbolT const empty_stack_symbol, StackSymbolT const read_symbol, std::size_t const num_symbols_out, - rmm::cuda_stream_view stream = cudf::default_stream_value) + rmm::cuda_stream_view stream = cudf::get_default_stream()) { rmm::device_buffer temp_storage{}; diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp index aabaa941daf..968d3827bfe 100644 --- a/cpp/src/io/functions.cpp +++ b/cpp/src/io/functions.cpp @@ -156,7 +156,7 @@ table_with_metadata read_avro(avro_reader_options const& options, CUDF_EXPECTS(datasources.size() == 1, "Only a single source is currently supported."); - return avro::read_avro(std::move(datasources[0]), options, cudf::default_stream_value, mr); + return avro::read_avro(std::move(datasources[0]), options, cudf::get_default_stream(), mr); } compression_type infer_compression_type(compression_type compression, source_info const& info) @@ -198,7 +198,7 @@ table_with_metadata read_json(json_reader_options options, rmm::mr::device_memor options.get_byte_range_offset(), options.get_byte_range_size_with_padding()); - return detail::json::read_json(datasources, options, cudf::default_stream_value, mr); + return detail::json::read_json(datasources, options, cudf::get_default_stream(), mr); } table_with_metadata read_csv(csv_reader_options options, rmm::mr::device_memory_resource* mr) @@ -216,7 +216,7 @@ table_with_metadata read_csv(csv_reader_options options, rmm::mr::device_memory_ return cudf::io::detail::csv::read_csv( // std::move(datasources[0]), options, - cudf::default_stream_value, + cudf::get_default_stream(), mr); } @@ -233,7 +233,7 @@ void write_csv(csv_writer_options const& options, rmm::mr::device_memory_resourc options.get_table(), options.get_metadata(), options, - cudf::default_stream_value, + cudf::get_default_stream(), mr); } @@ -241,7 +241,7 @@ namespace detail_orc = cudf::io::detail::orc; raw_orc_statistics read_raw_orc_statistics(source_info const& src_info) { - auto stream = cudf::default_stream_value; + auto stream = cudf::get_default_stream(); // Get source to read statistics from std::unique_ptr source; if (src_info.type() == io_type::FILEPATH) { @@ -347,7 +347,7 @@ table_with_metadata read_orc(orc_reader_options const& options, rmm::mr::device_ auto datasources = make_datasources(options.get_source()); auto reader = std::make_unique( - std::move(datasources), options, cudf::default_stream_value, mr); + std::move(datasources), options, cudf::get_default_stream(), mr); return reader->read(options); } @@ -365,7 +365,7 @@ void write_orc(orc_writer_options const& options, rmm::mr::device_memory_resourc CUDF_EXPECTS(sinks.size() == 1, "Multiple sinks not supported for ORC writing"); auto writer = std::make_unique( - std::move(sinks[0]), options, io_detail::SingleWriteMode::YES, cudf::default_stream_value, mr); + std::move(sinks[0]), options, io_detail::SingleWriteMode::YES, cudf::get_default_stream(), mr); writer->write(options.get_table()); } @@ -382,7 +382,7 @@ orc_chunked_writer::orc_chunked_writer(chunked_orc_writer_options const& options CUDF_EXPECTS(sinks.size() == 1, "Multiple sinks not supported for ORC writing"); writer = std::make_unique( - std::move(sinks[0]), options, io_detail::SingleWriteMode::NO, cudf::default_stream_value, mr); + std::move(sinks[0]), options, io_detail::SingleWriteMode::NO, cudf::get_default_stream(), mr); } /** @@ -417,7 +417,7 @@ table_with_metadata read_parquet(parquet_reader_options const& options, auto datasources = make_datasources(options.get_source()); auto reader = std::make_unique( - std::move(datasources), options, cudf::default_stream_value, mr); + std::move(datasources), options, cudf::get_default_stream(), mr); return reader->read(options); } @@ -458,7 +458,7 @@ std::unique_ptr> write_parquet(parquet_writer_options const auto sinks = make_datasinks(options.get_sink()); auto writer = std::make_unique( - std::move(sinks), options, io_detail::SingleWriteMode::YES, cudf::default_stream_value, mr); + std::move(sinks), options, io_detail::SingleWriteMode::YES, cudf::get_default_stream(), mr); writer->write(options.get_table(), options.get_partitions()); @@ -476,7 +476,7 @@ parquet_chunked_writer::parquet_chunked_writer(chunked_parquet_writer_options co auto sinks = make_datasinks(options.get_sink()); writer = std::make_unique( - std::move(sinks), options, io_detail::SingleWriteMode::NO, cudf::default_stream_value, mr); + std::move(sinks), options, io_detail::SingleWriteMode::NO, cudf::get_default_stream(), mr); } /** diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index 872e742a5af..36329db3e88 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -73,7 +73,7 @@ auto print_vec = [](auto const& cpu, auto const name, auto converter) { void print_tree(host_span input, tree_meta_t const& d_gpu_tree, - rmm::cuda_stream_view stream = cudf::default_stream_value) + rmm::cuda_stream_view stream = cudf::get_default_stream()) { print_vec(cudf::detail::make_std_vector_async(d_gpu_tree.node_categories, stream), "node_categories", @@ -278,11 +278,11 @@ std::vector copy_strings_to_host(device_span input, auto const scv = cudf::strings_column_view(col); auto const h_chars = cudf::detail::make_std_vector_sync( cudf::device_span(scv.chars().data(), scv.chars().size()), - cudf::default_stream_value); + cudf::get_default_stream()); auto const h_offsets = cudf::detail::make_std_vector_sync( cudf::device_span( scv.offsets().data() + scv.offset(), scv.size() + 1), - cudf::default_stream_value); + cudf::get_default_stream()); // build std::string vector from chars and offsets std::vector host_data; diff --git a/cpp/src/io/json/json_tree.cu b/cpp/src/io/json/json_tree.cu index cf041b02a20..8be298c6a8b 100644 --- a/cpp/src/io/json/json_tree.cu +++ b/cpp/src/io/json/json_tree.cu @@ -162,8 +162,14 @@ std::pair, rmm::device_uvector> stable_s thrust::copy(rmm::exec_policy(stream), keys.begin(), keys.end(), keys_buffer1.begin()); thrust::sequence(rmm::exec_policy(stream), order_buffer1.begin(), order_buffer1.end()); - cub::DeviceRadixSort::SortPairs( - d_temp_storage.data(), temp_storage_bytes, keys_buffer, order_buffer, keys.size()); + cub::DeviceRadixSort::SortPairs(d_temp_storage.data(), + temp_storage_bytes, + keys_buffer, + order_buffer, + keys.size(), + 0, + sizeof(KeyType) * 8, + stream.value()); return std::pair{keys_buffer.Current() == keys_buffer1.data() ? std::move(keys_buffer1) : std::move(keys_buffer2), diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu index 29a29a1f9d5..0c35930c2e4 100644 --- a/cpp/src/io/json/nested_json_gpu.cu +++ b/cpp/src/io/json/nested_json_gpu.cu @@ -1540,7 +1540,7 @@ auto parsing_options(cudf::io::json_reader_options const& options) { auto parse_opts = cudf::io::parse_options{',', '\n', '\"', '.'}; - auto const stream = cudf::default_stream_value; + auto const stream = cudf::get_default_stream(); parse_opts.dayfirst = options.is_enabled_dayfirst(); parse_opts.keepquotes = options.is_enabled_keep_quotes(); parse_opts.trie_true = cudf::detail::create_serialized_trie({"true"}, stream); diff --git a/cpp/src/io/orc/timezone.cuh b/cpp/src/io/orc/timezone.cuh index 2eb20af7898..9b98aa13bac 100644 --- a/cpp/src/io/orc/timezone.cuh +++ b/cpp/src/io/orc/timezone.cuh @@ -115,7 +115,7 @@ class timezone_table { public: // Safe to use the default stream, device_uvectors will not change after they are created empty - timezone_table() : ttimes{0, cudf::default_stream_value}, offsets{0, cudf::default_stream_value} + timezone_table() : ttimes{0, cudf::get_default_stream()}, offsets{0, cudf::get_default_stream()} { } timezone_table(int32_t gmt_offset, diff --git a/cpp/src/io/text/bgzip_data_chunk_source.cu b/cpp/src/io/text/bgzip_data_chunk_source.cu index e4b6bad614d..7f1f6688bec 100644 --- a/cpp/src/io/text/bgzip_data_chunk_source.cu +++ b/cpp/src/io/text/bgzip_data_chunk_source.cu @@ -170,7 +170,7 @@ class bgzip_data_chunk_reader : public data_chunk_reader { h_compressed_offsets.resize(1); h_decompressed_offsets.resize(1); // shrinking doesn't allocate/free, so we don't need to worry about streams - auto stream = cudf::default_stream_value; + auto stream = cudf::get_default_stream(); d_compressed_blocks.resize(0, stream); d_decompressed_blocks.resize(0, stream); d_compressed_offsets.resize(0, stream); @@ -256,8 +256,8 @@ class bgzip_data_chunk_reader : public data_chunk_reader { uint64_t virtual_begin, uint64_t virtual_end) : _data_stream(std::move(input_stream)), - _prev_blocks{cudf::default_stream_value}, // here we can use the default stream because - _curr_blocks{cudf::default_stream_value}, // we only initialize empty device_uvectors + _prev_blocks{cudf::get_default_stream()}, // here we can use the default stream because + _curr_blocks{cudf::get_default_stream()}, // we only initialize empty device_uvectors _local_end{virtual_end & 0xFFFFu}, _compressed_pos{virtual_begin >> 16}, _compressed_end{virtual_end >> 16} diff --git a/cpp/src/io/text/multibyte_split.cu b/cpp/src/io/text/multibyte_split.cu index 136eb8d24c6..29cec0e8c3f 100644 --- a/cpp/src/io/text/multibyte_split.cu +++ b/cpp/src/io/text/multibyte_split.cu @@ -452,19 +452,19 @@ class output_builder { * @param actual_size The number of elements that were written to the result of the previous * `next_output` call. */ - void advance_output(size_type actual_size) + void advance_output(size_type actual_size, rmm::cuda_stream_view stream) { CUDF_EXPECTS(actual_size <= _max_write_size, "Internal error"); if (_chunks.size() < 2) { auto const new_size = _chunks.back().size() + actual_size; - inplace_resize(_chunks.back(), new_size); + inplace_resize(_chunks.back(), new_size, stream); } else { auto& tail = _chunks.back(); auto& prev = _chunks.rbegin()[1]; auto const prev_advance = std::min(actual_size, prev.capacity() - prev.size()); auto const tail_advance = actual_size - prev_advance; - inplace_resize(prev, prev.size() + prev_advance); - inplace_resize(tail, tail.size() + tail_advance); + inplace_resize(prev, prev.size() + prev_advance, stream); + inplace_resize(tail, tail.size() + tail_advance, stream); } _size += actual_size; } @@ -522,10 +522,12 @@ class output_builder { * @param vector The vector * @param new_size The new size. Must be smaller than the vector's capacity */ - static void inplace_resize(rmm::device_uvector& vector, size_type new_size) + static void inplace_resize(rmm::device_uvector& vector, + size_type new_size, + rmm::cuda_stream_view stream) { CUDF_EXPECTS(new_size <= vector.capacity(), "Internal error"); - vector.resize(new_size, rmm::cuda_stream_view{}); + vector.resize(new_size, stream); } /** @@ -712,7 +714,7 @@ std::unique_ptr multibyte_split(cudf::io::text::data_chunk_source found_last_offset = true; return end_loc + 1; }(); - row_offset_storage.advance_output(new_offsets); + row_offset_storage.advance_output(new_offsets, scan_stream); // determine if we found the first or last field offset for the byte range if (new_offsets > 0 and not first_row_offset) { first_row_offset = row_offset_storage.front_element(scan_stream); @@ -729,7 +731,7 @@ std::unique_ptr multibyte_split(cudf::io::text::data_chunk_source auto const split = begin + std::min(output_size, char_output.head().size()); thrust::copy(rmm::exec_policy_nosync(scan_stream), begin, split, char_output.head().begin()); thrust::copy(rmm::exec_policy_nosync(scan_stream), split, end, char_output.tail().begin()); - char_storage.advance_output(output_size); + char_storage.advance_output(output_size, scan_stream); } cudaEventRecord(last_launch_event, scan_stream.value()); @@ -782,7 +784,7 @@ std::unique_ptr multibyte_split(cudf::io::text::data_chunk_source std::optional byte_range, rmm::mr::device_memory_resource* mr) { - auto stream = cudf::default_stream_value; + auto stream = cudf::get_default_stream(); auto stream_pool = rmm::cuda_stream_pool(2); auto result = detail::multibyte_split( diff --git a/cpp/src/io/utilities/hostdevice_vector.hpp b/cpp/src/io/utilities/hostdevice_vector.hpp index b5e59871119..6e34d862ed4 100644 --- a/cpp/src/io/utilities/hostdevice_vector.hpp +++ b/cpp/src/io/utilities/hostdevice_vector.hpp @@ -40,7 +40,7 @@ class hostdevice_vector { public: using value_type = T; - hostdevice_vector() : hostdevice_vector(0, cudf::default_stream_value) {} + hostdevice_vector() : hostdevice_vector(0, cudf::get_default_stream()) {} explicit hostdevice_vector(size_t size, rmm::cuda_stream_view stream) : hostdevice_vector(size, size, stream) diff --git a/cpp/src/join/conditional_join.cu b/cpp/src/join/conditional_join.cu index f0b66559799..cf1476d8bcc 100644 --- a/cpp/src/join/conditional_join.cu +++ b/cpp/src/join/conditional_join.cu @@ -298,7 +298,7 @@ conditional_inner_join(table_view const& left, binary_predicate, detail::join_kind::INNER_JOIN, output_size, - cudf::default_stream_value, + cudf::get_default_stream(), mr); } @@ -316,7 +316,7 @@ conditional_left_join(table_view const& left, binary_predicate, detail::join_kind::LEFT_JOIN, output_size, - cudf::default_stream_value, + cudf::get_default_stream(), mr); } @@ -333,7 +333,7 @@ conditional_full_join(table_view const& left, binary_predicate, detail::join_kind::FULL_JOIN, {}, - cudf::default_stream_value, + cudf::get_default_stream(), mr); } @@ -350,7 +350,7 @@ std::unique_ptr> conditional_left_semi_join( binary_predicate, detail::join_kind::LEFT_SEMI_JOIN, output_size, - cudf::default_stream_value, + cudf::get_default_stream(), mr) .first); } @@ -368,7 +368,7 @@ std::unique_ptr> conditional_left_anti_join( binary_predicate, detail::join_kind::LEFT_ANTI_JOIN, output_size, - cudf::default_stream_value, + cudf::get_default_stream(), mr) .first); } @@ -380,7 +380,7 @@ std::size_t conditional_inner_join_size(table_view const& left, { CUDF_FUNC_RANGE(); return detail::compute_conditional_join_output_size( - left, right, binary_predicate, detail::join_kind::INNER_JOIN, cudf::default_stream_value, mr); + left, right, binary_predicate, detail::join_kind::INNER_JOIN, cudf::get_default_stream(), mr); } std::size_t conditional_left_join_size(table_view const& left, @@ -390,7 +390,7 @@ std::size_t conditional_left_join_size(table_view const& left, { CUDF_FUNC_RANGE(); return detail::compute_conditional_join_output_size( - left, right, binary_predicate, detail::join_kind::LEFT_JOIN, cudf::default_stream_value, mr); + left, right, binary_predicate, detail::join_kind::LEFT_JOIN, cudf::get_default_stream(), mr); } std::size_t conditional_left_semi_join_size(table_view const& left, @@ -403,7 +403,7 @@ std::size_t conditional_left_semi_join_size(table_view const& left, right, binary_predicate, detail::join_kind::LEFT_SEMI_JOIN, - cudf::default_stream_value, + cudf::get_default_stream(), mr)); } @@ -417,7 +417,7 @@ std::size_t conditional_left_anti_join_size(table_view const& left, right, binary_predicate, detail::join_kind::LEFT_ANTI_JOIN, - cudf::default_stream_value, + cudf::get_default_stream(), mr)); } diff --git a/cpp/src/join/conditional_join.hpp b/cpp/src/join/conditional_join.hpp index 6de2664b5f6..23ecfebc52a 100644 --- a/cpp/src/join/conditional_join.hpp +++ b/cpp/src/join/conditional_join.hpp @@ -48,7 +48,7 @@ conditional_join(table_view const& left, ast::expression const& binary_predicate, join_kind JoinKind, std::optional output_size = {}, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -68,7 +68,7 @@ std::size_t compute_conditional_join_output_size( table_view const& right, ast::expression const& binary_predicate, join_kind JoinKind, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/src/join/cross_join.cu b/cpp/src/join/cross_join.cu index 3eb9f1b1198..7358726d69d 100644 --- a/cpp/src/join/cross_join.cu +++ b/cpp/src/join/cross_join.cu @@ -78,7 +78,7 @@ std::unique_ptr cross_join(cudf::table_view const& left, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::cross_join(left, right, cudf::default_stream_value, mr); + return detail::cross_join(left, right, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/join/join.cu b/cpp/src/join/join.cu index bb8fc07c2d7..dbc543f4dcd 100644 --- a/cpp/src/join/join.cu +++ b/cpp/src/join/join.cu @@ -113,7 +113,7 @@ inner_join(table_view const& left, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::inner_join(left, right, compare_nulls, cudf::default_stream_value, mr); + return detail::inner_join(left, right, compare_nulls, cudf::get_default_stream(), mr); } std::pair>, @@ -124,7 +124,7 @@ left_join(table_view const& left, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::left_join(left, right, compare_nulls, cudf::default_stream_value, mr); + return detail::left_join(left, right, compare_nulls, cudf::get_default_stream(), mr); } std::pair>, @@ -135,7 +135,7 @@ full_join(table_view const& left, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::full_join(left, right, compare_nulls, cudf::default_stream_value, mr); + return detail::full_join(left, right, compare_nulls, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/join/mixed_join.cu b/cpp/src/join/mixed_join.cu index ec2dacaca5b..4cedfca218a 100644 --- a/cpp/src/join/mixed_join.cu +++ b/cpp/src/join/mixed_join.cu @@ -458,7 +458,7 @@ mixed_inner_join( compare_nulls, detail::join_kind::INNER_JOIN, output_size_data, - cudf::default_stream_value, + cudf::get_default_stream(), mr); } @@ -479,7 +479,7 @@ std::pair>> mixed_in binary_predicate, compare_nulls, detail::join_kind::INNER_JOIN, - cudf::default_stream_value, + cudf::get_default_stream(), mr); } @@ -504,7 +504,7 @@ mixed_left_join( compare_nulls, detail::join_kind::LEFT_JOIN, output_size_data, - cudf::default_stream_value, + cudf::get_default_stream(), mr); } @@ -525,7 +525,7 @@ std::pair>> mixed_le binary_predicate, compare_nulls, detail::join_kind::LEFT_JOIN, - cudf::default_stream_value, + cudf::get_default_stream(), mr); } @@ -550,7 +550,7 @@ mixed_full_join( compare_nulls, detail::join_kind::FULL_JOIN, output_size_data, - cudf::default_stream_value, + cudf::get_default_stream(), mr); } diff --git a/cpp/src/join/mixed_join_semi.cu b/cpp/src/join/mixed_join_semi.cu index a9897f0f40e..6ebf3702256 100644 --- a/cpp/src/join/mixed_join_semi.cu +++ b/cpp/src/join/mixed_join_semi.cu @@ -503,7 +503,7 @@ std::pair>> mixed_le binary_predicate, compare_nulls, detail::join_kind::LEFT_SEMI_JOIN, - cudf::default_stream_value, + cudf::get_default_stream(), mr); } @@ -526,7 +526,7 @@ std::unique_ptr> mixed_left_semi_join( compare_nulls, detail::join_kind::LEFT_SEMI_JOIN, output_size_data, - cudf::default_stream_value, + cudf::get_default_stream(), mr); } @@ -547,7 +547,7 @@ std::pair>> mixed_le binary_predicate, compare_nulls, detail::join_kind::LEFT_ANTI_JOIN, - cudf::default_stream_value, + cudf::get_default_stream(), mr); } @@ -570,7 +570,7 @@ std::unique_ptr> mixed_left_anti_join( compare_nulls, detail::join_kind::LEFT_ANTI_JOIN, output_size_data, - cudf::default_stream_value, + cudf::get_default_stream(), mr); } diff --git a/cpp/src/join/semi_join.cu b/cpp/src/join/semi_join.cu index 87bac002f53..cc523b2ac7f 100644 --- a/cpp/src/join/semi_join.cu +++ b/cpp/src/join/semi_join.cu @@ -95,7 +95,7 @@ std::unique_ptr> left_semi_join( { CUDF_FUNC_RANGE(); return detail::left_semi_anti_join( - detail::join_kind::LEFT_SEMI_JOIN, left, right, compare_nulls, cudf::default_stream_value, mr); + detail::join_kind::LEFT_SEMI_JOIN, left, right, compare_nulls, cudf::get_default_stream(), mr); } std::unique_ptr> left_anti_join( @@ -106,7 +106,7 @@ std::unique_ptr> left_anti_join( { CUDF_FUNC_RANGE(); return detail::left_semi_anti_join( - detail::join_kind::LEFT_ANTI_JOIN, left, right, compare_nulls, cudf::default_stream_value, mr); + detail::join_kind::LEFT_ANTI_JOIN, left, right, compare_nulls, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/labeling/label_bins.cu b/cpp/src/labeling/label_bins.cu index f5e35fc842f..4c3469c679e 100644 --- a/cpp/src/labeling/label_bins.cu +++ b/cpp/src/labeling/label_bins.cu @@ -244,7 +244,7 @@ std::unique_ptr label_bins(column_view const& input, left_inclusive, right_edges, right_inclusive, - cudf::default_stream_value, + cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/lists/combine/concatenate_list_elements.cu b/cpp/src/lists/combine/concatenate_list_elements.cu index c107bad018d..496d9ee670a 100644 --- a/cpp/src/lists/combine/concatenate_list_elements.cu +++ b/cpp/src/lists/combine/concatenate_list_elements.cu @@ -287,7 +287,7 @@ std::unique_ptr concatenate_list_elements(column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::concatenate_list_elements(input, null_policy, cudf::default_stream_value, mr); + return detail::concatenate_list_elements(input, null_policy, cudf::get_default_stream(), mr); } } // namespace lists diff --git a/cpp/src/lists/combine/concatenate_rows.cu b/cpp/src/lists/combine/concatenate_rows.cu index 4364470407f..0a3ff333d6c 100644 --- a/cpp/src/lists/combine/concatenate_rows.cu +++ b/cpp/src/lists/combine/concatenate_rows.cu @@ -307,7 +307,7 @@ std::unique_ptr concatenate_rows(table_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::concatenate_rows(input, null_policy, cudf::default_stream_value, mr); + return detail::concatenate_rows(input, null_policy, cudf::get_default_stream(), mr); } } // namespace lists diff --git a/cpp/src/lists/contains.cu b/cpp/src/lists/contains.cu index 3a52426c16a..0142e736fd0 100644 --- a/cpp/src/lists/contains.cu +++ b/cpp/src/lists/contains.cu @@ -495,7 +495,7 @@ std::unique_ptr contains(lists_column_view const& lists, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::contains(lists, search_key, cudf::default_stream_value, mr); + return detail::contains(lists, search_key, cudf::get_default_stream(), mr); } std::unique_ptr contains(lists_column_view const& lists, @@ -503,14 +503,14 @@ std::unique_ptr contains(lists_column_view const& lists, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::contains(lists, search_keys, cudf::default_stream_value, mr); + return detail::contains(lists, search_keys, cudf::get_default_stream(), mr); } std::unique_ptr contains_nulls(lists_column_view const& lists, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::contains_nulls(lists, cudf::default_stream_value, mr); + return detail::contains_nulls(lists, cudf::get_default_stream(), mr); } std::unique_ptr index_of(lists_column_view const& lists, @@ -519,7 +519,7 @@ std::unique_ptr index_of(lists_column_view const& lists, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::index_of(lists, search_key, find_option, cudf::default_stream_value, mr); + return detail::index_of(lists, search_key, find_option, cudf::get_default_stream(), mr); } std::unique_ptr index_of(lists_column_view const& lists, @@ -528,7 +528,7 @@ std::unique_ptr index_of(lists_column_view const& lists, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::index_of(lists, search_keys, find_option, cudf::default_stream_value, mr); + return detail::index_of(lists, search_keys, find_option, cudf::get_default_stream(), mr); } } // namespace cudf::lists diff --git a/cpp/src/lists/copying/segmented_gather.cu b/cpp/src/lists/copying/segmented_gather.cu index db37a82ba8e..2c12e09bcd9 100644 --- a/cpp/src/lists/copying/segmented_gather.cu +++ b/cpp/src/lists/copying/segmented_gather.cu @@ -120,7 +120,7 @@ std::unique_ptr segmented_gather(lists_column_view const& source_column, { CUDF_FUNC_RANGE(); return detail::segmented_gather( - source_column, gather_map_list, bounds_policy, cudf::default_stream_value, mr); + source_column, gather_map_list, bounds_policy, cudf::get_default_stream(), mr); } } // namespace lists diff --git a/cpp/src/lists/count_elements.cu b/cpp/src/lists/count_elements.cu index 68748dfde3f..f8e7b4c6126 100644 --- a/cpp/src/lists/count_elements.cu +++ b/cpp/src/lists/count_elements.cu @@ -76,7 +76,7 @@ std::unique_ptr count_elements(lists_column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::count_elements(input, cudf::default_stream_value, mr); + return detail::count_elements(input, cudf::get_default_stream(), mr); } } // namespace lists diff --git a/cpp/src/lists/explode.cu b/cpp/src/lists/explode.cu index 873b0fe408d..4db3254f201 100644 --- a/cpp/src/lists/explode.cu +++ b/cpp/src/lists/explode.cu @@ -299,7 +299,7 @@ std::unique_ptr
explode(table_view const& input_table, CUDF_FUNC_RANGE(); CUDF_EXPECTS(input_table.column(explode_column_idx).type().id() == type_id::LIST, "Unsupported non-list column"); - return detail::explode(input_table, explode_column_idx, cudf::default_stream_value, mr); + return detail::explode(input_table, explode_column_idx, cudf::get_default_stream(), mr); } /** @@ -312,7 +312,7 @@ std::unique_ptr
explode_position(table_view const& input_table, CUDF_FUNC_RANGE(); CUDF_EXPECTS(input_table.column(explode_column_idx).type().id() == type_id::LIST, "Unsupported non-list column"); - return detail::explode_position(input_table, explode_column_idx, cudf::default_stream_value, mr); + return detail::explode_position(input_table, explode_column_idx, cudf::get_default_stream(), mr); } /** @@ -326,7 +326,7 @@ std::unique_ptr
explode_outer(table_view const& input_table, CUDF_EXPECTS(input_table.column(explode_column_idx).type().id() == type_id::LIST, "Unsupported non-list column"); return detail::explode_outer( - input_table, explode_column_idx, false, cudf::default_stream_value, mr); + input_table, explode_column_idx, false, cudf::get_default_stream(), mr); } /** @@ -341,7 +341,7 @@ std::unique_ptr
explode_outer_position(table_view const& input_table, CUDF_EXPECTS(input_table.column(explode_column_idx).type().id() == type_id::LIST, "Unsupported non-list column"); return detail::explode_outer( - input_table, explode_column_idx, true, cudf::default_stream_value, mr); + input_table, explode_column_idx, true, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/lists/extract.cu b/cpp/src/lists/extract.cu index bc04bad7c0c..d1807c2c5ac 100644 --- a/cpp/src/lists/extract.cu +++ b/cpp/src/lists/extract.cu @@ -171,7 +171,7 @@ std::unique_ptr extract_list_element(lists_column_view const& lists_colu rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::extract_list_element(lists_column, index, cudf::default_stream_value, mr); + return detail::extract_list_element(lists_column, index, cudf::get_default_stream(), mr); } /** @@ -186,7 +186,7 @@ std::unique_ptr extract_list_element(lists_column_view const& lists_colu CUDF_FUNC_RANGE(); CUDF_EXPECTS(indices.size() == lists_column.size(), "Index column must have as many elements as lists column."); - return detail::extract_list_element(lists_column, indices, cudf::default_stream_value, mr); + return detail::extract_list_element(lists_column, indices, cudf::get_default_stream(), mr); } } // namespace lists diff --git a/cpp/src/lists/segmented_sort.cu b/cpp/src/lists/segmented_sort.cu index ea35977e8e4..1a4e3ea66ed 100644 --- a/cpp/src/lists/segmented_sort.cu +++ b/cpp/src/lists/segmented_sort.cu @@ -328,7 +328,7 @@ std::unique_ptr sort_lists(lists_column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::sort_lists(input, column_order, null_precedence, cudf::default_stream_value, mr); + return detail::sort_lists(input, column_order, null_precedence, cudf::get_default_stream(), mr); } std::unique_ptr stable_sort_lists(lists_column_view const& input, @@ -338,7 +338,7 @@ std::unique_ptr stable_sort_lists(lists_column_view const& input, { CUDF_FUNC_RANGE(); return detail::stable_sort_lists( - input, column_order, null_precedence, cudf::default_stream_value, mr); + input, column_order, null_precedence, cudf::get_default_stream(), mr); } } // namespace lists diff --git a/cpp/src/lists/sequences.cu b/cpp/src/lists/sequences.cu index 6c2b0b1a785..bb0e669339a 100644 --- a/cpp/src/lists/sequences.cu +++ b/cpp/src/lists/sequences.cu @@ -214,7 +214,7 @@ std::unique_ptr sequences(column_view const& starts, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::sequences(starts, sizes, cudf::default_stream_value, mr); + return detail::sequences(starts, sizes, cudf::get_default_stream(), mr); } std::unique_ptr sequences(column_view const& starts, @@ -223,7 +223,7 @@ std::unique_ptr sequences(column_view const& starts, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::sequences(starts, steps, sizes, cudf::default_stream_value, mr); + return detail::sequences(starts, steps, sizes, cudf::get_default_stream(), mr); } } // namespace cudf::lists diff --git a/cpp/src/lists/set_operations.cu b/cpp/src/lists/set_operations.cu index 00cdfcf7ff1..cc52478900a 100644 --- a/cpp/src/lists/set_operations.cu +++ b/cpp/src/lists/set_operations.cu @@ -267,7 +267,7 @@ std::unique_ptr have_overlap(lists_column_view const& lhs, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::have_overlap(lhs, rhs, nulls_equal, nans_equal, cudf::default_stream_value, mr); + return detail::have_overlap(lhs, rhs, nulls_equal, nans_equal, cudf::get_default_stream(), mr); } std::unique_ptr intersect_distinct(lists_column_view const& lhs, @@ -278,7 +278,7 @@ std::unique_ptr intersect_distinct(lists_column_view const& lhs, { CUDF_FUNC_RANGE(); return detail::intersect_distinct( - lhs, rhs, nulls_equal, nans_equal, cudf::default_stream_value, mr); + lhs, rhs, nulls_equal, nans_equal, cudf::get_default_stream(), mr); } std::unique_ptr union_distinct(lists_column_view const& lhs, @@ -288,7 +288,7 @@ std::unique_ptr union_distinct(lists_column_view const& lhs, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::union_distinct(lhs, rhs, nulls_equal, nans_equal, cudf::default_stream_value, mr); + return detail::union_distinct(lhs, rhs, nulls_equal, nans_equal, cudf::get_default_stream(), mr); } std::unique_ptr difference_distinct(lists_column_view const& lhs, @@ -299,7 +299,7 @@ std::unique_ptr difference_distinct(lists_column_view const& lhs, { CUDF_FUNC_RANGE(); return detail::difference_distinct( - lhs, rhs, nulls_equal, nans_equal, cudf::default_stream_value, mr); + lhs, rhs, nulls_equal, nans_equal, cudf::get_default_stream(), mr); } } // namespace cudf::lists diff --git a/cpp/src/lists/stream_compaction/apply_boolean_mask.cu b/cpp/src/lists/stream_compaction/apply_boolean_mask.cu index c99486ca8b0..c1c17dc0688 100644 --- a/cpp/src/lists/stream_compaction/apply_boolean_mask.cu +++ b/cpp/src/lists/stream_compaction/apply_boolean_mask.cu @@ -104,7 +104,7 @@ std::unique_ptr apply_boolean_mask(lists_column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::apply_boolean_mask(input, boolean_mask, cudf::default_stream_value, mr); + return detail::apply_boolean_mask(input, boolean_mask, cudf::get_default_stream(), mr); } } // namespace cudf::lists diff --git a/cpp/src/lists/stream_compaction/distinct.cu b/cpp/src/lists/stream_compaction/distinct.cu index c88209292de..d0e4557663e 100644 --- a/cpp/src/lists/stream_compaction/distinct.cu +++ b/cpp/src/lists/stream_compaction/distinct.cu @@ -78,7 +78,7 @@ std::unique_ptr distinct(lists_column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::distinct(input, nulls_equal, nans_equal, cudf::default_stream_value, mr); + return detail::distinct(input, nulls_equal, nans_equal, cudf::get_default_stream(), mr); } } // namespace cudf::lists diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu index 91018d3f006..d9c573e8155 100644 --- a/cpp/src/merge/merge.cu +++ b/cpp/src/merge/merge.cu @@ -171,7 +171,7 @@ index_vector generate_merged_indices(table_view const& left_table, std::vector const& column_order, std::vector const& null_precedence, bool nullable = true, - rmm::cuda_stream_view stream = cudf::default_stream_value) + rmm::cuda_stream_view stream = cudf::get_default_stream()) { const size_type left_size = left_table.num_rows(); const size_type right_size = right_table.num_rows(); @@ -540,7 +540,7 @@ std::unique_ptr merge(std::vector const& tables_to_merg { CUDF_FUNC_RANGE(); return detail::merge( - tables_to_merge, key_cols, column_order, null_precedence, cudf::default_stream_value, mr); + tables_to_merge, key_cols, column_order, null_precedence, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu index 296a9f40fbb..e4d366e7d01 100644 --- a/cpp/src/partitioning/partitioning.cu +++ b/cpp/src/partitioning/partitioning.cu @@ -797,7 +797,7 @@ std::pair, std::vector> partition( rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::partition(t, partition_map, num_partitions, cudf::default_stream_value, mr); + return detail::partition(t, partition_map, num_partitions, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/partitioning/round_robin.cu b/cpp/src/partitioning/round_robin.cu index 85bd31a20ea..990992cd8f2 100644 --- a/cpp/src/partitioning/round_robin.cu +++ b/cpp/src/partitioning/round_robin.cu @@ -152,7 +152,7 @@ std::pair, std::vector> round_robin_part table_view const& input, cudf::size_type num_partitions, cudf::size_type start_partition = 0, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { auto nrows = input.num_rows(); @@ -271,7 +271,7 @@ std::pair, std::vector> round_robi { CUDF_FUNC_RANGE(); return detail::round_robin_partition( - input, num_partitions, start_partition, cudf::default_stream_value, mr); + input, num_partitions, start_partition, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/quantiles/quantile.cu b/cpp/src/quantiles/quantile.cu index 1fe9809d922..1f1941529c9 100644 --- a/cpp/src/quantiles/quantile.cu +++ b/cpp/src/quantiles/quantile.cu @@ -189,7 +189,7 @@ std::unique_ptr quantile(column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::quantile(input, q, interp, ordered_indices, exact, cudf::default_stream_value, mr); + return detail::quantile(input, q, interp, ordered_indices, exact, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/quantiles/quantiles.cu b/cpp/src/quantiles/quantiles.cu index c6957482f05..e3e19eaeec4 100644 --- a/cpp/src/quantiles/quantiles.cu +++ b/cpp/src/quantiles/quantiles.cu @@ -83,12 +83,12 @@ std::unique_ptr
quantiles(table_view const& input, thrust::make_counting_iterator(0), q, interp, - cudf::default_stream_value, + cudf::get_default_stream(), mr); } else { auto sorted_idx = detail::sorted_order(input, column_order, null_precedence); return detail::quantiles( - input, sorted_idx->view().data(), q, interp, cudf::default_stream_value, mr); + input, sorted_idx->view().data(), q, interp, cudf::get_default_stream(), mr); } } @@ -109,7 +109,7 @@ std::unique_ptr
quantiles(table_view const& input, is_input_sorted, column_order, null_precedence, - cudf::default_stream_value, + cudf::get_default_stream(), mr); } diff --git a/cpp/src/quantiles/tdigest/tdigest.cu b/cpp/src/quantiles/tdigest/tdigest.cu index a11d7ab6646..019809d5f68 100644 --- a/cpp/src/quantiles/tdigest/tdigest.cu +++ b/cpp/src/quantiles/tdigest/tdigest.cu @@ -407,7 +407,7 @@ std::unique_ptr percentile_approx(tdigest_column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::percentile_approx(input, percentiles, cudf::default_stream_value, mr); + return detail::percentile_approx(input, percentiles, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/reductions/minmax.cu b/cpp/src/reductions/minmax.cu index e69942552ff..603e13c1894 100644 --- a/cpp/src/reductions/minmax.cu +++ b/cpp/src/reductions/minmax.cu @@ -277,7 +277,7 @@ std::pair, std::unique_ptr> minmax( const column_view& col, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::minmax(col, cudf::default_stream_value, mr); + return detail::minmax(col, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index 523865e0df0..4166becbf4d 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -143,7 +143,7 @@ std::unique_ptr reduce( std::unique_ptr const& agg, data_type output_dtype, std::optional> init, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { CUDF_EXPECTS(!init.has_value() || col.type() == init.value().get().type(), @@ -186,7 +186,7 @@ std::unique_ptr reduce(column_view const& col, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::reduce(col, agg, output_dtype, std::nullopt, cudf::default_stream_value, mr); + return detail::reduce(col, agg, output_dtype, std::nullopt, cudf::get_default_stream(), mr); } std::unique_ptr reduce(column_view const& col, @@ -196,6 +196,6 @@ std::unique_ptr reduce(column_view const& col, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::reduce(col, agg, output_dtype, init, cudf::default_stream_value, mr); + return detail::reduce(col, agg, output_dtype, init, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/reductions/scan/scan.cpp b/cpp/src/reductions/scan/scan.cpp index 85c0f7ea13f..c0b787b3a1d 100644 --- a/cpp/src/reductions/scan/scan.cpp +++ b/cpp/src/reductions/scan/scan.cpp @@ -61,7 +61,7 @@ std::unique_ptr scan(column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::scan(input, agg, inclusive, null_handling, cudf::default_stream_value, mr); + return detail::scan(input, agg, inclusive, null_handling, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/reductions/segmented_reductions.cpp b/cpp/src/reductions/segmented_reductions.cpp index d87644e7126..04a83217469 100644 --- a/cpp/src/reductions/segmented_reductions.cpp +++ b/cpp/src/reductions/segmented_reductions.cpp @@ -133,7 +133,7 @@ std::unique_ptr segmented_reduce(column_view const& segmented_values, output_dtype, null_handling, std::nullopt, - cudf::default_stream_value, + cudf::get_default_stream(), mr); } @@ -152,7 +152,7 @@ std::unique_ptr segmented_reduce(column_view const& segmented_values, output_dtype, null_handling, init, - cudf::default_stream_value, + cudf::get_default_stream(), mr); } diff --git a/cpp/src/replace/clamp.cu b/cpp/src/replace/clamp.cu index f5e0ca3b3ef..24822cc6c65 100644 --- a/cpp/src/replace/clamp.cu +++ b/cpp/src/replace/clamp.cu @@ -391,7 +391,7 @@ std::unique_ptr clamp(column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::clamp(input, lo, lo_replace, hi, hi_replace, cudf::default_stream_value, mr); + return detail::clamp(input, lo, lo_replace, hi, hi_replace, cudf::get_default_stream(), mr); } // clamp input at lo and hi @@ -401,6 +401,6 @@ std::unique_ptr clamp(column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::clamp(input, lo, lo, hi, hi, cudf::default_stream_value, mr); + return detail::clamp(input, lo, lo, hi, hi, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/replace/nans.cu b/cpp/src/replace/nans.cu index 47776422adb..ce0d2d07b36 100644 --- a/cpp/src/replace/nans.cu +++ b/cpp/src/replace/nans.cu @@ -114,7 +114,7 @@ std::unique_ptr replace_nans(column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::replace_nans(input, replacement, cudf::default_stream_value, mr); + return detail::replace_nans(input, replacement, cudf::get_default_stream(), mr); } std::unique_ptr replace_nans(column_view const& input, @@ -122,7 +122,7 @@ std::unique_ptr replace_nans(column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::replace_nans(input, replacement, cudf::default_stream_value, mr); + return detail::replace_nans(input, replacement, cudf::get_default_stream(), mr); } } // namespace cudf @@ -224,7 +224,7 @@ std::unique_ptr normalize_nans_and_zeros(column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::normalize_nans_and_zeros(input, cudf::default_stream_value, mr); + return detail::normalize_nans_and_zeros(input, cudf::get_default_stream(), mr); } /** @@ -240,7 +240,7 @@ std::unique_ptr normalize_nans_and_zeros(column_view const& input, void normalize_nans_and_zeros(mutable_column_view& in_out) { CUDF_FUNC_RANGE(); - detail::normalize_nans_and_zeros(in_out, cudf::default_stream_value); + detail::normalize_nans_and_zeros(in_out, cudf::get_default_stream()); } } // namespace cudf diff --git a/cpp/src/replace/nulls.cu b/cpp/src/replace/nulls.cu index 232392db0c6..d2d524ef9ba 100644 --- a/cpp/src/replace/nulls.cu +++ b/cpp/src/replace/nulls.cu @@ -453,7 +453,7 @@ std::unique_ptr replace_nulls(cudf::column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::replace_nulls(input, replacement, cudf::default_stream_value, mr); + return detail::replace_nulls(input, replacement, cudf::get_default_stream(), mr); } std::unique_ptr replace_nulls(cudf::column_view const& input, @@ -461,7 +461,7 @@ std::unique_ptr replace_nulls(cudf::column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::replace_nulls(input, replacement, cudf::default_stream_value, mr); + return detail::replace_nulls(input, replacement, cudf::get_default_stream(), mr); } std::unique_ptr replace_nulls(column_view const& input, @@ -469,7 +469,7 @@ std::unique_ptr replace_nulls(column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::replace_nulls(input, replace_policy, cudf::default_stream_value, mr); + return detail::replace_nulls(input, replace_policy, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/replace/replace.cu b/cpp/src/replace/replace.cu index b6048333bc9..2a675c00b48 100644 --- a/cpp/src/replace/replace.cu +++ b/cpp/src/replace/replace.cu @@ -531,6 +531,6 @@ std::unique_ptr find_and_replace_all(cudf::column_view const& inpu rmm::mr::device_memory_resource* mr) { return detail::find_and_replace_all( - input_col, values_to_replace, replacement_values, cudf::default_stream_value, mr); + input_col, values_to_replace, replacement_values, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/reshape/byte_cast.cu b/cpp/src/reshape/byte_cast.cu index 3d0510e1e6b..227ad2dad9c 100644 --- a/cpp/src/reshape/byte_cast.cu +++ b/cpp/src/reshape/byte_cast.cu @@ -147,7 +147,7 @@ std::unique_ptr byte_cast(column_view const& input_column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::byte_cast(input_column, endian_configuration, cudf::default_stream_value, mr); + return detail::byte_cast(input_column, endian_configuration, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/reshape/interleave_columns.cu b/cpp/src/reshape/interleave_columns.cu index 3a3397dc1d5..bf316ea20bf 100644 --- a/cpp/src/reshape/interleave_columns.cu +++ b/cpp/src/reshape/interleave_columns.cu @@ -294,7 +294,7 @@ std::unique_ptr interleave_columns(table_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::interleave_columns(input, cudf::default_stream_value, mr); + return detail::interleave_columns(input, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/reshape/tile.cu b/cpp/src/reshape/tile.cu index 95358ddab01..18174ef1001 100644 --- a/cpp/src/reshape/tile.cu +++ b/cpp/src/reshape/tile.cu @@ -65,7 +65,7 @@ std::unique_ptr
tile(const table_view& in, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::tile(in, count, cudf::default_stream_value, mr); + return detail::tile(in, count, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/rolling/detail/range_window_bounds.hpp b/cpp/src/rolling/detail/range_window_bounds.hpp index 266f397b1e3..506bd54e5eb 100644 --- a/cpp/src/rolling/detail/range_window_bounds.hpp +++ b/cpp/src/rolling/detail/range_window_bounds.hpp @@ -149,7 +149,7 @@ template range_rep_type range_comparable_value( range_window_bounds const& range_bounds, data_type const& order_by_data_type = data_type{type_to_id()}, - rmm::cuda_stream_view stream = cudf::default_stream_value) + rmm::cuda_stream_view stream = cudf::get_default_stream()) { auto const& range_scalar = range_bounds.range_scalar(); using range_type = cudf::detail::range_type; diff --git a/cpp/src/rolling/grouped_rolling.cu b/cpp/src/rolling/grouped_rolling.cu index c1be33a9cd5..960dbfb9dfe 100644 --- a/cpp/src/rolling/grouped_rolling.cu +++ b/cpp/src/rolling/grouped_rolling.cu @@ -211,7 +211,7 @@ std::unique_ptr grouped_rolling_window(table_view const& group_keys, following_window_bounds, min_periods, aggr, - cudf::default_stream_value, + cudf::get_default_stream(), mr); } @@ -1049,7 +1049,7 @@ std::unique_ptr grouped_time_range_rolling_window(table_view const& grou following, min_periods, aggr, - cudf::default_stream_value, + cudf::get_default_stream(), mr); } @@ -1089,7 +1089,7 @@ std::unique_ptr grouped_time_range_rolling_window(table_view const& grou following, min_periods, aggr, - cudf::default_stream_value, + cudf::get_default_stream(), mr); } @@ -1124,7 +1124,7 @@ std::unique_ptr grouped_range_rolling_window(table_view const& group_key following, min_periods, aggr, - cudf::default_stream_value, + cudf::get_default_stream(), mr); } diff --git a/cpp/src/rolling/rolling.cu b/cpp/src/rolling/rolling.cu index f11eaad351d..d699d7bea85 100644 --- a/cpp/src/rolling/rolling.cu +++ b/cpp/src/rolling/rolling.cu @@ -41,7 +41,7 @@ std::unique_ptr rolling_window(column_view const& input, following_window, min_periods, agg, - cudf::default_stream_value, + cudf::get_default_stream(), mr); } @@ -62,7 +62,7 @@ std::unique_ptr rolling_window(column_view const& input, following_window, min_periods, agg, - cudf::default_stream_value, + cudf::get_default_stream(), mr); } @@ -76,7 +76,7 @@ std::unique_ptr rolling_window(column_view const& input, { CUDF_FUNC_RANGE(); return detail::rolling_window( - input, preceding_window, following_window, min_periods, agg, cudf::default_stream_value, mr); + input, preceding_window, following_window, min_periods, agg, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/round/round.cu b/cpp/src/round/round.cu index c60ce7295fb..58e21fc97ab 100644 --- a/cpp/src/round/round.cu +++ b/cpp/src/round/round.cu @@ -348,7 +348,7 @@ std::unique_ptr round(column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::round(input, decimal_places, method, cudf::default_stream_value, mr); + return detail::round(input, decimal_places, method, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/scalar/scalar.cpp b/cpp/src/scalar/scalar.cpp index 2af3867cca7..0fe04ed1305 100644 --- a/cpp/src/scalar/scalar.cpp +++ b/cpp/src/scalar/scalar.cpp @@ -109,7 +109,7 @@ size_type string_scalar::size() const { return _data.size(); } const char* string_scalar::data() const { return static_cast(_data.data()); } -string_scalar::operator std::string() const { return this->to_string(cudf::default_stream_value); } +string_scalar::operator std::string() const { return this->to_string(cudf::get_default_stream()); } std::string string_scalar::to_string(rmm::cuda_stream_view stream) const { @@ -186,7 +186,7 @@ T fixed_point_scalar::fixed_point_value(rmm::cuda_stream_view stream) const template fixed_point_scalar::operator value_type() const { - return this->fixed_point_value(cudf::default_stream_value); + return this->fixed_point_value(cudf::get_default_stream()); } template @@ -269,7 +269,7 @@ T const* fixed_width_scalar::data() const template fixed_width_scalar::operator value_type() const { - return this->value(cudf::default_stream_value); + return this->value(cudf::get_default_stream()); } /** diff --git a/cpp/src/search/contains_column.cu b/cpp/src/search/contains_column.cu index c7631385270..31edf88a8cf 100644 --- a/cpp/src/search/contains_column.cu +++ b/cpp/src/search/contains_column.cu @@ -155,7 +155,7 @@ std::unique_ptr contains(column_view const& haystack, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::contains(haystack, needles, cudf::default_stream_value, mr); + return detail::contains(haystack, needles, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/search/contains_scalar.cu b/cpp/src/search/contains_scalar.cu index 11c47c769fb..59c7a86d29c 100644 --- a/cpp/src/search/contains_scalar.cu +++ b/cpp/src/search/contains_scalar.cu @@ -153,7 +153,7 @@ bool contains(column_view const& haystack, scalar const& needle, rmm::cuda_strea bool contains(column_view const& haystack, scalar const& needle) { CUDF_FUNC_RANGE(); - return detail::contains(haystack, needle, cudf::default_stream_value); + return detail::contains(haystack, needle, cudf::get_default_stream()); } } // namespace cudf diff --git a/cpp/src/search/search_ordered.cu b/cpp/src/search/search_ordered.cu index 754a17dc6d8..1da8d2313e6 100644 --- a/cpp/src/search/search_ordered.cu +++ b/cpp/src/search/search_ordered.cu @@ -147,7 +147,7 @@ std::unique_ptr lower_bound(table_view const& haystack, { CUDF_FUNC_RANGE(); return detail::lower_bound( - haystack, needles, column_order, null_precedence, cudf::default_stream_value, mr); + haystack, needles, column_order, null_precedence, cudf::get_default_stream(), mr); } std::unique_ptr upper_bound(table_view const& haystack, @@ -158,7 +158,7 @@ std::unique_ptr upper_bound(table_view const& haystack, { CUDF_FUNC_RANGE(); return detail::upper_bound( - haystack, needles, column_order, null_precedence, cudf::default_stream_value, mr); + haystack, needles, column_order, null_precedence, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/sort/is_sorted.cu b/cpp/src/sort/is_sorted.cu index 43f0a17ab27..459dcf5467f 100644 --- a/cpp/src/sort/is_sorted.cu +++ b/cpp/src/sort/is_sorted.cu @@ -84,7 +84,7 @@ bool is_sorted(cudf::table_view const& in, } return detail::is_sorted( - in, column_order, has_nulls(in), null_precedence, cudf::default_stream_value); + in, column_order, has_nulls(in), null_precedence, cudf::get_default_stream()); } } // namespace cudf diff --git a/cpp/src/sort/rank.cu b/cpp/src/sort/rank.cu index bcb9244231d..99e99704c10 100644 --- a/cpp/src/sort/rank.cu +++ b/cpp/src/sort/rank.cu @@ -352,7 +352,7 @@ std::unique_ptr rank(column_view const& input, null_handling, null_precedence, percentage, - cudf::default_stream_value, + cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/sort/segmented_sort.cu b/cpp/src/sort/segmented_sort.cu index c5f13df5305..20017eda402 100644 --- a/cpp/src/sort/segmented_sort.cu +++ b/cpp/src/sort/segmented_sort.cu @@ -220,7 +220,7 @@ std::unique_ptr segmented_sorted_order(table_view const& keys, { CUDF_FUNC_RANGE(); return detail::segmented_sorted_order( - keys, segment_offsets, column_order, null_precedence, cudf::default_stream_value, mr); + keys, segment_offsets, column_order, null_precedence, cudf::get_default_stream(), mr); } std::unique_ptr stable_segmented_sorted_order( @@ -232,7 +232,7 @@ std::unique_ptr stable_segmented_sorted_order( { CUDF_FUNC_RANGE(); return detail::stable_segmented_sorted_order( - keys, segment_offsets, column_order, null_precedence, cudf::default_stream_value, mr); + keys, segment_offsets, column_order, null_precedence, cudf::get_default_stream(), mr); } std::unique_ptr
segmented_sort_by_key(table_view const& values, @@ -244,7 +244,7 @@ std::unique_ptr
segmented_sort_by_key(table_view const& values, { CUDF_FUNC_RANGE(); return detail::segmented_sort_by_key( - values, keys, segment_offsets, column_order, null_precedence, cudf::default_stream_value, mr); + values, keys, segment_offsets, column_order, null_precedence, cudf::get_default_stream(), mr); } std::unique_ptr
stable_segmented_sort_by_key(table_view const& values, @@ -256,7 +256,7 @@ std::unique_ptr
stable_segmented_sort_by_key(table_view const& values, { CUDF_FUNC_RANGE(); return detail::stable_segmented_sort_by_key( - values, keys, segment_offsets, column_order, null_precedence, cudf::default_stream_value, mr); + values, keys, segment_offsets, column_order, null_precedence, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/sort/sort.cu b/cpp/src/sort/sort.cu index 5089f233916..34041bddeb8 100644 --- a/cpp/src/sort/sort.cu +++ b/cpp/src/sort/sort.cu @@ -100,7 +100,7 @@ std::unique_ptr
sort(table_view const& input, return std::make_unique
(std::move(columns)); } return detail::sort_by_key( - input, input, column_order, null_precedence, cudf::default_stream_value, mr); + input, input, column_order, null_precedence, cudf::get_default_stream(), mr); } } // namespace detail @@ -111,7 +111,7 @@ std::unique_ptr sorted_order(table_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::sorted_order(input, column_order, null_precedence, cudf::default_stream_value, mr); + return detail::sorted_order(input, column_order, null_precedence, cudf::get_default_stream(), mr); } std::unique_ptr
sort(table_view const& input, @@ -120,7 +120,7 @@ std::unique_ptr
sort(table_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::sort(input, column_order, null_precedence, cudf::default_stream_value, mr); + return detail::sort(input, column_order, null_precedence, cudf::get_default_stream(), mr); } std::unique_ptr
sort_by_key(table_view const& values, @@ -131,7 +131,7 @@ std::unique_ptr
sort_by_key(table_view const& values, { CUDF_FUNC_RANGE(); return detail::sort_by_key( - values, keys, column_order, null_precedence, cudf::default_stream_value, mr); + values, keys, column_order, null_precedence, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/sort/stable_sort.cu b/cpp/src/sort/stable_sort.cu index a7e5d86ded0..ff2cb871162 100644 --- a/cpp/src/sort/stable_sort.cu +++ b/cpp/src/sort/stable_sort.cu @@ -65,7 +65,7 @@ std::unique_ptr stable_sorted_order(table_view const& input, { CUDF_FUNC_RANGE(); return detail::stable_sorted_order( - input, column_order, null_precedence, cudf::default_stream_value, mr); + input, column_order, null_precedence, cudf::get_default_stream(), mr); } std::unique_ptr
stable_sort_by_key(table_view const& values, @@ -76,7 +76,7 @@ std::unique_ptr
stable_sort_by_key(table_view const& values, { CUDF_FUNC_RANGE(); return detail::stable_sort_by_key( - values, keys, column_order, null_precedence, cudf::default_stream_value, mr); + values, keys, column_order, null_precedence, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/stream_compaction/apply_boolean_mask.cu b/cpp/src/stream_compaction/apply_boolean_mask.cu index 54688672d20..8f707f6d15d 100644 --- a/cpp/src/stream_compaction/apply_boolean_mask.cu +++ b/cpp/src/stream_compaction/apply_boolean_mask.cu @@ -93,6 +93,6 @@ std::unique_ptr
apply_boolean_mask(table_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::apply_boolean_mask(input, boolean_mask, cudf::default_stream_value, mr); + return detail::apply_boolean_mask(input, boolean_mask, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index a03e4c4441a..02889d4f447 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -159,7 +159,7 @@ std::unique_ptr
distinct(table_view const& input, { CUDF_FUNC_RANGE(); return detail::distinct( - input, keys, keep, nulls_equal, nans_equal, cudf::default_stream_value, mr); + input, keys, keep, nulls_equal, nans_equal, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/stream_compaction/drop_nans.cu b/cpp/src/stream_compaction/drop_nans.cu index 4429c952277..a645b46f7a7 100644 --- a/cpp/src/stream_compaction/drop_nans.cu +++ b/cpp/src/stream_compaction/drop_nans.cu @@ -119,7 +119,7 @@ std::unique_ptr
drop_nans(table_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::drop_nans(input, keys, keep_threshold, cudf::default_stream_value, mr); + return detail::drop_nans(input, keys, keep_threshold, cudf::get_default_stream(), mr); } /* * Filters a table to remove nan elements. @@ -129,7 +129,7 @@ std::unique_ptr
drop_nans(table_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::drop_nans(input, keys, keys.size(), cudf::default_stream_value, mr); + return detail::drop_nans(input, keys, keys.size(), cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/stream_compaction/drop_nulls.cu b/cpp/src/stream_compaction/drop_nulls.cu index c5f3e0df1e2..6ea1fd4c31f 100644 --- a/cpp/src/stream_compaction/drop_nulls.cu +++ b/cpp/src/stream_compaction/drop_nulls.cu @@ -92,7 +92,7 @@ std::unique_ptr
drop_nulls(table_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::drop_nulls(input, keys, keep_threshold, cudf::default_stream_value, mr); + return detail::drop_nulls(input, keys, keep_threshold, cudf::get_default_stream(), mr); } /* * Filters a table to remove null elements. @@ -102,7 +102,7 @@ std::unique_ptr
drop_nulls(table_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::drop_nulls(input, keys, keys.size(), cudf::default_stream_value, mr); + return detail::drop_nulls(input, keys, keys.size(), cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/stream_compaction/unique.cu b/cpp/src/stream_compaction/unique.cu index 83c51a92633..6b432176acb 100644 --- a/cpp/src/stream_compaction/unique.cu +++ b/cpp/src/stream_compaction/unique.cu @@ -99,7 +99,7 @@ std::unique_ptr
unique(table_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::unique(input, keys, keep, nulls_equal, cudf::default_stream_value, mr); + return detail::unique(input, keys, keep, nulls_equal, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/strings/attributes.cu b/cpp/src/strings/attributes.cu index 0dd1a870b8a..ea01b570b91 100644 --- a/cpp/src/strings/attributes.cu +++ b/cpp/src/strings/attributes.cu @@ -185,21 +185,21 @@ std::unique_ptr count_characters(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::count_characters(strings, cudf::default_stream_value, mr); + return detail::count_characters(strings, cudf::get_default_stream(), mr); } std::unique_ptr count_bytes(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::count_bytes(strings, cudf::default_stream_value, mr); + return detail::count_bytes(strings, cudf::get_default_stream(), mr); } std::unique_ptr code_points(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::code_points(strings, cudf::default_stream_value, mr); + return detail::code_points(strings, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/capitalize.cu b/cpp/src/strings/capitalize.cu index 4328765773f..58134ab28d1 100644 --- a/cpp/src/strings/capitalize.cu +++ b/cpp/src/strings/capitalize.cu @@ -289,7 +289,7 @@ std::unique_ptr capitalize(strings_column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::capitalize(input, delimiter, cudf::default_stream_value, mr); + return detail::capitalize(input, delimiter, cudf::get_default_stream(), mr); } std::unique_ptr title(strings_column_view const& input, @@ -297,14 +297,14 @@ std::unique_ptr title(strings_column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::title(input, sequence_type, cudf::default_stream_value, mr); + return detail::title(input, sequence_type, cudf::get_default_stream(), mr); } std::unique_ptr is_title(strings_column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::is_title(input, cudf::default_stream_value, mr); + return detail::is_title(input, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/case.cu b/cpp/src/strings/case.cu index cabb1241f1b..05c2904ec9e 100644 --- a/cpp/src/strings/case.cu +++ b/cpp/src/strings/case.cu @@ -185,21 +185,21 @@ std::unique_ptr to_lower(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::to_lower(strings, cudf::default_stream_value, mr); + return detail::to_lower(strings, cudf::get_default_stream(), mr); } std::unique_ptr to_upper(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::to_upper(strings, cudf::default_stream_value, mr); + return detail::to_upper(strings, cudf::get_default_stream(), mr); } std::unique_ptr swapcase(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::swapcase(strings, cudf::default_stream_value, mr); + return detail::swapcase(strings, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/char_types/char_types.cu b/cpp/src/strings/char_types/char_types.cu index 4010ec8861a..0426d82c6c6 100644 --- a/cpp/src/strings/char_types/char_types.cu +++ b/cpp/src/strings/char_types/char_types.cu @@ -197,7 +197,7 @@ std::unique_ptr all_characters_of_type(strings_column_view const& string { CUDF_FUNC_RANGE(); return detail::all_characters_of_type( - strings, types, verify_types, cudf::default_stream_value, mr); + strings, types, verify_types, cudf::get_default_stream(), mr); } std::unique_ptr filter_characters_of_type(strings_column_view const& strings, @@ -208,7 +208,7 @@ std::unique_ptr filter_characters_of_type(strings_column_view const& str { CUDF_FUNC_RANGE(); return detail::filter_characters_of_type( - strings, types_to_remove, replacement, types_to_keep, cudf::default_stream_value, mr); + strings, types_to_remove, replacement, types_to_keep, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/combine/concatenate.cu b/cpp/src/strings/combine/concatenate.cu index ae94348cbb4..e98ae537ddd 100644 --- a/cpp/src/strings/combine/concatenate.cu +++ b/cpp/src/strings/combine/concatenate.cu @@ -270,7 +270,7 @@ std::unique_ptr concatenate(table_view const& strings_columns, { CUDF_FUNC_RANGE(); return detail::concatenate( - strings_columns, separator, narep, separate_nulls, cudf::default_stream_value, mr); + strings_columns, separator, narep, separate_nulls, cudf::get_default_stream(), mr); } std::unique_ptr concatenate(table_view const& strings_columns, @@ -286,7 +286,7 @@ std::unique_ptr concatenate(table_view const& strings_columns, separator_narep, col_narep, separate_nulls, - cudf::default_stream_value, + cudf::get_default_stream(), mr); } diff --git a/cpp/src/strings/combine/join.cu b/cpp/src/strings/combine/join.cu index f450ce4019e..6537ce168e5 100644 --- a/cpp/src/strings/combine/join.cu +++ b/cpp/src/strings/combine/join.cu @@ -135,7 +135,7 @@ std::unique_ptr join_strings(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::join_strings(strings, separator, narep, cudf::default_stream_value, mr); + return detail::join_strings(strings, separator, narep, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/combine/join_list_elements.cu b/cpp/src/strings/combine/join_list_elements.cu index 1d0ee94d306..ec2e65d7ad5 100644 --- a/cpp/src/strings/combine/join_list_elements.cu +++ b/cpp/src/strings/combine/join_list_elements.cu @@ -308,7 +308,7 @@ std::unique_ptr join_list_elements(lists_column_view const& lists_string narep, separate_nulls, empty_list_policy, - cudf::default_stream_value, + cudf::get_default_stream(), mr); } @@ -327,7 +327,7 @@ std::unique_ptr join_list_elements(lists_column_view const& lists_string string_narep, separate_nulls, empty_list_policy, - cudf::default_stream_value, + cudf::get_default_stream(), mr); } diff --git a/cpp/src/strings/contains.cu b/cpp/src/strings/contains.cu index b7d154c4808..c6e71b00809 100644 --- a/cpp/src/strings/contains.cu +++ b/cpp/src/strings/contains.cu @@ -136,7 +136,7 @@ std::unique_ptr contains_re(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::contains_re(strings, pattern, flags, cudf::default_stream_value, mr); + return detail::contains_re(strings, pattern, flags, cudf::get_default_stream(), mr); } std::unique_ptr matches_re(strings_column_view const& strings, @@ -145,7 +145,7 @@ std::unique_ptr matches_re(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::matches_re(strings, pattern, flags, cudf::default_stream_value, mr); + return detail::matches_re(strings, pattern, flags, cudf::get_default_stream(), mr); } std::unique_ptr count_re(strings_column_view const& strings, @@ -154,7 +154,7 @@ std::unique_ptr count_re(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::count_re(strings, pattern, flags, cudf::default_stream_value, mr); + return detail::count_re(strings, pattern, flags, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/convert/convert_booleans.cu b/cpp/src/strings/convert/convert_booleans.cu index 196929a9377..da4728da331 100644 --- a/cpp/src/strings/convert/convert_booleans.cu +++ b/cpp/src/strings/convert/convert_booleans.cu @@ -86,7 +86,7 @@ std::unique_ptr to_booleans(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::to_booleans(strings, true_string, cudf::default_stream_value, mr); + return detail::to_booleans(strings, true_string, cudf::get_default_stream(), mr); } namespace detail { @@ -155,7 +155,7 @@ std::unique_ptr from_booleans(column_view const& booleans, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::from_booleans(booleans, true_string, false_string, cudf::default_stream_value, mr); + return detail::from_booleans(booleans, true_string, false_string, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/convert/convert_datetime.cu b/cpp/src/strings/convert/convert_datetime.cu index e70ae09de84..0cc2ef341d4 100644 --- a/cpp/src/strings/convert/convert_datetime.cu +++ b/cpp/src/strings/convert/convert_datetime.cu @@ -653,7 +653,7 @@ std::unique_ptr to_timestamps(strings_column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::to_timestamps(input, timestamp_type, format, cudf::default_stream_value, mr); + return detail::to_timestamps(input, timestamp_type, format, cudf::get_default_stream(), mr); } std::unique_ptr is_timestamp(strings_column_view const& input, @@ -661,7 +661,7 @@ std::unique_ptr is_timestamp(strings_column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::is_timestamp(input, format, cudf::default_stream_value, mr); + return detail::is_timestamp(input, format, cudf::get_default_stream(), mr); } namespace detail { @@ -1149,7 +1149,7 @@ std::unique_ptr from_timestamps(column_view const& timestamps, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::from_timestamps(timestamps, format, names, cudf::default_stream_value, mr); + return detail::from_timestamps(timestamps, format, names, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/convert/convert_durations.cu b/cpp/src/strings/convert/convert_durations.cu index ac64bceae54..0e2092fd31c 100644 --- a/cpp/src/strings/convert/convert_durations.cu +++ b/cpp/src/strings/convert/convert_durations.cu @@ -748,7 +748,7 @@ std::unique_ptr from_durations(column_view const& durations, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::from_durations(durations, format, cudf::default_stream_value, mr); + return detail::from_durations(durations, format, cudf::get_default_stream(), mr); } std::unique_ptr to_durations(strings_column_view const& strings, @@ -757,7 +757,7 @@ std::unique_ptr to_durations(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::to_durations(strings, duration_type, format, cudf::default_stream_value, mr); + return detail::to_durations(strings, duration_type, format, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/convert/convert_fixed_point.cu b/cpp/src/strings/convert/convert_fixed_point.cu index 94bb235d1cd..402be192572 100644 --- a/cpp/src/strings/convert/convert_fixed_point.cu +++ b/cpp/src/strings/convert/convert_fixed_point.cu @@ -191,7 +191,7 @@ std::unique_ptr to_fixed_point(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::to_fixed_point(strings, output_type, cudf::default_stream_value, mr); + return detail::to_fixed_point(strings, output_type, cudf::get_default_stream(), mr); } namespace detail { @@ -334,7 +334,7 @@ std::unique_ptr from_fixed_point(column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::from_fixed_point(input, cudf::default_stream_value, mr); + return detail::from_fixed_point(input, cudf::get_default_stream(), mr); } namespace detail { @@ -398,7 +398,7 @@ std::unique_ptr is_fixed_point(strings_column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::is_fixed_point(input, decimal_type, cudf::default_stream_value, mr); + return detail::is_fixed_point(input, decimal_type, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/convert/convert_floats.cu b/cpp/src/strings/convert/convert_floats.cu index 4c11707f2c6..2de4bd2a2cc 100644 --- a/cpp/src/strings/convert/convert_floats.cu +++ b/cpp/src/strings/convert/convert_floats.cu @@ -125,7 +125,7 @@ std::unique_ptr to_floats(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::to_floats(strings, output_type, cudf::default_stream_value, mr); + return detail::to_floats(strings, output_type, cudf::get_default_stream(), mr); } namespace detail { @@ -450,7 +450,7 @@ std::unique_ptr from_floats(column_view const& floats, std::unique_ptr from_floats(column_view const& floats, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::from_floats(floats, cudf::default_stream_value, mr); + return detail::from_floats(floats, cudf::get_default_stream(), mr); } namespace detail { @@ -489,7 +489,7 @@ std::unique_ptr is_float(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::is_float(strings, cudf::default_stream_value, mr); + return detail::is_float(strings, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/convert/convert_hex.cu b/cpp/src/strings/convert/convert_hex.cu index c327f7da00e..dbbdffac2c2 100644 --- a/cpp/src/strings/convert/convert_hex.cu +++ b/cpp/src/strings/convert/convert_hex.cu @@ -284,21 +284,21 @@ std::unique_ptr hex_to_integers(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::hex_to_integers(strings, output_type, cudf::default_stream_value, mr); + return detail::hex_to_integers(strings, output_type, cudf::get_default_stream(), mr); } std::unique_ptr is_hex(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::is_hex(strings, cudf::default_stream_value, mr); + return detail::is_hex(strings, cudf::get_default_stream(), mr); } std::unique_ptr integers_to_hex(column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::integers_to_hex(input, cudf::default_stream_value, mr); + return detail::integers_to_hex(input, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/convert/convert_integers.cu b/cpp/src/strings/convert/convert_integers.cu index abce70ef4d5..343288af0c1 100644 --- a/cpp/src/strings/convert/convert_integers.cu +++ b/cpp/src/strings/convert/convert_integers.cu @@ -209,7 +209,7 @@ std::unique_ptr is_integer(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::is_integer(strings, cudf::default_stream_value, mr); + return detail::is_integer(strings, cudf::get_default_stream(), mr); } std::unique_ptr is_integer(strings_column_view const& strings, @@ -217,7 +217,7 @@ std::unique_ptr is_integer(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::is_integer(strings, int_type, cudf::default_stream_value, mr); + return detail::is_integer(strings, int_type, cudf::get_default_stream(), mr); } namespace detail { @@ -310,7 +310,7 @@ std::unique_ptr to_integers(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::to_integers(strings, output_type, cudf::default_stream_value, mr); + return detail::to_integers(strings, output_type, cudf::get_default_stream(), mr); } namespace detail { @@ -431,7 +431,7 @@ std::unique_ptr from_integers(column_view const& integers, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::from_integers(integers, cudf::default_stream_value, mr); + return detail::from_integers(integers, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/convert/convert_ipv4.cu b/cpp/src/strings/convert/convert_ipv4.cu index 4dbdd3fc9d8..5229f0fdf1b 100644 --- a/cpp/src/strings/convert/convert_ipv4.cu +++ b/cpp/src/strings/convert/convert_ipv4.cu @@ -110,7 +110,7 @@ std::unique_ptr ipv4_to_integers(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::ipv4_to_integers(strings, cudf::default_stream_value, mr); + return detail::ipv4_to_integers(strings, cudf::get_default_stream(), mr); } namespace detail { @@ -264,14 +264,14 @@ std::unique_ptr integers_to_ipv4(column_view const& integers, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::integers_to_ipv4(integers, cudf::default_stream_value, mr); + return detail::integers_to_ipv4(integers, cudf::get_default_stream(), mr); } std::unique_ptr is_ipv4(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::is_ipv4(strings, cudf::default_stream_value, mr); + return detail::is_ipv4(strings, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/convert/convert_lists.cu b/cpp/src/strings/convert/convert_lists.cu index 547052d5680..289fa9a1c05 100644 --- a/cpp/src/strings/convert/convert_lists.cu +++ b/cpp/src/strings/convert/convert_lists.cu @@ -235,7 +235,7 @@ std::unique_ptr format_list_column(lists_column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::format_list_column(input, na_rep, separators, cudf::default_stream_value, mr); + return detail::format_list_column(input, na_rep, separators, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/convert/convert_urls.cu b/cpp/src/strings/convert/convert_urls.cu index ca32383e73f..0c6ecf46313 100644 --- a/cpp/src/strings/convert/convert_urls.cu +++ b/cpp/src/strings/convert/convert_urls.cu @@ -172,7 +172,7 @@ std::unique_ptr url_encode(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::url_encode(strings, cudf::default_stream_value, mr); + return detail::url_encode(strings, cudf::get_default_stream(), mr); } namespace detail { @@ -454,7 +454,7 @@ std::unique_ptr url_decode(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::url_decode(strings, cudf::default_stream_value, mr); + return detail::url_decode(strings, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/extract/extract.cu b/cpp/src/strings/extract/extract.cu index 76d2f84b1a0..882b85d1066 100644 --- a/cpp/src/strings/extract/extract.cu +++ b/cpp/src/strings/extract/extract.cu @@ -136,7 +136,7 @@ std::unique_ptr
extract(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::extract(strings, pattern, flags, cudf::default_stream_value, mr); + return detail::extract(strings, pattern, flags, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/extract/extract_all.cu b/cpp/src/strings/extract/extract_all.cu index 76c2788c1be..1ba5a8a1470 100644 --- a/cpp/src/strings/extract/extract_all.cu +++ b/cpp/src/strings/extract/extract_all.cu @@ -171,7 +171,7 @@ std::unique_ptr extract_all_record(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::extract_all_record(strings, pattern, flags, cudf::default_stream_value, mr); + return detail::extract_all_record(strings, pattern, flags, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/filter_chars.cu b/cpp/src/strings/filter_chars.cu index 7f0332ba9cc..b30b0e89c28 100644 --- a/cpp/src/strings/filter_chars.cu +++ b/cpp/src/strings/filter_chars.cu @@ -160,7 +160,7 @@ std::unique_ptr filter_characters( { CUDF_FUNC_RANGE(); return detail::filter_characters( - strings, characters_to_filter, keep_characters, replacement, cudf::default_stream_value, mr); + strings, characters_to_filter, keep_characters, replacement, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/json/json_path.cu b/cpp/src/strings/json/json_path.cu index 9ec1ec248e5..303c35ea7fb 100644 --- a/cpp/src/strings/json/json_path.cu +++ b/cpp/src/strings/json/json_path.cu @@ -1047,7 +1047,7 @@ std::unique_ptr get_json_object(cudf::strings_column_view const& c rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::get_json_object(col, json_path, options, cudf::default_stream_value, mr); + return detail::get_json_object(col, json_path, options, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/like.cu b/cpp/src/strings/like.cu index 2d3a3d3d52a..cb6fc844426 100644 --- a/cpp/src/strings/like.cu +++ b/cpp/src/strings/like.cu @@ -147,7 +147,7 @@ std::unique_ptr like(strings_column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::like(input, pattern, escape_character, cudf::default_stream_value, mr); + return detail::like(input, pattern, escape_character, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/padding.cu b/cpp/src/strings/padding.cu index e4002525af9..d84b4afc7cf 100644 --- a/cpp/src/strings/padding.cu +++ b/cpp/src/strings/padding.cu @@ -58,7 +58,7 @@ std::unique_ptr pad( size_type width, side_type side = side_type::RIGHT, std::string_view fill_char = " ", - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { size_type strings_count = strings.size(); @@ -180,7 +180,7 @@ std::unique_ptr pad(strings_column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::pad(input, width, side, fill_char, cudf::default_stream_value, mr); + return detail::pad(input, width, side, fill_char, cudf::get_default_stream(), mr); } std::unique_ptr zfill(strings_column_view const& input, @@ -188,7 +188,7 @@ std::unique_ptr zfill(strings_column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::zfill(input, width, cudf::default_stream_value, mr); + return detail::zfill(input, width, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/repeat_strings.cu b/cpp/src/strings/repeat_strings.cu index 959229bbb87..5d02069d7f3 100644 --- a/cpp/src/strings/repeat_strings.cu +++ b/cpp/src/strings/repeat_strings.cu @@ -385,7 +385,7 @@ std::unique_ptr repeat_string(string_scalar const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::repeat_string(input, repeat_times, cudf::default_stream_value, mr); + return detail::repeat_string(input, repeat_times, cudf::get_default_stream(), mr); } std::unique_ptr repeat_strings(strings_column_view const& input, @@ -393,7 +393,7 @@ std::unique_ptr repeat_strings(strings_column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::repeat_strings(input, repeat_times, cudf::default_stream_value, mr); + return detail::repeat_strings(input, repeat_times, cudf::get_default_stream(), mr); } std::unique_ptr repeat_strings(strings_column_view const& input, @@ -403,7 +403,7 @@ std::unique_ptr repeat_strings(strings_column_view const& input, { CUDF_FUNC_RANGE(); return detail::repeat_strings( - input, repeat_times, output_strings_sizes, cudf::default_stream_value, mr); + input, repeat_times, output_strings_sizes, cudf::get_default_stream(), mr); } std::pair, int64_t> repeat_strings_output_sizes( @@ -412,7 +412,7 @@ std::pair, int64_t> repeat_strings_output_sizes( rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::repeat_strings_output_sizes(input, repeat_times, cudf::default_stream_value, mr); + return detail::repeat_strings_output_sizes(input, repeat_times, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/replace/backref_re.cu b/cpp/src/strings/replace/backref_re.cu index e0a995c26b9..9658610da18 100644 --- a/cpp/src/strings/replace/backref_re.cu +++ b/cpp/src/strings/replace/backref_re.cu @@ -153,7 +153,7 @@ std::unique_ptr replace_with_backrefs(strings_column_view const& strings { CUDF_FUNC_RANGE(); return detail::replace_with_backrefs( - strings, pattern, replacement, flags, cudf::default_stream_value, mr); + strings, pattern, replacement, flags, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/replace/multi_re.cu b/cpp/src/strings/replace/multi_re.cu index a5b9ad37e65..cc5cf1384ec 100644 --- a/cpp/src/strings/replace/multi_re.cu +++ b/cpp/src/strings/replace/multi_re.cu @@ -198,7 +198,7 @@ std::unique_ptr replace_re(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::replace_re(strings, patterns, replacements, flags, cudf::default_stream_value, mr); + return detail::replace_re(strings, patterns, replacements, flags, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/replace/replace.cu b/cpp/src/strings/replace/replace.cu index de875014054..1cb7de5dc3b 100644 --- a/cpp/src/strings/replace/replace.cu +++ b/cpp/src/strings/replace/replace.cu @@ -843,7 +843,7 @@ std::unique_ptr replace(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::replace(strings, target, repl, maxrepl, cudf::default_stream_value, mr); + return detail::replace(strings, target, repl, maxrepl, cudf::get_default_stream(), mr); } std::unique_ptr replace_slice(strings_column_view const& strings, @@ -853,7 +853,7 @@ std::unique_ptr replace_slice(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::replace_slice(strings, repl, start, stop, cudf::default_stream_value, mr); + return detail::replace_slice(strings, repl, start, stop, cudf::get_default_stream(), mr); } std::unique_ptr replace(strings_column_view const& strings, @@ -862,7 +862,7 @@ std::unique_ptr replace(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::replace(strings, targets, repls, cudf::default_stream_value, mr); + return detail::replace(strings, targets, repls, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/replace/replace_re.cu b/cpp/src/strings/replace/replace_re.cu index fd0049d7c89..34175f2ec6c 100644 --- a/cpp/src/strings/replace/replace_re.cu +++ b/cpp/src/strings/replace/replace_re.cu @@ -106,7 +106,7 @@ std::unique_ptr replace_re( string_scalar const& replacement, std::optional max_replace_count, regex_flags const flags, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { if (input.is_empty()) return make_empty_column(type_id::STRING); @@ -144,7 +144,7 @@ std::unique_ptr replace_re(strings_column_view const& strings, { CUDF_FUNC_RANGE(); return detail::replace_re( - strings, pattern, replacement, max_replace_count, flags, cudf::default_stream_value, mr); + strings, pattern, replacement, max_replace_count, flags, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/search/find.cu b/cpp/src/strings/search/find.cu index fa8581558a0..c48aedc5499 100644 --- a/cpp/src/strings/search/find.cu +++ b/cpp/src/strings/search/find.cu @@ -107,7 +107,7 @@ std::unique_ptr find( string_scalar const& target, size_type start = 0, size_type stop = -1, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { auto pfn = [] __device__( @@ -127,7 +127,7 @@ std::unique_ptr rfind( string_scalar const& target, size_type start = 0, size_type stop = -1, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { auto pfn = [] __device__( @@ -153,7 +153,7 @@ std::unique_ptr find(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::find(strings, target, start, stop, cudf::default_stream_value, mr); + return detail::find(strings, target, start, stop, cudf::get_default_stream(), mr); } std::unique_ptr rfind(strings_column_view const& strings, @@ -163,7 +163,7 @@ std::unique_ptr rfind(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::rfind(strings, target, start, stop, cudf::default_stream_value, mr); + return detail::rfind(strings, target, start, stop, cudf::get_default_stream(), mr); } namespace detail { @@ -463,7 +463,7 @@ std::unique_ptr contains(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::contains(strings, target, cudf::default_stream_value, mr); + return detail::contains(strings, target, cudf::get_default_stream(), mr); } std::unique_ptr contains(strings_column_view const& strings, @@ -471,7 +471,7 @@ std::unique_ptr contains(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::contains(strings, targets, cudf::default_stream_value, mr); + return detail::contains(strings, targets, cudf::get_default_stream(), mr); } std::unique_ptr starts_with(strings_column_view const& strings, @@ -479,7 +479,7 @@ std::unique_ptr starts_with(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::starts_with(strings, target, cudf::default_stream_value, mr); + return detail::starts_with(strings, target, cudf::get_default_stream(), mr); } std::unique_ptr starts_with(strings_column_view const& strings, @@ -487,7 +487,7 @@ std::unique_ptr starts_with(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::starts_with(strings, targets, cudf::default_stream_value, mr); + return detail::starts_with(strings, targets, cudf::get_default_stream(), mr); } std::unique_ptr ends_with(strings_column_view const& strings, @@ -495,7 +495,7 @@ std::unique_ptr ends_with(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::ends_with(strings, target, cudf::default_stream_value, mr); + return detail::ends_with(strings, target, cudf::get_default_stream(), mr); } std::unique_ptr ends_with(strings_column_view const& strings, @@ -503,7 +503,7 @@ std::unique_ptr ends_with(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::ends_with(strings, targets, cudf::default_stream_value, mr); + return detail::ends_with(strings, targets, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/search/find_multiple.cu b/cpp/src/strings/search/find_multiple.cu index 1e0f26b8650..389e6eccc43 100644 --- a/cpp/src/strings/search/find_multiple.cu +++ b/cpp/src/strings/search/find_multiple.cu @@ -92,7 +92,7 @@ std::unique_ptr find_multiple(strings_column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::find_multiple(input, targets, cudf::default_stream_value, mr); + return detail::find_multiple(input, targets, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/search/findall.cu b/cpp/src/strings/search/findall.cu index 73470bde867..07829581aa6 100644 --- a/cpp/src/strings/search/findall.cu +++ b/cpp/src/strings/search/findall.cu @@ -139,7 +139,7 @@ std::unique_ptr findall(strings_column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::findall(input, pattern, flags, cudf::default_stream_value, mr); + return detail::findall(input, pattern, flags, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/split/partition.cu b/cpp/src/strings/split/partition.cu index 161c48383ff..acdd9efbb45 100644 --- a/cpp/src/strings/split/partition.cu +++ b/cpp/src/strings/split/partition.cu @@ -184,7 +184,7 @@ struct rpartition_fn : public partition_fn { std::unique_ptr
partition( strings_column_view const& strings, string_scalar const& delimiter = string_scalar(""), - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { CUDF_EXPECTS(delimiter.is_valid(stream), "Parameter delimiter must be valid"); @@ -212,7 +212,7 @@ std::unique_ptr
partition( std::unique_ptr
rpartition( strings_column_view const& strings, string_scalar const& delimiter = string_scalar(""), - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { CUDF_EXPECTS(delimiter.is_valid(stream), "Parameter delimiter must be valid"); @@ -246,7 +246,7 @@ std::unique_ptr
partition(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::partition(strings, delimiter, cudf::default_stream_value, mr); + return detail::partition(strings, delimiter, cudf::get_default_stream(), mr); } std::unique_ptr
rpartition(strings_column_view const& strings, @@ -254,7 +254,7 @@ std::unique_ptr
rpartition(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::rpartition(strings, delimiter, cudf::default_stream_value, mr); + return detail::rpartition(strings, delimiter, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/split/split.cu b/cpp/src/strings/split/split.cu index 000029063e0..89b4c1d75c2 100644 --- a/cpp/src/strings/split/split.cu +++ b/cpp/src/strings/split/split.cu @@ -795,7 +795,7 @@ std::unique_ptr
split( strings_column_view const& strings_column, string_scalar const& delimiter = string_scalar(""), size_type maxsplit = -1, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { CUDF_EXPECTS(delimiter.is_valid(stream), "Parameter delimiter must be valid"); @@ -820,7 +820,7 @@ std::unique_ptr
rsplit( strings_column_view const& strings_column, string_scalar const& delimiter = string_scalar(""), size_type maxsplit = -1, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { CUDF_EXPECTS(delimiter.is_valid(stream), "Parameter delimiter must be valid"); @@ -851,7 +851,7 @@ std::unique_ptr
split(strings_column_view const& strings_column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::split(strings_column, delimiter, maxsplit, cudf::default_stream_value, mr); + return detail::split(strings_column, delimiter, maxsplit, cudf::get_default_stream(), mr); } std::unique_ptr
rsplit(strings_column_view const& strings_column, @@ -860,7 +860,7 @@ std::unique_ptr
rsplit(strings_column_view const& strings_column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::rsplit(strings_column, delimiter, maxsplit, cudf::default_stream_value, mr); + return detail::rsplit(strings_column, delimiter, maxsplit, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/split/split_re.cu b/cpp/src/strings/split/split_re.cu index e8de1da0d83..2538bab6229 100644 --- a/cpp/src/strings/split/split_re.cu +++ b/cpp/src/strings/split/split_re.cu @@ -334,7 +334,7 @@ std::unique_ptr
split_re(strings_column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::split_re(input, pattern, maxsplit, cudf::default_stream_value, mr); + return detail::split_re(input, pattern, maxsplit, cudf::get_default_stream(), mr); } std::unique_ptr split_record_re(strings_column_view const& input, @@ -343,7 +343,7 @@ std::unique_ptr split_record_re(strings_column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::split_record_re(input, pattern, maxsplit, cudf::default_stream_value, mr); + return detail::split_record_re(input, pattern, maxsplit, cudf::get_default_stream(), mr); } std::unique_ptr
rsplit_re(strings_column_view const& input, @@ -352,7 +352,7 @@ std::unique_ptr
rsplit_re(strings_column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::rsplit_re(input, pattern, maxsplit, cudf::default_stream_value, mr); + return detail::rsplit_re(input, pattern, maxsplit, cudf::get_default_stream(), mr); } std::unique_ptr rsplit_record_re(strings_column_view const& input, @@ -361,7 +361,7 @@ std::unique_ptr rsplit_record_re(strings_column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::rsplit_record_re(input, pattern, maxsplit, cudf::default_stream_value, mr); + return detail::rsplit_record_re(input, pattern, maxsplit, cudf::get_default_stream(), mr); } } // namespace strings } // namespace cudf diff --git a/cpp/src/strings/split/split_record.cu b/cpp/src/strings/split/split_record.cu index 60c09ffd93a..83d8d7f9203 100644 --- a/cpp/src/strings/split/split_record.cu +++ b/cpp/src/strings/split/split_record.cu @@ -268,7 +268,7 @@ std::unique_ptr split_record( strings_column_view const& strings, string_scalar const& delimiter = string_scalar(""), size_type maxsplit = -1, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { CUDF_EXPECTS(delimiter.is_valid(stream), "Parameter delimiter must be valid"); @@ -304,7 +304,7 @@ std::unique_ptr split_record(strings_column_view const& strings, { CUDF_FUNC_RANGE(); return detail::split_record( - strings, delimiter, maxsplit, cudf::default_stream_value, mr); + strings, delimiter, maxsplit, cudf::get_default_stream(), mr); } std::unique_ptr rsplit_record(strings_column_view const& strings, @@ -314,7 +314,7 @@ std::unique_ptr rsplit_record(strings_column_view const& strings, { CUDF_FUNC_RANGE(); return detail::split_record( - strings, delimiter, maxsplit, cudf::default_stream_value, mr); + strings, delimiter, maxsplit, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/strings_column_factories.cu b/cpp/src/strings/strings_column_factories.cu index f5188ce1354..8c5916283be 100644 --- a/cpp/src/strings/strings_column_factories.cu +++ b/cpp/src/strings/strings_column_factories.cu @@ -61,7 +61,7 @@ std::unique_ptr make_strings_column( device_span offsets, size_type null_count, rmm::device_buffer&& null_mask, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { CUDF_FUNC_RANGE(); diff --git a/cpp/src/strings/strip.cu b/cpp/src/strings/strip.cu index 5d51a5a7bed..baa6a27b4ba 100644 --- a/cpp/src/strings/strip.cu +++ b/cpp/src/strings/strip.cu @@ -73,7 +73,7 @@ std::unique_ptr strip( strings_column_view const& input, side_type side = side_type::BOTH, string_scalar const& to_strip = string_scalar(""), - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { if (input.is_empty()) return make_empty_column(type_id::STRING); @@ -104,7 +104,7 @@ std::unique_ptr strip(strings_column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::strip(input, side, to_strip, cudf::default_stream_value, mr); + return detail::strip(input, side, to_strip, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/substring.cu b/cpp/src/strings/substring.cu index 271a5375915..e0d1bc8cf31 100644 --- a/cpp/src/strings/substring.cu +++ b/cpp/src/strings/substring.cu @@ -110,7 +110,7 @@ std::unique_ptr slice_strings( numeric_scalar const& start = numeric_scalar(0, false), numeric_scalar const& stop = numeric_scalar(0, false), numeric_scalar const& step = numeric_scalar(1), - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { if (strings.is_empty()) return make_empty_column(type_id::STRING); @@ -143,7 +143,7 @@ std::unique_ptr slice_strings(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::slice_strings(strings, start, stop, step, cudf::default_stream_value, mr); + return detail::slice_strings(strings, start, stop, step, cudf::get_default_stream(), mr); } namespace detail { @@ -398,7 +398,7 @@ std::unique_ptr slice_strings(strings_column_view const& strings, { CUDF_FUNC_RANGE(); return detail::slice_strings( - strings, starts_column, stops_column, cudf::default_stream_value, mr); + strings, starts_column, stops_column, cudf::get_default_stream(), mr); } std::unique_ptr slice_strings(strings_column_view const& strings, @@ -410,7 +410,7 @@ std::unique_ptr slice_strings(strings_column_view const& strings, return detail::slice_strings(strings, cudf::detail::make_pair_iterator(delimiter), count, - cudf::default_stream_value, + cudf::get_default_stream(), mr); } @@ -420,7 +420,7 @@ std::unique_ptr slice_strings(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::slice_strings(strings, delimiters, count, cudf::default_stream_value, mr); + return detail::slice_strings(strings, delimiters, count, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/translate.cu b/cpp/src/strings/translate.cu index 94e4d313109..01ecc49f10a 100644 --- a/cpp/src/strings/translate.cu +++ b/cpp/src/strings/translate.cu @@ -130,7 +130,7 @@ std::unique_ptr translate(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::translate(strings, chars_table, cudf::default_stream_value, mr); + return detail::translate(strings, chars_table, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/strings/wrap.cu b/cpp/src/strings/wrap.cu index 232e61c1965..cd0aafc3545 100644 --- a/cpp/src/strings/wrap.cu +++ b/cpp/src/strings/wrap.cu @@ -137,7 +137,7 @@ std::unique_ptr wrap(strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::wrap(strings, width, cudf::default_stream_value, mr); + return detail::wrap(strings, width, cudf::get_default_stream(), mr); } } // namespace strings diff --git a/cpp/src/text/detokenize.cu b/cpp/src/text/detokenize.cu index 16cc8f4922d..5e86a7ca1f3 100644 --- a/cpp/src/text/detokenize.cu +++ b/cpp/src/text/detokenize.cu @@ -201,7 +201,7 @@ std::unique_ptr detokenize(cudf::strings_column_view const& string rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::detokenize(strings, row_indices, separator, cudf::default_stream_value, mr); + return detail::detokenize(strings, row_indices, separator, cudf::get_default_stream(), mr); } } // namespace nvtext diff --git a/cpp/src/text/edit_distance.cu b/cpp/src/text/edit_distance.cu index 18658433d6c..fb0ecdb7677 100644 --- a/cpp/src/text/edit_distance.cu +++ b/cpp/src/text/edit_distance.cu @@ -309,7 +309,7 @@ std::unique_ptr edit_distance(cudf::strings_column_view const& str rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::edit_distance(strings, targets, cudf::default_stream_value, mr); + return detail::edit_distance(strings, targets, cudf::get_default_stream(), mr); } /** @@ -319,7 +319,7 @@ std::unique_ptr edit_distance_matrix(cudf::strings_column_view con rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::edit_distance_matrix(strings, cudf::default_stream_value, mr); + return detail::edit_distance_matrix(strings, cudf::get_default_stream(), mr); } } // namespace nvtext diff --git a/cpp/src/text/generate_ngrams.cu b/cpp/src/text/generate_ngrams.cu index 7ffd2bd80a7..d5ff7b99344 100644 --- a/cpp/src/text/generate_ngrams.cu +++ b/cpp/src/text/generate_ngrams.cu @@ -88,7 +88,7 @@ std::unique_ptr generate_ngrams( cudf::strings_column_view const& strings, cudf::size_type ngrams = 2, cudf::string_scalar const& separator = cudf::string_scalar{"_"}, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { CUDF_EXPECTS(separator.is_valid(stream), "Parameter separator must be valid"); @@ -151,7 +151,7 @@ std::unique_ptr generate_ngrams(cudf::strings_column_view const& s rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::generate_ngrams(strings, ngrams, separator, cudf::default_stream_value, mr); + return detail::generate_ngrams(strings, ngrams, separator, cudf::get_default_stream(), mr); } namespace detail { @@ -261,7 +261,7 @@ std::unique_ptr generate_character_ngrams(cudf::strings_column_vie rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::generate_character_ngrams(strings, ngrams, cudf::default_stream_value, mr); + return detail::generate_character_ngrams(strings, ngrams, cudf::get_default_stream(), mr); } } // namespace nvtext diff --git a/cpp/src/text/ngrams_tokenize.cu b/cpp/src/text/ngrams_tokenize.cu index f353b79f720..b0071ed9e88 100644 --- a/cpp/src/text/ngrams_tokenize.cu +++ b/cpp/src/text/ngrams_tokenize.cu @@ -139,7 +139,7 @@ std::unique_ptr ngrams_tokenize( cudf::size_type ngrams = 2, cudf::string_scalar const& delimiter = cudf::string_scalar(""), cudf::string_scalar const& separator = cudf::string_scalar{"_"}, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { CUDF_EXPECTS(delimiter.is_valid(stream), "Parameter delimiter must be valid"); @@ -263,7 +263,7 @@ std::unique_ptr ngrams_tokenize(cudf::strings_column_view const& s { CUDF_FUNC_RANGE(); return detail::ngrams_tokenize( - strings, ngrams, delimiter, separator, cudf::default_stream_value, mr); + strings, ngrams, delimiter, separator, cudf::get_default_stream(), mr); } } // namespace nvtext diff --git a/cpp/src/text/normalize.cu b/cpp/src/text/normalize.cu index 48921ac6520..2d5dd0ebbf8 100644 --- a/cpp/src/text/normalize.cu +++ b/cpp/src/text/normalize.cu @@ -244,7 +244,7 @@ std::unique_ptr normalize_spaces(cudf::strings_column_view const& rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::normalize_spaces(strings, cudf::default_stream_value, mr); + return detail::normalize_spaces(strings, cudf::get_default_stream(), mr); } /** @@ -255,7 +255,7 @@ std::unique_ptr normalize_characters(cudf::strings_column_view con rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::normalize_characters(strings, do_lower_case, cudf::default_stream_value, mr); + return detail::normalize_characters(strings, do_lower_case, cudf::get_default_stream(), mr); } } // namespace nvtext diff --git a/cpp/src/text/replace.cu b/cpp/src/text/replace.cu index 9171df97800..87c1d345ff5 100644 --- a/cpp/src/text/replace.cu +++ b/cpp/src/text/replace.cu @@ -282,7 +282,7 @@ std::unique_ptr replace_tokens(cudf::strings_column_view const& st { CUDF_FUNC_RANGE(); return detail::replace_tokens( - strings, targets, replacements, delimiter, cudf::default_stream_value, mr); + strings, targets, replacements, delimiter, cudf::get_default_stream(), mr); } std::unique_ptr filter_tokens(cudf::strings_column_view const& strings, @@ -293,7 +293,7 @@ std::unique_ptr filter_tokens(cudf::strings_column_view const& str { CUDF_FUNC_RANGE(); return detail::filter_tokens( - strings, min_token_length, replacement, delimiter, cudf::default_stream_value, mr); + strings, min_token_length, replacement, delimiter, cudf::get_default_stream(), mr); } } // namespace nvtext diff --git a/cpp/src/text/stemmer.cu b/cpp/src/text/stemmer.cu index cdf87967a0d..780ca5b4e5d 100644 --- a/cpp/src/text/stemmer.cu +++ b/cpp/src/text/stemmer.cu @@ -254,7 +254,7 @@ std::unique_ptr is_letter(cudf::strings_column_view const& strings return detail::is_letter(strings, ltype, thrust::make_constant_iterator(character_index), - cudf::default_stream_value, + cudf::get_default_stream(), mr); } @@ -264,7 +264,7 @@ std::unique_ptr is_letter(cudf::strings_column_view const& strings rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::is_letter(strings, ltype, indices, cudf::default_stream_value, mr); + return detail::is_letter(strings, ltype, indices, cudf::get_default_stream(), mr); } /** @@ -274,7 +274,7 @@ std::unique_ptr porter_stemmer_measure(cudf::strings_column_view c rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::porter_stemmer_measure(strings, cudf::default_stream_value, mr); + return detail::porter_stemmer_measure(strings, cudf::get_default_stream(), mr); } } // namespace nvtext diff --git a/cpp/src/text/subword/bpe_tokenizer.cu b/cpp/src/text/subword/bpe_tokenizer.cu index 549704bcbe4..ba07d70fea3 100644 --- a/cpp/src/text/subword/bpe_tokenizer.cu +++ b/cpp/src/text/subword/bpe_tokenizer.cu @@ -565,7 +565,7 @@ std::unique_ptr byte_pair_encoding(cudf::strings_column_view const rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::byte_pair_encoding(input, merges_table, separator, cudf::default_stream_value, mr); + return detail::byte_pair_encoding(input, merges_table, separator, cudf::get_default_stream(), mr); } } // namespace nvtext diff --git a/cpp/src/text/subword/load_hash_file.cu b/cpp/src/text/subword/load_hash_file.cu index 1b84cfd49fa..b52597fff47 100644 --- a/cpp/src/text/subword/load_hash_file.cu +++ b/cpp/src/text/subword/load_hash_file.cu @@ -284,7 +284,7 @@ std::unique_ptr load_vocabulary_file( std::string const& filename_hashed_vocabulary, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::load_vocabulary_file(filename_hashed_vocabulary, cudf::default_stream_value, mr); + return detail::load_vocabulary_file(filename_hashed_vocabulary, cudf::get_default_stream(), mr); } } // namespace nvtext diff --git a/cpp/src/text/subword/load_merges_file.cu b/cpp/src/text/subword/load_merges_file.cu index da0598ddfac..518a860e39a 100644 --- a/cpp/src/text/subword/load_merges_file.cu +++ b/cpp/src/text/subword/load_merges_file.cu @@ -159,7 +159,7 @@ std::unique_ptr load_merge_pairs_file(std::string const& filena rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::load_merge_pairs_file(filename_merges, cudf::default_stream_value, mr); + return detail::load_merge_pairs_file(filename_merges, cudf::get_default_stream(), mr); } bpe_merge_pairs::bpe_merge_pairs_impl::bpe_merge_pairs_impl( diff --git a/cpp/src/text/subword/subword_tokenize.cu b/cpp/src/text/subword/subword_tokenize.cu index 7bd941f5823..844f2a625e0 100644 --- a/cpp/src/text/subword/subword_tokenize.cu +++ b/cpp/src/text/subword/subword_tokenize.cu @@ -270,7 +270,7 @@ tokenizer_result subword_tokenize(cudf::strings_column_view const& strings, do_lower_case, do_truncate, max_rows_tensor, - cudf::default_stream_value, + cudf::get_default_stream(), mr); } diff --git a/cpp/src/text/tokenize.cu b/cpp/src/text/tokenize.cu index 9da28af13c2..4ffd1b08998 100644 --- a/cpp/src/text/tokenize.cu +++ b/cpp/src/text/tokenize.cu @@ -232,7 +232,7 @@ std::unique_ptr tokenize(cudf::strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::tokenize(strings, delimiter, cudf::default_stream_value, mr); + return detail::tokenize(strings, delimiter, cudf::get_default_stream(), mr); } std::unique_ptr tokenize(cudf::strings_column_view const& strings, @@ -240,7 +240,7 @@ std::unique_ptr tokenize(cudf::strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::tokenize(strings, delimiters, cudf::default_stream_value, mr); + return detail::tokenize(strings, delimiters, cudf::get_default_stream(), mr); } std::unique_ptr count_tokens(cudf::strings_column_view const& strings, @@ -248,7 +248,7 @@ std::unique_ptr count_tokens(cudf::strings_column_view const& stri rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::count_tokens(strings, delimiter, cudf::default_stream_value, mr); + return detail::count_tokens(strings, delimiter, cudf::get_default_stream(), mr); } std::unique_ptr count_tokens(cudf::strings_column_view const& strings, @@ -256,14 +256,14 @@ std::unique_ptr count_tokens(cudf::strings_column_view const& stri rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::count_tokens(strings, delimiters, cudf::default_stream_value, mr); + return detail::count_tokens(strings, delimiters, cudf::get_default_stream(), mr); } std::unique_ptr character_tokenize(cudf::strings_column_view const& strings, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::character_tokenize(strings, cudf::default_stream_value, mr); + return detail::character_tokenize(strings, cudf::get_default_stream(), mr); } } // namespace nvtext diff --git a/cpp/src/transform/bools_to_mask.cu b/cpp/src/transform/bools_to_mask.cu index 70ead43e15b..e558b51fbb0 100644 --- a/cpp/src/transform/bools_to_mask.cu +++ b/cpp/src/transform/bools_to_mask.cu @@ -61,7 +61,7 @@ std::pair, cudf::size_type> bools_to_mask( column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::bools_to_mask(input, cudf::default_stream_value, mr); + return detail::bools_to_mask(input, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/transform/compute_column.cu b/cpp/src/transform/compute_column.cu index 9d9f1d3d8d2..e11ff437c14 100644 --- a/cpp/src/transform/compute_column.cu +++ b/cpp/src/transform/compute_column.cu @@ -137,7 +137,7 @@ std::unique_ptr compute_column(table_view const& table, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::compute_column(table, expr, cudf::default_stream_value, mr); + return detail::compute_column(table, expr, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/transform/encode.cu b/cpp/src/transform/encode.cu index 75e3fae6e78..c0e0c83c416 100644 --- a/cpp/src/transform/encode.cu +++ b/cpp/src/transform/encode.cu @@ -73,7 +73,7 @@ std::pair, std::unique_ptr> encode( cudf::table_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::encode(input, cudf::default_stream_value, mr); + return detail::encode(input, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/transform/mask_to_bools.cu b/cpp/src/transform/mask_to_bools.cu index 23bfe1f24f1..1b9a58c4724 100644 --- a/cpp/src/transform/mask_to_bools.cu +++ b/cpp/src/transform/mask_to_bools.cu @@ -64,6 +64,6 @@ std::unique_ptr mask_to_bools(bitmask_type const* bitmask, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::mask_to_bools(bitmask, begin_bit, end_bit, cudf::default_stream_value, mr); + return detail::mask_to_bools(bitmask, begin_bit, end_bit, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/transform/nans_to_nulls.cu b/cpp/src/transform/nans_to_nulls.cu index d840832af88..3c02409f778 100644 --- a/cpp/src/transform/nans_to_nulls.cu +++ b/cpp/src/transform/nans_to_nulls.cu @@ -95,7 +95,7 @@ std::pair, cudf::size_type> nans_to_nulls( column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::nans_to_nulls(input, cudf::default_stream_value, mr); + return detail::nans_to_nulls(input, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/transform/one_hot_encode.cu b/cpp/src/transform/one_hot_encode.cu index 94cf86f6829..9ccd21f5898 100644 --- a/cpp/src/transform/one_hot_encode.cu +++ b/cpp/src/transform/one_hot_encode.cu @@ -127,6 +127,6 @@ std::pair, table_view> one_hot_encode(column_view const& rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::one_hot_encode(input, categories, cudf::default_stream_value, mr); + return detail::one_hot_encode(input, categories, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/transform/row_bit_count.cu b/cpp/src/transform/row_bit_count.cu index 9545b5289f9..634fdd70831 100644 --- a/cpp/src/transform/row_bit_count.cu +++ b/cpp/src/transform/row_bit_count.cu @@ -539,7 +539,7 @@ std::unique_ptr row_bit_count(table_view const& t, std::unique_ptr row_bit_count(table_view const& t, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::row_bit_count(t, cudf::default_stream_value, mr); + return detail::row_bit_count(t, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index 9de17f22b50..5e6646aa48f 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -99,7 +99,7 @@ std::unique_ptr transform(column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::transform(input, unary_udf, output_type, is_ptx, cudf::default_stream_value, mr); + return detail::transform(input, unary_udf, output_type, is_ptx, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/transpose/transpose.cu b/cpp/src/transpose/transpose.cu index 5592e298fa3..94ede5d3c65 100644 --- a/cpp/src/transpose/transpose.cu +++ b/cpp/src/transpose/transpose.cu @@ -63,7 +63,7 @@ std::pair, table_view> transpose(table_view const& input rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::transpose(input, cudf::default_stream_value, mr); + return detail::transpose(input, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/unary/cast_ops.cu b/cpp/src/unary/cast_ops.cu index 17c47d8fc90..b569ce04c31 100644 --- a/cpp/src/unary/cast_ops.cu +++ b/cpp/src/unary/cast_ops.cu @@ -412,7 +412,7 @@ std::unique_ptr cast(column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::cast(input, type, cudf::default_stream_value, mr); + return detail::cast(input, type, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/unary/math_ops.cu b/cpp/src/unary/math_ops.cu index 448ac01babb..961f3a9e720 100644 --- a/cpp/src/unary/math_ops.cu +++ b/cpp/src/unary/math_ops.cu @@ -641,7 +641,7 @@ std::unique_ptr unary_operation(cudf::column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::unary_operation(input, op, cudf::default_stream_value, mr); + return detail::unary_operation(input, op, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/unary/nan_ops.cu b/cpp/src/unary/nan_ops.cu index 5cac9c51e4e..2cf83466b03 100644 --- a/cpp/src/unary/nan_ops.cu +++ b/cpp/src/unary/nan_ops.cu @@ -94,14 +94,14 @@ std::unique_ptr is_not_nan(cudf::column_view const& input, std::unique_ptr is_nan(cudf::column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::is_nan(input, cudf::default_stream_value, mr); + return detail::is_nan(input, cudf::get_default_stream(), mr); } std::unique_ptr is_not_nan(cudf::column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::is_not_nan(input, cudf::default_stream_value, mr); + return detail::is_not_nan(input, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/unary/null_ops.cu b/cpp/src/unary/null_ops.cu index 04bb1fe63e3..e64c68fdae6 100644 --- a/cpp/src/unary/null_ops.cu +++ b/cpp/src/unary/null_ops.cu @@ -58,14 +58,14 @@ std::unique_ptr is_valid(cudf::column_view const& input, std::unique_ptr is_null(cudf::column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::is_null(input, cudf::default_stream_value, mr); + return detail::is_null(input, cudf::get_default_stream(), mr); } std::unique_ptr is_valid(cudf::column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::is_valid(input, cudf::default_stream_value, mr); + return detail::is_valid(input, cudf::get_default_stream(), mr); } } // namespace cudf diff --git a/cpp/src/utilities/default_stream.cpp b/cpp/src/utilities/default_stream.cpp index d580972bc97..c21436abdb9 100644 --- a/cpp/src/utilities/default_stream.cpp +++ b/cpp/src/utilities/default_stream.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,16 @@ namespace cudf { +namespace detail { + +#if defined(CUDF_USE_PER_THREAD_DEFAULT_STREAM) +rmm::cuda_stream_view const default_stream_value{rmm::cuda_stream_per_thread}; +#else +rmm::cuda_stream_view const default_stream_value{}; +#endif + +} // namespace detail + /** * @brief Check if per-thread default stream is enabled. * @@ -32,4 +42,5 @@ bool is_ptds_enabled() #endif } +rmm::cuda_stream_view const get_default_stream() { return detail::default_stream_value; } } // namespace cudf diff --git a/cpp/tests/bitmask/bitmask_tests.cpp b/cpp/tests/bitmask/bitmask_tests.cpp index 048c6f9dfa2..e4fdf2ddabb 100644 --- a/cpp/tests/bitmask/bitmask_tests.cpp +++ b/cpp/tests/bitmask/bitmask_tests.cpp @@ -69,15 +69,15 @@ struct CountBitmaskTest : public cudf::test::BaseFixture { TEST_F(CountBitmaskTest, NullMask) { - EXPECT_THROW(cudf::detail::count_set_bits(nullptr, 0, 32, cudf::default_stream_value), + EXPECT_THROW(cudf::detail::count_set_bits(nullptr, 0, 32, cudf::get_default_stream()), cudf::logic_error); - EXPECT_EQ(32, cudf::detail::valid_count(nullptr, 0, 32, cudf::default_stream_value)); + EXPECT_EQ(32, cudf::detail::valid_count(nullptr, 0, 32, cudf::get_default_stream())); std::vector indices = {0, 32, 7, 25}; - EXPECT_THROW(cudf::detail::segmented_count_set_bits(nullptr, indices, cudf::default_stream_value), + EXPECT_THROW(cudf::detail::segmented_count_set_bits(nullptr, indices, cudf::get_default_stream()), cudf::logic_error); auto valid_counts = - cudf::detail::segmented_valid_count(nullptr, indices, cudf::default_stream_value); + cudf::detail::segmented_valid_count(nullptr, indices, cudf::get_default_stream()); EXPECT_THAT(valid_counts, ::testing::ElementsAreArray(std::vector{32, 18})); } @@ -88,11 +88,11 @@ rmm::device_uvector make_mask(cudf::size_type size, bool fil if (!fill_valid) { return cudf::detail::make_zeroed_device_uvector_sync(size); } else { - auto ret = rmm::device_uvector(size, cudf::default_stream_value); + auto ret = rmm::device_uvector(size, cudf::get_default_stream()); CUDF_CUDA_TRY(cudaMemsetAsync(ret.data(), ~cudf::bitmask_type{0}, size * sizeof(cudf::bitmask_type), - cudf::default_stream_value.value())); + cudf::get_default_stream().value())); return ret; } } @@ -100,244 +100,244 @@ rmm::device_uvector make_mask(cudf::size_type size, bool fil TEST_F(CountBitmaskTest, NegativeStart) { auto mask = make_mask(1); - EXPECT_THROW(cudf::detail::count_set_bits(mask.data(), -1, 32, cudf::default_stream_value), + EXPECT_THROW(cudf::detail::count_set_bits(mask.data(), -1, 32, cudf::get_default_stream()), cudf::logic_error); - EXPECT_THROW(cudf::detail::valid_count(mask.data(), -1, 32, cudf::default_stream_value), + EXPECT_THROW(cudf::detail::valid_count(mask.data(), -1, 32, cudf::get_default_stream()), cudf::logic_error); std::vector indices = {0, 16, -1, 32}; EXPECT_THROW( - cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value), + cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream()), cudf::logic_error); EXPECT_THROW( - cudf::detail::segmented_valid_count(mask.data(), indices, cudf::default_stream_value), + cudf::detail::segmented_valid_count(mask.data(), indices, cudf::get_default_stream()), cudf::logic_error); } TEST_F(CountBitmaskTest, StartLargerThanStop) { auto mask = make_mask(1); - EXPECT_THROW(cudf::detail::count_set_bits(mask.data(), 32, 31, cudf::default_stream_value), + EXPECT_THROW(cudf::detail::count_set_bits(mask.data(), 32, 31, cudf::get_default_stream()), cudf::logic_error); - EXPECT_THROW(cudf::detail::valid_count(mask.data(), 32, 31, cudf::default_stream_value), + EXPECT_THROW(cudf::detail::valid_count(mask.data(), 32, 31, cudf::get_default_stream()), cudf::logic_error); std::vector indices = {0, 16, 31, 30}; EXPECT_THROW( - cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value), + cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream()), cudf::logic_error); EXPECT_THROW( - cudf::detail::segmented_valid_count(mask.data(), indices, cudf::default_stream_value), + cudf::detail::segmented_valid_count(mask.data(), indices, cudf::get_default_stream()), cudf::logic_error); } TEST_F(CountBitmaskTest, EmptyRange) { auto mask = make_mask(1); - EXPECT_EQ(0, cudf::detail::count_set_bits(mask.data(), 17, 17, cudf::default_stream_value)); - EXPECT_EQ(0, cudf::detail::valid_count(mask.data(), 17, 17, cudf::default_stream_value)); + EXPECT_EQ(0, cudf::detail::count_set_bits(mask.data(), 17, 17, cudf::get_default_stream())); + EXPECT_EQ(0, cudf::detail::valid_count(mask.data(), 17, 17, cudf::get_default_stream())); std::vector indices = {0, 0, 17, 17}; auto set_counts = - cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(set_counts, ::testing::ElementsAreArray(std::vector{0, 0})); auto valid_counts = - cudf::detail::segmented_valid_count(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_valid_count(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(valid_counts, ::testing::ElementsAreArray(std::vector{0, 0})); } TEST_F(CountBitmaskTest, SingleWordAllZero) { auto mask = make_mask(1); - EXPECT_EQ(0, cudf::detail::count_set_bits(mask.data(), 0, 32, cudf::default_stream_value)); - EXPECT_EQ(0, cudf::detail::valid_count(mask.data(), 0, 32, cudf::default_stream_value)); + EXPECT_EQ(0, cudf::detail::count_set_bits(mask.data(), 0, 32, cudf::get_default_stream())); + EXPECT_EQ(0, cudf::detail::valid_count(mask.data(), 0, 32, cudf::get_default_stream())); std::vector indices = {0, 32, 0, 32}; auto set_counts = - cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(set_counts, ::testing::ElementsAreArray(std::vector{0, 0})); auto valid_counts = - cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(valid_counts, ::testing::ElementsAreArray(std::vector{0, 0})); } TEST_F(CountBitmaskTest, SingleBitAllZero) { auto mask = make_mask(1); - EXPECT_EQ(0, cudf::detail::count_set_bits(mask.data(), 17, 18, cudf::default_stream_value)); - EXPECT_EQ(0, cudf::detail::valid_count(mask.data(), 17, 18, cudf::default_stream_value)); + EXPECT_EQ(0, cudf::detail::count_set_bits(mask.data(), 17, 18, cudf::get_default_stream())); + EXPECT_EQ(0, cudf::detail::valid_count(mask.data(), 17, 18, cudf::get_default_stream())); std::vector indices = {17, 18, 7, 8}; auto set_counts = - cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(set_counts, ::testing::ElementsAreArray(std::vector{0, 0})); auto valid_counts = - cudf::detail::segmented_valid_count(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_valid_count(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(valid_counts, ::testing::ElementsAreArray(std::vector{0, 0})); } TEST_F(CountBitmaskTest, SingleBitAllSet) { auto mask = make_mask(1, true); - EXPECT_EQ(1, cudf::detail::count_set_bits(mask.data(), 13, 14, cudf::default_stream_value)); - EXPECT_EQ(1, cudf::detail::valid_count(mask.data(), 13, 14, cudf::default_stream_value)); + EXPECT_EQ(1, cudf::detail::count_set_bits(mask.data(), 13, 14, cudf::get_default_stream())); + EXPECT_EQ(1, cudf::detail::valid_count(mask.data(), 13, 14, cudf::get_default_stream())); std::vector indices = {13, 14, 0, 1}; auto set_counts = - cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(set_counts, ::testing::ElementsAreArray(std::vector{1, 1})); auto valid_counts = - cudf::detail::segmented_valid_count(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_valid_count(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(valid_counts, ::testing::ElementsAreArray(std::vector{1, 1})); } TEST_F(CountBitmaskTest, SingleWordAllBitsSet) { auto mask = make_mask(1, true); - EXPECT_EQ(32, cudf::detail::count_set_bits(mask.data(), 0, 32, cudf::default_stream_value)); - EXPECT_EQ(32, cudf::detail::valid_count(mask.data(), 0, 32, cudf::default_stream_value)); + EXPECT_EQ(32, cudf::detail::count_set_bits(mask.data(), 0, 32, cudf::get_default_stream())); + EXPECT_EQ(32, cudf::detail::valid_count(mask.data(), 0, 32, cudf::get_default_stream())); std::vector indices = {0, 32, 0, 32}; auto set_counts = - cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(set_counts, ::testing::ElementsAreArray(std::vector{32, 32})); auto valid_counts = - cudf::detail::segmented_valid_count(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_valid_count(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(valid_counts, ::testing::ElementsAreArray(std::vector{32, 32})); } TEST_F(CountBitmaskTest, SingleWordPreSlack) { auto mask = make_mask(1, true); - EXPECT_EQ(25, cudf::detail::count_set_bits(mask.data(), 7, 32, cudf::default_stream_value)); - EXPECT_EQ(25, cudf::detail::valid_count(mask.data(), 7, 32, cudf::default_stream_value)); + EXPECT_EQ(25, cudf::detail::count_set_bits(mask.data(), 7, 32, cudf::get_default_stream())); + EXPECT_EQ(25, cudf::detail::valid_count(mask.data(), 7, 32, cudf::get_default_stream())); std::vector indices = {7, 32, 8, 32}; auto set_counts = - cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(set_counts, ::testing::ElementsAreArray(std::vector{25, 24})); auto valid_counts = - cudf::detail::segmented_valid_count(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_valid_count(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(valid_counts, ::testing::ElementsAreArray(std::vector{25, 24})); } TEST_F(CountBitmaskTest, SingleWordPostSlack) { auto mask = make_mask(1, true); - EXPECT_EQ(17, cudf::detail::count_set_bits(mask.data(), 0, 17, cudf::default_stream_value)); - EXPECT_EQ(17, cudf::detail::valid_count(mask.data(), 0, 17, cudf::default_stream_value)); + EXPECT_EQ(17, cudf::detail::count_set_bits(mask.data(), 0, 17, cudf::get_default_stream())); + EXPECT_EQ(17, cudf::detail::valid_count(mask.data(), 0, 17, cudf::get_default_stream())); std::vector indices = {0, 17, 0, 18}; auto set_counts = - cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(set_counts, ::testing::ElementsAreArray(std::vector{17, 18})); auto valid_counts = - cudf::detail::segmented_valid_count(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_valid_count(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(valid_counts, ::testing::ElementsAreArray(std::vector{17, 18})); } TEST_F(CountBitmaskTest, SingleWordSubset) { auto mask = make_mask(1, true); - EXPECT_EQ(30, cudf::detail::count_set_bits(mask.data(), 1, 31, cudf::default_stream_value)); - EXPECT_EQ(30, cudf::detail::valid_count(mask.data(), 1, 31, cudf::default_stream_value)); + EXPECT_EQ(30, cudf::detail::count_set_bits(mask.data(), 1, 31, cudf::get_default_stream())); + EXPECT_EQ(30, cudf::detail::valid_count(mask.data(), 1, 31, cudf::get_default_stream())); std::vector indices = {1, 31, 7, 17}; auto set_counts = - cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(set_counts, ::testing::ElementsAreArray(std::vector{30, 10})); auto valid_counts = - cudf::detail::segmented_valid_count(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_valid_count(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(valid_counts, ::testing::ElementsAreArray(std::vector{30, 10})); } TEST_F(CountBitmaskTest, SingleWordSubset2) { auto mask = make_mask(1, true); - EXPECT_EQ(28, cudf::detail::count_set_bits(mask.data(), 2, 30, cudf::default_stream_value)); - EXPECT_EQ(28, cudf::detail::valid_count(mask.data(), 2, 30, cudf::default_stream_value)); + EXPECT_EQ(28, cudf::detail::count_set_bits(mask.data(), 2, 30, cudf::get_default_stream())); + EXPECT_EQ(28, cudf::detail::valid_count(mask.data(), 2, 30, cudf::get_default_stream())); std::vector indices = {4, 16, 2, 30}; auto set_counts = - cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(set_counts, ::testing::ElementsAreArray(std::vector{12, 28})); auto valid_counts = - cudf::detail::segmented_valid_count(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_valid_count(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(valid_counts, ::testing::ElementsAreArray(std::vector{12, 28})); } TEST_F(CountBitmaskTest, MultipleWordsAllBits) { auto mask = make_mask(10, true); - EXPECT_EQ(320, cudf::detail::count_set_bits(mask.data(), 0, 320, cudf::default_stream_value)); - EXPECT_EQ(320, cudf::detail::valid_count(mask.data(), 0, 320, cudf::default_stream_value)); + EXPECT_EQ(320, cudf::detail::count_set_bits(mask.data(), 0, 320, cudf::get_default_stream())); + EXPECT_EQ(320, cudf::detail::valid_count(mask.data(), 0, 320, cudf::get_default_stream())); std::vector indices = {0, 320, 0, 320}; auto set_counts = - cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(set_counts, ::testing::ElementsAreArray(std::vector{320, 320})); auto valid_counts = - cudf::detail::segmented_valid_count(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_valid_count(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(valid_counts, ::testing::ElementsAreArray(std::vector{320, 320})); } TEST_F(CountBitmaskTest, MultipleWordsSubsetWordBoundary) { auto mask = make_mask(10, true); - EXPECT_EQ(256, cudf::detail::count_set_bits(mask.data(), 32, 288, cudf::default_stream_value)); - EXPECT_EQ(256, cudf::detail::valid_count(mask.data(), 32, 288, cudf::default_stream_value)); + EXPECT_EQ(256, cudf::detail::count_set_bits(mask.data(), 32, 288, cudf::get_default_stream())); + EXPECT_EQ(256, cudf::detail::valid_count(mask.data(), 32, 288, cudf::get_default_stream())); std::vector indices = {32, 192, 32, 288}; auto set_counts = - cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(set_counts, ::testing::ElementsAreArray(std::vector{160, 256})); auto valid_counts = - cudf::detail::segmented_valid_count(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_valid_count(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(valid_counts, ::testing::ElementsAreArray(std::vector{160, 256})); } TEST_F(CountBitmaskTest, MultipleWordsSplitWordBoundary) { auto mask = make_mask(10, true); - EXPECT_EQ(2, cudf::detail::count_set_bits(mask.data(), 31, 33, cudf::default_stream_value)); - EXPECT_EQ(2, cudf::detail::valid_count(mask.data(), 31, 33, cudf::default_stream_value)); + EXPECT_EQ(2, cudf::detail::count_set_bits(mask.data(), 31, 33, cudf::get_default_stream())); + EXPECT_EQ(2, cudf::detail::valid_count(mask.data(), 31, 33, cudf::get_default_stream())); std::vector indices = {31, 33, 60, 67}; auto set_counts = - cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(set_counts, ::testing::ElementsAreArray(std::vector{2, 7})); auto valid_counts = - cudf::detail::segmented_valid_count(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_valid_count(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(valid_counts, ::testing::ElementsAreArray(std::vector{2, 7})); } TEST_F(CountBitmaskTest, MultipleWordsSubset) { auto mask = make_mask(10, true); - EXPECT_EQ(226, cudf::detail::count_set_bits(mask.data(), 67, 293, cudf::default_stream_value)); - EXPECT_EQ(226, cudf::detail::valid_count(mask.data(), 67, 293, cudf::default_stream_value)); + EXPECT_EQ(226, cudf::detail::count_set_bits(mask.data(), 67, 293, cudf::get_default_stream())); + EXPECT_EQ(226, cudf::detail::valid_count(mask.data(), 67, 293, cudf::get_default_stream())); std::vector indices = {67, 293, 37, 319}; auto set_counts = - cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(set_counts, ::testing::ElementsAreArray(std::vector{226, 282})); auto valid_counts = - cudf::detail::segmented_valid_count(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_valid_count(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(valid_counts, ::testing::ElementsAreArray(std::vector{226, 282})); } TEST_F(CountBitmaskTest, MultipleWordsSingleBit) { auto mask = make_mask(10, true); - EXPECT_EQ(1, cudf::detail::count_set_bits(mask.data(), 67, 68, cudf::default_stream_value)); - EXPECT_EQ(1, cudf::detail::valid_count(mask.data(), 67, 68, cudf::default_stream_value)); + EXPECT_EQ(1, cudf::detail::count_set_bits(mask.data(), 67, 68, cudf::get_default_stream())); + EXPECT_EQ(1, cudf::detail::valid_count(mask.data(), 67, 68, cudf::get_default_stream())); std::vector indices = {67, 68, 31, 32, 192, 193}; auto set_counts = - cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(set_counts, ::testing::ElementsAreArray(std::vector{1, 1, 1})); auto valid_counts = - cudf::detail::segmented_valid_count(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_valid_count(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(valid_counts, ::testing::ElementsAreArray(std::vector{1, 1, 1})); } @@ -346,180 +346,180 @@ using CountUnsetBitsTest = CountBitmaskTest; TEST_F(CountUnsetBitsTest, SingleBitAllSet) { auto mask = make_mask(1, true); - EXPECT_EQ(0, cudf::detail::count_unset_bits(mask.data(), 13, 14, cudf::default_stream_value)); - EXPECT_EQ(0, cudf::detail::null_count(mask.data(), 13, 14, cudf::default_stream_value)); + EXPECT_EQ(0, cudf::detail::count_unset_bits(mask.data(), 13, 14, cudf::get_default_stream())); + EXPECT_EQ(0, cudf::detail::null_count(mask.data(), 13, 14, cudf::get_default_stream())); std::vector indices = {13, 14, 31, 32}; auto unset_counts = - cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(unset_counts, ::testing::ElementsAreArray(std::vector{0, 0})); auto null_counts = - cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(null_counts, ::testing::ElementsAreArray(std::vector{0, 0})); } TEST_F(CountUnsetBitsTest, NullMask) { - EXPECT_THROW(cudf::detail::count_unset_bits(nullptr, 0, 32, cudf::default_stream_value), + EXPECT_THROW(cudf::detail::count_unset_bits(nullptr, 0, 32, cudf::get_default_stream()), cudf::logic_error); - EXPECT_EQ(0, cudf::detail::null_count(nullptr, 0, 32, cudf::default_stream_value)); + EXPECT_EQ(0, cudf::detail::null_count(nullptr, 0, 32, cudf::get_default_stream())); std::vector indices = {0, 32, 7, 25}; EXPECT_THROW( - cudf::detail::segmented_count_unset_bits(nullptr, indices, cudf::default_stream_value), + cudf::detail::segmented_count_unset_bits(nullptr, indices, cudf::get_default_stream()), cudf::logic_error); auto null_counts = - cudf::detail::segmented_null_count(nullptr, indices, cudf::default_stream_value); + cudf::detail::segmented_null_count(nullptr, indices, cudf::get_default_stream()); EXPECT_THAT(null_counts, ::testing::ElementsAreArray(std::vector{0, 0})); } TEST_F(CountUnsetBitsTest, SingleWordAllBits) { auto mask = make_mask(1); - EXPECT_EQ(32, cudf::detail::count_unset_bits(mask.data(), 0, 32, cudf::default_stream_value)); - EXPECT_EQ(32, cudf::detail::null_count(mask.data(), 0, 32, cudf::default_stream_value)); + EXPECT_EQ(32, cudf::detail::count_unset_bits(mask.data(), 0, 32, cudf::get_default_stream())); + EXPECT_EQ(32, cudf::detail::null_count(mask.data(), 0, 32, cudf::get_default_stream())); std::vector indices = {0, 32, 0, 32}; auto unset_counts = - cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(unset_counts, ::testing::ElementsAreArray(std::vector{32, 32})); auto null_counts = - cudf::detail::segmented_null_count(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_null_count(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(null_counts, ::testing::ElementsAreArray(std::vector{32, 32})); } TEST_F(CountUnsetBitsTest, SingleWordPreSlack) { auto mask = make_mask(1); - EXPECT_EQ(25, cudf::detail::count_unset_bits(mask.data(), 7, 32, cudf::default_stream_value)); - EXPECT_EQ(25, cudf::detail::null_count(mask.data(), 7, 32, cudf::default_stream_value)); + EXPECT_EQ(25, cudf::detail::count_unset_bits(mask.data(), 7, 32, cudf::get_default_stream())); + EXPECT_EQ(25, cudf::detail::null_count(mask.data(), 7, 32, cudf::get_default_stream())); std::vector indices = {7, 32, 8, 32}; auto unset_counts = - cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(unset_counts, ::testing::ElementsAreArray(std::vector{25, 24})); auto null_counts = - cudf::detail::segmented_null_count(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_null_count(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(null_counts, ::testing::ElementsAreArray(std::vector{25, 24})); } TEST_F(CountUnsetBitsTest, SingleWordPostSlack) { auto mask = make_mask(1); - EXPECT_EQ(17, cudf::detail::count_unset_bits(mask.data(), 0, 17, cudf::default_stream_value)); - EXPECT_EQ(17, cudf::detail::null_count(mask.data(), 0, 17, cudf::default_stream_value)); + EXPECT_EQ(17, cudf::detail::count_unset_bits(mask.data(), 0, 17, cudf::get_default_stream())); + EXPECT_EQ(17, cudf::detail::null_count(mask.data(), 0, 17, cudf::get_default_stream())); std::vector indices = {0, 17, 0, 18}; auto unset_counts = - cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(unset_counts, ::testing::ElementsAreArray(std::vector{17, 18})); auto null_counts = - cudf::detail::segmented_null_count(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_null_count(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(null_counts, ::testing::ElementsAreArray(std::vector{17, 18})); } TEST_F(CountUnsetBitsTest, SingleWordSubset) { auto mask = make_mask(1); - EXPECT_EQ(30, cudf::detail::count_unset_bits(mask.data(), 1, 31, cudf::default_stream_value)); - EXPECT_EQ(30, cudf::detail::null_count(mask.data(), 1, 31, cudf::default_stream_value)); + EXPECT_EQ(30, cudf::detail::count_unset_bits(mask.data(), 1, 31, cudf::get_default_stream())); + EXPECT_EQ(30, cudf::detail::null_count(mask.data(), 1, 31, cudf::get_default_stream())); std::vector indices = {1, 31, 7, 17}; auto unset_counts = - cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(unset_counts, ::testing::ElementsAreArray(std::vector{30, 10})); auto null_counts = - cudf::detail::segmented_null_count(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_null_count(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(null_counts, ::testing::ElementsAreArray(std::vector{30, 10})); } TEST_F(CountUnsetBitsTest, SingleWordSubset2) { auto mask = make_mask(1); - EXPECT_EQ(28, cudf::detail::count_unset_bits(mask.data(), 2, 30, cudf::default_stream_value)); - EXPECT_EQ(28, cudf::detail::null_count(mask.data(), 2, 30, cudf::default_stream_value)); + EXPECT_EQ(28, cudf::detail::count_unset_bits(mask.data(), 2, 30, cudf::get_default_stream())); + EXPECT_EQ(28, cudf::detail::null_count(mask.data(), 2, 30, cudf::get_default_stream())); std::vector indices = {4, 16, 2, 30}; auto unset_counts = - cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(unset_counts, ::testing::ElementsAreArray(std::vector{12, 28})); auto null_counts = - cudf::detail::segmented_null_count(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_null_count(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(null_counts, ::testing::ElementsAreArray(std::vector{12, 28})); } TEST_F(CountUnsetBitsTest, MultipleWordsAllBits) { auto mask = make_mask(10); - EXPECT_EQ(320, cudf::detail::count_unset_bits(mask.data(), 0, 320, cudf::default_stream_value)); - EXPECT_EQ(320, cudf::detail::null_count(mask.data(), 0, 320, cudf::default_stream_value)); + EXPECT_EQ(320, cudf::detail::count_unset_bits(mask.data(), 0, 320, cudf::get_default_stream())); + EXPECT_EQ(320, cudf::detail::null_count(mask.data(), 0, 320, cudf::get_default_stream())); std::vector indices = {0, 320, 0, 320}; auto unset_counts = - cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(unset_counts, ::testing::ElementsAreArray(std::vector{320, 320})); auto null_counts = - cudf::detail::segmented_null_count(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_null_count(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(null_counts, ::testing::ElementsAreArray(std::vector{320, 320})); } TEST_F(CountUnsetBitsTest, MultipleWordsSubsetWordBoundary) { auto mask = make_mask(10); - EXPECT_EQ(256, cudf::detail::count_unset_bits(mask.data(), 32, 288, cudf::default_stream_value)); - EXPECT_EQ(256, cudf::detail::null_count(mask.data(), 32, 288, cudf::default_stream_value)); + EXPECT_EQ(256, cudf::detail::count_unset_bits(mask.data(), 32, 288, cudf::get_default_stream())); + EXPECT_EQ(256, cudf::detail::null_count(mask.data(), 32, 288, cudf::get_default_stream())); std::vector indices = {32, 192, 32, 288}; auto unset_counts = - cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(unset_counts, ::testing::ElementsAreArray(std::vector{160, 256})); auto null_counts = - cudf::detail::segmented_null_count(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_null_count(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(null_counts, ::testing::ElementsAreArray(std::vector{160, 256})); } TEST_F(CountUnsetBitsTest, MultipleWordsSplitWordBoundary) { auto mask = make_mask(10); - EXPECT_EQ(2, cudf::detail::count_unset_bits(mask.data(), 31, 33, cudf::default_stream_value)); - EXPECT_EQ(2, cudf::detail::null_count(mask.data(), 31, 33, cudf::default_stream_value)); + EXPECT_EQ(2, cudf::detail::count_unset_bits(mask.data(), 31, 33, cudf::get_default_stream())); + EXPECT_EQ(2, cudf::detail::null_count(mask.data(), 31, 33, cudf::get_default_stream())); std::vector indices = {31, 33, 60, 67}; auto unset_counts = - cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(unset_counts, ::testing::ElementsAreArray(std::vector{2, 7})); auto null_counts = - cudf::detail::segmented_null_count(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_null_count(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(null_counts, ::testing::ElementsAreArray(std::vector{2, 7})); } TEST_F(CountUnsetBitsTest, MultipleWordsSubset) { auto mask = make_mask(10); - EXPECT_EQ(226, cudf::detail::count_unset_bits(mask.data(), 67, 293, cudf::default_stream_value)); - EXPECT_EQ(226, cudf::detail::null_count(mask.data(), 67, 293, cudf::default_stream_value)); + EXPECT_EQ(226, cudf::detail::count_unset_bits(mask.data(), 67, 293, cudf::get_default_stream())); + EXPECT_EQ(226, cudf::detail::null_count(mask.data(), 67, 293, cudf::get_default_stream())); std::vector indices = {67, 293, 37, 319}; auto unset_counts = - cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(unset_counts, ::testing::ElementsAreArray(std::vector{226, 282})); auto null_counts = - cudf::detail::segmented_null_count(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_null_count(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(null_counts, ::testing::ElementsAreArray(std::vector{226, 282})); } TEST_F(CountUnsetBitsTest, MultipleWordsSingleBit) { auto mask = make_mask(10); - EXPECT_EQ(1, cudf::detail::count_unset_bits(mask.data(), 67, 68, cudf::default_stream_value)); - EXPECT_EQ(1, cudf::detail::null_count(mask.data(), 67, 68, cudf::default_stream_value)); + EXPECT_EQ(1, cudf::detail::count_unset_bits(mask.data(), 67, 68, cudf::get_default_stream())); + EXPECT_EQ(1, cudf::detail::null_count(mask.data(), 67, 68, cudf::get_default_stream())); std::vector indices = {67, 68, 31, 32, 192, 193}; auto unset_counts = - cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(unset_counts, ::testing::ElementsAreArray(std::vector{1, 1, 1})); auto null_counts = - cudf::detail::segmented_null_count(mask.data(), indices, cudf::default_stream_value); + cudf::detail::segmented_null_count(mask.data(), indices, cudf::get_default_stream()); EXPECT_THAT(null_counts, ::testing::ElementsAreArray(std::vector{1, 1, 1})); } @@ -622,10 +622,10 @@ TEST_F(CopyBitmaskTest, TestCopyColumnViewVectorContiguous) } auto gold_mask = cudf::test::detail::make_null_mask(validity_bit.begin(), validity_bit.end()); - rmm::device_buffer copy_mask{gold_mask, cudf::default_stream_value}; + rmm::device_buffer copy_mask{gold_mask, cudf::get_default_stream()}; cudf::column original{t, num_elements, - rmm::device_buffer{num_elements * sizeof(int), cudf::default_stream_value}, + rmm::device_buffer{num_elements * sizeof(int), cudf::get_default_stream()}, std::move(copy_mask)}; std::vector indices{0, 104, @@ -669,7 +669,7 @@ TEST_F(CopyBitmaskTest, TestCopyColumnViewVectorDiscontiguous) cols.emplace_back( t, split[i + 1] - split[i], - rmm::device_buffer{sizeof(int) * (split[i + 1] - split[i]), cudf::default_stream_value}, + rmm::device_buffer{sizeof(int) * (split[i + 1] - split[i]), cudf::get_default_stream()}, cudf::test::detail::make_null_mask(validity_bit.begin() + split[i], validity_bit.begin() + split[i + 1])); views.push_back(cols.back()); diff --git a/cpp/tests/bitmask/set_nullmask_tests.cu b/cpp/tests/bitmask/set_nullmask_tests.cu index 1b7ffe2ff72..19e3202a6d7 100644 --- a/cpp/tests/bitmask/set_nullmask_tests.cu +++ b/cpp/tests/bitmask/set_nullmask_tests.cu @@ -50,7 +50,7 @@ struct SetBitmaskTest : public cudf::test::BaseFixture { void expect_bitmask_equal(cudf::bitmask_type const* bitmask, // Device Ptr cudf::size_type start_bit, thrust::host_vector const& expect, - rmm::cuda_stream_view stream = cudf::default_stream_value) + rmm::cuda_stream_view stream = cudf::get_default_stream()) { rmm::device_uvector result(expect.size(), stream); auto counting_iter = thrust::counting_iterator{0}; diff --git a/cpp/tests/column/column_device_view_test.cu b/cpp/tests/column/column_device_view_test.cu index c317ddec8b7..0868ad885cf 100644 --- a/cpp/tests/column/column_device_view_test.cu +++ b/cpp/tests/column/column_device_view_test.cu @@ -37,7 +37,7 @@ struct ColumnDeviceViewTest : public cudf::test::BaseFixture { TEST_F(ColumnDeviceViewTest, Sample) { using T = int32_t; - rmm::cuda_stream_view stream{cudf::default_stream_value}; + rmm::cuda_stream_view stream{cudf::get_default_stream()}; cudf::test::fixed_width_column_wrapper input({1, 2, 3, 4, 5, 6}); auto output = cudf::allocate_like(input); auto input_device_view = cudf::column_device_view::create(input, stream); @@ -55,7 +55,7 @@ TEST_F(ColumnDeviceViewTest, Sample) TEST_F(ColumnDeviceViewTest, MismatchingType) { using T = int32_t; - rmm::cuda_stream_view stream{cudf::default_stream_value}; + rmm::cuda_stream_view stream{cudf::get_default_stream()}; cudf::test::fixed_width_column_wrapper input({1, 2, 3, 4, 5, 6}); auto output = cudf::allocate_like(input); auto input_device_view = cudf::column_device_view::create(input, stream); diff --git a/cpp/tests/column/column_test.cu b/cpp/tests/column/column_test.cu index 51f37ecac6c..b31c38e4187 100644 --- a/cpp/tests/column/column_test.cu +++ b/cpp/tests/column/column_test.cu @@ -43,15 +43,15 @@ struct TypedColumnTest : public cudf::test::BaseFixture { cudf::data_type type() { return cudf::data_type{cudf::type_to_id()}; } TypedColumnTest() - : data{_num_elements * cudf::size_of(type()), cudf::default_stream_value}, - mask{cudf::bitmask_allocation_size_bytes(_num_elements), cudf::default_stream_value} + : data{_num_elements * cudf::size_of(type()), cudf::get_default_stream()}, + mask{cudf::bitmask_allocation_size_bytes(_num_elements), cudf::get_default_stream()} { auto typed_data = static_cast(data.data()); auto typed_mask = static_cast(mask.data()); thrust::sequence( - rmm::exec_policy(cudf::default_stream_value), typed_data, typed_data + data.size()); + rmm::exec_policy(cudf::get_default_stream()), typed_data, typed_data + data.size()); thrust::sequence( - rmm::exec_policy(cudf::default_stream_value), typed_mask, typed_mask + mask.size()); + rmm::exec_policy(cudf::get_default_stream()), typed_mask, typed_mask + mask.size()); } cudf::size_type num_elements() { return _num_elements; } @@ -247,8 +247,8 @@ TYPED_TEST(TypedColumnTest, CopyDataAndMask) { cudf::column col{this->type(), this->num_elements(), - rmm::device_buffer{this->data, cudf::default_stream_value}, - rmm::device_buffer{this->all_valid_mask, cudf::default_stream_value}}; + rmm::device_buffer{this->data, cudf::get_default_stream()}, + rmm::device_buffer{this->all_valid_mask, cudf::get_default_stream()}}; EXPECT_EQ(this->type(), col.type()); EXPECT_TRUE(col.nullable()); EXPECT_EQ(0, col.null_count()); @@ -352,8 +352,8 @@ TYPED_TEST(TypedColumnTest, MoveConstructorWithMask) TYPED_TEST(TypedColumnTest, DeviceUvectorConstructorNoMask) { rmm::device_uvector original{static_cast(this->num_elements()), - cudf::default_stream_value}; - thrust::copy(rmm::exec_policy(cudf::default_stream_value), + cudf::get_default_stream()}; + thrust::copy(rmm::exec_policy(cudf::get_default_stream()), static_cast(this->data.data()), static_cast(this->data.data()) + this->num_elements(), original.begin()); @@ -369,8 +369,8 @@ TYPED_TEST(TypedColumnTest, DeviceUvectorConstructorNoMask) TYPED_TEST(TypedColumnTest, DeviceUvectorConstructorWithMask) { rmm::device_uvector original{static_cast(this->num_elements()), - cudf::default_stream_value}; - thrust::copy(rmm::exec_policy(cudf::default_stream_value), + cudf::get_default_stream()}; + thrust::copy(rmm::exec_policy(cudf::get_default_stream()), static_cast(this->data.data()), static_cast(this->data.data()) + this->num_elements(), original.begin()); @@ -392,17 +392,17 @@ TYPED_TEST(TypedColumnTest, ConstructWithChildren) children.emplace_back(std::make_unique( cudf::data_type{cudf::type_id::INT8}, 42, - rmm::device_buffer{this->data, cudf::default_stream_value}, - rmm::device_buffer{this->all_valid_mask, cudf::default_stream_value})); + rmm::device_buffer{this->data, cudf::get_default_stream()}, + rmm::device_buffer{this->all_valid_mask, cudf::get_default_stream()})); children.emplace_back(std::make_unique( cudf::data_type{cudf::type_id::FLOAT64}, 314, - rmm::device_buffer{this->data, cudf::default_stream_value}, - rmm::device_buffer{this->all_valid_mask, cudf::default_stream_value})); + rmm::device_buffer{this->data, cudf::get_default_stream()}, + rmm::device_buffer{this->all_valid_mask, cudf::get_default_stream()})); cudf::column col{this->type(), this->num_elements(), - rmm::device_buffer{this->data, cudf::default_stream_value}, - rmm::device_buffer{this->all_valid_mask, cudf::default_stream_value}, + rmm::device_buffer{this->data, cudf::get_default_stream()}, + rmm::device_buffer{this->all_valid_mask, cudf::get_default_stream()}, cudf::UNKNOWN_NULL_COUNT, std::move(children)}; @@ -437,17 +437,17 @@ TYPED_TEST(TypedColumnTest, ReleaseWithChildren) children.emplace_back(std::make_unique( this->type(), this->num_elements(), - rmm::device_buffer{this->data, cudf::default_stream_value}, - rmm::device_buffer{this->all_valid_mask, cudf::default_stream_value})); + rmm::device_buffer{this->data, cudf::get_default_stream()}, + rmm::device_buffer{this->all_valid_mask, cudf::get_default_stream()})); children.emplace_back(std::make_unique( this->type(), this->num_elements(), - rmm::device_buffer{this->data, cudf::default_stream_value}, - rmm::device_buffer{this->all_valid_mask, cudf::default_stream_value})); + rmm::device_buffer{this->data, cudf::get_default_stream()}, + rmm::device_buffer{this->all_valid_mask, cudf::get_default_stream()})); cudf::column col{this->type(), this->num_elements(), - rmm::device_buffer{this->data, cudf::default_stream_value}, - rmm::device_buffer{this->all_valid_mask, cudf::default_stream_value}, + rmm::device_buffer{this->data, cudf::get_default_stream()}, + rmm::device_buffer{this->all_valid_mask, cudf::get_default_stream()}, cudf::UNKNOWN_NULL_COUNT, std::move(children)}; diff --git a/cpp/tests/column/compound_test.cu b/cpp/tests/column/compound_test.cu index 58be2b2f316..79f746369f2 100644 --- a/cpp/tests/column/compound_test.cu +++ b/cpp/tests/column/compound_test.cu @@ -65,13 +65,13 @@ struct checker_for_level2 { TEST_F(CompoundColumnTest, ChildrenLevel1) { - rmm::device_uvector data(1000, cudf::default_stream_value); - thrust::sequence(rmm::exec_policy(cudf::default_stream_value), data.begin(), data.end(), 1); + rmm::device_uvector data(1000, cudf::get_default_stream()); + thrust::sequence(rmm::exec_policy(cudf::get_default_stream()), data.begin(), data.end(), 1); auto null_mask = cudf::create_null_mask(100, cudf::mask_state::UNALLOCATED); - rmm::device_buffer data1{data.data() + 100, 100 * sizeof(int32_t), cudf::default_stream_value}; - rmm::device_buffer data2{data.data() + 200, 100 * sizeof(int32_t), cudf::default_stream_value}; - rmm::device_buffer data3{data.data() + 300, 100 * sizeof(int32_t), cudf::default_stream_value}; + rmm::device_buffer data1{data.data() + 100, 100 * sizeof(int32_t), cudf::get_default_stream()}; + rmm::device_buffer data2{data.data() + 200, 100 * sizeof(int32_t), cudf::get_default_stream()}; + rmm::device_buffer data3{data.data() + 300, 100 * sizeof(int32_t), cudf::get_default_stream()}; auto child1 = std::make_unique(cudf::data_type{cudf::type_id::INT32}, 100, @@ -105,14 +105,14 @@ TEST_F(CompoundColumnTest, ChildrenLevel1) { auto column = cudf::column_device_view::create(parent->view()); - EXPECT_TRUE(thrust::any_of(rmm::exec_policy(cudf::default_stream_value), + EXPECT_TRUE(thrust::any_of(rmm::exec_policy(cudf::get_default_stream()), thrust::make_counting_iterator(0), thrust::make_counting_iterator(100), checker_for_level1{*column})); } { auto column = cudf::mutable_column_device_view::create(parent->mutable_view()); - EXPECT_TRUE(thrust::any_of(rmm::exec_policy(cudf::default_stream_value), + EXPECT_TRUE(thrust::any_of(rmm::exec_policy(cudf::get_default_stream()), thrust::make_counting_iterator(0), thrust::make_counting_iterator(100), checker_for_level1{*column})); @@ -121,16 +121,16 @@ TEST_F(CompoundColumnTest, ChildrenLevel1) TEST_F(CompoundColumnTest, ChildrenLevel2) { - rmm::device_uvector data(1000, cudf::default_stream_value); - thrust::sequence(rmm::exec_policy(cudf::default_stream_value), data.begin(), data.end(), 1); + rmm::device_uvector data(1000, cudf::get_default_stream()); + thrust::sequence(rmm::exec_policy(cudf::get_default_stream()), data.begin(), data.end(), 1); auto null_mask = cudf::create_null_mask(100, cudf::mask_state::UNALLOCATED); - rmm::device_buffer data11{data.data() + 100, 100 * sizeof(int32_t), cudf::default_stream_value}; - rmm::device_buffer data12{data.data() + 200, 100 * sizeof(int32_t), cudf::default_stream_value}; - rmm::device_buffer data13{data.data() + 300, 100 * sizeof(int32_t), cudf::default_stream_value}; - rmm::device_buffer data21{data.data() + 400, 100 * sizeof(int32_t), cudf::default_stream_value}; - rmm::device_buffer data22{data.data() + 500, 100 * sizeof(int32_t), cudf::default_stream_value}; - rmm::device_buffer data23{data.data() + 600, 100 * sizeof(int32_t), cudf::default_stream_value}; + rmm::device_buffer data11{data.data() + 100, 100 * sizeof(int32_t), cudf::get_default_stream()}; + rmm::device_buffer data12{data.data() + 200, 100 * sizeof(int32_t), cudf::get_default_stream()}; + rmm::device_buffer data13{data.data() + 300, 100 * sizeof(int32_t), cudf::get_default_stream()}; + rmm::device_buffer data21{data.data() + 400, 100 * sizeof(int32_t), cudf::get_default_stream()}; + rmm::device_buffer data22{data.data() + 500, 100 * sizeof(int32_t), cudf::get_default_stream()}; + rmm::device_buffer data23{data.data() + 600, 100 * sizeof(int32_t), cudf::get_default_stream()}; auto gchild11 = std::make_unique(cudf::data_type{cudf::type_id::INT32}, 100, @@ -202,14 +202,14 @@ TEST_F(CompoundColumnTest, ChildrenLevel2) { auto column = cudf::column_device_view::create(parent->view()); - EXPECT_TRUE(thrust::any_of(rmm::exec_policy(cudf::default_stream_value), + EXPECT_TRUE(thrust::any_of(rmm::exec_policy(cudf::get_default_stream()), thrust::make_counting_iterator(0), thrust::make_counting_iterator(100), checker_for_level2{*column})); } { auto column = cudf::mutable_column_device_view::create(parent->mutable_view()); - EXPECT_TRUE(thrust::any_of(rmm::exec_policy(cudf::default_stream_value), + EXPECT_TRUE(thrust::any_of(rmm::exec_policy(cudf::get_default_stream()), thrust::make_counting_iterator(0), thrust::make_counting_iterator(100), checker_for_level2{*column})); diff --git a/cpp/tests/column/factories_test.cpp b/cpp/tests/column/factories_test.cpp index bd37da91f69..9386bb9a840 100644 --- a/cpp/tests/column/factories_test.cpp +++ b/cpp/tests/column/factories_test.cpp @@ -36,7 +36,7 @@ class ColumnFactoryTest : public cudf::test::BaseFixture { public: cudf::size_type size() { return _size; } - rmm::cuda_stream_view stream() { return cudf::default_stream_value; } + rmm::cuda_stream_view stream() { return cudf::get_default_stream(); } }; template diff --git a/cpp/tests/copying/concatenate_tests.cu b/cpp/tests/copying/concatenate_tests.cu index c06afe85cff..f01a17a0005 100644 --- a/cpp/tests/copying/concatenate_tests.cu +++ b/cpp/tests/copying/concatenate_tests.cu @@ -53,7 +53,7 @@ template struct TypedColumnTest : public cudf::test::BaseFixture { cudf::data_type type() { return cudf::data_type{cudf::type_to_id()}; } - TypedColumnTest(rmm::cuda_stream_view stream = cudf::default_stream_value) + TypedColumnTest(rmm::cuda_stream_view stream = cudf::get_default_stream()) : data{_num_elements * cudf::size_of(type()), stream}, mask{cudf::bitmask_allocation_size_bytes(_num_elements), stream} { @@ -356,7 +356,7 @@ TEST_F(OverflowTest, OverflowTest) table_view tbl_last({*many_chars_last}); std::vector table_views_to_concat({tbl, tbl, tbl, tbl, tbl, tbl_last}); std::unique_ptr concatenated_tables = cudf::concatenate(table_views_to_concat); - EXPECT_NO_THROW(cudf::default_stream_value.synchronize()); + EXPECT_NO_THROW(cudf::get_default_stream().synchronize()); ASSERT_EQ(concatenated_tables->num_rows(), std::numeric_limits::max()); } @@ -522,11 +522,11 @@ TEST_F(OverflowTest, Presliced) // try and concatenate 4 string columns of with ~1/2 billion chars in each auto offsets = cudf::make_fixed_width_column(data_type{type_id::INT32}, num_rows + 1); - thrust::fill(rmm::exec_policy(cudf::default_stream_value), + thrust::fill(rmm::exec_policy(cudf::get_default_stream()), offsets->mutable_view().begin(), offsets->mutable_view().end(), string_size); - thrust::exclusive_scan(rmm::exec_policy(cudf::default_stream_value), + thrust::exclusive_scan(rmm::exec_policy(cudf::get_default_stream()), offsets->view().begin(), offsets->view().end(), offsets->mutable_view().begin()); @@ -596,11 +596,11 @@ TEST_F(OverflowTest, Presliced) // try and concatenate 4 struct columns of with ~1/2 billion elements in each auto offsets = cudf::make_fixed_width_column(data_type{type_id::INT32}, num_rows + 1); - thrust::fill(rmm::exec_policy(cudf::default_stream_value), + thrust::fill(rmm::exec_policy(cudf::get_default_stream()), offsets->mutable_view().begin(), offsets->mutable_view().end(), list_size); - thrust::exclusive_scan(rmm::exec_policy(cudf::default_stream_value), + thrust::exclusive_scan(rmm::exec_policy(cudf::get_default_stream()), offsets->view().begin(), offsets->view().end(), offsets->mutable_view().begin()); @@ -688,11 +688,11 @@ TEST_F(OverflowTest, BigColumnsSmallSlices) constexpr size_type string_size = inner_size / num_rows; auto offsets = cudf::make_fixed_width_column(data_type{type_id::INT32}, num_rows + 1); - thrust::fill(rmm::exec_policy(cudf::default_stream_value), + thrust::fill(rmm::exec_policy(cudf::get_default_stream()), offsets->mutable_view().begin(), offsets->mutable_view().end(), string_size); - thrust::exclusive_scan(rmm::exec_policy(cudf::default_stream_value), + thrust::exclusive_scan(rmm::exec_policy(cudf::get_default_stream()), offsets->view().begin(), offsets->view().end(), offsets->mutable_view().begin()); @@ -715,11 +715,11 @@ TEST_F(OverflowTest, BigColumnsSmallSlices) constexpr size_type list_size = inner_size / num_rows; auto offsets = cudf::make_fixed_width_column(data_type{type_id::INT32}, num_rows + 1); - thrust::fill(rmm::exec_policy(cudf::default_stream_value), + thrust::fill(rmm::exec_policy(cudf::get_default_stream()), offsets->mutable_view().begin(), offsets->mutable_view().end(), list_size); - thrust::exclusive_scan(rmm::exec_policy(cudf::default_stream_value), + thrust::exclusive_scan(rmm::exec_policy(cudf::get_default_stream()), offsets->view().begin(), offsets->view().end(), offsets->mutable_view().begin()); @@ -742,11 +742,11 @@ TEST_F(OverflowTest, BigColumnsSmallSlices) constexpr size_type list_size = inner_size / num_rows; auto offsets = cudf::make_fixed_width_column(data_type{type_id::INT32}, num_rows + 1); - thrust::fill(rmm::exec_policy(cudf::default_stream_value), + thrust::fill(rmm::exec_policy(cudf::get_default_stream()), offsets->mutable_view().begin(), offsets->mutable_view().end(), list_size); - thrust::exclusive_scan(rmm::exec_policy(cudf::default_stream_value), + thrust::exclusive_scan(rmm::exec_policy(cudf::get_default_stream()), offsets->view().begin(), offsets->view().end(), offsets->mutable_view().begin()); diff --git a/cpp/tests/copying/detail_gather_tests.cu b/cpp/tests/copying/detail_gather_tests.cu index 9cd74abce1c..08037b78a70 100644 --- a/cpp/tests/copying/detail_gather_tests.cu +++ b/cpp/tests/copying/detail_gather_tests.cu @@ -46,9 +46,9 @@ TYPED_TEST_SUITE(GatherTest, cudf::test::NumericTypes); TYPED_TEST(GatherTest, GatherDetailDeviceVectorTest) { constexpr cudf::size_type source_size{1000}; - rmm::device_uvector gather_map(source_size, cudf::default_stream_value); + rmm::device_uvector gather_map(source_size, cudf::get_default_stream()); thrust::sequence( - rmm::exec_policy(cudf::default_stream_value), gather_map.begin(), gather_map.end()); + rmm::exec_policy(cudf::get_default_stream()), gather_map.begin(), gather_map.end()); auto data = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i; }); cudf::test::fixed_width_column_wrapper source_column(data, data + source_size); diff --git a/cpp/tests/copying/scatter_list_tests.cpp b/cpp/tests/copying/scatter_list_tests.cpp index 179ab56fc40..d262cbccd61 100644 --- a/cpp/tests/copying/scatter_list_tests.cpp +++ b/cpp/tests/copying/scatter_list_tests.cpp @@ -67,7 +67,7 @@ TYPED_TEST(TypedScatterListsTest, SlicedInputLists) auto src_list_column = lists_column_wrapper{{0, 0, 0, 0}, {9, 9, 9, 9}, {8, 8, 8}, {7, 7, 7}}.release(); auto src_sliced = - cudf::detail::slice(src_list_column->view(), {1, 3}, cudf::default_stream_value).front(); + cudf::detail::slice(src_list_column->view(), {1, 3}, cudf::get_default_stream()).front(); auto target_list_column = lists_column_wrapper{{0, 0}, {1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}} @@ -84,7 +84,7 @@ TYPED_TEST(TypedScatterListsTest, SlicedInputLists) {8, 8, 8}, {1, 1}, {9, 9, 9, 9}, {3, 3}, {4, 4}, {5, 5}, {6, 6}}); auto target_sliced = - cudf::detail::slice(target_list_column->view(), {1, 6}, cudf::default_stream_value); + cudf::detail::slice(target_list_column->view(), {1, 6}, cudf::get_default_stream()); auto ret_2 = cudf::scatter(cudf::table_view({src_sliced}), scatter_map, cudf::table_view({target_sliced})); diff --git a/cpp/tests/copying/shift_tests.cpp b/cpp/tests/copying/shift_tests.cpp index 3907afd10c0..288e1d3fec6 100644 --- a/cpp/tests/copying/shift_tests.cpp +++ b/cpp/tests/copying/shift_tests.cpp @@ -35,7 +35,7 @@ using TestTypes = cudf::test::Types; template > std::unique_ptr make_scalar( - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { auto s = new ScalarType(cudf::test::make_type_param_scalar(0), false, stream, mr); @@ -45,7 +45,7 @@ std::unique_ptr make_scalar( template > std::unique_ptr make_scalar( T value, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { auto s = new ScalarType(value, true, stream, mr); diff --git a/cpp/tests/device_atomics/device_atomics_test.cu b/cpp/tests/device_atomics/device_atomics_test.cu index 17e67da6227..fd43690dcff 100644 --- a/cpp/tests/device_atomics/device_atomics_test.cu +++ b/cpp/tests/device_atomics/device_atomics_test.cu @@ -147,16 +147,16 @@ struct AtomicsTest : public cudf::test::BaseFixture { if (block_size == 0) { block_size = vec_size; } if (is_cas_test) { - gpu_atomicCAS_test<<>>( + gpu_atomicCAS_test<<>>( dev_result.data(), dev_data.data(), vec_size); } else { - gpu_atomic_test<<>>( + gpu_atomic_test<<>>( dev_result.data(), dev_data.data(), vec_size); } auto host_result = cudf::detail::make_host_vector_sync(dev_result); - CUDF_CHECK_CUDA(cudf::default_stream_value.value()); + CUDF_CHECK_CUDA(cudf::get_default_stream().value()); if (!is_timestamp_sum()) { EXPECT_EQ(host_result[0], exact[0]) << "atomicAdd test failed"; @@ -298,12 +298,12 @@ struct AtomicsBitwiseOpTest : public cudf::test::BaseFixture { if (block_size == 0) { block_size = vec_size; } - gpu_atomic_bitwiseOp_test<<>>( + gpu_atomic_bitwiseOp_test<<>>( reinterpret_cast(dev_result.data()), reinterpret_cast(dev_data.data()), vec_size); auto host_result = cudf::detail::make_host_vector_sync(dev_result); - CUDF_CHECK_CUDA(cudf::default_stream_value.value()); + CUDF_CHECK_CUDA(cudf::get_default_stream().value()); // print_exact(exact, "exact"); // print_exact(host_result.data(), "result"); diff --git a/cpp/tests/error/error_handling_test.cu b/cpp/tests/error/error_handling_test.cu index e34cf23eee4..e83d961cd9b 100644 --- a/cpp/tests/error/error_handling_test.cu +++ b/cpp/tests/error/error_handling_test.cu @@ -90,7 +90,7 @@ TEST(DeathTest, CudaFatalError) { testing::FLAGS_gtest_death_test_style = "threadsafe"; auto call_kernel = []() { - kernel<<<1, 1, 0, cudf::default_stream_value.value()>>>(); + kernel<<<1, 1, 0, cudf::get_default_stream().value()>>>(); try { CUDF_CUDA_TRY(cudaDeviceSynchronize()); } catch (const cudf::fatal_cuda_error& fe) { @@ -140,5 +140,12 @@ TEST(DebugAssert, cudf_assert_true) int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); + auto const cmd_opts = parse_cudf_test_opts(argc, argv); + auto const stream_mode = cmd_opts["stream_mode"].as(); + if (stream_mode == "custom") { + auto resource = rmm::mr::get_current_device_resource(); + auto adapter = make_stream_checking_resource_adaptor(resource); + rmm::mr::set_current_device_resource(&adapter); + } return RUN_ALL_TESTS(); } diff --git a/cpp/tests/fixed_point/fixed_point_tests.cu b/cpp/tests/fixed_point/fixed_point_tests.cu index a4e0736e22f..394229b460e 100644 --- a/cpp/tests/fixed_point/fixed_point_tests.cu +++ b/cpp/tests/fixed_point/fixed_point_tests.cu @@ -85,7 +85,7 @@ TEST_F(FixedPointTest, DecimalXXThrustOnDevice) std::vector vec1(1000, decimal32{1, scale_type{-2}}); auto d_vec1 = cudf::detail::make_device_uvector_sync(vec1); - auto const sum = thrust::reduce(rmm::exec_policy(cudf::default_stream_value), + auto const sum = thrust::reduce(rmm::exec_policy(cudf::get_default_stream()), std::cbegin(d_vec1), std::cend(d_vec1), decimal32{0, scale_type{-2}}); @@ -101,7 +101,7 @@ TEST_F(FixedPointTest, DecimalXXThrustOnDevice) std::vector vec2(1000); std::iota(std::begin(vec2), std::end(vec2), 1); - auto const res1 = thrust::reduce(rmm::exec_policy(cudf::default_stream_value), + auto const res1 = thrust::reduce(rmm::exec_policy(cudf::get_default_stream()), std::cbegin(d_vec1), std::cend(d_vec1), decimal32{0, scale_type{-2}}); @@ -110,9 +110,9 @@ TEST_F(FixedPointTest, DecimalXXThrustOnDevice) EXPECT_EQ(static_cast(res1), res2); - rmm::device_uvector d_vec3(1000, cudf::default_stream_value); + rmm::device_uvector d_vec3(1000, cudf::get_default_stream()); - thrust::transform(rmm::exec_policy(cudf::default_stream_value), + thrust::transform(rmm::exec_policy(cudf::get_default_stream()), std::cbegin(d_vec1), std::cend(d_vec1), std::begin(d_vec3), diff --git a/cpp/tests/groupby/lists_tests.cu b/cpp/tests/groupby/lists_tests.cu index 45c6b8fe2e6..e4118318792 100644 --- a/cpp/tests/groupby/lists_tests.cu +++ b/cpp/tests/groupby/lists_tests.cu @@ -114,7 +114,7 @@ inline void test_hash_based_sum_agg(column_view const& keys, // resulting table: `t [num_rows, 2 * num_rows - 1]` auto combined_table = cudf::concatenate(std::vector{expected_kv, result_kv}); auto preprocessed_t = cudf::experimental::row::hash::preprocessed_table::create( - combined_table->view(), cudf::default_stream_value); + combined_table->view(), cudf::get_default_stream()); cudf::experimental::row::equality::self_comparator comparator(preprocessed_t); auto const null_keys_are_equal = @@ -124,7 +124,7 @@ inline void test_hash_based_sum_agg(column_view const& keys, // For each row in expected table `t[0, num_rows)`, there must be a match // in the resulting table `t[num_rows, 2 * num_rows)` - EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value), + EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()), thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_rows), func)); diff --git a/cpp/tests/groupby/tdigest_tests.cu b/cpp/tests/groupby/tdigest_tests.cu index 736a1096fd1..f9c907767f5 100644 --- a/cpp/tests/groupby/tdigest_tests.cu +++ b/cpp/tests/groupby/tdigest_tests.cu @@ -74,7 +74,7 @@ struct tdigest_groupby_simple_op { // make a simple set of matching keys. auto keys = cudf::make_fixed_width_column( data_type{type_id::INT32}, values.size(), mask_state::UNALLOCATED); - thrust::fill(rmm::exec_policy(cudf::default_stream_value), + thrust::fill(rmm::exec_policy(cudf::get_default_stream()), keys->mutable_view().template begin(), keys->mutable_view().template end(), 0); @@ -100,7 +100,7 @@ struct tdigest_groupby_simple_merge_op { // make a simple set of matching keys. auto merge_keys = cudf::make_fixed_width_column( data_type{type_id::INT32}, merge_values.size(), mask_state::UNALLOCATED); - thrust::fill(rmm::exec_policy(cudf::default_stream_value), + thrust::fill(rmm::exec_policy(cudf::get_default_stream()), merge_keys->mutable_view().template begin(), merge_keys->mutable_view().template end(), 0); @@ -272,7 +272,7 @@ TEST_F(TDigestMergeTest, Grouped) data_type{type_id::INT32}, values->size(), mask_state::UNALLOCATED); // 3 groups. 0-250000 in group 0. 250000-500000 in group 1 and 500000-750000 in group 1 auto key_iter = cudf::detail::make_counting_transform_iterator(0, key_groups{}); - thrust::copy(rmm::exec_policy(cudf::default_stream_value), + thrust::copy(rmm::exec_policy(cudf::get_default_stream()), key_iter, key_iter + keys->size(), keys->mutable_view().template begin()); diff --git a/cpp/tests/hash_map/map_test.cu b/cpp/tests/hash_map/map_test.cu index 84e64027c5d..2895d3323b8 100644 --- a/cpp/tests/hash_map/map_test.cu +++ b/cpp/tests/hash_map/map_test.cu @@ -56,13 +56,13 @@ struct InsertTest : public cudf::test::BaseFixture { // prevent overflow of small types const size_t input_size = std::min(static_cast(size), std::numeric_limits::max()); - pairs.resize(input_size, cudf::default_stream_value); + pairs.resize(input_size, cudf::get_default_stream()); map = std::move(map_type::create(compute_hash_table_size(size))); - cudf::default_stream_value.synchronize(); + cudf::get_default_stream().synchronize(); } const cudf::size_type size{10000}; - rmm::device_uvector pairs{static_cast(size), cudf::default_stream_value}; + rmm::device_uvector pairs{static_cast(size), cudf::get_default_stream()}; std::unique_ptr> map; }; @@ -140,18 +140,18 @@ TYPED_TEST(InsertTest, UniqueKeysUniqueValues) { using map_type = typename TypeParam::map_type; using pair_type = typename TypeParam::pair_type; - thrust::tabulate(rmm::exec_policy(cudf::default_stream_value), + thrust::tabulate(rmm::exec_policy(cudf::get_default_stream()), this->pairs.begin(), this->pairs.end(), unique_pair_generator{}); // All pairs should be new inserts - EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value), + EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()), this->pairs.begin(), this->pairs.end(), insert_pair{*this->map})); // All pairs should be present in the map - EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value), + EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()), this->pairs.begin(), this->pairs.end(), find_pair{*this->map})); @@ -161,23 +161,23 @@ TYPED_TEST(InsertTest, IdenticalKeysIdenticalValues) { using map_type = typename TypeParam::map_type; using pair_type = typename TypeParam::pair_type; - thrust::tabulate(rmm::exec_policy(cudf::default_stream_value), + thrust::tabulate(rmm::exec_policy(cudf::get_default_stream()), this->pairs.begin(), this->pairs.end(), identical_pair_generator{}); // Insert a single pair - EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value), + EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()), this->pairs.begin(), this->pairs.begin() + 1, insert_pair{*this->map})); // Identical inserts should all return false (no new insert) - EXPECT_FALSE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value), + EXPECT_FALSE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()), this->pairs.begin(), this->pairs.end(), insert_pair{*this->map})); // All pairs should be present in the map - EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value), + EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()), this->pairs.begin(), this->pairs.end(), find_pair{*this->map})); @@ -187,30 +187,30 @@ TYPED_TEST(InsertTest, IdenticalKeysUniqueValues) { using map_type = typename TypeParam::map_type; using pair_type = typename TypeParam::pair_type; - thrust::tabulate(rmm::exec_policy(cudf::default_stream_value), + thrust::tabulate(rmm::exec_policy(cudf::get_default_stream()), this->pairs.begin(), this->pairs.end(), identical_key_generator{}); // Insert a single pair - EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value), + EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()), this->pairs.begin(), this->pairs.begin() + 1, insert_pair{*this->map})); // Identical key inserts should all return false (no new insert) - EXPECT_FALSE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value), + EXPECT_FALSE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()), this->pairs.begin() + 1, this->pairs.end(), insert_pair{*this->map})); // Only first pair is present in map - EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value), + EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()), this->pairs.begin(), this->pairs.begin() + 1, find_pair{*this->map})); - EXPECT_FALSE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value), + EXPECT_FALSE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()), this->pairs.begin() + 1, this->pairs.end(), find_pair{*this->map})); diff --git a/cpp/tests/io/comp/decomp_test.cpp b/cpp/tests/io/comp/decomp_test.cpp index c51a7854e25..51dfc467e00 100644 --- a/cpp/tests/io/comp/decomp_test.cpp +++ b/cpp/tests/io/comp/decomp_test.cpp @@ -46,7 +46,7 @@ struct DecompressTest : public cudf::test::BaseFixture { const uint8_t* compressed, size_t compressed_size) { - auto stream = cudf::default_stream_value; + auto stream = cudf::get_default_stream(); rmm::device_buffer src{compressed, compressed_size, stream}; rmm::device_uvector dst{decompressed->size(), stream}; @@ -82,7 +82,7 @@ struct GzipDecompressTest : public DecompressTest { d_inf_out, d_inf_stat, cudf::io::gzip_header_included::YES, - cudf::default_stream_value); + cudf::get_default_stream()); } }; @@ -94,7 +94,7 @@ struct SnappyDecompressTest : public DecompressTest { device_span> d_inf_out, device_span d_inf_stat) { - cudf::io::gpu_unsnap(d_inf_in, d_inf_out, d_inf_stat, cudf::default_stream_value); + cudf::io::gpu_unsnap(d_inf_in, d_inf_out, d_inf_stat, cudf::get_default_stream()); } }; @@ -107,14 +107,14 @@ struct BrotliDecompressTest : public DecompressTest { device_span d_inf_stat) { rmm::device_buffer d_scratch{cudf::io::get_gpu_debrotli_scratch_size(1), - cudf::default_stream_value}; + cudf::get_default_stream()}; cudf::io::gpu_debrotli(d_inf_in, d_inf_out, d_inf_stat, d_scratch.data(), d_scratch.size(), - cudf::default_stream_value); + cudf::get_default_stream()); } }; diff --git a/cpp/tests/io/json_tree.cpp b/cpp/tests/io/json_tree.cpp index 6f7e28a2ca3..89156c821c3 100644 --- a/cpp/tests/io/json_tree.cpp +++ b/cpp/tests/io/json_tree.cpp @@ -132,7 +132,7 @@ void print_tree(tree_meta_t2 const& cpu_tree) } void print_tree(tree_meta_t const& d_gpu_tree) { - auto const cpu_tree = to_cpu_tree(d_gpu_tree, rmm::cuda_stream_default); + auto const cpu_tree = to_cpu_tree(d_gpu_tree, cudf::get_default_stream()); print_tree(cpu_tree); } @@ -161,7 +161,7 @@ bool compare_vector(std::vector const& cpu_vec, rmm::device_uvector const& d_vec, std::string const& name) { - auto gpu_vec = cudf::detail::make_std_vector_async(d_vec, cudf::default_stream_value); + auto gpu_vec = cudf::detail::make_std_vector_async(d_vec, cudf::get_default_stream()); return compare_vector(cpu_vec, gpu_vec, name); } @@ -173,7 +173,7 @@ void compare_trees(tree_meta_t2 const& cpu_tree, tree_meta_t const& d_gpu_tree, EXPECT_EQ(cpu_num_nodes, d_gpu_tree.node_levels.size()); EXPECT_EQ(cpu_num_nodes, d_gpu_tree.node_range_begin.size()); EXPECT_EQ(cpu_num_nodes, d_gpu_tree.node_range_end.size()); - auto gpu_tree = to_cpu_tree(d_gpu_tree, cudf::default_stream_value); + auto gpu_tree = to_cpu_tree(d_gpu_tree, cudf::get_default_stream()); bool mismatch = false; #define COMPARE_MEMBER(member) \ @@ -535,7 +535,7 @@ struct JsonTest : public cudf::test::BaseFixture { TEST_F(JsonTest, TreeRepresentation) { - auto const stream = cudf::default_stream_value; + auto const stream = cudf::get_default_stream(); // Test input std::string const input = R"( [{)" @@ -632,7 +632,7 @@ TEST_F(JsonTest, TreeRepresentation) TEST_F(JsonTest, TreeRepresentation2) { - auto const stream = cudf::default_stream_value; + auto const stream = cudf::get_default_stream(); // Test input: value end with comma, space, close-brace ", }" std::string const input = // 0 1 2 3 4 5 6 7 8 9 @@ -707,7 +707,7 @@ TEST_F(JsonTest, TreeRepresentation2) TEST_F(JsonTest, TreeRepresentation3) { - auto const stream = cudf::default_stream_value; + auto const stream = cudf::get_default_stream(); // Test input: Json lines with same TreeRepresentation2 input std::string const input = R"( {} @@ -790,7 +790,7 @@ INSTANTIATE_TEST_SUITE_P(JsonLines, TEST_P(JsonTreeTraversalTest, CPUvsGPUTraversal) { auto [json_lines, input] = GetParam(); - auto stream = cudf::default_stream_value; + auto stream = cudf::get_default_stream(); cudf::io::json_reader_options options{}; options.enable_lines(json_lines); diff --git a/cpp/tests/io/json_type_cast_test.cu b/cpp/tests/io/json_type_cast_test.cu index 43702f1f7e7..2170ce4a3e2 100644 --- a/cpp/tests/io/json_type_cast_test.cu +++ b/cpp/tests/io/json_type_cast_test.cu @@ -51,7 +51,7 @@ auto default_json_options() { auto parse_opts = cudf::io::parse_options{',', '\n', '\"', '.'}; - auto const stream = cudf::default_stream_value; + auto const stream = cudf::get_default_stream(); parse_opts.trie_true = cudf::detail::create_serialized_trie({"true"}, stream); parse_opts.trie_false = cudf::detail::create_serialized_trie({"false"}, stream); parse_opts.trie_na = cudf::detail::create_serialized_trie({"", "null"}, stream); @@ -60,7 +60,7 @@ auto default_json_options() TEST_F(JSONTypeCastTest, String) { - auto const stream = cudf::default_stream_value; + auto const stream = cudf::get_default_stream(); auto mr = rmm::mr::get_current_device_resource(); auto const type = cudf::data_type{cudf::type_id::STRING}; @@ -70,7 +70,7 @@ TEST_F(JSONTypeCastTest, String) auto d_column = cudf::column_device_view::create(input); rmm::device_uvector> svs(d_column->size(), stream); - thrust::transform(rmm::exec_policy(cudf::default_stream_value), + thrust::transform(rmm::exec_policy(cudf::get_default_stream()), d_column->pair_begin(), d_column->pair_end(), svs.begin(), @@ -93,14 +93,14 @@ TEST_F(JSONTypeCastTest, String) TEST_F(JSONTypeCastTest, Int) { - auto const stream = cudf::default_stream_value; + auto const stream = cudf::get_default_stream(); auto mr = rmm::mr::get_current_device_resource(); auto const type = cudf::data_type{cudf::type_id::INT64}; cudf::test::strings_column_wrapper data({"1", "null", "3", "true", "5", "false"}); auto d_column = cudf::column_device_view::create(data); rmm::device_uvector> svs(d_column->size(), stream); - thrust::transform(rmm::exec_policy(cudf::default_stream_value), + thrust::transform(rmm::exec_policy(cudf::get_default_stream()), d_column->pair_begin(), d_column->pair_end(), svs.begin(), @@ -120,7 +120,7 @@ TEST_F(JSONTypeCastTest, Int) TEST_F(JSONTypeCastTest, StringEscapes) { - auto const stream = cudf::default_stream_value; + auto const stream = cudf::get_default_stream(); auto mr = rmm::mr::get_current_device_resource(); auto const type = cudf::data_type{cudf::type_id::STRING}; @@ -137,7 +137,7 @@ TEST_F(JSONTypeCastTest, StringEscapes) }); auto d_column = cudf::column_device_view::create(data); rmm::device_uvector> svs(d_column->size(), stream); - thrust::transform(rmm::exec_policy(cudf::default_stream_value), + thrust::transform(rmm::exec_policy(cudf::get_default_stream()), d_column->pair_begin(), d_column->pair_end(), svs.begin(), diff --git a/cpp/tests/io/nested_json_test.cpp b/cpp/tests/io/nested_json_test.cpp index 65926be495f..01a1f0647cc 100644 --- a/cpp/tests/io/nested_json_test.cpp +++ b/cpp/tests/io/nested_json_test.cpp @@ -139,7 +139,7 @@ TEST_F(JsonTest, StackContext) using StackSymbolT = char; // Prepare cuda stream for data transfers & kernels - auto const stream = cudf::default_stream_value; + auto const stream = cudf::get_default_stream(); // Test input std::string const input = R"( [{)" @@ -200,7 +200,7 @@ TEST_F(JsonTest, StackContextUtf8) using StackSymbolT = char; // Prepare cuda stream for data transfers & kernels - auto const stream = cudf::default_stream_value; + auto const stream = cudf::get_default_stream(); // Test input std::string const input = R"([{"a":{"year":1882,"author": "Bharathi"}, {"a":"filip ʒakotɛ"}}])"; @@ -251,7 +251,7 @@ TEST_F(JsonTest, TokenStream) R"("price": 8.95)" R"(}] )"; - auto const stream = cudf::default_stream_value; + auto const stream = cudf::get_default_stream(); // Default parsing options cudf::io::json_reader_options default_options{}; @@ -387,7 +387,7 @@ TEST_F(JsonTest, TokenStream2) R"([ {}, { "a": { "y" : 6, "z": [] }}, { "a" : { "x" : 8, "y": 9}, "b" : {"x": 10 , "z": 11)" "\n}}]"; - auto const stream = cudf::default_stream_value; + auto const stream = cudf::get_default_stream(); // Default parsing options cudf::io::json_reader_options default_options{}; @@ -462,7 +462,7 @@ TEST_P(JsonParserTest, ExtractColumn) : cuio_json::detail::host_parse_nested_json; // Prepare cuda stream for data transfers & kernels - auto const stream = cudf::default_stream_value; + auto const stream = cudf::get_default_stream(); auto mr = rmm::mr::get_current_device_resource(); // Default parsing options @@ -489,7 +489,7 @@ TEST_P(JsonParserTest, ExtractColumn) TEST_P(JsonParserTest, UTF_JSON) { // Prepare cuda stream for data transfers & kernels - auto const stream = cudf::default_stream_value; + auto const stream = cudf::get_default_stream(); auto mr = rmm::mr::get_current_device_resource(); bool const is_full_gpu = GetParam(); auto json_parser = is_full_gpu ? cuio_json::detail::device_parse_nested_json @@ -539,7 +539,7 @@ TEST_P(JsonParserTest, ExtractColumnWithQuotes) : cuio_json::detail::host_parse_nested_json; // Prepare cuda stream for data transfers & kernels - auto const stream = cudf::default_stream_value; + auto const stream = cudf::get_default_stream(); auto mr = rmm::mr::get_current_device_resource(); // Default parsing options @@ -572,7 +572,7 @@ TEST_P(JsonParserTest, ExpectFailMixStructAndList) : cuio_json::detail::host_parse_nested_json; // Prepare cuda stream for data transfers & kernels - auto const stream = cudf::default_stream_value; + auto const stream = cudf::get_default_stream(); auto mr = rmm::mr::get_current_device_resource(); // Default parsing options @@ -610,7 +610,7 @@ TEST_P(JsonParserTest, EmptyString) : cuio_json::detail::host_parse_nested_json; // Prepare cuda stream for data transfers & kernels - auto const stream = cudf::default_stream_value; + auto const stream = cudf::get_default_stream(); auto mr = rmm::mr::get_current_device_resource(); // Default parsing options @@ -624,3 +624,5 @@ TEST_P(JsonParserTest, EmptyString) auto const expected_col_count = 0; EXPECT_EQ(cudf_table.tbl->num_columns(), expected_col_count); } + +CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/io/text/data_chunk_source_test.cpp b/cpp/tests/io/text/data_chunk_source_test.cpp index 2111d66a066..a3314c440a4 100644 --- a/cpp/tests/io/text/data_chunk_source_test.cpp +++ b/cpp/tests/io/text/data_chunk_source_test.cpp @@ -45,7 +45,7 @@ void test_source(const std::string& content, const cudf::io::text::data_chunk_so { // full contents auto reader = source.create_reader(); - auto const chunk = reader->get_next_chunk(content.size(), rmm::cuda_stream_default); + auto const chunk = reader->get_next_chunk(content.size(), cudf::get_default_stream()); ASSERT_EQ(chunk->size(), content.size()); ASSERT_EQ(chunk_to_host(*chunk), content); } @@ -53,15 +53,15 @@ void test_source(const std::string& content, const cudf::io::text::data_chunk_so // skipping contents auto reader = source.create_reader(); reader->skip_bytes(4); - auto const chunk = reader->get_next_chunk(content.size(), rmm::cuda_stream_default); + auto const chunk = reader->get_next_chunk(content.size(), cudf::get_default_stream()); ASSERT_EQ(chunk->size(), content.size() - 4); ASSERT_EQ(chunk_to_host(*chunk), content.substr(4)); } { // reading multiple chunks, starting with a small one auto reader = source.create_reader(); - auto const chunk1 = reader->get_next_chunk(5, rmm::cuda_stream_default); - auto const chunk2 = reader->get_next_chunk(content.size() - 5, rmm::cuda_stream_default); + auto const chunk1 = reader->get_next_chunk(5, cudf::get_default_stream()); + auto const chunk2 = reader->get_next_chunk(content.size() - 5, cudf::get_default_stream()); ASSERT_EQ(chunk1->size(), 5); ASSERT_EQ(chunk2->size(), content.size() - 5); ASSERT_EQ(chunk_to_host(*chunk1), content.substr(0, 5)); @@ -70,9 +70,9 @@ void test_source(const std::string& content, const cudf::io::text::data_chunk_so { // reading multiple chunks auto reader = source.create_reader(); - auto const chunk1 = reader->get_next_chunk(content.size() / 2, rmm::cuda_stream_default); + auto const chunk1 = reader->get_next_chunk(content.size() / 2, cudf::get_default_stream()); auto const chunk2 = - reader->get_next_chunk(content.size() - content.size() / 2, rmm::cuda_stream_default); + reader->get_next_chunk(content.size() - content.size() / 2, cudf::get_default_stream()); ASSERT_EQ(chunk1->size(), content.size() / 2); ASSERT_EQ(chunk2->size(), content.size() - content.size() / 2); ASSERT_EQ(chunk_to_host(*chunk1), content.substr(0, content.size() / 2)); @@ -81,17 +81,17 @@ void test_source(const std::string& content, const cudf::io::text::data_chunk_so { // reading too many bytes auto reader = source.create_reader(); - auto const chunk = reader->get_next_chunk(content.size() + 10, rmm::cuda_stream_default); + auto const chunk = reader->get_next_chunk(content.size() + 10, cudf::get_default_stream()); ASSERT_EQ(chunk->size(), content.size()); ASSERT_EQ(chunk_to_host(*chunk), content); - auto next_chunk = reader->get_next_chunk(1, rmm::cuda_stream_default); + auto next_chunk = reader->get_next_chunk(1, cudf::get_default_stream()); ASSERT_EQ(next_chunk->size(), 0); } { // skipping past the end auto reader = source.create_reader(); reader->skip_bytes(content.size() + 10); - auto const next_chunk = reader->get_next_chunk(1, rmm::cuda_stream_default); + auto const next_chunk = reader->get_next_chunk(1, cudf::get_default_stream()); ASSERT_EQ(next_chunk->size(), 0); } } diff --git a/cpp/tests/io/type_inference_test.cu b/cpp/tests/io/type_inference_test.cu index 4d01ef95b85..cbaa06589cf 100644 --- a/cpp/tests/io/type_inference_test.cu +++ b/cpp/tests/io/type_inference_test.cu @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -39,7 +40,7 @@ struct TypeInference : public cudf::test::BaseFixture { TEST_F(TypeInference, Basic) { - auto const stream = cudf::default_stream_value; + auto const stream = cudf::get_default_stream(); auto options = parse_options{',', '\n', '\"'}; options.trie_true = cudf::detail::create_serialized_trie({"true"}, stream); @@ -52,11 +53,13 @@ TEST_F(TypeInference, Basic) auto const string_offset = std::vector{1, 4, 7}; auto const string_length = std::vector{2, 2, 1}; - rmm::device_vector d_string_offset{string_offset}; - rmm::device_vector d_string_length{string_length}; + auto const d_string_offset = + cudf::detail::make_device_uvector_async(string_offset, cudf::get_default_stream()); + auto const d_string_length = + cudf::detail::make_device_uvector_async(string_length, cudf::get_default_stream()); auto d_col_strings = - thrust::make_zip_iterator(make_tuple(d_string_offset.begin(), d_string_length.begin())); + thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin())); auto res_type = infer_data_type(options.json_view(), @@ -70,7 +73,7 @@ TEST_F(TypeInference, Basic) TEST_F(TypeInference, Null) { - auto const stream = cudf::default_stream_value; + auto const stream = cudf::get_default_stream(); auto options = parse_options{',', '\n', '\"'}; options.trie_true = cudf::detail::create_serialized_trie({"true"}, stream); @@ -83,11 +86,13 @@ TEST_F(TypeInference, Null) auto const string_offset = std::vector{1, 1, 4}; auto const string_length = std::vector{0, 2, 1}; - rmm::device_vector d_string_offset{string_offset}; - rmm::device_vector d_string_length{string_length}; + auto const d_string_offset = + cudf::detail::make_device_uvector_async(string_offset, cudf::get_default_stream()); + auto const d_string_length = + cudf::detail::make_device_uvector_async(string_length, cudf::get_default_stream()); auto d_col_strings = - thrust::make_zip_iterator(make_tuple(d_string_offset.begin(), d_string_length.begin())); + thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin())); auto res_type = infer_data_type(options.json_view(), @@ -102,7 +107,7 @@ TEST_F(TypeInference, Null) TEST_F(TypeInference, AllNull) { - auto const stream = cudf::default_stream_value; + auto const stream = cudf::get_default_stream(); auto options = parse_options{',', '\n', '\"'}; options.trie_true = cudf::detail::create_serialized_trie({"true"}, stream); @@ -115,11 +120,13 @@ TEST_F(TypeInference, AllNull) auto const string_offset = std::vector{1, 1, 1}; auto const string_length = std::vector{0, 0, 4}; - rmm::device_vector d_string_offset{string_offset}; - rmm::device_vector d_string_length{string_length}; + auto const d_string_offset = + cudf::detail::make_device_uvector_async(string_offset, cudf::get_default_stream()); + auto const d_string_length = + cudf::detail::make_device_uvector_async(string_length, cudf::get_default_stream()); auto d_col_strings = - thrust::make_zip_iterator(make_tuple(d_string_offset.begin(), d_string_length.begin())); + thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin())); auto res_type = infer_data_type(options.json_view(), @@ -133,7 +140,7 @@ TEST_F(TypeInference, AllNull) TEST_F(TypeInference, String) { - auto const stream = cudf::default_stream_value; + auto const stream = cudf::get_default_stream(); auto options = parse_options{',', '\n', '\"'}; options.trie_true = cudf::detail::create_serialized_trie({"true"}, stream); @@ -146,11 +153,13 @@ TEST_F(TypeInference, String) auto const string_offset = std::vector{1, 8, 12}; auto const string_length = std::vector{6, 3, 4}; - rmm::device_vector d_string_offset{string_offset}; - rmm::device_vector d_string_length{string_length}; + auto const d_string_offset = + cudf::detail::make_device_uvector_async(string_offset, cudf::get_default_stream()); + auto const d_string_length = + cudf::detail::make_device_uvector_async(string_length, cudf::get_default_stream()); auto d_col_strings = - thrust::make_zip_iterator(make_tuple(d_string_offset.begin(), d_string_length.begin())); + thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin())); auto res_type = infer_data_type(options.json_view(), @@ -164,7 +173,7 @@ TEST_F(TypeInference, String) TEST_F(TypeInference, Bool) { - auto const stream = cudf::default_stream_value; + auto const stream = cudf::get_default_stream(); auto options = parse_options{',', '\n', '\"'}; options.trie_true = cudf::detail::create_serialized_trie({"true"}, stream); @@ -177,11 +186,13 @@ TEST_F(TypeInference, Bool) auto const string_offset = std::vector{1, 6, 12}; auto const string_length = std::vector{4, 5, 5}; - rmm::device_vector d_string_offset{string_offset}; - rmm::device_vector d_string_length{string_length}; + auto const d_string_offset = + cudf::detail::make_device_uvector_async(string_offset, cudf::get_default_stream()); + auto const d_string_length = + cudf::detail::make_device_uvector_async(string_length, cudf::get_default_stream()); auto d_col_strings = - thrust::make_zip_iterator(make_tuple(d_string_offset.begin(), d_string_length.begin())); + thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin())); auto res_type = infer_data_type(options.json_view(), @@ -195,7 +206,7 @@ TEST_F(TypeInference, Bool) TEST_F(TypeInference, Timestamp) { - auto const stream = cudf::default_stream_value; + auto const stream = cudf::get_default_stream(); auto options = parse_options{',', '\n', '\"'}; options.trie_true = cudf::detail::create_serialized_trie({"true"}, stream); @@ -208,11 +219,13 @@ TEST_F(TypeInference, Timestamp) auto const string_offset = std::vector{1, 10}; auto const string_length = std::vector{8, 9}; - rmm::device_vector d_string_offset{string_offset}; - rmm::device_vector d_string_length{string_length}; + auto const d_string_offset = + cudf::detail::make_device_uvector_async(string_offset, cudf::get_default_stream()); + auto const d_string_length = + cudf::detail::make_device_uvector_async(string_length, cudf::get_default_stream()); auto d_col_strings = - thrust::make_zip_iterator(make_tuple(d_string_offset.begin(), d_string_length.begin())); + thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin())); auto res_type = infer_data_type(options.json_view(), @@ -227,7 +240,7 @@ TEST_F(TypeInference, Timestamp) TEST_F(TypeInference, InvalidInput) { - auto const stream = cudf::default_stream_value; + auto const stream = cudf::get_default_stream(); auto options = parse_options{',', '\n', '\"'}; options.trie_true = cudf::detail::create_serialized_trie({"true"}, stream); @@ -240,11 +253,13 @@ TEST_F(TypeInference, InvalidInput) auto const string_offset = std::vector{1, 3, 5, 7, 9}; auto const string_length = std::vector{1, 1, 1, 1, 1}; - rmm::device_vector d_string_offset{string_offset}; - rmm::device_vector d_string_length{string_length}; + auto const d_string_offset = + cudf::detail::make_device_uvector_async(string_offset, cudf::get_default_stream()); + auto const d_string_length = + cudf::detail::make_device_uvector_async(string_length, cudf::get_default_stream()); auto d_col_strings = - thrust::make_zip_iterator(make_tuple(d_string_offset.begin(), d_string_length.begin())); + thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin())); auto res_type = infer_data_type(options.json_view(), @@ -256,3 +271,5 @@ TEST_F(TypeInference, InvalidInput) // Invalid input is inferred as string for now EXPECT_EQ(res_type, cudf::data_type{cudf::type_id::STRING}); } + +CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/iterator/iterator_tests.cuh b/cpp/tests/iterator/iterator_tests.cuh index 26902b43662..7eb2c3d70bb 100644 --- a/cpp/tests/iterator/iterator_tests.cuh +++ b/cpp/tests/iterator/iterator_tests.cuh @@ -50,7 +50,7 @@ struct IteratorTest : public cudf::test::BaseFixture { void iterator_test_cub(T_output expected, InputIterator d_in, int num_items) { T_output init = cudf::test::make_type_param_scalar(0); - rmm::device_uvector dev_result(1, cudf::default_stream_value); + rmm::device_uvector dev_result(1, cudf::get_default_stream()); // Get temporary storage size size_t temp_storage_bytes = 0; @@ -61,10 +61,10 @@ struct IteratorTest : public cudf::test::BaseFixture { num_items, thrust::minimum{}, init, - cudf::default_stream_value.value()); + cudf::get_default_stream().value()); // Allocate temporary storage - rmm::device_buffer d_temp_storage(temp_storage_bytes, cudf::default_stream_value); + rmm::device_buffer d_temp_storage(temp_storage_bytes, cudf::get_default_stream()); // Run reduction cub::DeviceReduce::Reduce(d_temp_storage.data(), @@ -74,7 +74,7 @@ struct IteratorTest : public cudf::test::BaseFixture { num_items, thrust::minimum{}, init, - cudf::default_stream_value.value()); + cudf::get_default_stream().value()); evaluate(expected, dev_result, "cub test"); } @@ -91,14 +91,14 @@ struct IteratorTest : public cudf::test::BaseFixture { // using a temporary vector and calling transform and all_of separately is // equivalent to thrust::equal but compiles ~3x faster - auto dev_results = rmm::device_uvector(num_items, cudf::default_stream_value); - thrust::transform(rmm::exec_policy(cudf::default_stream_value), + auto dev_results = rmm::device_uvector(num_items, cudf::get_default_stream()); + thrust::transform(rmm::exec_policy(cudf::get_default_stream()), d_in, d_in_last, dev_expected.begin(), dev_results.begin(), thrust::equal_to{}); - auto result = thrust::all_of(rmm::exec_policy(cudf::default_stream_value), + auto result = thrust::all_of(rmm::exec_policy(cudf::get_default_stream()), dev_results.begin(), dev_results.end(), thrust::identity{}); diff --git a/cpp/tests/iterator/optional_iterator_test_numeric.cu b/cpp/tests/iterator/optional_iterator_test_numeric.cu index c5b7393550a..586c9472185 100644 --- a/cpp/tests/iterator/optional_iterator_test_numeric.cu +++ b/cpp/tests/iterator/optional_iterator_test_numeric.cu @@ -111,14 +111,14 @@ TYPED_TEST(NumericOptionalIteratorTest, mean_var_output) // this can be computed with a single reduce and without a temporary output vector // but the approach increases the compile time by ~2x - auto results = rmm::device_uvector(d_col->size(), cudf::default_stream_value); - thrust::transform(rmm::exec_policy(cudf::default_stream_value), + auto results = rmm::device_uvector(d_col->size(), cudf::get_default_stream()); + thrust::transform(rmm::exec_policy(cudf::get_default_stream()), it_dev_squared, it_dev_squared + d_col->size(), results.begin(), optional_to_meanvar{}); auto result = thrust::reduce( - rmm::exec_policy(cudf::default_stream_value), results.begin(), results.end(), T_output{}); + rmm::exec_policy(cudf::get_default_stream()), results.begin(), results.end(), T_output{}); if (not std::is_floating_point()) { EXPECT_EQ(expected_value, result) << "optional iterator reduction sum"; diff --git a/cpp/tests/iterator/pair_iterator_test_numeric.cu b/cpp/tests/iterator/pair_iterator_test_numeric.cu index f570df44286..99ec3118b4b 100644 --- a/cpp/tests/iterator/pair_iterator_test_numeric.cu +++ b/cpp/tests/iterator/pair_iterator_test_numeric.cu @@ -113,7 +113,7 @@ TYPED_TEST(NumericPairIteratorTest, mean_var_output) // GPU test auto it_dev = d_col->pair_begin(); auto it_dev_squared = thrust::make_transform_iterator(it_dev, transformer); - auto result = thrust::reduce(rmm::exec_policy(cudf::default_stream_value), + auto result = thrust::reduce(rmm::exec_policy(cudf::get_default_stream()), it_dev_squared, it_dev_squared + d_col->size(), thrust::make_pair(T_output{}, true), diff --git a/cpp/tests/join/conditional_join_tests.cu b/cpp/tests/join/conditional_join_tests.cu index f8dfc972191..920c497f850 100644 --- a/cpp/tests/join/conditional_join_tests.cu +++ b/cpp/tests/join/conditional_join_tests.cu @@ -230,8 +230,8 @@ struct ConditionalJoinPairReturnTest : public ConditionalJoinTest { // Note: Not trying to be terribly efficient here since these tests are // small, otherwise a batch copy to host before constructing the tuples // would be important. - result_pairs.push_back({result.first->element(i, cudf::default_stream_value), - result.second->element(i, cudf::default_stream_value)}); + result_pairs.push_back({result.first->element(i, cudf::get_default_stream()), + result.second->element(i, cudf::get_default_stream())}); } std::sort(result_pairs.begin(), result_pairs.end()); std::sort(expected_outputs.begin(), expected_outputs.end()); @@ -276,11 +276,11 @@ struct ConditionalJoinPairReturnTest : public ConditionalJoinTest { void _compare_to_hash_join(PairJoinReturn const& result, PairJoinReturn const& reference) { auto result_pairs = - rmm::device_uvector(result.first->size(), cudf::default_stream_value); + rmm::device_uvector(result.first->size(), cudf::get_default_stream()); auto reference_pairs = - rmm::device_uvector(reference.first->size(), cudf::default_stream_value); + rmm::device_uvector(reference.first->size(), cudf::get_default_stream()); - thrust::transform(rmm::exec_policy(cudf::default_stream_value), + thrust::transform(rmm::exec_policy(cudf::get_default_stream()), result.first->begin(), result.first->end(), result.second->begin(), @@ -288,7 +288,7 @@ struct ConditionalJoinPairReturnTest : public ConditionalJoinTest { [] __device__(cudf::size_type first, cudf::size_type second) { return index_pair{first, second}; }); - thrust::transform(rmm::exec_policy(cudf::default_stream_value), + thrust::transform(rmm::exec_policy(cudf::get_default_stream()), reference.first->begin(), reference.first->end(), reference.second->begin(), @@ -298,11 +298,11 @@ struct ConditionalJoinPairReturnTest : public ConditionalJoinTest { }); thrust::sort( - rmm::exec_policy(cudf::default_stream_value), result_pairs.begin(), result_pairs.end()); + rmm::exec_policy(cudf::get_default_stream()), result_pairs.begin(), result_pairs.end()); thrust::sort( - rmm::exec_policy(cudf::default_stream_value), reference_pairs.begin(), reference_pairs.end()); + rmm::exec_policy(cudf::get_default_stream()), reference_pairs.begin(), reference_pairs.end()); - EXPECT_TRUE(thrust::equal(rmm::exec_policy(cudf::default_stream_value), + EXPECT_TRUE(thrust::equal(rmm::exec_policy(cudf::get_default_stream()), reference_pairs.begin(), reference_pairs.end(), result_pairs.begin())); @@ -713,7 +713,7 @@ struct ConditionalJoinSingleReturnTest : public ConditionalJoinTest { // Note: Not trying to be terribly efficient here since these tests are // small, otherwise a batch copy to host before constructing the tuples // would be important. - resulting_indices.push_back(result->element(i, cudf::default_stream_value)); + resulting_indices.push_back(result->element(i, cudf::get_default_stream())); } std::sort(resulting_indices.begin(), resulting_indices.end()); std::sort(expected_outputs.begin(), expected_outputs.end()); @@ -724,10 +724,10 @@ struct ConditionalJoinSingleReturnTest : public ConditionalJoinTest { void _compare_to_hash_join(std::unique_ptr> const& result, std::unique_ptr> const& reference) { - thrust::sort(rmm::exec_policy(cudf::default_stream_value), result->begin(), result->end()); + thrust::sort(rmm::exec_policy(cudf::get_default_stream()), result->begin(), result->end()); thrust::sort( - rmm::exec_policy(cudf::default_stream_value), reference->begin(), reference->end()); - EXPECT_TRUE(thrust::equal(rmm::exec_policy(cudf::default_stream_value), + rmm::exec_policy(cudf::get_default_stream()), reference->begin(), reference->end()); + EXPECT_TRUE(thrust::equal(rmm::exec_policy(cudf::get_default_stream()), result->begin(), result->end(), reference->begin())); diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp index 44e1d586389..fb2eb77512c 100644 --- a/cpp/tests/join/join_tests.cpp +++ b/cpp/tests/join/join_tests.cpp @@ -1499,9 +1499,9 @@ TEST_F(JoinTest, HashJoinLargeOutputSize) { // self-join a table of zeroes to generate an output row count that would overflow int32_t std::size_t col_size = 65567; - rmm::device_buffer zeroes(col_size * sizeof(int32_t), cudf::default_stream_value); + rmm::device_buffer zeroes(col_size * sizeof(int32_t), cudf::get_default_stream()); CUDF_CUDA_TRY( - cudaMemsetAsync(zeroes.data(), 0, zeroes.size(), cudf::default_stream_value.value())); + cudaMemsetAsync(zeroes.data(), 0, zeroes.size(), cudf::get_default_stream().value())); cudf::column_view col_zeros(cudf::data_type{cudf::type_id::INT32}, col_size, zeroes.data()); cudf::table_view tview{{col_zeros}}; cudf::hash_join hash_join(tview, cudf::null_equality::UNEQUAL); diff --git a/cpp/tests/join/mixed_join_tests.cu b/cpp/tests/join/mixed_join_tests.cu index dbff5a1d8fc..d252ded6627 100644 --- a/cpp/tests/join/mixed_join_tests.cu +++ b/cpp/tests/join/mixed_join_tests.cu @@ -228,8 +228,8 @@ struct MixedJoinPairReturnTest : public MixedJoinTest { // Note: Not trying to be terribly efficient here since these tests are // small, otherwise a batch copy to host before constructing the tuples // would be important. - result_pairs.push_back({result.first->element(i, cudf::default_stream_value), - result.second->element(i, cudf::default_stream_value)}); + result_pairs.push_back({result.first->element(i, cudf::get_default_stream()), + result.second->element(i, cudf::get_default_stream())}); } std::sort(result_pairs.begin(), result_pairs.end()); std::sort(expected_outputs.begin(), expected_outputs.end()); @@ -586,8 +586,8 @@ struct MixedFullJoinTest : public MixedJoinPairReturnTest { left_equality, right_equality, left_conditional, right_conditional, predicate, compare_nulls); std::vector> result_pairs; for (size_t i = 0; i < result.first->size(); ++i) { - result_pairs.push_back({result.first->element(i, cudf::default_stream_value), - result.second->element(i, cudf::default_stream_value)}); + result_pairs.push_back({result.first->element(i, cudf::get_default_stream()), + result.second->element(i, cudf::get_default_stream())}); } std::sort(result_pairs.begin(), result_pairs.end()); std::sort(expected_outputs.begin(), expected_outputs.end()); @@ -666,7 +666,7 @@ struct MixedJoinSingleReturnTest : public MixedJoinTest { // Note: Not trying to be terribly efficient here since these tests are // small, otherwise a batch copy to host before constructing the tuples // would be important. - resulting_indices.push_back(result->element(i, cudf::default_stream_value)); + resulting_indices.push_back(result->element(i, cudf::get_default_stream())); } std::sort(resulting_indices.begin(), resulting_indices.end()); std::sort(expected_outputs.begin(), expected_outputs.end()); diff --git a/cpp/tests/quantiles/percentile_approx_test.cu b/cpp/tests/quantiles/percentile_approx_test.cu index 0ca63526c51..82151caea53 100644 --- a/cpp/tests/quantiles/percentile_approx_test.cu +++ b/cpp/tests/quantiles/percentile_approx_test.cu @@ -234,7 +234,7 @@ void simple_test(data_type input_type, std::vector> params) // all in the same group auto keys = cudf::make_fixed_width_column( data_type{type_id::INT32}, values->size(), mask_state::UNALLOCATED); - thrust::fill(rmm::exec_policy(cudf::default_stream_value), + thrust::fill(rmm::exec_policy(cudf::get_default_stream()), keys->mutable_view().template begin(), keys->mutable_view().template end(), 0); @@ -257,7 +257,7 @@ void grouped_test(data_type input_type, std::vector> params) auto keys = cudf::make_fixed_width_column( data_type{type_id::INT32}, values->size(), mask_state::UNALLOCATED); auto i = thrust::make_counting_iterator(0); - thrust::transform(rmm::exec_policy(cudf::default_stream_value), + thrust::transform(rmm::exec_policy(cudf::get_default_stream()), i, i + values->size(), keys->mutable_view().template begin(), @@ -282,7 +282,7 @@ void simple_with_nulls_test(data_type input_type, std::vectorsize(), mask_state::UNALLOCATED); - thrust::fill(rmm::exec_policy(cudf::default_stream_value), + thrust::fill(rmm::exec_policy(cudf::get_default_stream()), keys->mutable_view().template begin(), keys->mutable_view().template end(), 0); @@ -304,7 +304,7 @@ void grouped_with_nulls_test(data_type input_type, std::vectorsize(), mask_state::UNALLOCATED); auto i = thrust::make_counting_iterator(0); - thrust::transform(rmm::exec_policy(cudf::default_stream_value), + thrust::transform(rmm::exec_policy(cudf::get_default_stream()), i, i + values->size(), keys->mutable_view().template begin(), diff --git a/cpp/tests/quantiles/tdigest_utilities.cu b/cpp/tests/quantiles/tdigest_utilities.cu index 3cf2f2eb4ef..68147dc29eb 100644 --- a/cpp/tests/quantiles/tdigest_utilities.cu +++ b/cpp/tests/quantiles/tdigest_utilities.cu @@ -65,15 +65,15 @@ void tdigest_sample_compare(cudf::tdigest::tdigest_column_view const& tdv, } auto d_expected_src = - cudf::detail::make_device_uvector_async(h_expected_src, cudf::default_stream_value); + cudf::detail::make_device_uvector_async(h_expected_src, cudf::get_default_stream()); auto d_expected_mean = - cudf::detail::make_device_uvector_async(h_expected_mean, cudf::default_stream_value); + cudf::detail::make_device_uvector_async(h_expected_mean, cudf::get_default_stream()); auto d_expected_weight = - cudf::detail::make_device_uvector_async(h_expected_weight, cudf::default_stream_value); + cudf::detail::make_device_uvector_async(h_expected_weight, cudf::get_default_stream()); auto iter = thrust::make_counting_iterator(0); thrust::for_each( - rmm::exec_policy(cudf::default_stream_value), + rmm::exec_policy(cudf::get_default_stream()), iter, iter + h_expected.size(), [expected_src_in = d_expected_src.data(), @@ -122,13 +122,13 @@ std::unique_ptr make_expected_tdigest_column(std::vectormutable_view().begin(), min_col->mutable_view().end(), tdigest.min); auto max_col = cudf::make_fixed_width_column(data_type{type_id::FLOAT64}, 1, mask_state::UNALLOCATED); - thrust::fill(rmm::exec_policy(cudf::default_stream_value), + thrust::fill(rmm::exec_policy(cudf::get_default_stream()), max_col->mutable_view().begin(), max_col->mutable_view().end(), tdigest.max); diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp index a8547ea982d..c0c4f580393 100644 --- a/cpp/tests/reductions/segmented_reduction_tests.cpp +++ b/cpp/tests/reductions/segmented_reduction_tests.cpp @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -52,7 +53,8 @@ TYPED_TEST(SegmentedReductionTest, SumExcludeNulls) auto const input = fixed_width_column_wrapper{{1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}}; auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = thrust::device_vector(offsets); + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); auto const expect = fixed_width_column_wrapper{{6, 4, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}}; @@ -98,7 +100,8 @@ TYPED_TEST(SegmentedReductionTest, ProductExcludeNulls) auto const input = fixed_width_column_wrapper{{1, 3, 5, XXX, 3, 5, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 1, 1, 1, 0, 0, 0}}; auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = thrust::device_vector(offsets); + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); auto const expect = fixed_width_column_wrapper{{15, 15, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}}; @@ -144,7 +147,8 @@ TYPED_TEST(SegmentedReductionTest, MaxExcludeNulls) auto const input = fixed_width_column_wrapper{{1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}}; auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = thrust::device_vector(offsets); + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); auto const expect = fixed_width_column_wrapper{{3, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}}; @@ -190,7 +194,8 @@ TYPED_TEST(SegmentedReductionTest, MinExcludeNulls) auto const input = fixed_width_column_wrapper{{1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}}; auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = thrust::device_vector(offsets); + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); auto const expect = fixed_width_column_wrapper{{1, 1, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}}; @@ -236,9 +241,10 @@ TYPED_TEST(SegmentedReductionTest, AnyExcludeNulls) auto const input = fixed_width_column_wrapper{ {0, 0, 0, 0, XXX, 0, 0, 1, 0, 1, XXX, 0, 0, 1, XXX, XXX, XXX}, {1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0}}; - auto const offsets = std::vector{0, 3, 6, 9, 12, 12, 13, 14, 15, 17}; - auto const d_offsets = thrust::device_vector(offsets); - auto const expect = fixed_width_column_wrapper{ + auto const offsets = std::vector{0, 3, 6, 9, 12, 12, 13, 14, 15, 17}; + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const expect = fixed_width_column_wrapper{ {false, false, true, true, bool{XXX}, false, true, bool{XXX}, bool{XXX}}, {true, true, true, true, false, true, true, false, false}}; @@ -285,9 +291,10 @@ TYPED_TEST(SegmentedReductionTest, AllExcludeNulls) auto const input = fixed_width_column_wrapper{ {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX, 1, 0, 3, 1, XXX, 0, 0}, {1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1}}; - auto const offsets = std::vector{0, 3, 6, 6, 7, 8, 10, 13, 16, 17}; - auto const d_offsets = thrust::device_vector(offsets); - auto const expect = fixed_width_column_wrapper{ + auto const offsets = std::vector{0, 3, 6, 6, 7, 8, 10, 13, 16, 17}; + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const expect = fixed_width_column_wrapper{ {true, true, bool{XXX}, true, bool{XXX}, bool{XXX}, false, false, false}, {true, true, false, true, false, false, true, true, true}}; @@ -335,7 +342,8 @@ TYPED_TEST(SegmentedReductionTest, SumIncludeNulls) auto const input = fixed_width_column_wrapper{{1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}}; auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = thrust::device_vector(offsets); + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); auto const expect = fixed_width_column_wrapper{{6, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}}; @@ -384,7 +392,8 @@ TYPED_TEST(SegmentedReductionTest, ProductIncludeNulls) auto const input = fixed_width_column_wrapper{{1, 3, 5, XXX, 3, 5, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 1, 1, 1, 0, 0, 0}}; auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = thrust::device_vector(offsets); + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); auto const expect = fixed_width_column_wrapper{{15, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}}; @@ -433,7 +442,8 @@ TYPED_TEST(SegmentedReductionTest, MaxIncludeNulls) auto const input = fixed_width_column_wrapper{{1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}}; auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = thrust::device_vector(offsets); + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); auto const expect = fixed_width_column_wrapper{{3, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}}; @@ -482,7 +492,8 @@ TYPED_TEST(SegmentedReductionTest, MinIncludeNulls) auto const input = fixed_width_column_wrapper{{1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 1, 0, 1, 1, 0, 0}}; auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = thrust::device_vector(offsets); + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); auto const expect = fixed_width_column_wrapper{{1, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}}; @@ -531,9 +542,10 @@ TYPED_TEST(SegmentedReductionTest, AnyIncludeNulls) auto const input = fixed_width_column_wrapper{ {0, 0, 0, 0, XXX, 0, 0, 1, 0, 1, XXX, 0, 0, 1, XXX, XXX, XXX}, {1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0}}; - auto const offsets = std::vector{0, 3, 6, 9, 12, 12, 13, 14, 15, 17}; - auto const d_offsets = thrust::device_vector(offsets); - auto const expect = fixed_width_column_wrapper{ + auto const offsets = std::vector{0, 3, 6, 9, 12, 12, 13, 14, 15, 17}; + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const expect = fixed_width_column_wrapper{ {false, bool{XXX}, true, bool{XXX}, bool{XXX}, false, true, bool{XXX}, bool{XXX}}, {true, false, true, false, false, true, true, false, false}}; @@ -592,9 +604,10 @@ TYPED_TEST(SegmentedReductionTest, AllIncludeNulls) auto const input = fixed_width_column_wrapper{ {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX, 1, 0, 3, 1, XXX, 0, 0}, {1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1}}; - auto const offsets = std::vector{0, 3, 6, 6, 7, 8, 10, 13, 16, 17}; - auto const d_offsets = thrust::device_vector(offsets); - auto const expect = fixed_width_column_wrapper{ + auto const offsets = std::vector{0, 3, 6, 6, 7, 8, 10, 13, 16, 17}; + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const expect = fixed_width_column_wrapper{ {true, bool{XXX}, bool{XXX}, true, bool{XXX}, bool{XXX}, false, bool{XXX}, false}, {true, false, false, true, false, false, true, false, true}}; @@ -655,9 +668,10 @@ TEST_F(SegmentedReductionTestUntyped, PartialSegmentReduction) auto const input = fixed_width_column_wrapper{ {1, 2, 3, 4, 5, 6, 7}, {true, true, true, true, true, true, true}}; - auto const offsets = std::vector{1, 3, 4}; - auto const d_offsets = thrust::device_vector(offsets); - auto const expect = fixed_width_column_wrapper{{5, 4}, {true, true}}; + auto const offsets = std::vector{1, 3, 4}; + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const expect = fixed_width_column_wrapper{{5, 4}, {true, true}}; auto res = segmented_reduce(input, d_offsets, @@ -702,9 +716,10 @@ TEST_F(SegmentedReductionTestUntyped, NonNullableInput) // outputs: {1, 5, 4} // output nullmask: {1, 1, 1} - auto const input = fixed_width_column_wrapper{1, 2, 3, 4, 5, 6, 7}; - auto const offsets = std::vector{0, 1, 1, 3, 7}; - auto const d_offsets = thrust::device_vector(offsets); + auto const input = fixed_width_column_wrapper{1, 2, 3, 4, 5, 6, 7}; + auto const offsets = std::vector{0, 1, 1, 3, 7}; + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); auto const expect = fixed_width_column_wrapper{{1, XXX, 5, 22}, {true, false, true, true}}; @@ -745,10 +760,11 @@ TEST_F(SegmentedReductionTestUntyped, NonNullableInput) TEST_F(SegmentedReductionTestUntyped, ReduceEmptyColumn) { - auto const input = fixed_width_column_wrapper{}; - auto const offsets = std::vector{0}; - auto const d_offsets = thrust::device_vector(offsets); - auto const expect = fixed_width_column_wrapper{}; + auto const input = fixed_width_column_wrapper{}; + auto const offsets = std::vector{0}; + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const expect = fixed_width_column_wrapper{}; auto res = segmented_reduce(input, d_offsets, @@ -780,9 +796,10 @@ TEST_F(SegmentedReductionTestUntyped, ReduceEmptyColumn) TEST_F(SegmentedReductionTestUntyped, EmptyInputWithOffsets) { - auto const input = fixed_width_column_wrapper{}; - auto const offsets = std::vector{0, 0, 0, 0, 0, 0}; - auto const d_offsets = thrust::device_vector(offsets); + auto const input = fixed_width_column_wrapper{}; + auto const offsets = std::vector{0, 0, 0, 0, 0, 0}; + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); auto const expect = fixed_width_column_wrapper{{XXX, XXX, XXX, XXX, XXX}, {0, 0, 0, 0, 0}}; @@ -840,9 +857,10 @@ TYPED_TEST(SegmentedReductionFixedPointTest, MaxIncludeNulls) {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}, numeric::scale_type{scale}); auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = thrust::device_vector(offsets); - auto out_type = column_view(input).type(); - auto const expect = fixed_point_column_wrapper( + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto out_type = column_view(input).type(); + auto const expect = fixed_point_column_wrapper( {3, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}, numeric::scale_type{scale}); auto res = segmented_reduce(input, @@ -872,9 +890,10 @@ TYPED_TEST(SegmentedReductionFixedPointTest, MaxExcludeNulls) {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}, numeric::scale_type{scale}); auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = thrust::device_vector(offsets); - auto out_type = column_view(input).type(); - auto const expect = fixed_point_column_wrapper( + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto out_type = column_view(input).type(); + auto const expect = fixed_point_column_wrapper( {3, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}, numeric::scale_type{scale}); auto res = segmented_reduce(input, @@ -904,9 +923,10 @@ TYPED_TEST(SegmentedReductionFixedPointTest, MinIncludeNulls) {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}, numeric::scale_type{scale}); auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = thrust::device_vector(offsets); - auto out_type = column_view(input).type(); - auto const expect = fixed_point_column_wrapper( + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto out_type = column_view(input).type(); + auto const expect = fixed_point_column_wrapper( {1, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}, numeric::scale_type{scale}); auto res = segmented_reduce(input, @@ -936,9 +956,10 @@ TYPED_TEST(SegmentedReductionFixedPointTest, MinExcludeNulls) {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}, numeric::scale_type{scale}); auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; - auto const d_offsets = thrust::device_vector(offsets); - auto out_type = column_view(input).type(); - auto const expect = fixed_point_column_wrapper( + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto out_type = column_view(input).type(); + auto const expect = fixed_point_column_wrapper( {1, 1, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}, numeric::scale_type{scale}); auto res = segmented_reduce(input, @@ -965,9 +986,10 @@ TYPED_TEST(SegmentedReductionFixedPointTest, MaxNonNullableInput) for (auto scale : {-2, 0, 5}) { auto const input = fixed_point_column_wrapper({1, 2, 3, 1}, numeric::scale_type{scale}); - auto const offsets = std::vector{0, 3, 4, 4}; - auto const d_offsets = thrust::device_vector(offsets); - auto out_type = column_view(input).type(); + auto const offsets = std::vector{0, 3, 4, 4}; + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto out_type = column_view(input).type(); auto const expect = fixed_point_column_wrapper({3, 1, XXX}, {1, 1, 0}, numeric::scale_type{scale}); @@ -1002,9 +1024,10 @@ TYPED_TEST(SegmentedReductionFixedPointTest, MinNonNullableInput) for (auto scale : {-2, 0, 5}) { auto const input = fixed_point_column_wrapper({1, 2, 3, 1}, numeric::scale_type{scale}); - auto const offsets = std::vector{0, 3, 4, 4}; - auto const d_offsets = thrust::device_vector(offsets); - auto out_type = column_view(input).type(); + auto const offsets = std::vector{0, 3, 4, 4}; + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto out_type = column_view(input).type(); auto const expect = fixed_point_column_wrapper({1, 1, XXX}, {1, 1, 0}, numeric::scale_type{scale}); @@ -1148,10 +1171,11 @@ TEST_F(SegmentedReductionStringTest, MinExcludeNulls) TEST_F(SegmentedReductionStringTest, EmptyInputWithOffsets) { - auto const input = strings_column_wrapper{}; - auto const offsets = std::vector{0, 0, 0, 0}; - auto const d_offsets = thrust::device_vector(offsets); - auto const expect = strings_column_wrapper({XXX, XXX, XXX}, {0, 0, 0}); + auto const input = strings_column_wrapper{}; + auto const offsets = std::vector{0, 0, 0, 0}; + auto const d_offsets = + cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream()); + auto const expect = strings_column_wrapper({XXX, XXX, XXX}, {0, 0, 0}); auto result = segmented_reduce(input, d_offsets, diff --git a/cpp/tests/replace/replace_nulls_tests.cpp b/cpp/tests/replace/replace_nulls_tests.cpp index 9624ab52865..ef4a9dea48c 100644 --- a/cpp/tests/replace/replace_nulls_tests.cpp +++ b/cpp/tests/replace/replace_nulls_tests.cpp @@ -176,8 +176,8 @@ TEST_F(ReplaceNullsStringsTest, SimpleReplaceScalar) std::vector input{"", "", "", "", "", "", "", ""}; std::vector input_v{0, 0, 0, 0, 0, 0, 0, 0}; std::unique_ptr repl = - cudf::make_string_scalar("rep", cudf::default_stream_value, mr()); - repl->set_valid_async(true, cudf::default_stream_value); + cudf::make_string_scalar("rep", cudf::get_default_stream(), mr()); + repl->set_valid_async(true, cudf::get_default_stream()); std::vector expected{"rep", "rep", "rep", "rep", "rep", "rep", "rep", "rep"}; cudf::test::strings_column_wrapper input_w{input.begin(), input.end(), input_v.begin()}; diff --git a/cpp/tests/scalar/factories_test.cpp b/cpp/tests/scalar/factories_test.cpp index b531623d548..73cf3479ac2 100644 --- a/cpp/tests/scalar/factories_test.cpp +++ b/cpp/tests/scalar/factories_test.cpp @@ -28,7 +28,7 @@ class ScalarFactoryTest : public cudf::test::BaseFixture { public: - rmm::cuda_stream_view stream() { return cudf::default_stream_value; } + rmm::cuda_stream_view stream() { return cudf::get_default_stream(); } }; template diff --git a/cpp/tests/scalar/scalar_device_view_test.cu b/cpp/tests/scalar/scalar_device_view_test.cu index f4a1c94c3e6..1a0fea7219e 100644 --- a/cpp/tests/scalar/scalar_device_view_test.cu +++ b/cpp/tests/scalar/scalar_device_view_test.cu @@ -57,20 +57,20 @@ TYPED_TEST(TypedScalarDeviceViewTest, Value) auto scalar_device_view = cudf::get_scalar_device_view(s); auto scalar_device_view1 = cudf::get_scalar_device_view(s1); - rmm::device_scalar result{cudf::default_stream_value}; + rmm::device_scalar result{cudf::get_default_stream()}; - test_set_value<<<1, 1, 0, cudf::default_stream_value.value()>>>(scalar_device_view, + test_set_value<<<1, 1, 0, cudf::get_default_stream().value()>>>(scalar_device_view, scalar_device_view1); CUDF_CHECK_CUDA(0); EXPECT_EQ(s1.value(), value); EXPECT_TRUE(s1.is_valid()); - test_value<<<1, 1, 0, cudf::default_stream_value.value()>>>( + test_value<<<1, 1, 0, cudf::get_default_stream().value()>>>( scalar_device_view, scalar_device_view1, result.data()); CUDF_CHECK_CUDA(0); - EXPECT_TRUE(result.value(cudf::default_stream_value)); + EXPECT_TRUE(result.value(cudf::get_default_stream())); } template @@ -84,12 +84,12 @@ TYPED_TEST(TypedScalarDeviceViewTest, ConstructNull) TypeParam value = cudf::test::make_type_param_scalar(5); cudf::scalar_type_t s(value, false); auto scalar_device_view = cudf::get_scalar_device_view(s); - rmm::device_scalar result{cudf::default_stream_value}; + rmm::device_scalar result{cudf::get_default_stream()}; - test_null<<<1, 1, 0, cudf::default_stream_value.value()>>>(scalar_device_view, result.data()); + test_null<<<1, 1, 0, cudf::get_default_stream().value()>>>(scalar_device_view, result.data()); CUDF_CHECK_CUDA(0); - EXPECT_FALSE(result.value(cudf::default_stream_value)); + EXPECT_FALSE(result.value(cudf::get_default_stream())); } template @@ -106,7 +106,7 @@ TYPED_TEST(TypedScalarDeviceViewTest, SetNull) s.set_valid_async(true); EXPECT_TRUE(s.is_valid()); - test_setnull<<<1, 1, 0, cudf::default_stream_value.value()>>>(scalar_device_view); + test_setnull<<<1, 1, 0, cudf::get_default_stream().value()>>>(scalar_device_view); CUDF_CHECK_CUDA(0); EXPECT_FALSE(s.is_valid()); @@ -129,12 +129,12 @@ TEST_F(StringScalarDeviceViewTest, Value) cudf::string_scalar s(value); auto scalar_device_view = cudf::get_scalar_device_view(s); - rmm::device_scalar result{cudf::default_stream_value}; + rmm::device_scalar result{cudf::get_default_stream()}; auto value_v = cudf::detail::make_device_uvector_sync(value); - test_string_value<<<1, 1, 0, cudf::default_stream_value.value()>>>( + test_string_value<<<1, 1, 0, cudf::get_default_stream().value()>>>( scalar_device_view, value_v.data(), value.size(), result.data()); CUDF_CHECK_CUDA(0); - EXPECT_TRUE(result.value(cudf::default_stream_value)); + EXPECT_TRUE(result.value(cudf::get_default_stream())); } diff --git a/cpp/tests/stream_compaction/apply_boolean_mask_tests.cpp b/cpp/tests/stream_compaction/apply_boolean_mask_tests.cpp index 99d5c90d1a4..2f8bfa847fa 100644 --- a/cpp/tests/stream_compaction/apply_boolean_mask_tests.cpp +++ b/cpp/tests/stream_compaction/apply_boolean_mask_tests.cpp @@ -273,7 +273,7 @@ TEST_F(ApplyBooleanMask, CorrectNullCount) auto got = cudf::apply_boolean_mask(input, boolean_mask); auto out_col = got->get_column(0).view(); auto expected_null_count = - cudf::detail::null_count(out_col.null_mask(), 0, out_col.size(), cudf::default_stream_value); + cudf::detail::null_count(out_col.null_mask(), 0, out_col.size(), cudf::get_default_stream()); ASSERT_EQ(out_col.null_count(), expected_null_count); } diff --git a/cpp/tests/strings/datetime_tests.cpp b/cpp/tests/strings/datetime_tests.cpp index 26beaf9756a..dc42fb283dd 100644 --- a/cpp/tests/strings/datetime_tests.cpp +++ b/cpp/tests/strings/datetime_tests.cpp @@ -430,12 +430,14 @@ TEST_F(StringsDatetimeTest, FromTimestampDayOfYear) // Format names used for some specifiers in from_timestamps // clang-format off -cudf::test::strings_column_wrapper format_names({"AM", "PM", - "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", - "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", - "January", "February", "March", "April", "May", "June", "July", - "August", "September", "October", "November", "December", - "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}); +cudf::test::strings_column_wrapper format_names() { + return cudf::test::strings_column_wrapper({"AM", "PM", + "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", + "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", + "January", "February", "March", "April", "May", "June", "July", + "August", "September", "October", "November", "December", + "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}); +} // clang-format on TEST_F(StringsDatetimeTest, FromTimestampDayOfWeekOfYear) @@ -492,8 +494,9 @@ TEST_F(StringsDatetimeTest, FromTimestampDayOfWeekOfYear) "[Fri 01, Jan 1982 5 00 5 00 1981 53]", "[Sat 02, Jan 1982 6 00 6 00 1981 53]", "[Sun 03, Jan 1982 0 00 7 01 1981 53]"}); - auto results = cudf::strings::from_timestamps( - timestamps, "[%a %d, %b %Y %w %W %u %U %G %V]", cudf::strings_column_view(format_names)); + auto results = cudf::strings::from_timestamps(timestamps, + "[%a %d, %b %Y %w %W %u %U %G %V]", + cudf::strings_column_view(format_names())); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); } @@ -528,7 +531,7 @@ TEST_F(StringsDatetimeTest, FromTimestampWeekdayMonthYear) "[Monday December 06, 2021: 02 AM]"}); auto results = cudf::strings::from_timestamps( - timestamps, "[%A %B %d, %Y: %I %p]", cudf::strings_column_view(format_names)); + timestamps, "[%A %B %d, %Y: %I %p]", cudf::strings_column_view(format_names())); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); } @@ -549,7 +552,7 @@ TEST_F(StringsDatetimeTest, FromTimestampAllSpecifiers) auto results = cudf::strings::from_timestamps( input, "[%d/%m/%y/%Y %H:%I:%M:%S.%f %z:%Z %j %u %U %W %V %G %p %a %A %b %B]", - cudf::strings_column_view(format_names)); + cudf::strings_column_view(format_names())); // clang-format off cudf::test::strings_column_wrapper expected({ diff --git a/cpp/tests/strings/factories_test.cu b/cpp/tests/strings/factories_test.cu index a381c1cff89..c27f48a9069 100644 --- a/cpp/tests/strings/factories_test.cu +++ b/cpp/tests/strings/factories_test.cu @@ -59,7 +59,7 @@ TEST_F(StringsFactoriesTest, CreateColumnFromPair) memsize += *itr ? (cudf::size_type)strlen(*itr) : 0; cudf::size_type count = (cudf::size_type)h_test_strings.size(); thrust::host_vector h_buffer(memsize); - rmm::device_uvector d_buffer(memsize, cudf::default_stream_value); + rmm::device_uvector d_buffer(memsize, cudf::get_default_stream()); thrust::host_vector> strings(count); thrust::host_vector h_offsets(count + 1); cudf::size_type offset = 0; @@ -97,12 +97,12 @@ TEST_F(StringsFactoriesTest, CreateColumnFromPair) // check string data auto h_chars_data = cudf::detail::make_std_vector_sync( cudf::device_span(strings_view.chars().data(), strings_view.chars().size()), - cudf::default_stream_value); + cudf::get_default_stream()); auto h_offsets_data = cudf::detail::make_std_vector_sync( cudf::device_span( strings_view.offsets().data() + strings_view.offset(), strings_view.size() + 1), - cudf::default_stream_value); + cudf::get_default_stream()); EXPECT_EQ(memcmp(h_buffer.data(), h_chars_data.data(), h_buffer.size()), 0); EXPECT_EQ( memcmp(h_offsets.data(), h_offsets_data.data(), h_offsets.size() * sizeof(cudf::size_type)), 0); @@ -159,12 +159,12 @@ TEST_F(StringsFactoriesTest, CreateColumnFromOffsets) // check string data auto h_chars_data = cudf::detail::make_std_vector_sync( cudf::device_span(strings_view.chars().data(), strings_view.chars().size()), - cudf::default_stream_value); + cudf::get_default_stream()); auto h_offsets_data = cudf::detail::make_std_vector_sync( cudf::device_span( strings_view.offsets().data() + strings_view.offset(), strings_view.size() + 1), - cudf::default_stream_value); + cudf::get_default_stream()); EXPECT_EQ(memcmp(h_buffer.data(), h_chars_data.data(), h_buffer.size()), 0); EXPECT_EQ( memcmp(h_offsets.data(), h_offsets_data.data(), h_offsets.size() * sizeof(cudf::size_type)), 0); @@ -183,15 +183,15 @@ TEST_F(StringsFactoriesTest, CreateScalar) TEST_F(StringsFactoriesTest, EmptyStringsColumn) { - rmm::device_uvector d_chars{0, cudf::default_stream_value}; + rmm::device_uvector d_chars{0, cudf::get_default_stream()}; auto d_offsets = cudf::detail::make_zeroed_device_uvector_sync(1); - rmm::device_uvector d_nulls{0, cudf::default_stream_value}; + rmm::device_uvector d_nulls{0, cudf::get_default_stream()}; auto results = cudf::make_strings_column(d_chars, d_offsets, d_nulls, 0); cudf::test::expect_column_empty(results->view()); rmm::device_uvector> d_strings{ - 0, cudf::default_stream_value}; + 0, cudf::get_default_stream()}; results = cudf::make_strings_column(d_strings); cudf::test::expect_column_empty(results->view()); } @@ -213,8 +213,8 @@ TEST_F(StringsFactoriesTest, StringPairWithNullsAndEmpty) {0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1}); auto d_column = cudf::column_device_view::create(data); - rmm::device_uvector pairs(d_column->size(), cudf::default_stream_value); - thrust::transform(rmm::exec_policy(cudf::default_stream_value), + rmm::device_uvector pairs(d_column->size(), cudf::get_default_stream()); + thrust::transform(rmm::exec_policy(cudf::get_default_stream()), d_column->pair_begin(), d_column->pair_end(), pairs.data(), diff --git a/cpp/tests/table/experimental_row_operator_tests.cu b/cpp/tests/table/experimental_row_operator_tests.cu index 0566f55e46d..427d819ace3 100644 --- a/cpp/tests/table/experimental_row_operator_tests.cu +++ b/cpp/tests/table/experimental_row_operator_tests.cu @@ -51,7 +51,7 @@ auto self_comparison(cudf::table_view input, std::vector const& column_order, PhysicalElementComparator comparator) { - rmm::cuda_stream_view stream{cudf::default_stream_value}; + rmm::cuda_stream_view stream{cudf::get_default_stream()}; auto const table_comparator = lexicographic::self_comparator{input, column_order, {}, stream}; @@ -82,7 +82,7 @@ auto two_table_comparison(cudf::table_view lhs, std::vector const& column_order, PhysicalElementComparator comparator) { - rmm::cuda_stream_view stream{cudf::default_stream_value}; + rmm::cuda_stream_view stream{cudf::get_default_stream()}; auto const table_comparator = lexicographic::two_table_comparator{lhs, rhs, column_order, {}, stream}; @@ -115,7 +115,7 @@ auto self_equality(cudf::table_view input, std::vector const& column_order, PhysicalElementComparator comparator) { - rmm::cuda_stream_view stream{cudf::default_stream_value}; + rmm::cuda_stream_view stream{cudf::get_default_stream()}; auto const table_comparator = equality::self_comparator{input, stream}; auto const equal_comparator = @@ -139,7 +139,7 @@ auto two_table_equality(cudf::table_view lhs, std::vector const& column_order, PhysicalElementComparator comparator) { - rmm::cuda_stream_view stream{cudf::default_stream_value}; + rmm::cuda_stream_view stream{cudf::get_default_stream()}; auto const table_comparator = equality::two_table_comparator{lhs, rhs, stream}; auto const equal_comparator = diff --git a/cpp/tests/table/table_view_tests.cu b/cpp/tests/table/table_view_tests.cu index d678e659f79..a092006bda6 100644 --- a/cpp/tests/table/table_view_tests.cu +++ b/cpp/tests/table/table_view_tests.cu @@ -43,7 +43,7 @@ void row_comparison(cudf::table_view input1, cudf::mutable_column_view output, std::vector const& column_order) { - rmm::cuda_stream_view stream{cudf::default_stream_value}; + rmm::cuda_stream_view stream{cudf::get_default_stream()}; auto device_table_1 = cudf::table_device_view::create(input1, stream); auto device_table_2 = cudf::table_device_view::create(input2, stream); diff --git a/cpp/tests/transform/row_bit_count_test.cu b/cpp/tests/transform/row_bit_count_test.cu index 8151e0d6d8d..b1cfc7a39d1 100644 --- a/cpp/tests/transform/row_bit_count_test.cu +++ b/cpp/tests/transform/row_bit_count_test.cu @@ -53,7 +53,7 @@ TYPED_TEST(RowBitCountTyped, SimpleTypes) // expect size of the type per row auto expected = make_fixed_width_column(data_type{type_id::INT32}, 16); cudf::mutable_column_view mcv(*expected); - thrust::fill(rmm::exec_policy(cudf::default_stream_value), + thrust::fill(rmm::exec_policy(cudf::get_default_stream()), mcv.begin(), mcv.end(), sizeof(device_storage_type_t) * CHAR_BIT); @@ -76,7 +76,7 @@ TYPED_TEST(RowBitCountTyped, SimpleTypesWithNulls) // expect size of the type + 1 bit per row auto expected = make_fixed_width_column(data_type{type_id::INT32}, 16); cudf::mutable_column_view mcv(*expected); - thrust::fill(rmm::exec_policy(cudf::default_stream_value), + thrust::fill(rmm::exec_policy(cudf::get_default_stream()), mcv.begin(), mcv.end(), (sizeof(device_storage_type_t) * CHAR_BIT) + 1); @@ -240,7 +240,7 @@ TEST_F(RowBitCount, StructsWithLists_RowsExceedingASingleBlock) // List child column = {0, 1, 2, 3, 4, ..., 2*num_rows}; auto ints = make_numeric_column(data_type{type_id::INT32}, num_rows * 2); auto ints_view = ints->mutable_view(); - thrust::tabulate(rmm::exec_policy(cudf::default_stream_value), + thrust::tabulate(rmm::exec_policy(cudf::get_default_stream()), ints_view.begin(), ints_view.end(), thrust::identity{}); @@ -248,7 +248,7 @@ TEST_F(RowBitCount, StructsWithLists_RowsExceedingASingleBlock) // List offsets = {0, 2, 4, 6, 8, ..., num_rows*2}; auto list_offsets = make_numeric_column(data_type{type_id::INT32}, num_rows + 1); auto list_offsets_view = list_offsets->mutable_view(); - thrust::tabulate(rmm::exec_policy(cudf::default_stream_value), + thrust::tabulate(rmm::exec_policy(cudf::get_default_stream()), list_offsets_view.begin(), list_offsets_view.end(), times_2{}); @@ -264,7 +264,7 @@ TEST_F(RowBitCount, StructsWithLists_RowsExceedingASingleBlock) // Compute row_bit_count, and compare. auto row_bit_counts = row_bit_count(table_view{{structs_column->view()}}); auto expected_row_bit_counts = make_numeric_column(data_type{type_id::INT32}, num_rows); - thrust::fill_n(rmm::exec_policy(cudf::default_stream_value), + thrust::fill_n(rmm::exec_policy(cudf::get_default_stream()), expected_row_bit_counts->mutable_view().begin(), num_rows, CHAR_BIT * (2 * sizeof(int32_t) + sizeof(offset_type))); @@ -613,7 +613,7 @@ TEST_F(RowBitCount, Table) auto expected = cudf::make_fixed_width_column(data_type{type_id::INT32}, t.num_rows()); cudf::mutable_column_view mcv(*expected); thrust::transform( - rmm::exec_policy(cudf::default_stream_value), + rmm::exec_policy(cudf::get_default_stream()), thrust::make_counting_iterator(0), thrust::make_counting_iterator(0) + t.num_rows(), mcv.begin(), diff --git a/cpp/tests/types/type_dispatcher_test.cu b/cpp/tests/types/type_dispatcher_test.cu index 3280339ea85..e3856759cfc 100644 --- a/cpp/tests/types/type_dispatcher_test.cu +++ b/cpp/tests/types/type_dispatcher_test.cu @@ -70,10 +70,10 @@ __global__ void dispatch_test_kernel(cudf::type_id id, bool* d_result) TYPED_TEST(TypedDispatcherTest, DeviceDispatch) { auto result = cudf::detail::make_zeroed_device_uvector_sync(1); - dispatch_test_kernel<<<1, 1, 0, cudf::default_stream_value.value()>>>( + dispatch_test_kernel<<<1, 1, 0, cudf::get_default_stream().value()>>>( cudf::type_to_id(), result.data()); CUDF_CUDA_TRY(cudaDeviceSynchronize()); - EXPECT_EQ(true, result.front_element(cudf::default_stream_value)); + EXPECT_EQ(true, result.front_element(cudf::get_default_stream())); } struct IdDispatcherTest : public DispatcherTest, public testing::WithParamInterface { @@ -131,10 +131,10 @@ __global__ void double_dispatch_test_kernel(cudf::type_id id1, cudf::type_id id2 TYPED_TEST(TypedDoubleDispatcherTest, DeviceDoubleDispatch) { auto result = cudf::detail::make_zeroed_device_uvector_sync(1); - double_dispatch_test_kernel<<<1, 1, 0, cudf::default_stream_value.value()>>>( + double_dispatch_test_kernel<<<1, 1, 0, cudf::get_default_stream().value()>>>( cudf::type_to_id(), cudf::type_to_id(), result.data()); CUDF_CUDA_TRY(cudaDeviceSynchronize()); - EXPECT_EQ(true, result.front_element(cudf::default_stream_value)); + EXPECT_EQ(true, result.front_element(cudf::get_default_stream())); } struct IdDoubleDispatcherTest : public DispatcherTest, diff --git a/cpp/tests/unary/cast_tests.cpp b/cpp/tests/unary/cast_tests.cpp index fd9211a56e5..ac68a277622 100644 --- a/cpp/tests/unary/cast_tests.cpp +++ b/cpp/tests/unary/cast_tests.cpp @@ -90,70 +90,70 @@ inline cudf::column make_exp_chrono_column(cudf::type_id type_id) test_timestamps_D.size(), rmm::device_buffer{test_timestamps_D.data(), test_timestamps_D.size() * sizeof(test_timestamps_D.front()), - cudf::default_stream_value}); + cudf::get_default_stream()}); case cudf::type_id::TIMESTAMP_SECONDS: return cudf::column( cudf::data_type{type_id}, test_timestamps_s.size(), rmm::device_buffer{test_timestamps_s.data(), test_timestamps_s.size() * sizeof(test_timestamps_s.front()), - cudf::default_stream_value}); + cudf::get_default_stream()}); case cudf::type_id::TIMESTAMP_MILLISECONDS: return cudf::column( cudf::data_type{type_id}, test_timestamps_ms.size(), rmm::device_buffer{test_timestamps_ms.data(), test_timestamps_ms.size() * sizeof(test_timestamps_ms.front()), - cudf::default_stream_value}); + cudf::get_default_stream()}); case cudf::type_id::TIMESTAMP_MICROSECONDS: return cudf::column( cudf::data_type{type_id}, test_timestamps_us.size(), rmm::device_buffer{test_timestamps_us.data(), test_timestamps_us.size() * sizeof(test_timestamps_us.front()), - cudf::default_stream_value}); + cudf::get_default_stream()}); case cudf::type_id::TIMESTAMP_NANOSECONDS: return cudf::column( cudf::data_type{type_id}, test_timestamps_ns.size(), rmm::device_buffer{test_timestamps_ns.data(), test_timestamps_ns.size() * sizeof(test_timestamps_ns.front()), - cudf::default_stream_value}); + cudf::get_default_stream()}); case cudf::type_id::DURATION_DAYS: return cudf::column( cudf::data_type{type_id}, test_durations_D.size(), rmm::device_buffer{test_durations_D.data(), test_durations_D.size() * sizeof(test_durations_D.front()), - cudf::default_stream_value}); + cudf::get_default_stream()}); case cudf::type_id::DURATION_SECONDS: return cudf::column( cudf::data_type{type_id}, test_durations_s.size(), rmm::device_buffer{test_durations_s.data(), test_durations_s.size() * sizeof(test_durations_s.front()), - cudf::default_stream_value}); + cudf::get_default_stream()}); case cudf::type_id::DURATION_MILLISECONDS: return cudf::column( cudf::data_type{type_id}, test_durations_ms.size(), rmm::device_buffer{test_durations_ms.data(), test_durations_ms.size() * sizeof(test_durations_ms.front()), - cudf::default_stream_value}); + cudf::get_default_stream()}); case cudf::type_id::DURATION_MICROSECONDS: return cudf::column( cudf::data_type{type_id}, test_durations_us.size(), rmm::device_buffer{test_durations_us.data(), test_durations_us.size() * sizeof(test_durations_us.front()), - cudf::default_stream_value}); + cudf::get_default_stream()}); case cudf::type_id::DURATION_NANOSECONDS: return cudf::column( cudf::data_type{type_id}, test_durations_ns.size(), rmm::device_buffer{test_durations_ns.data(), test_durations_ns.size() * sizeof(test_durations_ns.front()), - cudf::default_stream_value}); + cudf::get_default_stream()}); default: CUDF_FAIL("Unsupported type_id"); } }; diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu index d0fc92b0bb5..080bb3ef916 100644 --- a/cpp/tests/utilities/column_utilities.cu +++ b/cpp/tests/utilities/column_utilities.cu @@ -66,7 +66,7 @@ std::unique_ptr generate_all_row_indices(size_type num_rows) { auto indices = cudf::make_fixed_width_column(data_type{type_id::INT32}, num_rows, mask_state::UNALLOCATED); - thrust::sequence(rmm::exec_policy(cudf::default_stream_value), + thrust::sequence(rmm::exec_policy(cudf::get_default_stream()), indices->mutable_view().begin(), indices->mutable_view().end(), 0); @@ -103,7 +103,7 @@ std::unique_ptr generate_child_row_indices(lists_column_view const& c, // if we are checking for exact equality, we should be checking for "unsanitized" data that may // be hiding underneath nulls. so check all rows instead of just non-null rows if (check_exact_equality) { - return generate_all_row_indices(c.get_sliced_child(cudf::default_stream_value).size()); + return generate_all_row_indices(c.get_sliced_child(cudf::get_default_stream()).size()); } // Example input @@ -132,7 +132,7 @@ std::unique_ptr generate_child_row_indices(lists_column_view const& c, ? (offsets[true_index + 1] - offsets[true_index]) : 0; }); - auto const output_size = thrust::reduce(rmm::exec_policy(cudf::default_stream_value), + auto const output_size = thrust::reduce(rmm::exec_policy(cudf::get_default_stream()), row_size_iter, row_size_iter + row_indices.size()); // no output. done. @@ -147,7 +147,7 @@ std::unique_ptr generate_child_row_indices(lists_column_view const& c, // auto output_row_start = cudf::make_fixed_width_column( data_type{type_id::INT32}, row_indices.size(), mask_state::UNALLOCATED); - thrust::exclusive_scan(rmm::exec_policy(cudf::default_stream_value), + thrust::exclusive_scan(rmm::exec_policy(cudf::get_default_stream()), row_size_iter, row_size_iter + row_indices.size(), output_row_start->mutable_view().begin()); @@ -156,7 +156,7 @@ std::unique_ptr generate_child_row_indices(lists_column_view const& c, // // result = [1, 1, 1, 1, 1] // - thrust::generate(rmm::exec_policy(cudf::default_stream_value), + thrust::generate(rmm::exec_policy(cudf::get_default_stream()), result->mutable_view().begin(), result->mutable_view().end(), [] __device__() { return 1; }); @@ -171,11 +171,11 @@ std::unique_ptr generate_child_row_indices(lists_column_view const& c, offsets = c.offsets().begin(), offset = c.offset(), first_offset = cudf::detail::get_value( - c.offsets(), c.offset(), cudf::default_stream_value)] __device__(int index) { + c.offsets(), c.offset(), cudf::get_default_stream())] __device__(int index) { auto const true_index = row_indices[index] + offset; return offsets[true_index] - first_offset; }); - thrust::scatter_if(rmm::exec_policy(cudf::default_stream_value), + thrust::scatter_if(rmm::exec_policy(cudf::get_default_stream()), output_row_iter, output_row_iter + row_indices.size(), output_row_start->view().begin(), @@ -189,18 +189,18 @@ std::unique_ptr generate_child_row_indices(lists_column_view const& c, // auto keys = cudf::make_fixed_width_column(data_type{type_id::INT32}, output_size, mask_state::UNALLOCATED); - thrust::generate(rmm::exec_policy(cudf::default_stream_value), + thrust::generate(rmm::exec_policy(cudf::get_default_stream()), keys->mutable_view().begin(), keys->mutable_view().end(), [] __device__() { return 0; }); - thrust::scatter_if(rmm::exec_policy(cudf::default_stream_value), + thrust::scatter_if(rmm::exec_policy(cudf::get_default_stream()), row_size_iter, row_size_iter + row_indices.size(), output_row_start->view().begin(), row_size_iter, keys->mutable_view().begin(), [] __device__(auto row_size) { return row_size != 0; }); - thrust::inclusive_scan(rmm::exec_policy(cudf::default_stream_value), + thrust::inclusive_scan(rmm::exec_policy(cudf::get_default_stream()), keys->view().begin(), keys->view().end(), keys->mutable_view().begin()); @@ -213,7 +213,7 @@ std::unique_ptr generate_child_row_indices(lists_column_view const& c, // output // result = [6, 7, 11, 12, 13] // - thrust::inclusive_scan_by_key(rmm::exec_policy(cudf::default_stream_value), + thrust::inclusive_scan_by_key(rmm::exec_policy(cudf::get_default_stream()), keys->view().begin(), keys->view().end(), result->view().begin(), @@ -256,7 +256,7 @@ struct column_property_comparator { auto const true_index = row_indices[index] + offset; return !validity || cudf::bit_is_set(validity, true_index) ? 0 : 1; }); - return thrust::reduce(rmm::exec_policy(cudf::default_stream_value), + return thrust::reduce(rmm::exec_policy(cudf::get_default_stream()), validity_iter, validity_iter + row_indices.size()); } @@ -328,8 +328,8 @@ struct column_property_comparator { auto lhs_child_indices = generate_child_row_indices(lhs_l, lhs_row_indices, check_exact_equality); if (lhs_child_indices->size() > 0) { - auto lhs_child = lhs_l.get_sliced_child(cudf::default_stream_value); - auto rhs_child = rhs_l.get_sliced_child(cudf::default_stream_value); + auto lhs_child = lhs_l.get_sliced_child(cudf::get_default_stream()); + auto rhs_child = rhs_l.get_sliced_child(cudf::get_default_stream()); auto rhs_child_indices = generate_child_row_indices(rhs_l, rhs_row_indices, check_exact_equality); return cudf::type_dispatcher(lhs_child.type(), @@ -516,9 +516,9 @@ std::string stringify_column_differences(cudf::device_span difference auto const index = h_differences[0]; // only stringify first difference auto const lhs_index = - cudf::detail::get_value(lhs_row_indices, index, cudf::default_stream_value); + cudf::detail::get_value(lhs_row_indices, index, cudf::get_default_stream()); auto const rhs_index = - cudf::detail::get_value(rhs_row_indices, index, cudf::default_stream_value); + cudf::detail::get_value(rhs_row_indices, index, cudf::get_default_stream()); auto diff_lhs = cudf::detail::slice(lhs, lhs_index, lhs_index + 1); auto diff_rhs = cudf::detail::slice(rhs, rhs_index, rhs_index + 1); return depth_str + "first difference: " + "lhs[" + std::to_string(index) + @@ -549,17 +549,17 @@ struct column_comparator_impl { corresponding_rows_not_equivalent>; auto differences = rmm::device_uvector( - lhs.size(), cudf::default_stream_value); // worst case: everything different + lhs.size(), cudf::get_default_stream()); // worst case: everything different auto input_iter = thrust::make_counting_iterator(0); auto diff_iter = thrust::copy_if( - rmm::exec_policy(cudf::default_stream_value), + rmm::exec_policy(cudf::get_default_stream()), input_iter, input_iter + lhs_row_indices.size(), differences.begin(), ComparatorType(*d_lhs, *d_rhs, *d_lhs_row_indices, *d_rhs_row_indices, fp_ulps)); differences.resize(thrust::distance(differences.begin(), diff_iter), - cudf::default_stream_value); // shrink back down + cudf::get_default_stream()); // shrink back down if (not differences.is_empty()) { if (verbosity != debug_output_level::QUIET) { @@ -597,13 +597,13 @@ struct column_comparator_impl { if (lhs_row_indices.is_empty()) { return true; } // worst case - everything is different - rmm::device_uvector differences(lhs_row_indices.size(), cudf::default_stream_value); + rmm::device_uvector differences(lhs_row_indices.size(), cudf::get_default_stream()); // compare offsets, taking slicing into account // left side size_type lhs_shift = cudf::detail::get_value( - lhs_l.offsets(), lhs_l.offset(), cudf::default_stream_value); + lhs_l.offsets(), lhs_l.offset(), cudf::get_default_stream()); auto lhs_offsets = thrust::make_transform_iterator( lhs_l.offsets().begin() + lhs_l.offset(), [lhs_shift] __device__(size_type offset) { return offset - lhs_shift; }); @@ -615,7 +615,7 @@ struct column_comparator_impl { // right side size_type rhs_shift = cudf::detail::get_value( - rhs_l.offsets(), rhs_l.offset(), cudf::default_stream_value); + rhs_l.offsets(), rhs_l.offset(), cudf::get_default_stream()); auto rhs_offsets = thrust::make_transform_iterator( rhs_l.offsets().begin() + rhs_l.offset(), [rhs_shift] __device__(size_type offset) { return offset - rhs_shift; }); @@ -643,7 +643,7 @@ struct column_comparator_impl { // auto input_iter = thrust::make_counting_iterator(0); auto diff_iter = thrust::copy_if( - rmm::exec_policy(cudf::default_stream_value), + rmm::exec_policy(cudf::get_default_stream()), input_iter, input_iter + lhs_row_indices.size(), differences.begin(), @@ -679,7 +679,7 @@ struct column_comparator_impl { }); differences.resize(thrust::distance(differences.begin(), diff_iter), - cudf::default_stream_value); // shrink back down + cudf::get_default_stream()); // shrink back down if (not differences.is_empty()) { if (verbosity != debug_output_level::QUIET) { @@ -698,8 +698,8 @@ struct column_comparator_impl { auto lhs_child_indices = generate_child_row_indices(lhs_l, lhs_row_indices, check_exact_equality); if (lhs_child_indices->size() > 0) { - auto lhs_child = lhs_l.get_sliced_child(cudf::default_stream_value); - auto rhs_child = rhs_l.get_sliced_child(cudf::default_stream_value); + auto lhs_child = lhs_l.get_sliced_child(cudf::get_default_stream()); + auto rhs_child = rhs_l.get_sliced_child(cudf::get_default_stream()); auto rhs_child_indices = generate_child_row_indices(rhs_l, rhs_row_indices, check_exact_equality); return cudf::type_dispatcher(lhs_child.type(), @@ -875,7 +875,7 @@ void expect_equal_buffers(void const* lhs, void const* rhs, std::size_t size_byt auto typed_lhs = static_cast(lhs); auto typed_rhs = static_cast(rhs); EXPECT_TRUE(thrust::equal( - rmm::exec_policy(cudf::default_stream_value), typed_lhs, typed_lhs + size_bytes, typed_rhs)); + rmm::exec_policy(cudf::get_default_stream()), typed_lhs, typed_lhs + size_bytes, typed_rhs)); } /** @@ -964,13 +964,13 @@ std::string nested_offsets_to_string(NestedColumnView const& c, std::string cons // the first offset value to normalize everything against size_type first = - cudf::detail::get_value(offsets, c.offset(), cudf::default_stream_value); - rmm::device_uvector shifted_offsets(output_size, cudf::default_stream_value); + cudf::detail::get_value(offsets, c.offset(), cudf::get_default_stream()); + rmm::device_uvector shifted_offsets(output_size, cudf::get_default_stream()); // normalize the offset values for the column offset size_type const* d_offsets = offsets.head() + c.offset(); thrust::transform( - rmm::exec_policy(cudf::default_stream_value), + rmm::exec_policy(cudf::get_default_stream()), d_offsets, d_offsets + output_size, shifted_offsets.begin(), @@ -1146,7 +1146,7 @@ struct column_view_printer { lists_column_view lcv(col); // propagate slicing to the child if necessary - column_view child = lcv.get_sliced_child(cudf::default_stream_value); + column_view child = lcv.get_sliced_child(cudf::get_default_stream()); bool const is_sliced = lcv.offset() > 0 || child.offset() > 0; std::string tmp = diff --git a/cpp/tests/utilities/identify_stream_usage/CMakeLists.txt b/cpp/tests/utilities/identify_stream_usage/CMakeLists.txt new file mode 100644 index 00000000000..89f40303550 --- /dev/null +++ b/cpp/tests/utilities/identify_stream_usage/CMakeLists.txt @@ -0,0 +1,60 @@ +# ============================================================================= +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR) + +if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/CUDF_RAPIDS.cmake) + file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.12/RAPIDS.cmake + ${CMAKE_CURRENT_BINARY_DIR}/CUDF_RAPIDS.cmake + ) +endif() +include(${CMAKE_CURRENT_BINARY_DIR}/CUDF_RAPIDS.cmake) + +project( + IDENTIFY_STREAM_USAGE + VERSION 0.0.1 + LANGUAGES CXX CUDA +) + +include(rapids-cpm) +include(${rapids-cmake-dir}/cpm/rmm.cmake) +rapids_cpm_init() +rapids_cpm_rmm() + +set(CMAKE_CUDA_RUNTIME_LIBRARY SHARED) +add_library(identify_stream_usage SHARED identify_stream_usage.cpp) + +find_package(CUDAToolkit REQUIRED) + +set_target_properties(identify_stream_usage PROPERTIES CUDA_RUNTIME_LIBRARY SHARED) +target_link_libraries(identify_stream_usage PUBLIC CUDA::cudart rmm::rmm) + +set_target_properties( + identify_stream_usage + PROPERTIES # set target compile options + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + POSITION_INDEPENDENT_CODE ON +) + +# Add the test file. +include(CTest) + +add_executable(Tests test_default_stream_identification.cu) +add_test(NAME default_stream_identification COMMAND Tests) + +set_tests_properties( + default_stream_identification PROPERTIES ENVIRONMENT + LD_PRELOAD=$ +) diff --git a/cpp/tests/utilities/identify_stream_usage/identify_stream_usage.cpp b/cpp/tests/utilities/identify_stream_usage/identify_stream_usage.cpp new file mode 100644 index 00000000000..4a1a8f04791 --- /dev/null +++ b/cpp/tests/utilities/identify_stream_usage/identify_stream_usage.cpp @@ -0,0 +1,322 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +/** + * @brief Print a backtrace and raise an error if stream is a default stream. + */ +void check_stream_and_error(cudaStream_t stream) +{ + // We explicitly list the possibilities rather than using + // `cudf::get_default_stream().value()` for two reasons: + // 1. There is no guarantee that `thrust::device` and the default value of + // `cudf::get_default_stream().value()` are actually the same. At present, + // the former is `cudaStreamLegacy` while the latter is 0. + // 2. Using the cudf default stream would require linking against cudf, which + // adds unnecessary complexity to the build process (especially in CI) + // when this simple approach is sufficient. + if (stream == cudaStreamDefault || (stream == cudaStreamLegacy) || + (stream == cudaStreamPerThread)) { +#ifdef __GNUC__ + // If we're on the wrong stream, print the stack trace from the current frame. + // Adapted from from https://panthema.net/2008/0901-stacktrace-demangled/ + constexpr int kMaxStackDepth = 64; + void* stack[kMaxStackDepth]; + auto depth = backtrace(stack, kMaxStackDepth); + auto strings = backtrace_symbols(stack, depth); + + if (strings == nullptr) { + std::cout << "No stack trace could be found!" << std::endl; + } else { + // If we were able to extract a trace, parse it, demangle symbols, and + // print a readable output. + + // allocate string which will be filled with the demangled function name + size_t funcnamesize = 256; + char* funcname = (char*)malloc(funcnamesize); + + // Start at frame 1 to skip print_trace itself. + for (int i = 1; i < depth; ++i) { + char* begin_name = nullptr; + char* begin_offset = nullptr; + char* end_offset = nullptr; + + // find parentheses and +address offset surrounding the mangled name: + // ./module(function+0x15c) [0x8048a6d] + for (char* p = strings[i]; *p; ++p) { + if (*p == '(') { + begin_name = p; + } else if (*p == '+') { + begin_offset = p; + } else if (*p == ')' && begin_offset) { + end_offset = p; + break; + } + } + + if (begin_name && begin_offset && end_offset && begin_name < begin_offset) { + *begin_name++ = '\0'; + *begin_offset++ = '\0'; + *end_offset = '\0'; + + // mangled name is now in [begin_name, begin_offset) and caller offset + // in [begin_offset, end_offset). now apply __cxa_demangle(): + + int status; + char* ret = abi::__cxa_demangle(begin_name, funcname, &funcnamesize, &status); + if (status == 0) { + funcname = + ret; // use possibly realloc()-ed string (__cxa_demangle may realloc funcname) + std::cout << "#" << i << " in " << strings[i] << " : " << funcname << "+" + << begin_offset << std::endl; + } else { + // demangling failed. Output function name as a C function with no arguments. + std::cout << "#" << i << " in " << strings[i] << " : " << begin_name << "()+" + << begin_offset << std::endl; + } + } else { + std::cout << "#" << i << " in " << strings[i] << std::endl; + } + } + + free(funcname); + } + free(strings); +#else + std::cout << "Backtraces are only when built with a GNU compiler." << std::endl; +#endif // __GNUC__ + throw std::runtime_error("Found unexpected default stream!"); + } +} + +/** + * @brief Container for CUDA APIs that have been overloaded using DEFINE_OVERLOAD. + * + * This variable must be initialized before everything else. + * + * @see find_originals for a description of the priorities + */ +__attribute__((init_priority(1001))) std::unordered_map originals; + +/** + * @brief Macro for generating functions to override existing CUDA functions. + * + * Define a new function with the provided signature that checks the used + * stream and raises an exception if it is one of CUDA's default streams. If + * not, the new function forwards all arguments to the original function. + * + * Note that since this only defines the function, we do not need default + * parameter values since those will be provided by the original declarations + * in CUDA itself. + * + * @see find_originals for a description of the priorities + * + * @param function The function to overload. + * @param signature The function signature (must include names, not just types). + * @parameter arguments The function arguments (names only, no types). + */ +#define DEFINE_OVERLOAD(function, signature, arguments) \ + using function##_t = cudaError_t (*)(signature); \ + \ + cudaError_t function(signature) \ + { \ + check_stream_and_error(stream); \ + return ((function##_t)originals[#function])(arguments); \ + } \ + __attribute__((constructor(1002))) void queue_##function() { originals[#function] = nullptr; } + +/** + * @brief Helper macro to define macro arguments that contain a comma. + */ +#define ARG(...) __VA_ARGS__ + +// clang-format off +/* + We need to overload all the functions from the runtime API (assuming that we + don't use the driver API) that accept streams. The main webpage for APIs is + https://docs.nvidia.com/cuda/cuda-runtime-api/modules.html#modules. Here are + the modules containing any APIs using streams as of 9/20/2022: + - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html + - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EVENT.html#group__CUDART__EVENT - Done + - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EXTRES__INTEROP.html#group__CUDART__EXTRES__INTEROP + - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EXECUTION.html#group__CUDART__EXECUTION - Done + - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY - Done + - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY__POOLS.html#group__CUDART__MEMORY__POOLS - Done + - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__OPENGL__DEPRECATED.html#group__CUDART__OPENGL__DEPRECATED + - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EGL.html#group__CUDART__EGL + - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__INTEROP.html#group__CUDART__INTEROP + - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__GRAPH.html#group__CUDART__GRAPH + - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__HIGHLEVEL.html#group__CUDART__HIGHLEVEL + */ +// clang-format on + +// Event APIS: +// https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EVENT.html#group__CUDART__EVENT +DEFINE_OVERLOAD(cudaEventRecord, ARG(cudaEvent_t event, cudaStream_t stream), ARG(event, stream)); + +DEFINE_OVERLOAD(cudaEventRecordWithFlags, + ARG(cudaEvent_t event, cudaStream_t stream, unsigned int flags), + ARG(event, stream, flags)); + +// Execution APIS: +// https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EXECUTION.html#group__CUDART__EXECUTION +DEFINE_OVERLOAD(cudaLaunchKernel, + ARG(const void* func, + dim3 gridDim, + dim3 blockDim, + void** args, + size_t sharedMem, + cudaStream_t stream), + ARG(func, gridDim, blockDim, args, sharedMem, stream)); +DEFINE_OVERLOAD(cudaLaunchCooperativeKernel, + ARG(const void* func, + dim3 gridDim, + dim3 blockDim, + void** args, + size_t sharedMem, + cudaStream_t stream), + ARG(func, gridDim, blockDim, args, sharedMem, stream)); +DEFINE_OVERLOAD(cudaLaunchHostFunc, + ARG(cudaStream_t stream, cudaHostFn_t fn, void* userData), + ARG(stream, fn, userData)); + +// Memory transfer APIS: +// https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY +DEFINE_OVERLOAD(cudaMemPrefetchAsync, + ARG(const void* devPtr, size_t count, int dstDevice, cudaStream_t stream), + ARG(devPtr, count, dstDevice, stream)); +DEFINE_OVERLOAD(cudaMemcpy2DAsync, + ARG(void* dst, + size_t dpitch, + const void* src, + size_t spitch, + size_t width, + size_t height, + cudaMemcpyKind kind, + cudaStream_t stream), + ARG(dst, dpitch, src, spitch, width, height, kind, stream)); +DEFINE_OVERLOAD(cudaMemcpy2DFromArrayAsync, + ARG(void* dst, + size_t dpitch, + cudaArray_const_t src, + size_t wOffset, + size_t hOffset, + size_t width, + size_t height, + cudaMemcpyKind kind, + cudaStream_t stream), + ARG(dst, dpitch, src, wOffset, hOffset, width, height, kind, stream)); +DEFINE_OVERLOAD(cudaMemcpy2DToArrayAsync, + ARG(cudaArray_t dst, + size_t wOffset, + size_t hOffset, + const void* src, + size_t spitch, + size_t width, + size_t height, + cudaMemcpyKind kind, + cudaStream_t stream), + ARG(dst, wOffset, hOffset, src, spitch, width, height, kind, stream)); +DEFINE_OVERLOAD(cudaMemcpy3DAsync, + ARG(const cudaMemcpy3DParms* p, cudaStream_t stream), + ARG(p, stream)); +DEFINE_OVERLOAD(cudaMemcpy3DPeerAsync, + ARG(const cudaMemcpy3DPeerParms* p, cudaStream_t stream), + ARG(p, stream)); +DEFINE_OVERLOAD( + cudaMemcpyAsync, + ARG(void* dst, const void* src, size_t count, cudaMemcpyKind kind, cudaStream_t stream), + ARG(dst, src, count, kind, stream)); +DEFINE_OVERLOAD(cudaMemcpyFromSymbolAsync, + ARG(void* dst, + const void* symbol, + size_t count, + size_t offset, + cudaMemcpyKind kind, + cudaStream_t stream), + ARG(dst, symbol, count, offset, kind, stream)); +DEFINE_OVERLOAD(cudaMemcpyToSymbolAsync, + ARG(const void* symbol, + const void* src, + size_t count, + size_t offset, + cudaMemcpyKind kind, + cudaStream_t stream), + ARG(symbol, src, count, offset, kind, stream)); +DEFINE_OVERLOAD( + cudaMemset2DAsync, + ARG(void* devPtr, size_t pitch, int value, size_t width, size_t height, cudaStream_t stream), + ARG(devPtr, pitch, value, width, height, stream)); +DEFINE_OVERLOAD( + cudaMemset3DAsync, + ARG(cudaPitchedPtr pitchedDevPtr, int value, cudaExtent extent, cudaStream_t stream), + ARG(pitchedDevPtr, value, extent, stream)); +DEFINE_OVERLOAD(cudaMemsetAsync, + ARG(void* devPtr, int value, size_t count, cudaStream_t stream), + ARG(devPtr, value, count, stream)); + +// Memory allocation APIS: +// https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY__POOLS.html#group__CUDART__MEMORY__POOLS +DEFINE_OVERLOAD(cudaFreeAsync, ARG(void* devPtr, cudaStream_t stream), ARG(devPtr, stream)); +DEFINE_OVERLOAD(cudaMallocAsync, + ARG(void** devPtr, size_t size, cudaStream_t stream), + ARG(devPtr, size, stream)); +DEFINE_OVERLOAD(cudaMallocFromPoolAsync, + ARG(void** ptr, size_t size, cudaMemPool_t memPool, cudaStream_t stream), + ARG(ptr, size, memPool, stream)); + +namespace cudf { + +/** + * @brief Get the current default stream + * + * Overload the default function to return a new stream here. + * + * @return The current default stream. + */ +rmm::cuda_stream_view const get_default_stream() +{ + static rmm::cuda_stream stream{}; + return {stream}; +} + +} // namespace cudf + +/** + * @brief Function to collect all the original CUDA symbols corresponding to overloaded functions. + * + * Note on priorities: + * - `originals` must be initialized first, so it is 1001. + * - The function names must be added to originals next in the macro, so those are 1002. + * - Finally, this function actually finds the original symbols so it is 1003. + */ +__attribute__((constructor(1003))) void find_originals() +{ + for (auto it : originals) { + originals[it.first] = dlsym(RTLD_NEXT, it.first.data()); + } +} diff --git a/cpp/tests/utilities/identify_stream_usage/test_default_stream_identification.cu b/cpp/tests/utilities/identify_stream_usage/test_default_stream_identification.cu new file mode 100644 index 00000000000..022244b148b --- /dev/null +++ b/cpp/tests/utilities/identify_stream_usage/test_default_stream_identification.cu @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +__global__ void kernel() { printf("The kernel ran!\n"); } + +void test_cudaLaunchKernel() +{ + cudaStream_t stream; + cudaStreamCreate(&stream); + kernel<<<1, 1, 0, stream>>>(); + cudaError_t err{cudaDeviceSynchronize()}; + if (err != cudaSuccess) { throw std::runtime_error("Kernel failed on non-default stream!"); } + err = cudaGetLastError(); + if (err != cudaSuccess) { throw std::runtime_error("Kernel failed on non-default stream!"); } + + try { + kernel<<<1, 1>>>(); + } catch (std::runtime_error) { + return; + } + throw std::runtime_error("No exception raised for kernel on default stream!"); +} + +int main() { test_cudaLaunchKernel(); } diff --git a/cpp/tests/utilities_tests/span_tests.cu b/cpp/tests/utilities_tests/span_tests.cu index cccef4b6284..a043e723eda 100644 --- a/cpp/tests/utilities_tests/span_tests.cu +++ b/cpp/tests/utilities_tests/span_tests.cu @@ -212,11 +212,14 @@ TEST(SpanTest, CanConstructFromHostContainers) (void)host_span(h_vector_c); } +// This test is the only place in libcudf's test suite where using a +// thrust::device_vector (and therefore the CUDA default stream) is acceptable +// since we are explicitly testing conversions from thrust::device_vector. TEST(SpanTest, CanConstructFromDeviceContainers) { auto d_thrust_vector = thrust::device_vector(1); auto d_vector = rmm::device_vector(1); - auto d_uvector = rmm::device_uvector(1, cudf::default_stream_value); + auto d_uvector = rmm::device_uvector(1, cudf::get_default_stream()); (void)device_span(d_thrust_vector); (void)device_span(d_vector); @@ -236,13 +239,13 @@ __global__ void simple_device_kernel(device_span result) { result[0] = tru TEST(SpanTest, CanUseDeviceSpan) { auto d_message = - cudf::detail::make_zeroed_device_uvector_async(1, cudf::default_stream_value); + cudf::detail::make_zeroed_device_uvector_async(1, cudf::get_default_stream()); auto d_span = device_span(d_message.data(), d_message.size()); - simple_device_kernel<<<1, 1, 0, cudf::default_stream_value.value()>>>(d_span); + simple_device_kernel<<<1, 1, 0, cudf::get_default_stream().value()>>>(d_span); - ASSERT_TRUE(d_message.element(0, cudf::default_stream_value)); + ASSERT_TRUE(d_message.element(0, cudf::get_default_stream())); } class MdSpanTest : public cudf::test::BaseFixture { @@ -250,9 +253,9 @@ class MdSpanTest : public cudf::test::BaseFixture { TEST(MdSpanTest, CanDetermineEmptiness) { - auto const vector = hostdevice_2dvector(1, 2, cudf::default_stream_value); - auto const no_rows_vector = hostdevice_2dvector(0, 2, cudf::default_stream_value); - auto const no_columns_vector = hostdevice_2dvector(1, 0, cudf::default_stream_value); + auto const vector = hostdevice_2dvector(1, 2, cudf::get_default_stream()); + auto const no_rows_vector = hostdevice_2dvector(0, 2, cudf::get_default_stream()); + auto const no_columns_vector = hostdevice_2dvector(1, 0, cudf::get_default_stream()); EXPECT_FALSE(host_2dspan{vector}.is_empty()); EXPECT_FALSE(device_2dspan{vector}.is_empty()); @@ -273,17 +276,17 @@ __global__ void readwrite_kernel(device_2dspan result) TEST(MdSpanTest, DeviceReadWrite) { - auto vector = hostdevice_2dvector(11, 23, cudf::default_stream_value); + auto vector = hostdevice_2dvector(11, 23, cudf::get_default_stream()); - readwrite_kernel<<<1, 1, 0, cudf::default_stream_value.value()>>>(vector); - readwrite_kernel<<<1, 1, 0, cudf::default_stream_value.value()>>>(vector); - vector.device_to_host(cudf::default_stream_value, true); + readwrite_kernel<<<1, 1, 0, cudf::get_default_stream().value()>>>(vector); + readwrite_kernel<<<1, 1, 0, cudf::get_default_stream().value()>>>(vector); + vector.device_to_host(cudf::get_default_stream(), true); EXPECT_EQ(vector[5][6], 30); } TEST(MdSpanTest, HostReadWrite) { - auto vector = hostdevice_2dvector(11, 23, cudf::default_stream_value); + auto vector = hostdevice_2dvector(11, 23, cudf::get_default_stream()); auto span = host_2dspan{vector}; span[5][6] = 5; if (span[5][6] == 5) { span[5][6] *= 6; } @@ -293,7 +296,7 @@ TEST(MdSpanTest, HostReadWrite) TEST(MdSpanTest, CanGetSize) { - auto const vector = hostdevice_2dvector(1, 2, cudf::default_stream_value); + auto const vector = hostdevice_2dvector(1, 2, cudf::get_default_stream()); EXPECT_EQ(host_2dspan{vector}.size(), vector.size()); EXPECT_EQ(device_2dspan{vector}.size(), vector.size()); @@ -301,7 +304,7 @@ TEST(MdSpanTest, CanGetSize) TEST(MdSpanTest, CanGetCount) { - auto const vector = hostdevice_2dvector(11, 23, cudf::default_stream_value); + auto const vector = hostdevice_2dvector(11, 23, cudf::get_default_stream()); EXPECT_EQ(host_2dspan{vector}.count(), 11ul * 23); EXPECT_EQ(device_2dspan{vector}.count(), 11ul * 23); diff --git a/cpp/tests/wrappers/timestamps_test.cu b/cpp/tests/wrappers/timestamps_test.cu index 73bfd15744a..9aad90788e0 100644 --- a/cpp/tests/wrappers/timestamps_test.cu +++ b/cpp/tests/wrappers/timestamps_test.cu @@ -38,7 +38,7 @@ template struct ChronoColumnTest : public cudf::test::BaseFixture { - rmm::cuda_stream_view stream() { return cudf::default_stream_value; } + rmm::cuda_stream_view stream() { return cudf::get_default_stream(); } cudf::size_type size() { return cudf::size_type(100); } cudf::data_type type() { return cudf::data_type{cudf::type_to_id()}; } }; @@ -93,9 +93,9 @@ TYPED_TEST(ChronoColumnTest, ChronoDurationsMatchPrimitiveRepresentation) auto primitive_col = fixed_width_column_wrapper(chrono_col_data.begin(), chrono_col_data.end()); - rmm::device_uvector indices(this->size(), cudf::default_stream_value); - thrust::sequence(rmm::exec_policy(cudf::default_stream_value), indices.begin(), indices.end()); - EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value), + rmm::device_uvector indices(this->size(), cudf::get_default_stream()); + thrust::sequence(rmm::exec_policy(cudf::get_default_stream()), indices.begin(), indices.end()); + EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()), indices.begin(), indices.end(), compare_chrono_elements_to_primitive_representation{ @@ -147,11 +147,11 @@ TYPED_TEST(ChronoColumnTest, ChronosCanBeComparedInDeviceCode) auto chrono_rhs_col = generate_timestamps(this->size(), time_point_ms(start_rhs), time_point_ms(stop_rhs)); - rmm::device_uvector indices(this->size(), cudf::default_stream_value); - thrust::sequence(rmm::exec_policy(cudf::default_stream_value), indices.begin(), indices.end()); + rmm::device_uvector indices(this->size(), cudf::get_default_stream()); + thrust::sequence(rmm::exec_policy(cudf::get_default_stream()), indices.begin(), indices.end()); EXPECT_TRUE(thrust::all_of( - rmm::exec_policy(cudf::default_stream_value), + rmm::exec_policy(cudf::get_default_stream()), indices.begin(), indices.end(), compare_chrono_elements{cudf::binary_operator::LESS, @@ -159,7 +159,7 @@ TYPED_TEST(ChronoColumnTest, ChronosCanBeComparedInDeviceCode) *cudf::column_device_view::create(chrono_rhs_col)})); EXPECT_TRUE(thrust::all_of( - rmm::exec_policy(cudf::default_stream_value), + rmm::exec_policy(cudf::get_default_stream()), indices.begin(), indices.end(), compare_chrono_elements{cudf::binary_operator::GREATER, @@ -167,7 +167,7 @@ TYPED_TEST(ChronoColumnTest, ChronosCanBeComparedInDeviceCode) *cudf::column_device_view::create(chrono_lhs_col)})); EXPECT_TRUE(thrust::all_of( - rmm::exec_policy(cudf::default_stream_value), + rmm::exec_policy(cudf::get_default_stream()), indices.begin(), indices.end(), compare_chrono_elements{cudf::binary_operator::LESS_EQUAL, @@ -175,7 +175,7 @@ TYPED_TEST(ChronoColumnTest, ChronosCanBeComparedInDeviceCode) *cudf::column_device_view::create(chrono_lhs_col)})); EXPECT_TRUE(thrust::all_of( - rmm::exec_policy(cudf::default_stream_value), + rmm::exec_policy(cudf::get_default_stream()), indices.begin(), indices.end(), compare_chrono_elements{cudf::binary_operator::GREATER_EQUAL, diff --git a/java/src/main/native/include/maps_column_view.hpp b/java/src/main/native/include/maps_column_view.hpp index b9b60f4e3b2..5ac8d5c5713 100644 --- a/java/src/main/native/include/maps_column_view.hpp +++ b/java/src/main/native/include/maps_column_view.hpp @@ -38,7 +38,7 @@ namespace jni { class maps_column_view { public: maps_column_view(lists_column_view const &lists_of_structs, - rmm::cuda_stream_view stream = cudf::default_stream_value); + rmm::cuda_stream_view stream = cudf::get_default_stream()); // Rule of 5. maps_column_view(maps_column_view const &maps_view) = default; @@ -82,7 +82,7 @@ class maps_column_view { * @return std::unique_ptr Column of values corresponding the value of the lookup key. */ std::unique_ptr get_values_for( - column_view const &keys, rmm::cuda_stream_view stream = cudf::default_stream_value, + column_view const &keys, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) const; /** @@ -100,7 +100,7 @@ class maps_column_view { * @return std::unique_ptr */ std::unique_ptr get_values_for( - scalar const &key, rmm::cuda_stream_view stream = cudf::default_stream_value, + scalar const &key, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) const; /** @@ -120,7 +120,7 @@ class maps_column_view { * @return std::unique_ptr */ std::unique_ptr - contains(scalar const &key, rmm::cuda_stream_view stream = cudf::default_stream_value, + contains(scalar const &key, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) const; /** @@ -141,7 +141,7 @@ class maps_column_view { */ std::unique_ptr - contains(column_view const &key, rmm::cuda_stream_view stream = cudf::default_stream_value, + contains(column_view const &key, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) const; private: diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index f16ead009a8..979c1f9f772 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -486,7 +486,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_dropListDuplicatesWithKey "Input column has child that does not have 2 children.", 0); return release_as_jlong( - cudf::jni::lists_distinct_by_key(lists_keys_vals, cudf::default_stream_value)); + cudf::jni::lists_distinct_by_key(lists_keys_vals, cudf::get_default_stream())); } CATCH_STD(env, 0); } diff --git a/java/src/main/native/src/ColumnViewJni.hpp b/java/src/main/native/src/ColumnViewJni.hpp index 2cbdb65653e..29158cbd98f 100644 --- a/java/src/main/native/src/ColumnViewJni.hpp +++ b/java/src/main/native/src/ColumnViewJni.hpp @@ -51,7 +51,7 @@ new_column_with_boolean_column_as_validity(cudf::column_view const &exemplar, */ std::unique_ptr generate_list_offsets(cudf::column_view const &list_length, - rmm::cuda_stream_view stream = cudf::default_stream_value); + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @brief Perform a special treatment for the results of `cudf::lists::have_overlap` to produce the @@ -73,7 +73,7 @@ generate_list_offsets(cudf::column_view const &list_length, */ void post_process_list_overlap(cudf::column_view const &lhs, cudf::column_view const &rhs, std::unique_ptr const &overlap_result, - rmm::cuda_stream_view stream = cudf::default_stream_value); + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @brief Generates lists column by copying elements that are distinct by key from each input list diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index cbd0aee335e..b70a7b5a615 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -3467,7 +3467,7 @@ JNIEXPORT jobject JNICALL Java_ai_rapids_cudf_Table_contiguousSplitGroups( auto const size = cudf::distance(begin, end); auto const vec = thrust::host_vector(begin, end); auto buf = rmm::device_buffer{vec.data(), size * sizeof(cudf::size_type), - cudf::default_stream_value}; + cudf::get_default_stream()}; auto gather_map_col = std::make_unique(cudf::data_type{cudf::type_id::INT32}, size, std::move(buf)); diff --git a/java/src/main/native/src/aggregation128_utils.hpp b/java/src/main/native/src/aggregation128_utils.hpp index 70658976dad..a1437606cdf 100644 --- a/java/src/main/native/src/aggregation128_utils.hpp +++ b/java/src/main/native/src/aggregation128_utils.hpp @@ -41,7 +41,7 @@ namespace cudf::jni { */ std::unique_ptr extract_chunk32(cudf::column_view const &col, cudf::data_type dtype, int chunk_idx, - rmm::cuda_stream_view stream = cudf::default_stream_value); + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @brief Reassemble a 128-bit column from four 64-bit integer columns with overflow detection. @@ -65,6 +65,6 @@ extract_chunk32(cudf::column_view const &col, cudf::data_type dtype, int chunk_i */ std::unique_ptr assemble128_from_sum(cudf::table_view const &chunks_table, cudf::data_type output_type, - rmm::cuda_stream_view stream = cudf::default_stream_value); + rmm::cuda_stream_view stream = cudf::get_default_stream()); } // namespace cudf::jni diff --git a/java/src/main/native/src/row_conversion.cu b/java/src/main/native/src/row_conversion.cu index 578915ee2ce..3913de720f9 100644 --- a/java/src/main/native/src/row_conversion.cu +++ b/java/src/main/native/src/row_conversion.cu @@ -1885,7 +1885,7 @@ std::vector> convert_to_rows( return make_lists_column( batch_info.row_batches[batch].row_count, std::move(offsets), std::move(data), - 0, rmm::device_buffer{0, cudf::default_stream_value, mr}, stream, mr); + 0, rmm::device_buffer{0, cudf::get_default_stream(), mr}, stream, mr); }); return ret; diff --git a/java/src/main/native/src/row_conversion.hpp b/java/src/main/native/src/row_conversion.hpp index e260ea44089..e4631875152 100644 --- a/java/src/main/native/src/row_conversion.hpp +++ b/java/src/main/native/src/row_conversion.hpp @@ -29,23 +29,23 @@ namespace jni { std::vector> convert_to_rows_fixed_width_optimized( cudf::table_view const &tbl, // TODO need something for validity - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); std::vector> convert_to_rows(cudf::table_view const &tbl, // TODO need something for validity - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); std::unique_ptr convert_from_rows_fixed_width_optimized( cudf::lists_column_view const &input, std::vector const &schema, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); std::unique_ptr convert_from_rows(cudf::lists_column_view const &input, std::vector const &schema, - rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); } // namespace jni diff --git a/python/strings_udf/cpp/src/strings/udf/udf_apis.cu b/python/strings_udf/cpp/src/strings/udf/udf_apis.cu index dfef1be39f5..89952dadb6c 100644 --- a/python/strings_udf/cpp/src/strings/udf/udf_apis.cu +++ b/python/strings_udf/cpp/src/strings/udf/udf_apis.cu @@ -40,7 +40,7 @@ std::unique_ptr to_string_view_array(cudf::column_view const std::unique_ptr to_string_view_array(cudf::column_view const input) { - return detail::to_string_view_array(input, rmm::cuda_stream_default); + return detail::to_string_view_array(input, cudf::get_default_stream()); } } // namespace udf From 9c06330363db4da99803a3728b8bf44f9829f0b9 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 21 Oct 2022 08:23:14 -0700 Subject: [PATCH 058/202] Accept const refs instead of const unique_ptr refs in reduce and scan APIs. (#11960) There is almost never a good reason to pass arguments as `unique_ptr const&`. Since those arguments cannot be modified, the only use case is accessing the underlying pointer, at which point the function better communicates its intent by accepting the underlying pointer/reference as an argument instead and is also more flexible as a result. Resolves #10393 Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Bradley Dice (https://github.com/bdice) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/11960 --- cpp/benchmarks/reduction/anyall.cpp | 2 +- cpp/benchmarks/reduction/dictionary.cpp | 2 +- cpp/benchmarks/reduction/reduce.cpp | 2 +- cpp/benchmarks/reduction/scan.cpp | 2 +- cpp/include/cudf/detail/scan.hpp | 8 +- cpp/include/cudf/reduction.hpp | 6 +- cpp/src/reductions/reductions.cpp | 63 +- cpp/src/reductions/scan/scan.cpp | 8 +- cpp/src/reductions/scan/scan.cuh | 4 +- cpp/src/reductions/scan/scan_exclusive.cu | 2 +- cpp/src/reductions/scan/scan_inclusive.cu | 2 +- cpp/tests/quantiles/percentile_approx_test.cu | 2 +- cpp/tests/reductions/collect_ops_tests.cpp | 22 +- cpp/tests/reductions/list_rank_test.cpp | 16 +- cpp/tests/reductions/rank_tests.cpp | 62 +- cpp/tests/reductions/reduction_tests.cpp | 565 +++++++++--------- cpp/tests/reductions/scan_tests.cpp | 155 +++-- cpp/tests/reductions/tdigest_tests.cu | 6 +- java/src/main/native/src/ColumnViewJni.cpp | 10 +- python/cudf/cudf/_lib/cpp/reduce.pxd | 4 +- python/cudf/cudf/_lib/reduce.pyx | 6 +- 21 files changed, 478 insertions(+), 471 deletions(-) diff --git a/cpp/benchmarks/reduction/anyall.cpp b/cpp/benchmarks/reduction/anyall.cpp index 80a85b0f217..755fa1ca2ad 100644 --- a/cpp/benchmarks/reduction/anyall.cpp +++ b/cpp/benchmarks/reduction/anyall.cpp @@ -41,7 +41,7 @@ void BM_reduction_anyall(benchmark::State& state, for (auto _ : state) { cuda_event_timer timer(state, true); - auto result = cudf::reduce(*values, agg, output_dtype); + auto result = cudf::reduce(*values, *agg, output_dtype); } } diff --git a/cpp/benchmarks/reduction/dictionary.cpp b/cpp/benchmarks/reduction/dictionary.cpp index 219564d6b5c..8f2f0be33ca 100644 --- a/cpp/benchmarks/reduction/dictionary.cpp +++ b/cpp/benchmarks/reduction/dictionary.cpp @@ -51,7 +51,7 @@ void BM_reduction_dictionary(benchmark::State& state, for (auto _ : state) { cuda_event_timer timer(state, true); - auto result = cudf::reduce(*values, agg, output_dtype); + auto result = cudf::reduce(*values, *agg, output_dtype); } } diff --git a/cpp/benchmarks/reduction/reduce.cpp b/cpp/benchmarks/reduction/reduce.cpp index 4e354352c11..4dfa7f0bbdc 100644 --- a/cpp/benchmarks/reduction/reduce.cpp +++ b/cpp/benchmarks/reduction/reduce.cpp @@ -45,7 +45,7 @@ void BM_reduction(benchmark::State& state, std::unique_ptr(), cudf::scan_type::INCLUSIVE); + *column, *cudf::make_min_aggregation(), cudf::scan_type::INCLUSIVE); } } diff --git a/cpp/include/cudf/detail/scan.hpp b/cpp/include/cudf/detail/scan.hpp index 13dddd3b0c8..f4b2d51d0cb 100644 --- a/cpp/include/cudf/detail/scan.hpp +++ b/cpp/include/cudf/detail/scan.hpp @@ -38,7 +38,7 @@ namespace detail { * `agg` is not Min or Max. * * @param input The input column view for the scan. - * @param agg unique_ptr to aggregation operator applied by the scan. + * @param agg Aggregation operator applied by the scan * @param null_handling Exclude null values when computing the result if null_policy::EXCLUDE. * Include nulls if null_policy::INCLUDE. Any operation with a null results in * a null. @@ -47,7 +47,7 @@ namespace detail { * @returns Column with scan results. */ std::unique_ptr scan_exclusive(column_view const& input, - std::unique_ptr const& agg, + scan_aggregation const& agg, null_policy null_handling, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); @@ -64,7 +64,7 @@ std::unique_ptr scan_exclusive(column_view const& input, * but the `agg` is not Min or Max. * * @param input The input column view for the scan. - * @param agg unique_ptr to aggregation operator applied by the scan. + * @param agg Aggregation operator applied by the scan * @param null_handling Exclude null values when computing the result if null_policy::EXCLUDE. * Include nulls if null_policy::INCLUDE. Any operation with a null results in * a null. @@ -73,7 +73,7 @@ std::unique_ptr scan_exclusive(column_view const& input, * @returns Column with scan results. */ std::unique_ptr scan_inclusive(column_view const& input, - std::unique_ptr const& agg, + scan_aggregation const& agg, null_policy null_handling, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); diff --git a/cpp/include/cudf/reduction.hpp b/cpp/include/cudf/reduction.hpp index 083892aa856..7aa7ada6896 100644 --- a/cpp/include/cudf/reduction.hpp +++ b/cpp/include/cudf/reduction.hpp @@ -72,7 +72,7 @@ enum class scan_type : bool { INCLUSIVE, EXCLUSIVE }; */ std::unique_ptr reduce( column_view const& col, - std::unique_ptr const& agg, + reduce_aggregation const& agg, data_type output_dtype, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); @@ -89,7 +89,7 @@ std::unique_ptr reduce( */ std::unique_ptr reduce( column_view const& col, - std::unique_ptr const& agg, + reduce_aggregation const& agg, data_type output_dtype, std::optional> init, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); @@ -187,7 +187,7 @@ std::unique_ptr segmented_reduce( */ std::unique_ptr scan( const column_view& input, - std::unique_ptr const& agg, + scan_aggregation const& agg, scan_type inclusive, null_policy null_handling = null_policy::EXCLUDE, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index 4166becbf4d..a7d7e14a193 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -49,7 +49,7 @@ struct reduce_dispatch_functor { } template - std::unique_ptr operator()(std::unique_ptr const& agg) + std::unique_ptr operator()(reduce_aggregation const& agg) { switch (k) { case aggregation::SUM: return reduction::sum(col, output_dtype, init, stream, mr); @@ -62,12 +62,12 @@ struct reduce_dispatch_functor { return reduction::sum_of_squares(col, output_dtype, stream, mr); case aggregation::MEAN: return reduction::mean(col, output_dtype, stream, mr); case aggregation::VARIANCE: { - auto var_agg = dynamic_cast(agg.get()); - return reduction::variance(col, output_dtype, var_agg->_ddof, stream, mr); + auto var_agg = static_cast(agg); + return reduction::variance(col, output_dtype, var_agg._ddof, stream, mr); } case aggregation::STD: { - auto var_agg = dynamic_cast(agg.get()); - return reduction::standard_deviation(col, output_dtype, var_agg->_ddof, stream, mr); + auto var_agg = static_cast(agg); + return reduction::standard_deviation(col, output_dtype, var_agg._ddof, stream, mr); } case aggregation::MEDIAN: { auto sorted_indices = sorted_order(table_view{{col}}, {}, {null_order::AFTER}, stream); @@ -78,60 +78,59 @@ struct reduce_dispatch_functor { return get_element(*col_ptr, 0, stream, mr); } case aggregation::QUANTILE: { - auto quantile_agg = dynamic_cast(agg.get()); - CUDF_EXPECTS(quantile_agg->_quantiles.size() == 1, + auto quantile_agg = static_cast(agg); + CUDF_EXPECTS(quantile_agg._quantiles.size() == 1, "Reduction quantile accepts only one quantile value"); auto sorted_indices = sorted_order(table_view{{col}}, {}, {null_order::AFTER}, stream); auto valid_sorted_indices = split(*sorted_indices, {col.size() - col.null_count()}, stream)[0]; auto col_ptr = quantile(col, - quantile_agg->_quantiles, - quantile_agg->_interpolation, + quantile_agg._quantiles, + quantile_agg._interpolation, valid_sorted_indices, true, stream); return get_element(*col_ptr, 0, stream, mr); } case aggregation::NUNIQUE: { - auto nunique_agg = dynamic_cast(agg.get()); + auto nunique_agg = static_cast(agg); return make_fixed_width_scalar( - detail::distinct_count( - col, nunique_agg->_null_handling, nan_policy::NAN_IS_VALID, stream), + detail::distinct_count(col, nunique_agg._null_handling, nan_policy::NAN_IS_VALID, stream), stream, mr); } case aggregation::NTH_ELEMENT: { - auto nth_agg = dynamic_cast(agg.get()); - return reduction::nth_element(col, nth_agg->_n, nth_agg->_null_handling, stream, mr); + auto nth_agg = static_cast(agg); + return reduction::nth_element(col, nth_agg._n, nth_agg._null_handling, stream, mr); } case aggregation::COLLECT_LIST: { - auto col_agg = dynamic_cast(agg.get()); - return reduction::collect_list(col, col_agg->_null_handling, stream, mr); + auto col_agg = static_cast(agg); + return reduction::collect_list(col, col_agg._null_handling, stream, mr); } case aggregation::COLLECT_SET: { - auto col_agg = dynamic_cast(agg.get()); + auto col_agg = static_cast(agg); return reduction::collect_set( - col, col_agg->_null_handling, col_agg->_nulls_equal, col_agg->_nans_equal, stream, mr); + col, col_agg._null_handling, col_agg._nulls_equal, col_agg._nans_equal, stream, mr); } case aggregation::MERGE_LISTS: { return reduction::merge_lists(col, stream, mr); } case aggregation::MERGE_SETS: { - auto col_agg = dynamic_cast(agg.get()); - return reduction::merge_sets(col, col_agg->_nulls_equal, col_agg->_nans_equal, stream, mr); + auto col_agg = static_cast(agg); + return reduction::merge_sets(col, col_agg._nulls_equal, col_agg._nans_equal, stream, mr); } case aggregation::TDIGEST: { CUDF_EXPECTS(output_dtype.id() == type_id::STRUCT, "Tdigest aggregations expect output type to be STRUCT"); - auto td_agg = dynamic_cast(agg.get()); - return detail::tdigest::reduce_tdigest(col, td_agg->max_centroids, stream, mr); + auto td_agg = static_cast(agg); + return detail::tdigest::reduce_tdigest(col, td_agg.max_centroids, stream, mr); } case aggregation::MERGE_TDIGEST: { CUDF_EXPECTS(output_dtype.id() == type_id::STRUCT, "Tdigest aggregations expect output type to be STRUCT"); - auto td_agg = dynamic_cast(agg.get()); - return detail::tdigest::reduce_merge_tdigest(col, td_agg->max_centroids, stream, mr); + auto td_agg = static_cast(agg); + return detail::tdigest::reduce_merge_tdigest(col, td_agg.max_centroids, stream, mr); } default: CUDF_FAIL("Unsupported reduction operator"); } @@ -140,7 +139,7 @@ struct reduce_dispatch_functor { std::unique_ptr reduce( column_view const& col, - std::unique_ptr const& agg, + reduce_aggregation const& agg, data_type output_dtype, std::optional> init, rmm::cuda_stream_view stream = cudf::get_default_stream(), @@ -148,16 +147,16 @@ std::unique_ptr reduce( { CUDF_EXPECTS(!init.has_value() || col.type() == init.value().get().type(), "column and initial value must be the same type"); - if (init.has_value() && !(agg->kind == aggregation::SUM || agg->kind == aggregation::PRODUCT || - agg->kind == aggregation::MIN || agg->kind == aggregation::MAX || - agg->kind == aggregation::ANY || agg->kind == aggregation::ALL)) { + if (init.has_value() && !(agg.kind == aggregation::SUM || agg.kind == aggregation::PRODUCT || + agg.kind == aggregation::MIN || agg.kind == aggregation::MAX || + agg.kind == aggregation::ANY || agg.kind == aggregation::ALL)) { CUDF_FAIL( "Initial value is only supported for SUM, PRODUCT, MIN, MAX, ANY, and ALL aggregation types"); } // Returns default scalar if input column is non-valid. In terms of nested columns, we need to // handcraft the default scalar with input column. if (col.size() <= col.null_count()) { - if (agg->kind == aggregation::TDIGEST || agg->kind == aggregation::MERGE_TDIGEST) { + if (agg.kind == aggregation::TDIGEST || agg.kind == aggregation::MERGE_TDIGEST) { return detail::tdigest::make_empty_tdigest_scalar(); } if (col.type().id() == type_id::EMPTY || col.type() != output_dtype) { @@ -176,12 +175,12 @@ std::unique_ptr reduce( } return aggregation_dispatcher( - agg->kind, reduce_dispatch_functor{col, output_dtype, init, stream, mr}, agg); + agg.kind, reduce_dispatch_functor{col, output_dtype, init, stream, mr}, agg); } } // namespace detail std::unique_ptr reduce(column_view const& col, - std::unique_ptr const& agg, + reduce_aggregation const& agg, data_type output_dtype, rmm::mr::device_memory_resource* mr) { @@ -190,7 +189,7 @@ std::unique_ptr reduce(column_view const& col, } std::unique_ptr reduce(column_view const& col, - std::unique_ptr const& agg, + reduce_aggregation const& agg, data_type output_dtype, std::optional> init, rmm::mr::device_memory_resource* mr) diff --git a/cpp/src/reductions/scan/scan.cpp b/cpp/src/reductions/scan/scan.cpp index c0b787b3a1d..2871ee283ba 100644 --- a/cpp/src/reductions/scan/scan.cpp +++ b/cpp/src/reductions/scan/scan.cpp @@ -25,16 +25,16 @@ namespace cudf { namespace detail { std::unique_ptr scan(column_view const& input, - std::unique_ptr const& agg, + scan_aggregation const& agg, scan_type inclusive, null_policy null_handling, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - if (agg->kind == aggregation::RANK) { + if (agg.kind == aggregation::RANK) { CUDF_EXPECTS(inclusive == scan_type::INCLUSIVE, "Rank aggregation operator requires an inclusive scan"); - auto const& rank_agg = dynamic_cast(*agg); + auto const& rank_agg = static_cast(agg); if (rank_agg._method == rank_method::MIN) { if (rank_agg._percentage == rank_percentage::NONE) { return inclusive_rank_scan(input, stream, mr); @@ -55,7 +55,7 @@ std::unique_ptr scan(column_view const& input, } // namespace detail std::unique_ptr scan(column_view const& input, - std::unique_ptr const& agg, + scan_aggregation const& agg, scan_type inclusive, null_policy null_handling, rmm::mr::device_memory_resource* mr) diff --git a/cpp/src/reductions/scan/scan.cuh b/cpp/src/reductions/scan/scan.cuh index 127f2ae95b4..2ad6124cdd0 100644 --- a/cpp/src/reductions/scan/scan.cuh +++ b/cpp/src/reductions/scan/scan.cuh @@ -35,12 +35,12 @@ rmm::device_buffer mask_scan(column_view const& input_view, template